##// END OF EJS Templates
hgext: replace references to hashlib.sha1 with hashutil.sha1...
Augie Fackler -
r44519:2d49482d default
parent child Browse files
Show More
@@ -1,856 +1,858 b''
1 # Copyright 2016-present Facebook. All Rights Reserved.
1 # Copyright 2016-present Facebook. All Rights Reserved.
2 #
2 #
3 # context: context needed to annotate a file
3 # context: context needed to annotate a file
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import contextlib
11 import contextlib
12 import hashlib
13 import os
12 import os
14
13
15 from mercurial.i18n import _
14 from mercurial.i18n import _
16 from mercurial.pycompat import (
15 from mercurial.pycompat import (
17 getattr,
16 getattr,
18 open,
17 open,
19 setattr,
18 setattr,
20 )
19 )
21 from mercurial import (
20 from mercurial import (
22 error,
21 error,
23 linelog as linelogmod,
22 linelog as linelogmod,
24 lock as lockmod,
23 lock as lockmod,
25 mdiff,
24 mdiff,
26 node,
25 node,
27 pycompat,
26 pycompat,
28 scmutil,
27 scmutil,
29 util,
28 util,
30 )
29 )
31 from mercurial.utils import stringutil
30 from mercurial.utils import (
31 hashutil,
32 stringutil,
33 )
32
34
33 from . import (
35 from . import (
34 error as faerror,
36 error as faerror,
35 revmap as revmapmod,
37 revmap as revmapmod,
36 )
38 )
37
39
38 # given path, get filelog, cached
40 # given path, get filelog, cached
39 @util.lrucachefunc
41 @util.lrucachefunc
40 def _getflog(repo, path):
42 def _getflog(repo, path):
41 return repo.file(path)
43 return repo.file(path)
42
44
43
45
44 # extracted from mercurial.context.basefilectx.annotate
46 # extracted from mercurial.context.basefilectx.annotate
45 def _parents(f, follow=True):
47 def _parents(f, follow=True):
46 # Cut _descendantrev here to mitigate the penalty of lazy linkrev
48 # Cut _descendantrev here to mitigate the penalty of lazy linkrev
47 # adjustment. Otherwise, p._adjustlinkrev() would walk changelog
49 # adjustment. Otherwise, p._adjustlinkrev() would walk changelog
48 # from the topmost introrev (= srcrev) down to p.linkrev() if it
50 # from the topmost introrev (= srcrev) down to p.linkrev() if it
49 # isn't an ancestor of the srcrev.
51 # isn't an ancestor of the srcrev.
50 f._changeid
52 f._changeid
51 pl = f.parents()
53 pl = f.parents()
52
54
53 # Don't return renamed parents if we aren't following.
55 # Don't return renamed parents if we aren't following.
54 if not follow:
56 if not follow:
55 pl = [p for p in pl if p.path() == f.path()]
57 pl = [p for p in pl if p.path() == f.path()]
56
58
57 # renamed filectx won't have a filelog yet, so set it
59 # renamed filectx won't have a filelog yet, so set it
58 # from the cache to save time
60 # from the cache to save time
59 for p in pl:
61 for p in pl:
60 if not '_filelog' in p.__dict__:
62 if not '_filelog' in p.__dict__:
61 p._filelog = _getflog(f._repo, p.path())
63 p._filelog = _getflog(f._repo, p.path())
62
64
63 return pl
65 return pl
64
66
65
67
66 # extracted from mercurial.context.basefilectx.annotate. slightly modified
68 # extracted from mercurial.context.basefilectx.annotate. slightly modified
67 # so it takes a fctx instead of a pair of text and fctx.
69 # so it takes a fctx instead of a pair of text and fctx.
68 def _decorate(fctx):
70 def _decorate(fctx):
69 text = fctx.data()
71 text = fctx.data()
70 linecount = text.count(b'\n')
72 linecount = text.count(b'\n')
71 if text and not text.endswith(b'\n'):
73 if text and not text.endswith(b'\n'):
72 linecount += 1
74 linecount += 1
73 return ([(fctx, i) for i in pycompat.xrange(linecount)], text)
75 return ([(fctx, i) for i in pycompat.xrange(linecount)], text)
74
76
75
77
76 # extracted from mercurial.context.basefilectx.annotate. slightly modified
78 # extracted from mercurial.context.basefilectx.annotate. slightly modified
77 # so it takes an extra "blocks" parameter calculated elsewhere, instead of
79 # so it takes an extra "blocks" parameter calculated elsewhere, instead of
78 # calculating diff here.
80 # calculating diff here.
79 def _pair(parent, child, blocks):
81 def _pair(parent, child, blocks):
80 for (a1, a2, b1, b2), t in blocks:
82 for (a1, a2, b1, b2), t in blocks:
81 # Changed blocks ('!') or blocks made only of blank lines ('~')
83 # Changed blocks ('!') or blocks made only of blank lines ('~')
82 # belong to the child.
84 # belong to the child.
83 if t == b'=':
85 if t == b'=':
84 child[0][b1:b2] = parent[0][a1:a2]
86 child[0][b1:b2] = parent[0][a1:a2]
85 return child
87 return child
86
88
87
89
88 # like scmutil.revsingle, but with lru cache, so their states (like manifests)
90 # like scmutil.revsingle, but with lru cache, so their states (like manifests)
89 # could be reused
91 # could be reused
90 _revsingle = util.lrucachefunc(scmutil.revsingle)
92 _revsingle = util.lrucachefunc(scmutil.revsingle)
91
93
92
94
93 def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None):
95 def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None):
94 """(repo, str, str) -> fctx
96 """(repo, str, str) -> fctx
95
97
96 get the filectx object from repo, rev, path, in an efficient way.
98 get the filectx object from repo, rev, path, in an efficient way.
97
99
98 if resolverev is True, "rev" is a revision specified by the revset
100 if resolverev is True, "rev" is a revision specified by the revset
99 language, otherwise "rev" is a nodeid, or a revision number that can
101 language, otherwise "rev" is a nodeid, or a revision number that can
100 be consumed by repo.__getitem__.
102 be consumed by repo.__getitem__.
101
103
102 if adjustctx is not None, the returned fctx will point to a changeset
104 if adjustctx is not None, the returned fctx will point to a changeset
103 that introduces the change (last modified the file). if adjustctx
105 that introduces the change (last modified the file). if adjustctx
104 is 'linkrev', trust the linkrev and do not adjust it. this is noticeably
106 is 'linkrev', trust the linkrev and do not adjust it. this is noticeably
105 faster for big repos but is incorrect for some cases.
107 faster for big repos but is incorrect for some cases.
106 """
108 """
107 if resolverev and not isinstance(rev, int) and rev is not None:
109 if resolverev and not isinstance(rev, int) and rev is not None:
108 ctx = _revsingle(repo, rev)
110 ctx = _revsingle(repo, rev)
109 else:
111 else:
110 ctx = repo[rev]
112 ctx = repo[rev]
111
113
112 # If we don't need to adjust the linkrev, create the filectx using the
114 # If we don't need to adjust the linkrev, create the filectx using the
113 # changectx instead of using ctx[path]. This means it already has the
115 # changectx instead of using ctx[path]. This means it already has the
114 # changectx information, so blame -u will be able to look directly at the
116 # changectx information, so blame -u will be able to look directly at the
115 # commitctx object instead of having to resolve it by going through the
117 # commitctx object instead of having to resolve it by going through the
116 # manifest. In a lazy-manifest world this can prevent us from downloading a
118 # manifest. In a lazy-manifest world this can prevent us from downloading a
117 # lot of data.
119 # lot of data.
118 if adjustctx is None:
120 if adjustctx is None:
119 # ctx.rev() is None means it's the working copy, which is a special
121 # ctx.rev() is None means it's the working copy, which is a special
120 # case.
122 # case.
121 if ctx.rev() is None:
123 if ctx.rev() is None:
122 fctx = ctx[path]
124 fctx = ctx[path]
123 else:
125 else:
124 fctx = repo.filectx(path, changeid=ctx.rev())
126 fctx = repo.filectx(path, changeid=ctx.rev())
125 else:
127 else:
126 fctx = ctx[path]
128 fctx = ctx[path]
127 if adjustctx == b'linkrev':
129 if adjustctx == b'linkrev':
128 introrev = fctx.linkrev()
130 introrev = fctx.linkrev()
129 else:
131 else:
130 introrev = fctx.introrev()
132 introrev = fctx.introrev()
131 if introrev != ctx.rev():
133 if introrev != ctx.rev():
132 fctx._changeid = introrev
134 fctx._changeid = introrev
133 fctx._changectx = repo[introrev]
135 fctx._changectx = repo[introrev]
134 return fctx
136 return fctx
135
137
136
138
137 # like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock
139 # like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock
138 def encodedir(path):
140 def encodedir(path):
139 return (
141 return (
140 path.replace(b'.hg/', b'.hg.hg/')
142 path.replace(b'.hg/', b'.hg.hg/')
141 .replace(b'.l/', b'.l.hg/')
143 .replace(b'.l/', b'.l.hg/')
142 .replace(b'.m/', b'.m.hg/')
144 .replace(b'.m/', b'.m.hg/')
143 .replace(b'.lock/', b'.lock.hg/')
145 .replace(b'.lock/', b'.lock.hg/')
144 )
146 )
145
147
146
148
147 def hashdiffopts(diffopts):
149 def hashdiffopts(diffopts):
148 diffoptstr = stringutil.pprint(
150 diffoptstr = stringutil.pprint(
149 sorted((k, getattr(diffopts, k)) for k in mdiff.diffopts.defaults)
151 sorted((k, getattr(diffopts, k)) for k in mdiff.diffopts.defaults)
150 )
152 )
151 return node.hex(hashlib.sha1(diffoptstr).digest())[:6]
153 return node.hex(hashutil.sha1(diffoptstr).digest())[:6]
152
154
153
155
154 _defaultdiffopthash = hashdiffopts(mdiff.defaultopts)
156 _defaultdiffopthash = hashdiffopts(mdiff.defaultopts)
155
157
156
158
157 class annotateopts(object):
159 class annotateopts(object):
158 """like mercurial.mdiff.diffopts, but is for annotate
160 """like mercurial.mdiff.diffopts, but is for annotate
159
161
160 followrename: follow renames, like "hg annotate -f"
162 followrename: follow renames, like "hg annotate -f"
161 followmerge: follow p2 of a merge changeset, otherwise p2 is ignored
163 followmerge: follow p2 of a merge changeset, otherwise p2 is ignored
162 """
164 """
163
165
164 defaults = {
166 defaults = {
165 b'diffopts': None,
167 b'diffopts': None,
166 b'followrename': True,
168 b'followrename': True,
167 b'followmerge': True,
169 b'followmerge': True,
168 }
170 }
169
171
170 def __init__(self, **opts):
172 def __init__(self, **opts):
171 opts = pycompat.byteskwargs(opts)
173 opts = pycompat.byteskwargs(opts)
172 for k, v in pycompat.iteritems(self.defaults):
174 for k, v in pycompat.iteritems(self.defaults):
173 setattr(self, k, opts.get(k, v))
175 setattr(self, k, opts.get(k, v))
174
176
175 @util.propertycache
177 @util.propertycache
176 def shortstr(self):
178 def shortstr(self):
177 """represent opts in a short string, suitable for a directory name"""
179 """represent opts in a short string, suitable for a directory name"""
178 result = b''
180 result = b''
179 if not self.followrename:
181 if not self.followrename:
180 result += b'r0'
182 result += b'r0'
181 if not self.followmerge:
183 if not self.followmerge:
182 result += b'm0'
184 result += b'm0'
183 if self.diffopts is not None:
185 if self.diffopts is not None:
184 assert isinstance(self.diffopts, mdiff.diffopts)
186 assert isinstance(self.diffopts, mdiff.diffopts)
185 diffopthash = hashdiffopts(self.diffopts)
187 diffopthash = hashdiffopts(self.diffopts)
186 if diffopthash != _defaultdiffopthash:
188 if diffopthash != _defaultdiffopthash:
187 result += b'i' + diffopthash
189 result += b'i' + diffopthash
188 return result or b'default'
190 return result or b'default'
189
191
190
192
191 defaultopts = annotateopts()
193 defaultopts = annotateopts()
192
194
193
195
194 class _annotatecontext(object):
196 class _annotatecontext(object):
195 """do not use this class directly as it does not use lock to protect
197 """do not use this class directly as it does not use lock to protect
196 writes. use "with annotatecontext(...)" instead.
198 writes. use "with annotatecontext(...)" instead.
197 """
199 """
198
200
199 def __init__(self, repo, path, linelogpath, revmappath, opts):
201 def __init__(self, repo, path, linelogpath, revmappath, opts):
200 self.repo = repo
202 self.repo = repo
201 self.ui = repo.ui
203 self.ui = repo.ui
202 self.path = path
204 self.path = path
203 self.opts = opts
205 self.opts = opts
204 self.linelogpath = linelogpath
206 self.linelogpath = linelogpath
205 self.revmappath = revmappath
207 self.revmappath = revmappath
206 self._linelog = None
208 self._linelog = None
207 self._revmap = None
209 self._revmap = None
208 self._node2path = {} # {str: str}
210 self._node2path = {} # {str: str}
209
211
210 @property
212 @property
211 def linelog(self):
213 def linelog(self):
212 if self._linelog is None:
214 if self._linelog is None:
213 if os.path.exists(self.linelogpath):
215 if os.path.exists(self.linelogpath):
214 with open(self.linelogpath, b'rb') as f:
216 with open(self.linelogpath, b'rb') as f:
215 try:
217 try:
216 self._linelog = linelogmod.linelog.fromdata(f.read())
218 self._linelog = linelogmod.linelog.fromdata(f.read())
217 except linelogmod.LineLogError:
219 except linelogmod.LineLogError:
218 self._linelog = linelogmod.linelog()
220 self._linelog = linelogmod.linelog()
219 else:
221 else:
220 self._linelog = linelogmod.linelog()
222 self._linelog = linelogmod.linelog()
221 return self._linelog
223 return self._linelog
222
224
223 @property
225 @property
224 def revmap(self):
226 def revmap(self):
225 if self._revmap is None:
227 if self._revmap is None:
226 self._revmap = revmapmod.revmap(self.revmappath)
228 self._revmap = revmapmod.revmap(self.revmappath)
227 return self._revmap
229 return self._revmap
228
230
229 def close(self):
231 def close(self):
230 if self._revmap is not None:
232 if self._revmap is not None:
231 self._revmap.flush()
233 self._revmap.flush()
232 self._revmap = None
234 self._revmap = None
233 if self._linelog is not None:
235 if self._linelog is not None:
234 with open(self.linelogpath, b'wb') as f:
236 with open(self.linelogpath, b'wb') as f:
235 f.write(self._linelog.encode())
237 f.write(self._linelog.encode())
236 self._linelog = None
238 self._linelog = None
237
239
238 __del__ = close
240 __del__ = close
239
241
240 def rebuild(self):
242 def rebuild(self):
241 """delete linelog and revmap, useful for rebuilding"""
243 """delete linelog and revmap, useful for rebuilding"""
242 self.close()
244 self.close()
243 self._node2path.clear()
245 self._node2path.clear()
244 _unlinkpaths([self.revmappath, self.linelogpath])
246 _unlinkpaths([self.revmappath, self.linelogpath])
245
247
246 @property
248 @property
247 def lastnode(self):
249 def lastnode(self):
248 """return last node in revmap, or None if revmap is empty"""
250 """return last node in revmap, or None if revmap is empty"""
249 if self._revmap is None:
251 if self._revmap is None:
250 # fast path, read revmap without loading its full content
252 # fast path, read revmap without loading its full content
251 return revmapmod.getlastnode(self.revmappath)
253 return revmapmod.getlastnode(self.revmappath)
252 else:
254 else:
253 return self._revmap.rev2hsh(self._revmap.maxrev)
255 return self._revmap.rev2hsh(self._revmap.maxrev)
254
256
255 def isuptodate(self, master, strict=True):
257 def isuptodate(self, master, strict=True):
256 """return True if the revmap / linelog is up-to-date, or the file
258 """return True if the revmap / linelog is up-to-date, or the file
257 does not exist in the master revision. False otherwise.
259 does not exist in the master revision. False otherwise.
258
260
259 it tries to be fast and could return false negatives, because of the
261 it tries to be fast and could return false negatives, because of the
260 use of linkrev instead of introrev.
262 use of linkrev instead of introrev.
261
263
262 useful for both server and client to decide whether to update
264 useful for both server and client to decide whether to update
263 fastannotate cache or not.
265 fastannotate cache or not.
264
266
265 if strict is True, even if fctx exists in the revmap, but is not the
267 if strict is True, even if fctx exists in the revmap, but is not the
266 last node, isuptodate will return False. it's good for performance - no
268 last node, isuptodate will return False. it's good for performance - no
267 expensive check was done.
269 expensive check was done.
268
270
269 if strict is False, if fctx exists in the revmap, this function may
271 if strict is False, if fctx exists in the revmap, this function may
270 return True. this is useful for the client to skip downloading the
272 return True. this is useful for the client to skip downloading the
271 cache if the client's master is behind the server's.
273 cache if the client's master is behind the server's.
272 """
274 """
273 lastnode = self.lastnode
275 lastnode = self.lastnode
274 try:
276 try:
275 f = self._resolvefctx(master, resolverev=True)
277 f = self._resolvefctx(master, resolverev=True)
276 # choose linkrev instead of introrev as the check is meant to be
278 # choose linkrev instead of introrev as the check is meant to be
277 # *fast*.
279 # *fast*.
278 linknode = self.repo.changelog.node(f.linkrev())
280 linknode = self.repo.changelog.node(f.linkrev())
279 if not strict and lastnode and linknode != lastnode:
281 if not strict and lastnode and linknode != lastnode:
280 # check if f.node() is in the revmap. note: this loads the
282 # check if f.node() is in the revmap. note: this loads the
281 # revmap and can be slow.
283 # revmap and can be slow.
282 return self.revmap.hsh2rev(linknode) is not None
284 return self.revmap.hsh2rev(linknode) is not None
283 # avoid resolving old manifest, or slow adjustlinkrev to be fast,
285 # avoid resolving old manifest, or slow adjustlinkrev to be fast,
284 # false negatives are acceptable in this case.
286 # false negatives are acceptable in this case.
285 return linknode == lastnode
287 return linknode == lastnode
286 except LookupError:
288 except LookupError:
287 # master does not have the file, or the revmap is ahead
289 # master does not have the file, or the revmap is ahead
288 return True
290 return True
289
291
290 def annotate(self, rev, master=None, showpath=False, showlines=False):
292 def annotate(self, rev, master=None, showpath=False, showlines=False):
291 """incrementally update the cache so it includes revisions in the main
293 """incrementally update the cache so it includes revisions in the main
292 branch till 'master'. and run annotate on 'rev', which may or may not be
294 branch till 'master'. and run annotate on 'rev', which may or may not be
293 included in the main branch.
295 included in the main branch.
294
296
295 if master is None, do not update linelog.
297 if master is None, do not update linelog.
296
298
297 the first value returned is the annotate result, it is [(node, linenum)]
299 the first value returned is the annotate result, it is [(node, linenum)]
298 by default. [(node, linenum, path)] if showpath is True.
300 by default. [(node, linenum, path)] if showpath is True.
299
301
300 if showlines is True, a second value will be returned, it is a list of
302 if showlines is True, a second value will be returned, it is a list of
301 corresponding line contents.
303 corresponding line contents.
302 """
304 """
303
305
304 # the fast path test requires commit hash, convert rev number to hash,
306 # the fast path test requires commit hash, convert rev number to hash,
305 # so it may hit the fast path. note: in the "fctx" mode, the "annotate"
307 # so it may hit the fast path. note: in the "fctx" mode, the "annotate"
306 # command could give us a revision number even if the user passes a
308 # command could give us a revision number even if the user passes a
307 # commit hash.
309 # commit hash.
308 if isinstance(rev, int):
310 if isinstance(rev, int):
309 rev = node.hex(self.repo.changelog.node(rev))
311 rev = node.hex(self.repo.changelog.node(rev))
310
312
311 # fast path: if rev is in the main branch already
313 # fast path: if rev is in the main branch already
312 directly, revfctx = self.canannotatedirectly(rev)
314 directly, revfctx = self.canannotatedirectly(rev)
313 if directly:
315 if directly:
314 if self.ui.debugflag:
316 if self.ui.debugflag:
315 self.ui.debug(
317 self.ui.debug(
316 b'fastannotate: %s: using fast path '
318 b'fastannotate: %s: using fast path '
317 b'(resolved fctx: %s)\n'
319 b'(resolved fctx: %s)\n'
318 % (
320 % (
319 self.path,
321 self.path,
320 stringutil.pprint(util.safehasattr(revfctx, b'node')),
322 stringutil.pprint(util.safehasattr(revfctx, b'node')),
321 )
323 )
322 )
324 )
323 return self.annotatedirectly(revfctx, showpath, showlines)
325 return self.annotatedirectly(revfctx, showpath, showlines)
324
326
325 # resolve master
327 # resolve master
326 masterfctx = None
328 masterfctx = None
327 if master:
329 if master:
328 try:
330 try:
329 masterfctx = self._resolvefctx(
331 masterfctx = self._resolvefctx(
330 master, resolverev=True, adjustctx=True
332 master, resolverev=True, adjustctx=True
331 )
333 )
332 except LookupError: # master does not have the file
334 except LookupError: # master does not have the file
333 pass
335 pass
334 else:
336 else:
335 if masterfctx in self.revmap: # no need to update linelog
337 if masterfctx in self.revmap: # no need to update linelog
336 masterfctx = None
338 masterfctx = None
337
339
338 # ... - @ <- rev (can be an arbitrary changeset,
340 # ... - @ <- rev (can be an arbitrary changeset,
339 # / not necessarily a descendant
341 # / not necessarily a descendant
340 # master -> o of master)
342 # master -> o of master)
341 # |
343 # |
342 # a merge -> o 'o': new changesets in the main branch
344 # a merge -> o 'o': new changesets in the main branch
343 # |\ '#': revisions in the main branch that
345 # |\ '#': revisions in the main branch that
344 # o * exist in linelog / revmap
346 # o * exist in linelog / revmap
345 # | . '*': changesets in side branches, or
347 # | . '*': changesets in side branches, or
346 # last master -> # . descendants of master
348 # last master -> # . descendants of master
347 # | .
349 # | .
348 # # * joint: '#', and is a parent of a '*'
350 # # * joint: '#', and is a parent of a '*'
349 # |/
351 # |/
350 # a joint -> # ^^^^ --- side branches
352 # a joint -> # ^^^^ --- side branches
351 # |
353 # |
352 # ^ --- main branch (in linelog)
354 # ^ --- main branch (in linelog)
353
355
354 # these DFSes are similar to the traditional annotate algorithm.
356 # these DFSes are similar to the traditional annotate algorithm.
355 # we cannot really reuse the code for perf reason.
357 # we cannot really reuse the code for perf reason.
356
358
357 # 1st DFS calculates merges, joint points, and needed.
359 # 1st DFS calculates merges, joint points, and needed.
358 # "needed" is a simple reference counting dict to free items in
360 # "needed" is a simple reference counting dict to free items in
359 # "hist", reducing its memory usage otherwise could be huge.
361 # "hist", reducing its memory usage otherwise could be huge.
360 initvisit = [revfctx]
362 initvisit = [revfctx]
361 if masterfctx:
363 if masterfctx:
362 if masterfctx.rev() is None:
364 if masterfctx.rev() is None:
363 raise error.Abort(
365 raise error.Abort(
364 _(b'cannot update linelog to wdir()'),
366 _(b'cannot update linelog to wdir()'),
365 hint=_(b'set fastannotate.mainbranch'),
367 hint=_(b'set fastannotate.mainbranch'),
366 )
368 )
367 initvisit.append(masterfctx)
369 initvisit.append(masterfctx)
368 visit = initvisit[:]
370 visit = initvisit[:]
369 pcache = {}
371 pcache = {}
370 needed = {revfctx: 1}
372 needed = {revfctx: 1}
371 hist = {} # {fctx: ([(llrev or fctx, linenum)], text)}
373 hist = {} # {fctx: ([(llrev or fctx, linenum)], text)}
372 while visit:
374 while visit:
373 f = visit.pop()
375 f = visit.pop()
374 if f in pcache or f in hist:
376 if f in pcache or f in hist:
375 continue
377 continue
376 if f in self.revmap: # in the old main branch, it's a joint
378 if f in self.revmap: # in the old main branch, it's a joint
377 llrev = self.revmap.hsh2rev(f.node())
379 llrev = self.revmap.hsh2rev(f.node())
378 self.linelog.annotate(llrev)
380 self.linelog.annotate(llrev)
379 result = self.linelog.annotateresult
381 result = self.linelog.annotateresult
380 hist[f] = (result, f.data())
382 hist[f] = (result, f.data())
381 continue
383 continue
382 pl = self._parentfunc(f)
384 pl = self._parentfunc(f)
383 pcache[f] = pl
385 pcache[f] = pl
384 for p in pl:
386 for p in pl:
385 needed[p] = needed.get(p, 0) + 1
387 needed[p] = needed.get(p, 0) + 1
386 if p not in pcache:
388 if p not in pcache:
387 visit.append(p)
389 visit.append(p)
388
390
389 # 2nd (simple) DFS calculates new changesets in the main branch
391 # 2nd (simple) DFS calculates new changesets in the main branch
390 # ('o' nodes in # the above graph), so we know when to update linelog.
392 # ('o' nodes in # the above graph), so we know when to update linelog.
391 newmainbranch = set()
393 newmainbranch = set()
392 f = masterfctx
394 f = masterfctx
393 while f and f not in self.revmap:
395 while f and f not in self.revmap:
394 newmainbranch.add(f)
396 newmainbranch.add(f)
395 pl = pcache[f]
397 pl = pcache[f]
396 if pl:
398 if pl:
397 f = pl[0]
399 f = pl[0]
398 else:
400 else:
399 f = None
401 f = None
400 break
402 break
401
403
402 # f, if present, is the position where the last build stopped at, and
404 # f, if present, is the position where the last build stopped at, and
403 # should be the "master" last time. check to see if we can continue
405 # should be the "master" last time. check to see if we can continue
404 # building the linelog incrementally. (we cannot if diverged)
406 # building the linelog incrementally. (we cannot if diverged)
405 if masterfctx is not None:
407 if masterfctx is not None:
406 self._checklastmasterhead(f)
408 self._checklastmasterhead(f)
407
409
408 if self.ui.debugflag:
410 if self.ui.debugflag:
409 if newmainbranch:
411 if newmainbranch:
410 self.ui.debug(
412 self.ui.debug(
411 b'fastannotate: %s: %d new changesets in the main'
413 b'fastannotate: %s: %d new changesets in the main'
412 b' branch\n' % (self.path, len(newmainbranch))
414 b' branch\n' % (self.path, len(newmainbranch))
413 )
415 )
414 elif not hist: # no joints, no updates
416 elif not hist: # no joints, no updates
415 self.ui.debug(
417 self.ui.debug(
416 b'fastannotate: %s: linelog cannot help in '
418 b'fastannotate: %s: linelog cannot help in '
417 b'annotating this revision\n' % self.path
419 b'annotating this revision\n' % self.path
418 )
420 )
419
421
420 # prepare annotateresult so we can update linelog incrementally
422 # prepare annotateresult so we can update linelog incrementally
421 self.linelog.annotate(self.linelog.maxrev)
423 self.linelog.annotate(self.linelog.maxrev)
422
424
423 # 3rd DFS does the actual annotate
425 # 3rd DFS does the actual annotate
424 visit = initvisit[:]
426 visit = initvisit[:]
425 progress = self.ui.makeprogress(
427 progress = self.ui.makeprogress(
426 b'building cache', total=len(newmainbranch)
428 b'building cache', total=len(newmainbranch)
427 )
429 )
428 while visit:
430 while visit:
429 f = visit[-1]
431 f = visit[-1]
430 if f in hist:
432 if f in hist:
431 visit.pop()
433 visit.pop()
432 continue
434 continue
433
435
434 ready = True
436 ready = True
435 pl = pcache[f]
437 pl = pcache[f]
436 for p in pl:
438 for p in pl:
437 if p not in hist:
439 if p not in hist:
438 ready = False
440 ready = False
439 visit.append(p)
441 visit.append(p)
440 if not ready:
442 if not ready:
441 continue
443 continue
442
444
443 visit.pop()
445 visit.pop()
444 blocks = None # mdiff blocks, used for appending linelog
446 blocks = None # mdiff blocks, used for appending linelog
445 ismainbranch = f in newmainbranch
447 ismainbranch = f in newmainbranch
446 # curr is the same as the traditional annotate algorithm,
448 # curr is the same as the traditional annotate algorithm,
447 # if we only care about linear history (do not follow merge),
449 # if we only care about linear history (do not follow merge),
448 # then curr is not actually used.
450 # then curr is not actually used.
449 assert f not in hist
451 assert f not in hist
450 curr = _decorate(f)
452 curr = _decorate(f)
451 for i, p in enumerate(pl):
453 for i, p in enumerate(pl):
452 bs = list(self._diffblocks(hist[p][1], curr[1]))
454 bs = list(self._diffblocks(hist[p][1], curr[1]))
453 if i == 0 and ismainbranch:
455 if i == 0 and ismainbranch:
454 blocks = bs
456 blocks = bs
455 curr = _pair(hist[p], curr, bs)
457 curr = _pair(hist[p], curr, bs)
456 if needed[p] == 1:
458 if needed[p] == 1:
457 del hist[p]
459 del hist[p]
458 del needed[p]
460 del needed[p]
459 else:
461 else:
460 needed[p] -= 1
462 needed[p] -= 1
461
463
462 hist[f] = curr
464 hist[f] = curr
463 del pcache[f]
465 del pcache[f]
464
466
465 if ismainbranch: # need to write to linelog
467 if ismainbranch: # need to write to linelog
466 progress.increment()
468 progress.increment()
467 bannotated = None
469 bannotated = None
468 if len(pl) == 2 and self.opts.followmerge: # merge
470 if len(pl) == 2 and self.opts.followmerge: # merge
469 bannotated = curr[0]
471 bannotated = curr[0]
470 if blocks is None: # no parents, add an empty one
472 if blocks is None: # no parents, add an empty one
471 blocks = list(self._diffblocks(b'', curr[1]))
473 blocks = list(self._diffblocks(b'', curr[1]))
472 self._appendrev(f, blocks, bannotated)
474 self._appendrev(f, blocks, bannotated)
473 elif showpath: # not append linelog, but we need to record path
475 elif showpath: # not append linelog, but we need to record path
474 self._node2path[f.node()] = f.path()
476 self._node2path[f.node()] = f.path()
475
477
476 progress.complete()
478 progress.complete()
477
479
478 result = [
480 result = [
479 ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l)
481 ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l)
480 for fr, l in hist[revfctx][0]
482 for fr, l in hist[revfctx][0]
481 ] # [(node, linenumber)]
483 ] # [(node, linenumber)]
482 return self._refineannotateresult(result, revfctx, showpath, showlines)
484 return self._refineannotateresult(result, revfctx, showpath, showlines)
483
485
484 def canannotatedirectly(self, rev):
486 def canannotatedirectly(self, rev):
485 """(str) -> bool, fctx or node.
487 """(str) -> bool, fctx or node.
486 return (True, f) if we can annotate without updating the linelog, pass
488 return (True, f) if we can annotate without updating the linelog, pass
487 f to annotatedirectly.
489 f to annotatedirectly.
488 return (False, f) if we need extra calculation. f is the fctx resolved
490 return (False, f) if we need extra calculation. f is the fctx resolved
489 from rev.
491 from rev.
490 """
492 """
491 result = True
493 result = True
492 f = None
494 f = None
493 if not isinstance(rev, int) and rev is not None:
495 if not isinstance(rev, int) and rev is not None:
494 hsh = {20: bytes, 40: node.bin}.get(len(rev), lambda x: None)(rev)
496 hsh = {20: bytes, 40: node.bin}.get(len(rev), lambda x: None)(rev)
495 if hsh is not None and (hsh, self.path) in self.revmap:
497 if hsh is not None and (hsh, self.path) in self.revmap:
496 f = hsh
498 f = hsh
497 if f is None:
499 if f is None:
498 adjustctx = b'linkrev' if self._perfhack else True
500 adjustctx = b'linkrev' if self._perfhack else True
499 f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True)
501 f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True)
500 result = f in self.revmap
502 result = f in self.revmap
501 if not result and self._perfhack:
503 if not result and self._perfhack:
502 # redo the resolution without perfhack - as we are going to
504 # redo the resolution without perfhack - as we are going to
503 # do write operations, we need a correct fctx.
505 # do write operations, we need a correct fctx.
504 f = self._resolvefctx(rev, adjustctx=True, resolverev=True)
506 f = self._resolvefctx(rev, adjustctx=True, resolverev=True)
505 return result, f
507 return result, f
506
508
507 def annotatealllines(self, rev, showpath=False, showlines=False):
509 def annotatealllines(self, rev, showpath=False, showlines=False):
508 """(rev : str) -> [(node : str, linenum : int, path : str)]
510 """(rev : str) -> [(node : str, linenum : int, path : str)]
509
511
510 the result has the same format with annotate, but include all (including
512 the result has the same format with annotate, but include all (including
511 deleted) lines up to rev. call this after calling annotate(rev, ...) for
513 deleted) lines up to rev. call this after calling annotate(rev, ...) for
512 better performance and accuracy.
514 better performance and accuracy.
513 """
515 """
514 revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True)
516 revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True)
515
517
516 # find a chain from rev to anything in the mainbranch
518 # find a chain from rev to anything in the mainbranch
517 if revfctx not in self.revmap:
519 if revfctx not in self.revmap:
518 chain = [revfctx]
520 chain = [revfctx]
519 a = b''
521 a = b''
520 while True:
522 while True:
521 f = chain[-1]
523 f = chain[-1]
522 pl = self._parentfunc(f)
524 pl = self._parentfunc(f)
523 if not pl:
525 if not pl:
524 break
526 break
525 if pl[0] in self.revmap:
527 if pl[0] in self.revmap:
526 a = pl[0].data()
528 a = pl[0].data()
527 break
529 break
528 chain.append(pl[0])
530 chain.append(pl[0])
529
531
530 # both self.linelog and self.revmap is backed by filesystem. now
532 # both self.linelog and self.revmap is backed by filesystem. now
531 # we want to modify them but do not want to write changes back to
533 # we want to modify them but do not want to write changes back to
532 # files. so we create in-memory objects and copy them. it's like
534 # files. so we create in-memory objects and copy them. it's like
533 # a "fork".
535 # a "fork".
534 linelog = linelogmod.linelog()
536 linelog = linelogmod.linelog()
535 linelog.copyfrom(self.linelog)
537 linelog.copyfrom(self.linelog)
536 linelog.annotate(linelog.maxrev)
538 linelog.annotate(linelog.maxrev)
537 revmap = revmapmod.revmap()
539 revmap = revmapmod.revmap()
538 revmap.copyfrom(self.revmap)
540 revmap.copyfrom(self.revmap)
539
541
540 for f in reversed(chain):
542 for f in reversed(chain):
541 b = f.data()
543 b = f.data()
542 blocks = list(self._diffblocks(a, b))
544 blocks = list(self._diffblocks(a, b))
543 self._doappendrev(linelog, revmap, f, blocks)
545 self._doappendrev(linelog, revmap, f, blocks)
544 a = b
546 a = b
545 else:
547 else:
546 # fastpath: use existing linelog, revmap as we don't write to them
548 # fastpath: use existing linelog, revmap as we don't write to them
547 linelog = self.linelog
549 linelog = self.linelog
548 revmap = self.revmap
550 revmap = self.revmap
549
551
550 lines = linelog.getalllines()
552 lines = linelog.getalllines()
551 hsh = revfctx.node()
553 hsh = revfctx.node()
552 llrev = revmap.hsh2rev(hsh)
554 llrev = revmap.hsh2rev(hsh)
553 result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev]
555 result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev]
554 # cannot use _refineannotateresult since we need custom logic for
556 # cannot use _refineannotateresult since we need custom logic for
555 # resolving line contents
557 # resolving line contents
556 if showpath:
558 if showpath:
557 result = self._addpathtoresult(result, revmap)
559 result = self._addpathtoresult(result, revmap)
558 if showlines:
560 if showlines:
559 linecontents = self._resolvelines(result, revmap, linelog)
561 linecontents = self._resolvelines(result, revmap, linelog)
560 result = (result, linecontents)
562 result = (result, linecontents)
561 return result
563 return result
562
564
563 def _resolvelines(self, annotateresult, revmap, linelog):
565 def _resolvelines(self, annotateresult, revmap, linelog):
564 """(annotateresult) -> [line]. designed for annotatealllines.
566 """(annotateresult) -> [line]. designed for annotatealllines.
565 this is probably the most inefficient code in the whole fastannotate
567 this is probably the most inefficient code in the whole fastannotate
566 directory. but we have made a decision that the linelog does not
568 directory. but we have made a decision that the linelog does not
567 store line contents. so getting them requires random accesses to
569 store line contents. so getting them requires random accesses to
568 the revlog data, since they can be many, it can be very slow.
570 the revlog data, since they can be many, it can be very slow.
569 """
571 """
570 # [llrev]
572 # [llrev]
571 revs = [revmap.hsh2rev(l[0]) for l in annotateresult]
573 revs = [revmap.hsh2rev(l[0]) for l in annotateresult]
572 result = [None] * len(annotateresult)
574 result = [None] * len(annotateresult)
573 # {(rev, linenum): [lineindex]}
575 # {(rev, linenum): [lineindex]}
574 key2idxs = collections.defaultdict(list)
576 key2idxs = collections.defaultdict(list)
575 for i in pycompat.xrange(len(result)):
577 for i in pycompat.xrange(len(result)):
576 key2idxs[(revs[i], annotateresult[i][1])].append(i)
578 key2idxs[(revs[i], annotateresult[i][1])].append(i)
577 while key2idxs:
579 while key2idxs:
578 # find an unresolved line and its linelog rev to annotate
580 # find an unresolved line and its linelog rev to annotate
579 hsh = None
581 hsh = None
580 try:
582 try:
581 for (rev, _linenum), idxs in pycompat.iteritems(key2idxs):
583 for (rev, _linenum), idxs in pycompat.iteritems(key2idxs):
582 if revmap.rev2flag(rev) & revmapmod.sidebranchflag:
584 if revmap.rev2flag(rev) & revmapmod.sidebranchflag:
583 continue
585 continue
584 hsh = annotateresult[idxs[0]][0]
586 hsh = annotateresult[idxs[0]][0]
585 break
587 break
586 except StopIteration: # no more unresolved lines
588 except StopIteration: # no more unresolved lines
587 return result
589 return result
588 if hsh is None:
590 if hsh is None:
589 # the remaining key2idxs are not in main branch, resolving them
591 # the remaining key2idxs are not in main branch, resolving them
590 # using the hard way...
592 # using the hard way...
591 revlines = {}
593 revlines = {}
592 for (rev, linenum), idxs in pycompat.iteritems(key2idxs):
594 for (rev, linenum), idxs in pycompat.iteritems(key2idxs):
593 if rev not in revlines:
595 if rev not in revlines:
594 hsh = annotateresult[idxs[0]][0]
596 hsh = annotateresult[idxs[0]][0]
595 if self.ui.debugflag:
597 if self.ui.debugflag:
596 self.ui.debug(
598 self.ui.debug(
597 b'fastannotate: reading %s line #%d '
599 b'fastannotate: reading %s line #%d '
598 b'to resolve lines %r\n'
600 b'to resolve lines %r\n'
599 % (node.short(hsh), linenum, idxs)
601 % (node.short(hsh), linenum, idxs)
600 )
602 )
601 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
603 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
602 lines = mdiff.splitnewlines(fctx.data())
604 lines = mdiff.splitnewlines(fctx.data())
603 revlines[rev] = lines
605 revlines[rev] = lines
604 for idx in idxs:
606 for idx in idxs:
605 result[idx] = revlines[rev][linenum]
607 result[idx] = revlines[rev][linenum]
606 assert all(x is not None for x in result)
608 assert all(x is not None for x in result)
607 return result
609 return result
608
610
609 # run the annotate and the lines should match to the file content
611 # run the annotate and the lines should match to the file content
610 self.ui.debug(
612 self.ui.debug(
611 b'fastannotate: annotate %s to resolve lines\n'
613 b'fastannotate: annotate %s to resolve lines\n'
612 % node.short(hsh)
614 % node.short(hsh)
613 )
615 )
614 linelog.annotate(rev)
616 linelog.annotate(rev)
615 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
617 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
616 annotated = linelog.annotateresult
618 annotated = linelog.annotateresult
617 lines = mdiff.splitnewlines(fctx.data())
619 lines = mdiff.splitnewlines(fctx.data())
618 if len(lines) != len(annotated):
620 if len(lines) != len(annotated):
619 raise faerror.CorruptedFileError(b'unexpected annotated lines')
621 raise faerror.CorruptedFileError(b'unexpected annotated lines')
620 # resolve lines from the annotate result
622 # resolve lines from the annotate result
621 for i, line in enumerate(lines):
623 for i, line in enumerate(lines):
622 k = annotated[i]
624 k = annotated[i]
623 if k in key2idxs:
625 if k in key2idxs:
624 for idx in key2idxs[k]:
626 for idx in key2idxs[k]:
625 result[idx] = line
627 result[idx] = line
626 del key2idxs[k]
628 del key2idxs[k]
627 return result
629 return result
628
630
629 def annotatedirectly(self, f, showpath, showlines):
631 def annotatedirectly(self, f, showpath, showlines):
630 """like annotate, but when we know that f is in linelog.
632 """like annotate, but when we know that f is in linelog.
631 f can be either a 20-char str (node) or a fctx. this is for perf - in
633 f can be either a 20-char str (node) or a fctx. this is for perf - in
632 the best case, the user provides a node and we don't need to read the
634 the best case, the user provides a node and we don't need to read the
633 filelog or construct any filecontext.
635 filelog or construct any filecontext.
634 """
636 """
635 if isinstance(f, bytes):
637 if isinstance(f, bytes):
636 hsh = f
638 hsh = f
637 else:
639 else:
638 hsh = f.node()
640 hsh = f.node()
639 llrev = self.revmap.hsh2rev(hsh)
641 llrev = self.revmap.hsh2rev(hsh)
640 if not llrev:
642 if not llrev:
641 raise faerror.CorruptedFileError(
643 raise faerror.CorruptedFileError(
642 b'%s is not in revmap' % node.hex(hsh)
644 b'%s is not in revmap' % node.hex(hsh)
643 )
645 )
644 if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0:
646 if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0:
645 raise faerror.CorruptedFileError(
647 raise faerror.CorruptedFileError(
646 b'%s is not in revmap mainbranch' % node.hex(hsh)
648 b'%s is not in revmap mainbranch' % node.hex(hsh)
647 )
649 )
648 self.linelog.annotate(llrev)
650 self.linelog.annotate(llrev)
649 result = [
651 result = [
650 (self.revmap.rev2hsh(r), l) for r, l in self.linelog.annotateresult
652 (self.revmap.rev2hsh(r), l) for r, l in self.linelog.annotateresult
651 ]
653 ]
652 return self._refineannotateresult(result, f, showpath, showlines)
654 return self._refineannotateresult(result, f, showpath, showlines)
653
655
654 def _refineannotateresult(self, result, f, showpath, showlines):
656 def _refineannotateresult(self, result, f, showpath, showlines):
655 """add the missing path or line contents, they can be expensive.
657 """add the missing path or line contents, they can be expensive.
656 f could be either node or fctx.
658 f could be either node or fctx.
657 """
659 """
658 if showpath:
660 if showpath:
659 result = self._addpathtoresult(result)
661 result = self._addpathtoresult(result)
660 if showlines:
662 if showlines:
661 if isinstance(f, bytes): # f: node or fctx
663 if isinstance(f, bytes): # f: node or fctx
662 llrev = self.revmap.hsh2rev(f)
664 llrev = self.revmap.hsh2rev(f)
663 fctx = self._resolvefctx(f, self.revmap.rev2path(llrev))
665 fctx = self._resolvefctx(f, self.revmap.rev2path(llrev))
664 else:
666 else:
665 fctx = f
667 fctx = f
666 lines = mdiff.splitnewlines(fctx.data())
668 lines = mdiff.splitnewlines(fctx.data())
667 if len(lines) != len(result): # linelog is probably corrupted
669 if len(lines) != len(result): # linelog is probably corrupted
668 raise faerror.CorruptedFileError()
670 raise faerror.CorruptedFileError()
669 result = (result, lines)
671 result = (result, lines)
670 return result
672 return result
671
673
672 def _appendrev(self, fctx, blocks, bannotated=None):
674 def _appendrev(self, fctx, blocks, bannotated=None):
673 self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated)
675 self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated)
674
676
675 def _diffblocks(self, a, b):
677 def _diffblocks(self, a, b):
676 return mdiff.allblocks(a, b, self.opts.diffopts)
678 return mdiff.allblocks(a, b, self.opts.diffopts)
677
679
678 @staticmethod
680 @staticmethod
679 def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None):
681 def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None):
680 """append a revision to linelog and revmap"""
682 """append a revision to linelog and revmap"""
681
683
682 def getllrev(f):
684 def getllrev(f):
683 """(fctx) -> int"""
685 """(fctx) -> int"""
684 # f should not be a linelog revision
686 # f should not be a linelog revision
685 if isinstance(f, int):
687 if isinstance(f, int):
686 raise error.ProgrammingError(b'f should not be an int')
688 raise error.ProgrammingError(b'f should not be an int')
687 # f is a fctx, allocate linelog rev on demand
689 # f is a fctx, allocate linelog rev on demand
688 hsh = f.node()
690 hsh = f.node()
689 rev = revmap.hsh2rev(hsh)
691 rev = revmap.hsh2rev(hsh)
690 if rev is None:
692 if rev is None:
691 rev = revmap.append(hsh, sidebranch=True, path=f.path())
693 rev = revmap.append(hsh, sidebranch=True, path=f.path())
692 return rev
694 return rev
693
695
694 # append sidebranch revisions to revmap
696 # append sidebranch revisions to revmap
695 siderevs = []
697 siderevs = []
696 siderevmap = {} # node: int
698 siderevmap = {} # node: int
697 if bannotated is not None:
699 if bannotated is not None:
698 for (a1, a2, b1, b2), op in blocks:
700 for (a1, a2, b1, b2), op in blocks:
699 if op != b'=':
701 if op != b'=':
700 # f could be either linelong rev, or fctx.
702 # f could be either linelong rev, or fctx.
701 siderevs += [
703 siderevs += [
702 f
704 f
703 for f, l in bannotated[b1:b2]
705 for f, l in bannotated[b1:b2]
704 if not isinstance(f, int)
706 if not isinstance(f, int)
705 ]
707 ]
706 siderevs = set(siderevs)
708 siderevs = set(siderevs)
707 if fctx in siderevs: # mainnode must be appended seperately
709 if fctx in siderevs: # mainnode must be appended seperately
708 siderevs.remove(fctx)
710 siderevs.remove(fctx)
709 for f in siderevs:
711 for f in siderevs:
710 siderevmap[f] = getllrev(f)
712 siderevmap[f] = getllrev(f)
711
713
712 # the changeset in the main branch, could be a merge
714 # the changeset in the main branch, could be a merge
713 llrev = revmap.append(fctx.node(), path=fctx.path())
715 llrev = revmap.append(fctx.node(), path=fctx.path())
714 siderevmap[fctx] = llrev
716 siderevmap[fctx] = llrev
715
717
716 for (a1, a2, b1, b2), op in reversed(blocks):
718 for (a1, a2, b1, b2), op in reversed(blocks):
717 if op == b'=':
719 if op == b'=':
718 continue
720 continue
719 if bannotated is None:
721 if bannotated is None:
720 linelog.replacelines(llrev, a1, a2, b1, b2)
722 linelog.replacelines(llrev, a1, a2, b1, b2)
721 else:
723 else:
722 blines = [
724 blines = [
723 ((r if isinstance(r, int) else siderevmap[r]), l)
725 ((r if isinstance(r, int) else siderevmap[r]), l)
724 for r, l in bannotated[b1:b2]
726 for r, l in bannotated[b1:b2]
725 ]
727 ]
726 linelog.replacelines_vec(llrev, a1, a2, blines)
728 linelog.replacelines_vec(llrev, a1, a2, blines)
727
729
728 def _addpathtoresult(self, annotateresult, revmap=None):
730 def _addpathtoresult(self, annotateresult, revmap=None):
729 """(revmap, [(node, linenum)]) -> [(node, linenum, path)]"""
731 """(revmap, [(node, linenum)]) -> [(node, linenum, path)]"""
730 if revmap is None:
732 if revmap is None:
731 revmap = self.revmap
733 revmap = self.revmap
732
734
733 def _getpath(nodeid):
735 def _getpath(nodeid):
734 path = self._node2path.get(nodeid)
736 path = self._node2path.get(nodeid)
735 if path is None:
737 if path is None:
736 path = revmap.rev2path(revmap.hsh2rev(nodeid))
738 path = revmap.rev2path(revmap.hsh2rev(nodeid))
737 self._node2path[nodeid] = path
739 self._node2path[nodeid] = path
738 return path
740 return path
739
741
740 return [(n, l, _getpath(n)) for n, l in annotateresult]
742 return [(n, l, _getpath(n)) for n, l in annotateresult]
741
743
742 def _checklastmasterhead(self, fctx):
744 def _checklastmasterhead(self, fctx):
743 """check if fctx is the master's head last time, raise if not"""
745 """check if fctx is the master's head last time, raise if not"""
744 if fctx is None:
746 if fctx is None:
745 llrev = 0
747 llrev = 0
746 else:
748 else:
747 llrev = self.revmap.hsh2rev(fctx.node())
749 llrev = self.revmap.hsh2rev(fctx.node())
748 if not llrev:
750 if not llrev:
749 raise faerror.CannotReuseError()
751 raise faerror.CannotReuseError()
750 if self.linelog.maxrev != llrev:
752 if self.linelog.maxrev != llrev:
751 raise faerror.CannotReuseError()
753 raise faerror.CannotReuseError()
752
754
753 @util.propertycache
755 @util.propertycache
754 def _parentfunc(self):
756 def _parentfunc(self):
755 """-> (fctx) -> [fctx]"""
757 """-> (fctx) -> [fctx]"""
756 followrename = self.opts.followrename
758 followrename = self.opts.followrename
757 followmerge = self.opts.followmerge
759 followmerge = self.opts.followmerge
758
760
759 def parents(f):
761 def parents(f):
760 pl = _parents(f, follow=followrename)
762 pl = _parents(f, follow=followrename)
761 if not followmerge:
763 if not followmerge:
762 pl = pl[:1]
764 pl = pl[:1]
763 return pl
765 return pl
764
766
765 return parents
767 return parents
766
768
767 @util.propertycache
769 @util.propertycache
768 def _perfhack(self):
770 def _perfhack(self):
769 return self.ui.configbool(b'fastannotate', b'perfhack')
771 return self.ui.configbool(b'fastannotate', b'perfhack')
770
772
771 def _resolvefctx(self, rev, path=None, **kwds):
773 def _resolvefctx(self, rev, path=None, **kwds):
772 return resolvefctx(self.repo, rev, (path or self.path), **kwds)
774 return resolvefctx(self.repo, rev, (path or self.path), **kwds)
773
775
774
776
775 def _unlinkpaths(paths):
777 def _unlinkpaths(paths):
776 """silent, best-effort unlink"""
778 """silent, best-effort unlink"""
777 for path in paths:
779 for path in paths:
778 try:
780 try:
779 util.unlink(path)
781 util.unlink(path)
780 except OSError:
782 except OSError:
781 pass
783 pass
782
784
783
785
784 class pathhelper(object):
786 class pathhelper(object):
785 """helper for getting paths for lockfile, linelog and revmap"""
787 """helper for getting paths for lockfile, linelog and revmap"""
786
788
787 def __init__(self, repo, path, opts=defaultopts):
789 def __init__(self, repo, path, opts=defaultopts):
788 # different options use different directories
790 # different options use different directories
789 self._vfspath = os.path.join(
791 self._vfspath = os.path.join(
790 b'fastannotate', opts.shortstr, encodedir(path)
792 b'fastannotate', opts.shortstr, encodedir(path)
791 )
793 )
792 self._repo = repo
794 self._repo = repo
793
795
794 @property
796 @property
795 def dirname(self):
797 def dirname(self):
796 return os.path.dirname(self._repo.vfs.join(self._vfspath))
798 return os.path.dirname(self._repo.vfs.join(self._vfspath))
797
799
798 @property
800 @property
799 def linelogpath(self):
801 def linelogpath(self):
800 return self._repo.vfs.join(self._vfspath + b'.l')
802 return self._repo.vfs.join(self._vfspath + b'.l')
801
803
802 def lock(self):
804 def lock(self):
803 return lockmod.lock(self._repo.vfs, self._vfspath + b'.lock')
805 return lockmod.lock(self._repo.vfs, self._vfspath + b'.lock')
804
806
805 @property
807 @property
806 def revmappath(self):
808 def revmappath(self):
807 return self._repo.vfs.join(self._vfspath + b'.m')
809 return self._repo.vfs.join(self._vfspath + b'.m')
808
810
809
811
810 @contextlib.contextmanager
812 @contextlib.contextmanager
811 def annotatecontext(repo, path, opts=defaultopts, rebuild=False):
813 def annotatecontext(repo, path, opts=defaultopts, rebuild=False):
812 """context needed to perform (fast) annotate on a file
814 """context needed to perform (fast) annotate on a file
813
815
814 an annotatecontext of a single file consists of two structures: the
816 an annotatecontext of a single file consists of two structures: the
815 linelog and the revmap. this function takes care of locking. only 1
817 linelog and the revmap. this function takes care of locking. only 1
816 process is allowed to write that file's linelog and revmap at a time.
818 process is allowed to write that file's linelog and revmap at a time.
817
819
818 when something goes wrong, this function will assume the linelog and the
820 when something goes wrong, this function will assume the linelog and the
819 revmap are in a bad state, and remove them from disk.
821 revmap are in a bad state, and remove them from disk.
820
822
821 use this function in the following way:
823 use this function in the following way:
822
824
823 with annotatecontext(...) as actx:
825 with annotatecontext(...) as actx:
824 actx. ....
826 actx. ....
825 """
827 """
826 helper = pathhelper(repo, path, opts)
828 helper = pathhelper(repo, path, opts)
827 util.makedirs(helper.dirname)
829 util.makedirs(helper.dirname)
828 revmappath = helper.revmappath
830 revmappath = helper.revmappath
829 linelogpath = helper.linelogpath
831 linelogpath = helper.linelogpath
830 actx = None
832 actx = None
831 try:
833 try:
832 with helper.lock():
834 with helper.lock():
833 actx = _annotatecontext(repo, path, linelogpath, revmappath, opts)
835 actx = _annotatecontext(repo, path, linelogpath, revmappath, opts)
834 if rebuild:
836 if rebuild:
835 actx.rebuild()
837 actx.rebuild()
836 yield actx
838 yield actx
837 except Exception:
839 except Exception:
838 if actx is not None:
840 if actx is not None:
839 actx.rebuild()
841 actx.rebuild()
840 repo.ui.debug(b'fastannotate: %s: cache broken and deleted\n' % path)
842 repo.ui.debug(b'fastannotate: %s: cache broken and deleted\n' % path)
841 raise
843 raise
842 finally:
844 finally:
843 if actx is not None:
845 if actx is not None:
844 actx.close()
846 actx.close()
845
847
846
848
847 def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False):
849 def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False):
848 """like annotatecontext but get the context from a fctx. convenient when
850 """like annotatecontext but get the context from a fctx. convenient when
849 used in fctx.annotate
851 used in fctx.annotate
850 """
852 """
851 repo = fctx._repo
853 repo = fctx._repo
852 path = fctx._path
854 path = fctx._path
853 if repo.ui.configbool(b'fastannotate', b'forcefollow', True):
855 if repo.ui.configbool(b'fastannotate', b'forcefollow', True):
854 follow = True
856 follow = True
855 aopts = annotateopts(diffopts=diffopts, followrename=follow)
857 aopts = annotateopts(diffopts=diffopts, followrename=follow)
856 return annotatecontext(repo, path, aopts, rebuild)
858 return annotatecontext(repo, path, aopts, rebuild)
@@ -1,988 +1,990 b''
1 # __init__.py - fsmonitor initialization and overrides
1 # __init__.py - fsmonitor initialization and overrides
2 #
2 #
3 # Copyright 2013-2016 Facebook, Inc.
3 # Copyright 2013-2016 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 '''Faster status operations with the Watchman file monitor (EXPERIMENTAL)
8 '''Faster status operations with the Watchman file monitor (EXPERIMENTAL)
9
9
10 Integrates the file-watching program Watchman with Mercurial to produce faster
10 Integrates the file-watching program Watchman with Mercurial to produce faster
11 status results.
11 status results.
12
12
13 On a particular Linux system, for a real-world repository with over 400,000
13 On a particular Linux system, for a real-world repository with over 400,000
14 files hosted on ext4, vanilla `hg status` takes 1.3 seconds. On the same
14 files hosted on ext4, vanilla `hg status` takes 1.3 seconds. On the same
15 system, with fsmonitor it takes about 0.3 seconds.
15 system, with fsmonitor it takes about 0.3 seconds.
16
16
17 fsmonitor requires no configuration -- it will tell Watchman about your
17 fsmonitor requires no configuration -- it will tell Watchman about your
18 repository as necessary. You'll need to install Watchman from
18 repository as necessary. You'll need to install Watchman from
19 https://facebook.github.io/watchman/ and make sure it is in your PATH.
19 https://facebook.github.io/watchman/ and make sure it is in your PATH.
20
20
21 fsmonitor is incompatible with the largefiles and eol extensions, and
21 fsmonitor is incompatible with the largefiles and eol extensions, and
22 will disable itself if any of those are active.
22 will disable itself if any of those are active.
23
23
24 The following configuration options exist:
24 The following configuration options exist:
25
25
26 ::
26 ::
27
27
28 [fsmonitor]
28 [fsmonitor]
29 mode = {off, on, paranoid}
29 mode = {off, on, paranoid}
30
30
31 When `mode = off`, fsmonitor will disable itself (similar to not loading the
31 When `mode = off`, fsmonitor will disable itself (similar to not loading the
32 extension at all). When `mode = on`, fsmonitor will be enabled (the default).
32 extension at all). When `mode = on`, fsmonitor will be enabled (the default).
33 When `mode = paranoid`, fsmonitor will query both Watchman and the filesystem,
33 When `mode = paranoid`, fsmonitor will query both Watchman and the filesystem,
34 and ensure that the results are consistent.
34 and ensure that the results are consistent.
35
35
36 ::
36 ::
37
37
38 [fsmonitor]
38 [fsmonitor]
39 timeout = (float)
39 timeout = (float)
40
40
41 A value, in seconds, that determines how long fsmonitor will wait for Watchman
41 A value, in seconds, that determines how long fsmonitor will wait for Watchman
42 to return results. Defaults to `2.0`.
42 to return results. Defaults to `2.0`.
43
43
44 ::
44 ::
45
45
46 [fsmonitor]
46 [fsmonitor]
47 blacklistusers = (list of userids)
47 blacklistusers = (list of userids)
48
48
49 A list of usernames for which fsmonitor will disable itself altogether.
49 A list of usernames for which fsmonitor will disable itself altogether.
50
50
51 ::
51 ::
52
52
53 [fsmonitor]
53 [fsmonitor]
54 walk_on_invalidate = (boolean)
54 walk_on_invalidate = (boolean)
55
55
56 Whether or not to walk the whole repo ourselves when our cached state has been
56 Whether or not to walk the whole repo ourselves when our cached state has been
57 invalidated, for example when Watchman has been restarted or .hgignore rules
57 invalidated, for example when Watchman has been restarted or .hgignore rules
58 have been changed. Walking the repo in that case can result in competing for
58 have been changed. Walking the repo in that case can result in competing for
59 I/O with Watchman. For large repos it is recommended to set this value to
59 I/O with Watchman. For large repos it is recommended to set this value to
60 false. You may wish to set this to true if you have a very fast filesystem
60 false. You may wish to set this to true if you have a very fast filesystem
61 that can outpace the IPC overhead of getting the result data for the full repo
61 that can outpace the IPC overhead of getting the result data for the full repo
62 from Watchman. Defaults to false.
62 from Watchman. Defaults to false.
63
63
64 ::
64 ::
65
65
66 [fsmonitor]
66 [fsmonitor]
67 warn_when_unused = (boolean)
67 warn_when_unused = (boolean)
68
68
69 Whether to print a warning during certain operations when fsmonitor would be
69 Whether to print a warning during certain operations when fsmonitor would be
70 beneficial to performance but isn't enabled.
70 beneficial to performance but isn't enabled.
71
71
72 ::
72 ::
73
73
74 [fsmonitor]
74 [fsmonitor]
75 warn_update_file_count = (integer)
75 warn_update_file_count = (integer)
76
76
77 If ``warn_when_unused`` is set and fsmonitor isn't enabled, a warning will
77 If ``warn_when_unused`` is set and fsmonitor isn't enabled, a warning will
78 be printed during working directory updates if this many files will be
78 be printed during working directory updates if this many files will be
79 created.
79 created.
80 '''
80 '''
81
81
82 # Platforms Supported
82 # Platforms Supported
83 # ===================
83 # ===================
84 #
84 #
85 # **Linux:** *Stable*. Watchman and fsmonitor are both known to work reliably,
85 # **Linux:** *Stable*. Watchman and fsmonitor are both known to work reliably,
86 # even under severe loads.
86 # even under severe loads.
87 #
87 #
88 # **Mac OS X:** *Stable*. The Mercurial test suite passes with fsmonitor
88 # **Mac OS X:** *Stable*. The Mercurial test suite passes with fsmonitor
89 # turned on, on case-insensitive HFS+. There has been a reasonable amount of
89 # turned on, on case-insensitive HFS+. There has been a reasonable amount of
90 # user testing under normal loads.
90 # user testing under normal loads.
91 #
91 #
92 # **Solaris, BSD:** *Alpha*. watchman and fsmonitor are believed to work, but
92 # **Solaris, BSD:** *Alpha*. watchman and fsmonitor are believed to work, but
93 # very little testing has been done.
93 # very little testing has been done.
94 #
94 #
95 # **Windows:** *Alpha*. Not in a release version of watchman or fsmonitor yet.
95 # **Windows:** *Alpha*. Not in a release version of watchman or fsmonitor yet.
96 #
96 #
97 # Known Issues
97 # Known Issues
98 # ============
98 # ============
99 #
99 #
100 # * fsmonitor will disable itself if any of the following extensions are
100 # * fsmonitor will disable itself if any of the following extensions are
101 # enabled: largefiles, inotify, eol; or if the repository has subrepos.
101 # enabled: largefiles, inotify, eol; or if the repository has subrepos.
102 # * fsmonitor will produce incorrect results if nested repos that are not
102 # * fsmonitor will produce incorrect results if nested repos that are not
103 # subrepos exist. *Workaround*: add nested repo paths to your `.hgignore`.
103 # subrepos exist. *Workaround*: add nested repo paths to your `.hgignore`.
104 #
104 #
105 # The issues related to nested repos and subrepos are probably not fundamental
105 # The issues related to nested repos and subrepos are probably not fundamental
106 # ones. Patches to fix them are welcome.
106 # ones. Patches to fix them are welcome.
107
107
108 from __future__ import absolute_import
108 from __future__ import absolute_import
109
109
110 import codecs
110 import codecs
111 import hashlib
112 import os
111 import os
113 import stat
112 import stat
114 import sys
113 import sys
115 import tempfile
114 import tempfile
116 import weakref
115 import weakref
117
116
118 from mercurial.i18n import _
117 from mercurial.i18n import _
119 from mercurial.node import hex
118 from mercurial.node import hex
120 from mercurial.pycompat import open
119 from mercurial.pycompat import open
121 from mercurial import (
120 from mercurial import (
122 context,
121 context,
123 encoding,
122 encoding,
124 error,
123 error,
125 extensions,
124 extensions,
126 localrepo,
125 localrepo,
127 merge,
126 merge,
128 pathutil,
127 pathutil,
129 pycompat,
128 pycompat,
130 registrar,
129 registrar,
131 scmutil,
130 scmutil,
132 util,
131 util,
133 )
132 )
134 from mercurial import match as matchmod
133 from mercurial import match as matchmod
135 from mercurial.utils import stringutil
134 from mercurial.utils import (
135 hashutil,
136 stringutil,
137 )
136
138
137 from . import (
139 from . import (
138 pywatchman,
140 pywatchman,
139 state,
141 state,
140 watchmanclient,
142 watchmanclient,
141 )
143 )
142
144
143 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
145 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
144 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
146 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
145 # be specifying the version(s) of Mercurial they are tested with, or
147 # be specifying the version(s) of Mercurial they are tested with, or
146 # leave the attribute unspecified.
148 # leave the attribute unspecified.
147 testedwith = b'ships-with-hg-core'
149 testedwith = b'ships-with-hg-core'
148
150
149 configtable = {}
151 configtable = {}
150 configitem = registrar.configitem(configtable)
152 configitem = registrar.configitem(configtable)
151
153
152 configitem(
154 configitem(
153 b'fsmonitor', b'mode', default=b'on',
155 b'fsmonitor', b'mode', default=b'on',
154 )
156 )
155 configitem(
157 configitem(
156 b'fsmonitor', b'walk_on_invalidate', default=False,
158 b'fsmonitor', b'walk_on_invalidate', default=False,
157 )
159 )
158 configitem(
160 configitem(
159 b'fsmonitor', b'timeout', default=b'2',
161 b'fsmonitor', b'timeout', default=b'2',
160 )
162 )
161 configitem(
163 configitem(
162 b'fsmonitor', b'blacklistusers', default=list,
164 b'fsmonitor', b'blacklistusers', default=list,
163 )
165 )
164 configitem(
166 configitem(
165 b'fsmonitor', b'watchman_exe', default=b'watchman',
167 b'fsmonitor', b'watchman_exe', default=b'watchman',
166 )
168 )
167 configitem(
169 configitem(
168 b'fsmonitor', b'verbose', default=True, experimental=True,
170 b'fsmonitor', b'verbose', default=True, experimental=True,
169 )
171 )
170 configitem(
172 configitem(
171 b'experimental', b'fsmonitor.transaction_notify', default=False,
173 b'experimental', b'fsmonitor.transaction_notify', default=False,
172 )
174 )
173
175
174 # This extension is incompatible with the following blacklisted extensions
176 # This extension is incompatible with the following blacklisted extensions
175 # and will disable itself when encountering one of these:
177 # and will disable itself when encountering one of these:
176 _blacklist = [b'largefiles', b'eol']
178 _blacklist = [b'largefiles', b'eol']
177
179
178
180
179 def debuginstall(ui, fm):
181 def debuginstall(ui, fm):
180 fm.write(
182 fm.write(
181 b"fsmonitor-watchman",
183 b"fsmonitor-watchman",
182 _(b"fsmonitor checking for watchman binary... (%s)\n"),
184 _(b"fsmonitor checking for watchman binary... (%s)\n"),
183 ui.configpath(b"fsmonitor", b"watchman_exe"),
185 ui.configpath(b"fsmonitor", b"watchman_exe"),
184 )
186 )
185 root = tempfile.mkdtemp()
187 root = tempfile.mkdtemp()
186 c = watchmanclient.client(ui, root)
188 c = watchmanclient.client(ui, root)
187 err = None
189 err = None
188 try:
190 try:
189 v = c.command(b"version")
191 v = c.command(b"version")
190 fm.write(
192 fm.write(
191 b"fsmonitor-watchman-version",
193 b"fsmonitor-watchman-version",
192 _(b" watchman binary version %s\n"),
194 _(b" watchman binary version %s\n"),
193 pycompat.bytestr(v["version"]),
195 pycompat.bytestr(v["version"]),
194 )
196 )
195 except watchmanclient.Unavailable as e:
197 except watchmanclient.Unavailable as e:
196 err = stringutil.forcebytestr(e)
198 err = stringutil.forcebytestr(e)
197 fm.condwrite(
199 fm.condwrite(
198 err,
200 err,
199 b"fsmonitor-watchman-error",
201 b"fsmonitor-watchman-error",
200 _(b" watchman binary missing or broken: %s\n"),
202 _(b" watchman binary missing or broken: %s\n"),
201 err,
203 err,
202 )
204 )
203 return 1 if err else 0
205 return 1 if err else 0
204
206
205
207
206 def _handleunavailable(ui, state, ex):
208 def _handleunavailable(ui, state, ex):
207 """Exception handler for Watchman interaction exceptions"""
209 """Exception handler for Watchman interaction exceptions"""
208 if isinstance(ex, watchmanclient.Unavailable):
210 if isinstance(ex, watchmanclient.Unavailable):
209 # experimental config: fsmonitor.verbose
211 # experimental config: fsmonitor.verbose
210 if ex.warn and ui.configbool(b'fsmonitor', b'verbose'):
212 if ex.warn and ui.configbool(b'fsmonitor', b'verbose'):
211 if b'illegal_fstypes' not in stringutil.forcebytestr(ex):
213 if b'illegal_fstypes' not in stringutil.forcebytestr(ex):
212 ui.warn(stringutil.forcebytestr(ex) + b'\n')
214 ui.warn(stringutil.forcebytestr(ex) + b'\n')
213 if ex.invalidate:
215 if ex.invalidate:
214 state.invalidate()
216 state.invalidate()
215 # experimental config: fsmonitor.verbose
217 # experimental config: fsmonitor.verbose
216 if ui.configbool(b'fsmonitor', b'verbose'):
218 if ui.configbool(b'fsmonitor', b'verbose'):
217 ui.log(
219 ui.log(
218 b'fsmonitor',
220 b'fsmonitor',
219 b'Watchman unavailable: %s\n',
221 b'Watchman unavailable: %s\n',
220 stringutil.forcebytestr(ex.msg),
222 stringutil.forcebytestr(ex.msg),
221 )
223 )
222 else:
224 else:
223 ui.log(
225 ui.log(
224 b'fsmonitor',
226 b'fsmonitor',
225 b'Watchman exception: %s\n',
227 b'Watchman exception: %s\n',
226 stringutil.forcebytestr(ex),
228 stringutil.forcebytestr(ex),
227 )
229 )
228
230
229
231
230 def _hashignore(ignore):
232 def _hashignore(ignore):
231 """Calculate hash for ignore patterns and filenames
233 """Calculate hash for ignore patterns and filenames
232
234
233 If this information changes between Mercurial invocations, we can't
235 If this information changes between Mercurial invocations, we can't
234 rely on Watchman information anymore and have to re-scan the working
236 rely on Watchman information anymore and have to re-scan the working
235 copy.
237 copy.
236
238
237 """
239 """
238 sha1 = hashlib.sha1()
240 sha1 = hashutil.sha1()
239 sha1.update(pycompat.byterepr(ignore))
241 sha1.update(pycompat.byterepr(ignore))
240 return pycompat.sysbytes(sha1.hexdigest())
242 return pycompat.sysbytes(sha1.hexdigest())
241
243
242
244
243 _watchmanencoding = pywatchman.encoding.get_local_encoding()
245 _watchmanencoding = pywatchman.encoding.get_local_encoding()
244 _fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
246 _fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
245 _fixencoding = codecs.lookup(_watchmanencoding) != codecs.lookup(_fsencoding)
247 _fixencoding = codecs.lookup(_watchmanencoding) != codecs.lookup(_fsencoding)
246
248
247
249
248 def _watchmantofsencoding(path):
250 def _watchmantofsencoding(path):
249 """Fix path to match watchman and local filesystem encoding
251 """Fix path to match watchman and local filesystem encoding
250
252
251 watchman's paths encoding can differ from filesystem encoding. For example,
253 watchman's paths encoding can differ from filesystem encoding. For example,
252 on Windows, it's always utf-8.
254 on Windows, it's always utf-8.
253 """
255 """
254 try:
256 try:
255 decoded = path.decode(_watchmanencoding)
257 decoded = path.decode(_watchmanencoding)
256 except UnicodeDecodeError as e:
258 except UnicodeDecodeError as e:
257 raise error.Abort(
259 raise error.Abort(
258 stringutil.forcebytestr(e), hint=b'watchman encoding error'
260 stringutil.forcebytestr(e), hint=b'watchman encoding error'
259 )
261 )
260
262
261 try:
263 try:
262 encoded = decoded.encode(_fsencoding, 'strict')
264 encoded = decoded.encode(_fsencoding, 'strict')
263 except UnicodeEncodeError as e:
265 except UnicodeEncodeError as e:
264 raise error.Abort(stringutil.forcebytestr(e))
266 raise error.Abort(stringutil.forcebytestr(e))
265
267
266 return encoded
268 return encoded
267
269
268
270
269 def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True):
271 def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True):
270 '''Replacement for dirstate.walk, hooking into Watchman.
272 '''Replacement for dirstate.walk, hooking into Watchman.
271
273
272 Whenever full is False, ignored is False, and the Watchman client is
274 Whenever full is False, ignored is False, and the Watchman client is
273 available, use Watchman combined with saved state to possibly return only a
275 available, use Watchman combined with saved state to possibly return only a
274 subset of files.'''
276 subset of files.'''
275
277
276 def bail(reason):
278 def bail(reason):
277 self._ui.debug(b'fsmonitor: fallback to core status, %s\n' % reason)
279 self._ui.debug(b'fsmonitor: fallback to core status, %s\n' % reason)
278 return orig(match, subrepos, unknown, ignored, full=True)
280 return orig(match, subrepos, unknown, ignored, full=True)
279
281
280 if full:
282 if full:
281 return bail(b'full rewalk requested')
283 return bail(b'full rewalk requested')
282 if ignored:
284 if ignored:
283 return bail(b'listing ignored files')
285 return bail(b'listing ignored files')
284 if not self._watchmanclient.available():
286 if not self._watchmanclient.available():
285 return bail(b'client unavailable')
287 return bail(b'client unavailable')
286 state = self._fsmonitorstate
288 state = self._fsmonitorstate
287 clock, ignorehash, notefiles = state.get()
289 clock, ignorehash, notefiles = state.get()
288 if not clock:
290 if not clock:
289 if state.walk_on_invalidate:
291 if state.walk_on_invalidate:
290 return bail(b'no clock')
292 return bail(b'no clock')
291 # Initial NULL clock value, see
293 # Initial NULL clock value, see
292 # https://facebook.github.io/watchman/docs/clockspec.html
294 # https://facebook.github.io/watchman/docs/clockspec.html
293 clock = b'c:0:0'
295 clock = b'c:0:0'
294 notefiles = []
296 notefiles = []
295
297
296 ignore = self._ignore
298 ignore = self._ignore
297 dirignore = self._dirignore
299 dirignore = self._dirignore
298 if unknown:
300 if unknown:
299 if _hashignore(ignore) != ignorehash and clock != b'c:0:0':
301 if _hashignore(ignore) != ignorehash and clock != b'c:0:0':
300 # ignore list changed -- can't rely on Watchman state any more
302 # ignore list changed -- can't rely on Watchman state any more
301 if state.walk_on_invalidate:
303 if state.walk_on_invalidate:
302 return bail(b'ignore rules changed')
304 return bail(b'ignore rules changed')
303 notefiles = []
305 notefiles = []
304 clock = b'c:0:0'
306 clock = b'c:0:0'
305 else:
307 else:
306 # always ignore
308 # always ignore
307 ignore = util.always
309 ignore = util.always
308 dirignore = util.always
310 dirignore = util.always
309
311
310 matchfn = match.matchfn
312 matchfn = match.matchfn
311 matchalways = match.always()
313 matchalways = match.always()
312 dmap = self._map
314 dmap = self._map
313 if util.safehasattr(dmap, b'_map'):
315 if util.safehasattr(dmap, b'_map'):
314 # for better performance, directly access the inner dirstate map if the
316 # for better performance, directly access the inner dirstate map if the
315 # standard dirstate implementation is in use.
317 # standard dirstate implementation is in use.
316 dmap = dmap._map
318 dmap = dmap._map
317 nonnormalset = self._map.nonnormalset
319 nonnormalset = self._map.nonnormalset
318
320
319 copymap = self._map.copymap
321 copymap = self._map.copymap
320 getkind = stat.S_IFMT
322 getkind = stat.S_IFMT
321 dirkind = stat.S_IFDIR
323 dirkind = stat.S_IFDIR
322 regkind = stat.S_IFREG
324 regkind = stat.S_IFREG
323 lnkkind = stat.S_IFLNK
325 lnkkind = stat.S_IFLNK
324 join = self._join
326 join = self._join
325 normcase = util.normcase
327 normcase = util.normcase
326 fresh_instance = False
328 fresh_instance = False
327
329
328 exact = skipstep3 = False
330 exact = skipstep3 = False
329 if match.isexact(): # match.exact
331 if match.isexact(): # match.exact
330 exact = True
332 exact = True
331 dirignore = util.always # skip step 2
333 dirignore = util.always # skip step 2
332 elif match.prefix(): # match.match, no patterns
334 elif match.prefix(): # match.match, no patterns
333 skipstep3 = True
335 skipstep3 = True
334
336
335 if not exact and self._checkcase:
337 if not exact and self._checkcase:
336 # note that even though we could receive directory entries, we're only
338 # note that even though we could receive directory entries, we're only
337 # interested in checking if a file with the same name exists. So only
339 # interested in checking if a file with the same name exists. So only
338 # normalize files if possible.
340 # normalize files if possible.
339 normalize = self._normalizefile
341 normalize = self._normalizefile
340 skipstep3 = False
342 skipstep3 = False
341 else:
343 else:
342 normalize = None
344 normalize = None
343
345
344 # step 1: find all explicit files
346 # step 1: find all explicit files
345 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
347 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
346
348
347 skipstep3 = skipstep3 and not (work or dirsnotfound)
349 skipstep3 = skipstep3 and not (work or dirsnotfound)
348 work = [d for d in work if not dirignore(d[0])]
350 work = [d for d in work if not dirignore(d[0])]
349
351
350 if not work and (exact or skipstep3):
352 if not work and (exact or skipstep3):
351 for s in subrepos:
353 for s in subrepos:
352 del results[s]
354 del results[s]
353 del results[b'.hg']
355 del results[b'.hg']
354 return results
356 return results
355
357
356 # step 2: query Watchman
358 # step 2: query Watchman
357 try:
359 try:
358 # Use the user-configured timeout for the query.
360 # Use the user-configured timeout for the query.
359 # Add a little slack over the top of the user query to allow for
361 # Add a little slack over the top of the user query to allow for
360 # overheads while transferring the data
362 # overheads while transferring the data
361 self._watchmanclient.settimeout(state.timeout + 0.1)
363 self._watchmanclient.settimeout(state.timeout + 0.1)
362 result = self._watchmanclient.command(
364 result = self._watchmanclient.command(
363 b'query',
365 b'query',
364 {
366 {
365 b'fields': [b'mode', b'mtime', b'size', b'exists', b'name'],
367 b'fields': [b'mode', b'mtime', b'size', b'exists', b'name'],
366 b'since': clock,
368 b'since': clock,
367 b'expression': [
369 b'expression': [
368 b'not',
370 b'not',
369 [
371 [
370 b'anyof',
372 b'anyof',
371 [b'dirname', b'.hg'],
373 [b'dirname', b'.hg'],
372 [b'name', b'.hg', b'wholename'],
374 [b'name', b'.hg', b'wholename'],
373 ],
375 ],
374 ],
376 ],
375 b'sync_timeout': int(state.timeout * 1000),
377 b'sync_timeout': int(state.timeout * 1000),
376 b'empty_on_fresh_instance': state.walk_on_invalidate,
378 b'empty_on_fresh_instance': state.walk_on_invalidate,
377 },
379 },
378 )
380 )
379 except Exception as ex:
381 except Exception as ex:
380 _handleunavailable(self._ui, state, ex)
382 _handleunavailable(self._ui, state, ex)
381 self._watchmanclient.clearconnection()
383 self._watchmanclient.clearconnection()
382 return bail(b'exception during run')
384 return bail(b'exception during run')
383 else:
385 else:
384 # We need to propagate the last observed clock up so that we
386 # We need to propagate the last observed clock up so that we
385 # can use it for our next query
387 # can use it for our next query
386 state.setlastclock(pycompat.sysbytes(result[b'clock']))
388 state.setlastclock(pycompat.sysbytes(result[b'clock']))
387 if result[b'is_fresh_instance']:
389 if result[b'is_fresh_instance']:
388 if state.walk_on_invalidate:
390 if state.walk_on_invalidate:
389 state.invalidate()
391 state.invalidate()
390 return bail(b'fresh instance')
392 return bail(b'fresh instance')
391 fresh_instance = True
393 fresh_instance = True
392 # Ignore any prior noteable files from the state info
394 # Ignore any prior noteable files from the state info
393 notefiles = []
395 notefiles = []
394
396
395 # for file paths which require normalization and we encounter a case
397 # for file paths which require normalization and we encounter a case
396 # collision, we store our own foldmap
398 # collision, we store our own foldmap
397 if normalize:
399 if normalize:
398 foldmap = dict((normcase(k), k) for k in results)
400 foldmap = dict((normcase(k), k) for k in results)
399
401
400 switch_slashes = pycompat.ossep == b'\\'
402 switch_slashes = pycompat.ossep == b'\\'
401 # The order of the results is, strictly speaking, undefined.
403 # The order of the results is, strictly speaking, undefined.
402 # For case changes on a case insensitive filesystem we may receive
404 # For case changes on a case insensitive filesystem we may receive
403 # two entries, one with exists=True and another with exists=False.
405 # two entries, one with exists=True and another with exists=False.
404 # The exists=True entries in the same response should be interpreted
406 # The exists=True entries in the same response should be interpreted
405 # as being happens-after the exists=False entries due to the way that
407 # as being happens-after the exists=False entries due to the way that
406 # Watchman tracks files. We use this property to reconcile deletes
408 # Watchman tracks files. We use this property to reconcile deletes
407 # for name case changes.
409 # for name case changes.
408 for entry in result[b'files']:
410 for entry in result[b'files']:
409 fname = entry[b'name']
411 fname = entry[b'name']
410
412
411 # Watchman always give us a str. Normalize to bytes on Python 3
413 # Watchman always give us a str. Normalize to bytes on Python 3
412 # using Watchman's encoding, if needed.
414 # using Watchman's encoding, if needed.
413 if not isinstance(fname, bytes):
415 if not isinstance(fname, bytes):
414 fname = fname.encode(_watchmanencoding)
416 fname = fname.encode(_watchmanencoding)
415
417
416 if _fixencoding:
418 if _fixencoding:
417 fname = _watchmantofsencoding(fname)
419 fname = _watchmantofsencoding(fname)
418
420
419 if switch_slashes:
421 if switch_slashes:
420 fname = fname.replace(b'\\', b'/')
422 fname = fname.replace(b'\\', b'/')
421 if normalize:
423 if normalize:
422 normed = normcase(fname)
424 normed = normcase(fname)
423 fname = normalize(fname, True, True)
425 fname = normalize(fname, True, True)
424 foldmap[normed] = fname
426 foldmap[normed] = fname
425 fmode = entry[b'mode']
427 fmode = entry[b'mode']
426 fexists = entry[b'exists']
428 fexists = entry[b'exists']
427 kind = getkind(fmode)
429 kind = getkind(fmode)
428
430
429 if b'/.hg/' in fname or fname.endswith(b'/.hg'):
431 if b'/.hg/' in fname or fname.endswith(b'/.hg'):
430 return bail(b'nested-repo-detected')
432 return bail(b'nested-repo-detected')
431
433
432 if not fexists:
434 if not fexists:
433 # if marked as deleted and we don't already have a change
435 # if marked as deleted and we don't already have a change
434 # record, mark it as deleted. If we already have an entry
436 # record, mark it as deleted. If we already have an entry
435 # for fname then it was either part of walkexplicit or was
437 # for fname then it was either part of walkexplicit or was
436 # an earlier result that was a case change
438 # an earlier result that was a case change
437 if (
439 if (
438 fname not in results
440 fname not in results
439 and fname in dmap
441 and fname in dmap
440 and (matchalways or matchfn(fname))
442 and (matchalways or matchfn(fname))
441 ):
443 ):
442 results[fname] = None
444 results[fname] = None
443 elif kind == dirkind:
445 elif kind == dirkind:
444 if fname in dmap and (matchalways or matchfn(fname)):
446 if fname in dmap and (matchalways or matchfn(fname)):
445 results[fname] = None
447 results[fname] = None
446 elif kind == regkind or kind == lnkkind:
448 elif kind == regkind or kind == lnkkind:
447 if fname in dmap:
449 if fname in dmap:
448 if matchalways or matchfn(fname):
450 if matchalways or matchfn(fname):
449 results[fname] = entry
451 results[fname] = entry
450 elif (matchalways or matchfn(fname)) and not ignore(fname):
452 elif (matchalways or matchfn(fname)) and not ignore(fname):
451 results[fname] = entry
453 results[fname] = entry
452 elif fname in dmap and (matchalways or matchfn(fname)):
454 elif fname in dmap and (matchalways or matchfn(fname)):
453 results[fname] = None
455 results[fname] = None
454
456
455 # step 3: query notable files we don't already know about
457 # step 3: query notable files we don't already know about
456 # XXX try not to iterate over the entire dmap
458 # XXX try not to iterate over the entire dmap
457 if normalize:
459 if normalize:
458 # any notable files that have changed case will already be handled
460 # any notable files that have changed case will already be handled
459 # above, so just check membership in the foldmap
461 # above, so just check membership in the foldmap
460 notefiles = set(
462 notefiles = set(
461 (
463 (
462 normalize(f, True, True)
464 normalize(f, True, True)
463 for f in notefiles
465 for f in notefiles
464 if normcase(f) not in foldmap
466 if normcase(f) not in foldmap
465 )
467 )
466 )
468 )
467 visit = set(
469 visit = set(
468 (
470 (
469 f
471 f
470 for f in notefiles
472 for f in notefiles
471 if (
473 if (
472 f not in results and matchfn(f) and (f in dmap or not ignore(f))
474 f not in results and matchfn(f) and (f in dmap or not ignore(f))
473 )
475 )
474 )
476 )
475 )
477 )
476
478
477 if not fresh_instance:
479 if not fresh_instance:
478 if matchalways:
480 if matchalways:
479 visit.update(f for f in nonnormalset if f not in results)
481 visit.update(f for f in nonnormalset if f not in results)
480 visit.update(f for f in copymap if f not in results)
482 visit.update(f for f in copymap if f not in results)
481 else:
483 else:
482 visit.update(
484 visit.update(
483 f for f in nonnormalset if f not in results and matchfn(f)
485 f for f in nonnormalset if f not in results and matchfn(f)
484 )
486 )
485 visit.update(f for f in copymap if f not in results and matchfn(f))
487 visit.update(f for f in copymap if f not in results and matchfn(f))
486 else:
488 else:
487 if matchalways:
489 if matchalways:
488 visit.update(
490 visit.update(
489 f for f, st in pycompat.iteritems(dmap) if f not in results
491 f for f, st in pycompat.iteritems(dmap) if f not in results
490 )
492 )
491 visit.update(f for f in copymap if f not in results)
493 visit.update(f for f in copymap if f not in results)
492 else:
494 else:
493 visit.update(
495 visit.update(
494 f
496 f
495 for f, st in pycompat.iteritems(dmap)
497 for f, st in pycompat.iteritems(dmap)
496 if f not in results and matchfn(f)
498 if f not in results and matchfn(f)
497 )
499 )
498 visit.update(f for f in copymap if f not in results and matchfn(f))
500 visit.update(f for f in copymap if f not in results and matchfn(f))
499
501
500 audit = pathutil.pathauditor(self._root, cached=True).check
502 audit = pathutil.pathauditor(self._root, cached=True).check
501 auditpass = [f for f in visit if audit(f)]
503 auditpass = [f for f in visit if audit(f)]
502 auditpass.sort()
504 auditpass.sort()
503 auditfail = visit.difference(auditpass)
505 auditfail = visit.difference(auditpass)
504 for f in auditfail:
506 for f in auditfail:
505 results[f] = None
507 results[f] = None
506
508
507 nf = iter(auditpass)
509 nf = iter(auditpass)
508 for st in util.statfiles([join(f) for f in auditpass]):
510 for st in util.statfiles([join(f) for f in auditpass]):
509 f = next(nf)
511 f = next(nf)
510 if st or f in dmap:
512 if st or f in dmap:
511 results[f] = st
513 results[f] = st
512
514
513 for s in subrepos:
515 for s in subrepos:
514 del results[s]
516 del results[s]
515 del results[b'.hg']
517 del results[b'.hg']
516 return results
518 return results
517
519
518
520
519 def overridestatus(
521 def overridestatus(
520 orig,
522 orig,
521 self,
523 self,
522 node1=b'.',
524 node1=b'.',
523 node2=None,
525 node2=None,
524 match=None,
526 match=None,
525 ignored=False,
527 ignored=False,
526 clean=False,
528 clean=False,
527 unknown=False,
529 unknown=False,
528 listsubrepos=False,
530 listsubrepos=False,
529 ):
531 ):
530 listignored = ignored
532 listignored = ignored
531 listclean = clean
533 listclean = clean
532 listunknown = unknown
534 listunknown = unknown
533
535
534 def _cmpsets(l1, l2):
536 def _cmpsets(l1, l2):
535 try:
537 try:
536 if b'FSMONITOR_LOG_FILE' in encoding.environ:
538 if b'FSMONITOR_LOG_FILE' in encoding.environ:
537 fn = encoding.environ[b'FSMONITOR_LOG_FILE']
539 fn = encoding.environ[b'FSMONITOR_LOG_FILE']
538 f = open(fn, b'wb')
540 f = open(fn, b'wb')
539 else:
541 else:
540 fn = b'fsmonitorfail.log'
542 fn = b'fsmonitorfail.log'
541 f = self.vfs.open(fn, b'wb')
543 f = self.vfs.open(fn, b'wb')
542 except (IOError, OSError):
544 except (IOError, OSError):
543 self.ui.warn(_(b'warning: unable to write to %s\n') % fn)
545 self.ui.warn(_(b'warning: unable to write to %s\n') % fn)
544 return
546 return
545
547
546 try:
548 try:
547 for i, (s1, s2) in enumerate(zip(l1, l2)):
549 for i, (s1, s2) in enumerate(zip(l1, l2)):
548 if set(s1) != set(s2):
550 if set(s1) != set(s2):
549 f.write(b'sets at position %d are unequal\n' % i)
551 f.write(b'sets at position %d are unequal\n' % i)
550 f.write(b'watchman returned: %s\n' % s1)
552 f.write(b'watchman returned: %s\n' % s1)
551 f.write(b'stat returned: %s\n' % s2)
553 f.write(b'stat returned: %s\n' % s2)
552 finally:
554 finally:
553 f.close()
555 f.close()
554
556
555 if isinstance(node1, context.changectx):
557 if isinstance(node1, context.changectx):
556 ctx1 = node1
558 ctx1 = node1
557 else:
559 else:
558 ctx1 = self[node1]
560 ctx1 = self[node1]
559 if isinstance(node2, context.changectx):
561 if isinstance(node2, context.changectx):
560 ctx2 = node2
562 ctx2 = node2
561 else:
563 else:
562 ctx2 = self[node2]
564 ctx2 = self[node2]
563
565
564 working = ctx2.rev() is None
566 working = ctx2.rev() is None
565 parentworking = working and ctx1 == self[b'.']
567 parentworking = working and ctx1 == self[b'.']
566 match = match or matchmod.always()
568 match = match or matchmod.always()
567
569
568 # Maybe we can use this opportunity to update Watchman's state.
570 # Maybe we can use this opportunity to update Watchman's state.
569 # Mercurial uses workingcommitctx and/or memctx to represent the part of
571 # Mercurial uses workingcommitctx and/or memctx to represent the part of
570 # the workingctx that is to be committed. So don't update the state in
572 # the workingctx that is to be committed. So don't update the state in
571 # that case.
573 # that case.
572 # HG_PENDING is set in the environment when the dirstate is being updated
574 # HG_PENDING is set in the environment when the dirstate is being updated
573 # in the middle of a transaction; we must not update our state in that
575 # in the middle of a transaction; we must not update our state in that
574 # case, or we risk forgetting about changes in the working copy.
576 # case, or we risk forgetting about changes in the working copy.
575 updatestate = (
577 updatestate = (
576 parentworking
578 parentworking
577 and match.always()
579 and match.always()
578 and not isinstance(ctx2, (context.workingcommitctx, context.memctx))
580 and not isinstance(ctx2, (context.workingcommitctx, context.memctx))
579 and b'HG_PENDING' not in encoding.environ
581 and b'HG_PENDING' not in encoding.environ
580 )
582 )
581
583
582 try:
584 try:
583 if self._fsmonitorstate.walk_on_invalidate:
585 if self._fsmonitorstate.walk_on_invalidate:
584 # Use a short timeout to query the current clock. If that
586 # Use a short timeout to query the current clock. If that
585 # takes too long then we assume that the service will be slow
587 # takes too long then we assume that the service will be slow
586 # to answer our query.
588 # to answer our query.
587 # walk_on_invalidate indicates that we prefer to walk the
589 # walk_on_invalidate indicates that we prefer to walk the
588 # tree ourselves because we can ignore portions that Watchman
590 # tree ourselves because we can ignore portions that Watchman
589 # cannot and we tend to be faster in the warmer buffer cache
591 # cannot and we tend to be faster in the warmer buffer cache
590 # cases.
592 # cases.
591 self._watchmanclient.settimeout(0.1)
593 self._watchmanclient.settimeout(0.1)
592 else:
594 else:
593 # Give Watchman more time to potentially complete its walk
595 # Give Watchman more time to potentially complete its walk
594 # and return the initial clock. In this mode we assume that
596 # and return the initial clock. In this mode we assume that
595 # the filesystem will be slower than parsing a potentially
597 # the filesystem will be slower than parsing a potentially
596 # very large Watchman result set.
598 # very large Watchman result set.
597 self._watchmanclient.settimeout(self._fsmonitorstate.timeout + 0.1)
599 self._watchmanclient.settimeout(self._fsmonitorstate.timeout + 0.1)
598 startclock = self._watchmanclient.getcurrentclock()
600 startclock = self._watchmanclient.getcurrentclock()
599 except Exception as ex:
601 except Exception as ex:
600 self._watchmanclient.clearconnection()
602 self._watchmanclient.clearconnection()
601 _handleunavailable(self.ui, self._fsmonitorstate, ex)
603 _handleunavailable(self.ui, self._fsmonitorstate, ex)
602 # boo, Watchman failed. bail
604 # boo, Watchman failed. bail
603 return orig(
605 return orig(
604 node1,
606 node1,
605 node2,
607 node2,
606 match,
608 match,
607 listignored,
609 listignored,
608 listclean,
610 listclean,
609 listunknown,
611 listunknown,
610 listsubrepos,
612 listsubrepos,
611 )
613 )
612
614
613 if updatestate:
615 if updatestate:
614 # We need info about unknown files. This may make things slower the
616 # We need info about unknown files. This may make things slower the
615 # first time, but whatever.
617 # first time, but whatever.
616 stateunknown = True
618 stateunknown = True
617 else:
619 else:
618 stateunknown = listunknown
620 stateunknown = listunknown
619
621
620 if updatestate:
622 if updatestate:
621 ps = poststatus(startclock)
623 ps = poststatus(startclock)
622 self.addpostdsstatus(ps)
624 self.addpostdsstatus(ps)
623
625
624 r = orig(
626 r = orig(
625 node1, node2, match, listignored, listclean, stateunknown, listsubrepos
627 node1, node2, match, listignored, listclean, stateunknown, listsubrepos
626 )
628 )
627 modified, added, removed, deleted, unknown, ignored, clean = r
629 modified, added, removed, deleted, unknown, ignored, clean = r
628
630
629 if not listunknown:
631 if not listunknown:
630 unknown = []
632 unknown = []
631
633
632 # don't do paranoid checks if we're not going to query Watchman anyway
634 # don't do paranoid checks if we're not going to query Watchman anyway
633 full = listclean or match.traversedir is not None
635 full = listclean or match.traversedir is not None
634 if self._fsmonitorstate.mode == b'paranoid' and not full:
636 if self._fsmonitorstate.mode == b'paranoid' and not full:
635 # run status again and fall back to the old walk this time
637 # run status again and fall back to the old walk this time
636 self.dirstate._fsmonitordisable = True
638 self.dirstate._fsmonitordisable = True
637
639
638 # shut the UI up
640 # shut the UI up
639 quiet = self.ui.quiet
641 quiet = self.ui.quiet
640 self.ui.quiet = True
642 self.ui.quiet = True
641 fout, ferr = self.ui.fout, self.ui.ferr
643 fout, ferr = self.ui.fout, self.ui.ferr
642 self.ui.fout = self.ui.ferr = open(os.devnull, b'wb')
644 self.ui.fout = self.ui.ferr = open(os.devnull, b'wb')
643
645
644 try:
646 try:
645 rv2 = orig(
647 rv2 = orig(
646 node1,
648 node1,
647 node2,
649 node2,
648 match,
650 match,
649 listignored,
651 listignored,
650 listclean,
652 listclean,
651 listunknown,
653 listunknown,
652 listsubrepos,
654 listsubrepos,
653 )
655 )
654 finally:
656 finally:
655 self.dirstate._fsmonitordisable = False
657 self.dirstate._fsmonitordisable = False
656 self.ui.quiet = quiet
658 self.ui.quiet = quiet
657 self.ui.fout, self.ui.ferr = fout, ferr
659 self.ui.fout, self.ui.ferr = fout, ferr
658
660
659 # clean isn't tested since it's set to True above
661 # clean isn't tested since it's set to True above
660 with self.wlock():
662 with self.wlock():
661 _cmpsets(
663 _cmpsets(
662 [modified, added, removed, deleted, unknown, ignored, clean],
664 [modified, added, removed, deleted, unknown, ignored, clean],
663 rv2,
665 rv2,
664 )
666 )
665 modified, added, removed, deleted, unknown, ignored, clean = rv2
667 modified, added, removed, deleted, unknown, ignored, clean = rv2
666
668
667 return scmutil.status(
669 return scmutil.status(
668 modified, added, removed, deleted, unknown, ignored, clean
670 modified, added, removed, deleted, unknown, ignored, clean
669 )
671 )
670
672
671
673
672 class poststatus(object):
674 class poststatus(object):
673 def __init__(self, startclock):
675 def __init__(self, startclock):
674 self._startclock = startclock
676 self._startclock = startclock
675
677
676 def __call__(self, wctx, status):
678 def __call__(self, wctx, status):
677 clock = wctx.repo()._fsmonitorstate.getlastclock() or self._startclock
679 clock = wctx.repo()._fsmonitorstate.getlastclock() or self._startclock
678 hashignore = _hashignore(wctx.repo().dirstate._ignore)
680 hashignore = _hashignore(wctx.repo().dirstate._ignore)
679 notefiles = (
681 notefiles = (
680 status.modified
682 status.modified
681 + status.added
683 + status.added
682 + status.removed
684 + status.removed
683 + status.deleted
685 + status.deleted
684 + status.unknown
686 + status.unknown
685 )
687 )
686 wctx.repo()._fsmonitorstate.set(clock, hashignore, notefiles)
688 wctx.repo()._fsmonitorstate.set(clock, hashignore, notefiles)
687
689
688
690
689 def makedirstate(repo, dirstate):
691 def makedirstate(repo, dirstate):
690 class fsmonitordirstate(dirstate.__class__):
692 class fsmonitordirstate(dirstate.__class__):
691 def _fsmonitorinit(self, repo):
693 def _fsmonitorinit(self, repo):
692 # _fsmonitordisable is used in paranoid mode
694 # _fsmonitordisable is used in paranoid mode
693 self._fsmonitordisable = False
695 self._fsmonitordisable = False
694 self._fsmonitorstate = repo._fsmonitorstate
696 self._fsmonitorstate = repo._fsmonitorstate
695 self._watchmanclient = repo._watchmanclient
697 self._watchmanclient = repo._watchmanclient
696 self._repo = weakref.proxy(repo)
698 self._repo = weakref.proxy(repo)
697
699
698 def walk(self, *args, **kwargs):
700 def walk(self, *args, **kwargs):
699 orig = super(fsmonitordirstate, self).walk
701 orig = super(fsmonitordirstate, self).walk
700 if self._fsmonitordisable:
702 if self._fsmonitordisable:
701 return orig(*args, **kwargs)
703 return orig(*args, **kwargs)
702 return overridewalk(orig, self, *args, **kwargs)
704 return overridewalk(orig, self, *args, **kwargs)
703
705
704 def rebuild(self, *args, **kwargs):
706 def rebuild(self, *args, **kwargs):
705 self._fsmonitorstate.invalidate()
707 self._fsmonitorstate.invalidate()
706 return super(fsmonitordirstate, self).rebuild(*args, **kwargs)
708 return super(fsmonitordirstate, self).rebuild(*args, **kwargs)
707
709
708 def invalidate(self, *args, **kwargs):
710 def invalidate(self, *args, **kwargs):
709 self._fsmonitorstate.invalidate()
711 self._fsmonitorstate.invalidate()
710 return super(fsmonitordirstate, self).invalidate(*args, **kwargs)
712 return super(fsmonitordirstate, self).invalidate(*args, **kwargs)
711
713
712 dirstate.__class__ = fsmonitordirstate
714 dirstate.__class__ = fsmonitordirstate
713 dirstate._fsmonitorinit(repo)
715 dirstate._fsmonitorinit(repo)
714
716
715
717
716 def wrapdirstate(orig, self):
718 def wrapdirstate(orig, self):
717 ds = orig(self)
719 ds = orig(self)
718 # only override the dirstate when Watchman is available for the repo
720 # only override the dirstate when Watchman is available for the repo
719 if util.safehasattr(self, b'_fsmonitorstate'):
721 if util.safehasattr(self, b'_fsmonitorstate'):
720 makedirstate(self, ds)
722 makedirstate(self, ds)
721 return ds
723 return ds
722
724
723
725
724 def extsetup(ui):
726 def extsetup(ui):
725 extensions.wrapfilecache(
727 extensions.wrapfilecache(
726 localrepo.localrepository, b'dirstate', wrapdirstate
728 localrepo.localrepository, b'dirstate', wrapdirstate
727 )
729 )
728 if pycompat.isdarwin:
730 if pycompat.isdarwin:
729 # An assist for avoiding the dangling-symlink fsevents bug
731 # An assist for avoiding the dangling-symlink fsevents bug
730 extensions.wrapfunction(os, b'symlink', wrapsymlink)
732 extensions.wrapfunction(os, b'symlink', wrapsymlink)
731
733
732 extensions.wrapfunction(merge, b'update', wrapupdate)
734 extensions.wrapfunction(merge, b'update', wrapupdate)
733
735
734
736
735 def wrapsymlink(orig, source, link_name):
737 def wrapsymlink(orig, source, link_name):
736 ''' if we create a dangling symlink, also touch the parent dir
738 ''' if we create a dangling symlink, also touch the parent dir
737 to encourage fsevents notifications to work more correctly '''
739 to encourage fsevents notifications to work more correctly '''
738 try:
740 try:
739 return orig(source, link_name)
741 return orig(source, link_name)
740 finally:
742 finally:
741 try:
743 try:
742 os.utime(os.path.dirname(link_name), None)
744 os.utime(os.path.dirname(link_name), None)
743 except OSError:
745 except OSError:
744 pass
746 pass
745
747
746
748
747 class state_update(object):
749 class state_update(object):
748 ''' This context manager is responsible for dispatching the state-enter
750 ''' This context manager is responsible for dispatching the state-enter
749 and state-leave signals to the watchman service. The enter and leave
751 and state-leave signals to the watchman service. The enter and leave
750 methods can be invoked manually (for scenarios where context manager
752 methods can be invoked manually (for scenarios where context manager
751 semantics are not possible). If parameters oldnode and newnode are None,
753 semantics are not possible). If parameters oldnode and newnode are None,
752 they will be populated based on current working copy in enter and
754 they will be populated based on current working copy in enter and
753 leave, respectively. Similarly, if the distance is none, it will be
755 leave, respectively. Similarly, if the distance is none, it will be
754 calculated based on the oldnode and newnode in the leave method.'''
756 calculated based on the oldnode and newnode in the leave method.'''
755
757
756 def __init__(
758 def __init__(
757 self,
759 self,
758 repo,
760 repo,
759 name,
761 name,
760 oldnode=None,
762 oldnode=None,
761 newnode=None,
763 newnode=None,
762 distance=None,
764 distance=None,
763 partial=False,
765 partial=False,
764 ):
766 ):
765 self.repo = repo.unfiltered()
767 self.repo = repo.unfiltered()
766 self.name = name
768 self.name = name
767 self.oldnode = oldnode
769 self.oldnode = oldnode
768 self.newnode = newnode
770 self.newnode = newnode
769 self.distance = distance
771 self.distance = distance
770 self.partial = partial
772 self.partial = partial
771 self._lock = None
773 self._lock = None
772 self.need_leave = False
774 self.need_leave = False
773
775
774 def __enter__(self):
776 def __enter__(self):
775 self.enter()
777 self.enter()
776
778
777 def enter(self):
779 def enter(self):
778 # Make sure we have a wlock prior to sending notifications to watchman.
780 # Make sure we have a wlock prior to sending notifications to watchman.
779 # We don't want to race with other actors. In the update case,
781 # We don't want to race with other actors. In the update case,
780 # merge.update is going to take the wlock almost immediately. We are
782 # merge.update is going to take the wlock almost immediately. We are
781 # effectively extending the lock around several short sanity checks.
783 # effectively extending the lock around several short sanity checks.
782 if self.oldnode is None:
784 if self.oldnode is None:
783 self.oldnode = self.repo[b'.'].node()
785 self.oldnode = self.repo[b'.'].node()
784
786
785 if self.repo.currentwlock() is None:
787 if self.repo.currentwlock() is None:
786 if util.safehasattr(self.repo, b'wlocknostateupdate'):
788 if util.safehasattr(self.repo, b'wlocknostateupdate'):
787 self._lock = self.repo.wlocknostateupdate()
789 self._lock = self.repo.wlocknostateupdate()
788 else:
790 else:
789 self._lock = self.repo.wlock()
791 self._lock = self.repo.wlock()
790 self.need_leave = self._state(b'state-enter', hex(self.oldnode))
792 self.need_leave = self._state(b'state-enter', hex(self.oldnode))
791 return self
793 return self
792
794
793 def __exit__(self, type_, value, tb):
795 def __exit__(self, type_, value, tb):
794 abort = True if type_ else False
796 abort = True if type_ else False
795 self.exit(abort=abort)
797 self.exit(abort=abort)
796
798
797 def exit(self, abort=False):
799 def exit(self, abort=False):
798 try:
800 try:
799 if self.need_leave:
801 if self.need_leave:
800 status = b'failed' if abort else b'ok'
802 status = b'failed' if abort else b'ok'
801 if self.newnode is None:
803 if self.newnode is None:
802 self.newnode = self.repo[b'.'].node()
804 self.newnode = self.repo[b'.'].node()
803 if self.distance is None:
805 if self.distance is None:
804 self.distance = calcdistance(
806 self.distance = calcdistance(
805 self.repo, self.oldnode, self.newnode
807 self.repo, self.oldnode, self.newnode
806 )
808 )
807 self._state(b'state-leave', hex(self.newnode), status=status)
809 self._state(b'state-leave', hex(self.newnode), status=status)
808 finally:
810 finally:
809 self.need_leave = False
811 self.need_leave = False
810 if self._lock:
812 if self._lock:
811 self._lock.release()
813 self._lock.release()
812
814
813 def _state(self, cmd, commithash, status=b'ok'):
815 def _state(self, cmd, commithash, status=b'ok'):
814 if not util.safehasattr(self.repo, b'_watchmanclient'):
816 if not util.safehasattr(self.repo, b'_watchmanclient'):
815 return False
817 return False
816 try:
818 try:
817 self.repo._watchmanclient.command(
819 self.repo._watchmanclient.command(
818 cmd,
820 cmd,
819 {
821 {
820 b'name': self.name,
822 b'name': self.name,
821 b'metadata': {
823 b'metadata': {
822 # the target revision
824 # the target revision
823 b'rev': commithash,
825 b'rev': commithash,
824 # approximate number of commits between current and target
826 # approximate number of commits between current and target
825 b'distance': self.distance if self.distance else 0,
827 b'distance': self.distance if self.distance else 0,
826 # success/failure (only really meaningful for state-leave)
828 # success/failure (only really meaningful for state-leave)
827 b'status': status,
829 b'status': status,
828 # whether the working copy parent is changing
830 # whether the working copy parent is changing
829 b'partial': self.partial,
831 b'partial': self.partial,
830 },
832 },
831 },
833 },
832 )
834 )
833 return True
835 return True
834 except Exception as e:
836 except Exception as e:
835 # Swallow any errors; fire and forget
837 # Swallow any errors; fire and forget
836 self.repo.ui.log(
838 self.repo.ui.log(
837 b'watchman', b'Exception %s while running %s\n', e, cmd
839 b'watchman', b'Exception %s while running %s\n', e, cmd
838 )
840 )
839 return False
841 return False
840
842
841
843
842 # Estimate the distance between two nodes
844 # Estimate the distance between two nodes
843 def calcdistance(repo, oldnode, newnode):
845 def calcdistance(repo, oldnode, newnode):
844 anc = repo.changelog.ancestor(oldnode, newnode)
846 anc = repo.changelog.ancestor(oldnode, newnode)
845 ancrev = repo[anc].rev()
847 ancrev = repo[anc].rev()
846 distance = abs(repo[oldnode].rev() - ancrev) + abs(
848 distance = abs(repo[oldnode].rev() - ancrev) + abs(
847 repo[newnode].rev() - ancrev
849 repo[newnode].rev() - ancrev
848 )
850 )
849 return distance
851 return distance
850
852
851
853
852 # Bracket working copy updates with calls to the watchman state-enter
854 # Bracket working copy updates with calls to the watchman state-enter
853 # and state-leave commands. This allows clients to perform more intelligent
855 # and state-leave commands. This allows clients to perform more intelligent
854 # settling during bulk file change scenarios
856 # settling during bulk file change scenarios
855 # https://facebook.github.io/watchman/docs/cmd/subscribe.html#advanced-settling
857 # https://facebook.github.io/watchman/docs/cmd/subscribe.html#advanced-settling
856 def wrapupdate(
858 def wrapupdate(
857 orig,
859 orig,
858 repo,
860 repo,
859 node,
861 node,
860 branchmerge,
862 branchmerge,
861 force,
863 force,
862 ancestor=None,
864 ancestor=None,
863 mergeancestor=False,
865 mergeancestor=False,
864 labels=None,
866 labels=None,
865 matcher=None,
867 matcher=None,
866 **kwargs
868 **kwargs
867 ):
869 ):
868
870
869 distance = 0
871 distance = 0
870 partial = True
872 partial = True
871 oldnode = repo[b'.'].node()
873 oldnode = repo[b'.'].node()
872 newnode = repo[node].node()
874 newnode = repo[node].node()
873 if matcher is None or matcher.always():
875 if matcher is None or matcher.always():
874 partial = False
876 partial = False
875 distance = calcdistance(repo.unfiltered(), oldnode, newnode)
877 distance = calcdistance(repo.unfiltered(), oldnode, newnode)
876
878
877 with state_update(
879 with state_update(
878 repo,
880 repo,
879 name=b"hg.update",
881 name=b"hg.update",
880 oldnode=oldnode,
882 oldnode=oldnode,
881 newnode=newnode,
883 newnode=newnode,
882 distance=distance,
884 distance=distance,
883 partial=partial,
885 partial=partial,
884 ):
886 ):
885 return orig(
887 return orig(
886 repo,
888 repo,
887 node,
889 node,
888 branchmerge,
890 branchmerge,
889 force,
891 force,
890 ancestor,
892 ancestor,
891 mergeancestor,
893 mergeancestor,
892 labels,
894 labels,
893 matcher,
895 matcher,
894 **kwargs
896 **kwargs
895 )
897 )
896
898
897
899
898 def repo_has_depth_one_nested_repo(repo):
900 def repo_has_depth_one_nested_repo(repo):
899 for f in repo.wvfs.listdir():
901 for f in repo.wvfs.listdir():
900 if os.path.isdir(os.path.join(repo.root, f, b'.hg')):
902 if os.path.isdir(os.path.join(repo.root, f, b'.hg')):
901 msg = b'fsmonitor: sub-repository %r detected, fsmonitor disabled\n'
903 msg = b'fsmonitor: sub-repository %r detected, fsmonitor disabled\n'
902 repo.ui.debug(msg % f)
904 repo.ui.debug(msg % f)
903 return True
905 return True
904 return False
906 return False
905
907
906
908
907 def reposetup(ui, repo):
909 def reposetup(ui, repo):
908 # We don't work with largefiles or inotify
910 # We don't work with largefiles or inotify
909 exts = extensions.enabled()
911 exts = extensions.enabled()
910 for ext in _blacklist:
912 for ext in _blacklist:
911 if ext in exts:
913 if ext in exts:
912 ui.warn(
914 ui.warn(
913 _(
915 _(
914 b'The fsmonitor extension is incompatible with the %s '
916 b'The fsmonitor extension is incompatible with the %s '
915 b'extension and has been disabled.\n'
917 b'extension and has been disabled.\n'
916 )
918 )
917 % ext
919 % ext
918 )
920 )
919 return
921 return
920
922
921 if repo.local():
923 if repo.local():
922 # We don't work with subrepos either.
924 # We don't work with subrepos either.
923 #
925 #
924 # if repo[None].substate can cause a dirstate parse, which is too
926 # if repo[None].substate can cause a dirstate parse, which is too
925 # slow. Instead, look for a file called hgsubstate,
927 # slow. Instead, look for a file called hgsubstate,
926 if repo.wvfs.exists(b'.hgsubstate') or repo.wvfs.exists(b'.hgsub'):
928 if repo.wvfs.exists(b'.hgsubstate') or repo.wvfs.exists(b'.hgsub'):
927 return
929 return
928
930
929 if repo_has_depth_one_nested_repo(repo):
931 if repo_has_depth_one_nested_repo(repo):
930 return
932 return
931
933
932 fsmonitorstate = state.state(repo)
934 fsmonitorstate = state.state(repo)
933 if fsmonitorstate.mode == b'off':
935 if fsmonitorstate.mode == b'off':
934 return
936 return
935
937
936 try:
938 try:
937 client = watchmanclient.client(repo.ui, repo.root)
939 client = watchmanclient.client(repo.ui, repo.root)
938 except Exception as ex:
940 except Exception as ex:
939 _handleunavailable(ui, fsmonitorstate, ex)
941 _handleunavailable(ui, fsmonitorstate, ex)
940 return
942 return
941
943
942 repo._fsmonitorstate = fsmonitorstate
944 repo._fsmonitorstate = fsmonitorstate
943 repo._watchmanclient = client
945 repo._watchmanclient = client
944
946
945 dirstate, cached = localrepo.isfilecached(repo, b'dirstate')
947 dirstate, cached = localrepo.isfilecached(repo, b'dirstate')
946 if cached:
948 if cached:
947 # at this point since fsmonitorstate wasn't present,
949 # at this point since fsmonitorstate wasn't present,
948 # repo.dirstate is not a fsmonitordirstate
950 # repo.dirstate is not a fsmonitordirstate
949 makedirstate(repo, dirstate)
951 makedirstate(repo, dirstate)
950
952
951 class fsmonitorrepo(repo.__class__):
953 class fsmonitorrepo(repo.__class__):
952 def status(self, *args, **kwargs):
954 def status(self, *args, **kwargs):
953 orig = super(fsmonitorrepo, self).status
955 orig = super(fsmonitorrepo, self).status
954 return overridestatus(orig, self, *args, **kwargs)
956 return overridestatus(orig, self, *args, **kwargs)
955
957
956 def wlocknostateupdate(self, *args, **kwargs):
958 def wlocknostateupdate(self, *args, **kwargs):
957 return super(fsmonitorrepo, self).wlock(*args, **kwargs)
959 return super(fsmonitorrepo, self).wlock(*args, **kwargs)
958
960
959 def wlock(self, *args, **kwargs):
961 def wlock(self, *args, **kwargs):
960 l = super(fsmonitorrepo, self).wlock(*args, **kwargs)
962 l = super(fsmonitorrepo, self).wlock(*args, **kwargs)
961 if not ui.configbool(
963 if not ui.configbool(
962 b"experimental", b"fsmonitor.transaction_notify"
964 b"experimental", b"fsmonitor.transaction_notify"
963 ):
965 ):
964 return l
966 return l
965 if l.held != 1:
967 if l.held != 1:
966 return l
968 return l
967 origrelease = l.releasefn
969 origrelease = l.releasefn
968
970
969 def staterelease():
971 def staterelease():
970 if origrelease:
972 if origrelease:
971 origrelease()
973 origrelease()
972 if l.stateupdate:
974 if l.stateupdate:
973 l.stateupdate.exit()
975 l.stateupdate.exit()
974 l.stateupdate = None
976 l.stateupdate = None
975
977
976 try:
978 try:
977 l.stateupdate = None
979 l.stateupdate = None
978 l.stateupdate = state_update(self, name=b"hg.transaction")
980 l.stateupdate = state_update(self, name=b"hg.transaction")
979 l.stateupdate.enter()
981 l.stateupdate.enter()
980 l.releasefn = staterelease
982 l.releasefn = staterelease
981 except Exception as e:
983 except Exception as e:
982 # Swallow any errors; fire and forget
984 # Swallow any errors; fire and forget
983 self.ui.log(
985 self.ui.log(
984 b'watchman', b'Exception in state update %s\n', e
986 b'watchman', b'Exception in state update %s\n', e
985 )
987 )
986 return l
988 return l
987
989
988 repo.__class__ = fsmonitorrepo
990 repo.__class__ = fsmonitorrepo
@@ -1,184 +1,186 b''
1 # This software may be used and distributed according to the terms of the
1 # This software may be used and distributed according to the terms of the
2 # GNU General Public License version 2 or any later version.
2 # GNU General Public License version 2 or any later version.
3
3
4 # based on bundleheads extension by Gregory Szorc <gps@mozilla.com>
4 # based on bundleheads extension by Gregory Szorc <gps@mozilla.com>
5
5
6 from __future__ import absolute_import
6 from __future__ import absolute_import
7
7
8 import abc
8 import abc
9 import hashlib
10 import os
9 import os
11 import subprocess
10 import subprocess
12 import tempfile
11 import tempfile
13
12
14 from mercurial.pycompat import open
13 from mercurial.pycompat import open
15 from mercurial import (
14 from mercurial import (
16 node,
15 node,
17 pycompat,
16 pycompat,
18 )
17 )
19 from mercurial.utils import procutil
18 from mercurial.utils import (
19 hashutil,
20 procutil,
21 )
20
22
21 NamedTemporaryFile = tempfile.NamedTemporaryFile
23 NamedTemporaryFile = tempfile.NamedTemporaryFile
22
24
23
25
24 class BundleWriteException(Exception):
26 class BundleWriteException(Exception):
25 pass
27 pass
26
28
27
29
28 class BundleReadException(Exception):
30 class BundleReadException(Exception):
29 pass
31 pass
30
32
31
33
32 class abstractbundlestore(object): # pytype: disable=ignored-metaclass
34 class abstractbundlestore(object): # pytype: disable=ignored-metaclass
33 """Defines the interface for bundle stores.
35 """Defines the interface for bundle stores.
34
36
35 A bundle store is an entity that stores raw bundle data. It is a simple
37 A bundle store is an entity that stores raw bundle data. It is a simple
36 key-value store. However, the keys are chosen by the store. The keys can
38 key-value store. However, the keys are chosen by the store. The keys can
37 be any Python object understood by the corresponding bundle index (see
39 be any Python object understood by the corresponding bundle index (see
38 ``abstractbundleindex`` below).
40 ``abstractbundleindex`` below).
39 """
41 """
40
42
41 __metaclass__ = abc.ABCMeta
43 __metaclass__ = abc.ABCMeta
42
44
43 @abc.abstractmethod
45 @abc.abstractmethod
44 def write(self, data):
46 def write(self, data):
45 """Write bundle data to the store.
47 """Write bundle data to the store.
46
48
47 This function receives the raw data to be written as a str.
49 This function receives the raw data to be written as a str.
48 Throws BundleWriteException
50 Throws BundleWriteException
49 The key of the written data MUST be returned.
51 The key of the written data MUST be returned.
50 """
52 """
51
53
52 @abc.abstractmethod
54 @abc.abstractmethod
53 def read(self, key):
55 def read(self, key):
54 """Obtain bundle data for a key.
56 """Obtain bundle data for a key.
55
57
56 Returns None if the bundle isn't known.
58 Returns None if the bundle isn't known.
57 Throws BundleReadException
59 Throws BundleReadException
58 The returned object should be a file object supporting read()
60 The returned object should be a file object supporting read()
59 and close().
61 and close().
60 """
62 """
61
63
62
64
63 class filebundlestore(object):
65 class filebundlestore(object):
64 """bundle store in filesystem
66 """bundle store in filesystem
65
67
66 meant for storing bundles somewhere on disk and on network filesystems
68 meant for storing bundles somewhere on disk and on network filesystems
67 """
69 """
68
70
69 def __init__(self, ui, repo):
71 def __init__(self, ui, repo):
70 self.ui = ui
72 self.ui = ui
71 self.repo = repo
73 self.repo = repo
72 self.storepath = ui.configpath(b'scratchbranch', b'storepath')
74 self.storepath = ui.configpath(b'scratchbranch', b'storepath')
73 if not self.storepath:
75 if not self.storepath:
74 self.storepath = self.repo.vfs.join(
76 self.storepath = self.repo.vfs.join(
75 b"scratchbranches", b"filebundlestore"
77 b"scratchbranches", b"filebundlestore"
76 )
78 )
77 if not os.path.exists(self.storepath):
79 if not os.path.exists(self.storepath):
78 os.makedirs(self.storepath)
80 os.makedirs(self.storepath)
79
81
80 def _dirpath(self, hashvalue):
82 def _dirpath(self, hashvalue):
81 """First two bytes of the hash are the name of the upper
83 """First two bytes of the hash are the name of the upper
82 level directory, next two bytes are the name of the
84 level directory, next two bytes are the name of the
83 next level directory"""
85 next level directory"""
84 return os.path.join(self.storepath, hashvalue[0:2], hashvalue[2:4])
86 return os.path.join(self.storepath, hashvalue[0:2], hashvalue[2:4])
85
87
86 def _filepath(self, filename):
88 def _filepath(self, filename):
87 return os.path.join(self._dirpath(filename), filename)
89 return os.path.join(self._dirpath(filename), filename)
88
90
89 def write(self, data):
91 def write(self, data):
90 filename = node.hex(hashlib.sha1(data).digest())
92 filename = node.hex(hashutil.sha1(data).digest())
91 dirpath = self._dirpath(filename)
93 dirpath = self._dirpath(filename)
92
94
93 if not os.path.exists(dirpath):
95 if not os.path.exists(dirpath):
94 os.makedirs(dirpath)
96 os.makedirs(dirpath)
95
97
96 with open(self._filepath(filename), b'wb') as f:
98 with open(self._filepath(filename), b'wb') as f:
97 f.write(data)
99 f.write(data)
98
100
99 return filename
101 return filename
100
102
101 def read(self, key):
103 def read(self, key):
102 try:
104 try:
103 with open(self._filepath(key), b'rb') as f:
105 with open(self._filepath(key), b'rb') as f:
104 return f.read()
106 return f.read()
105 except IOError:
107 except IOError:
106 return None
108 return None
107
109
108
110
109 class externalbundlestore(abstractbundlestore):
111 class externalbundlestore(abstractbundlestore):
110 def __init__(self, put_binary, put_args, get_binary, get_args):
112 def __init__(self, put_binary, put_args, get_binary, get_args):
111 """
113 """
112 `put_binary` - path to binary file which uploads bundle to external
114 `put_binary` - path to binary file which uploads bundle to external
113 storage and prints key to stdout
115 storage and prints key to stdout
114 `put_args` - format string with additional args to `put_binary`
116 `put_args` - format string with additional args to `put_binary`
115 {filename} replacement field can be used.
117 {filename} replacement field can be used.
116 `get_binary` - path to binary file which accepts filename and key
118 `get_binary` - path to binary file which accepts filename and key
117 (in that order), downloads bundle from store and saves it to file
119 (in that order), downloads bundle from store and saves it to file
118 `get_args` - format string with additional args to `get_binary`.
120 `get_args` - format string with additional args to `get_binary`.
119 {filename} and {handle} replacement field can be used.
121 {filename} and {handle} replacement field can be used.
120 """
122 """
121
123
122 self.put_args = put_args
124 self.put_args = put_args
123 self.get_args = get_args
125 self.get_args = get_args
124 self.put_binary = put_binary
126 self.put_binary = put_binary
125 self.get_binary = get_binary
127 self.get_binary = get_binary
126
128
127 def _call_binary(self, args):
129 def _call_binary(self, args):
128 p = subprocess.Popen(
130 p = subprocess.Popen(
129 pycompat.rapply(procutil.tonativestr, args),
131 pycompat.rapply(procutil.tonativestr, args),
130 stdout=subprocess.PIPE,
132 stdout=subprocess.PIPE,
131 stderr=subprocess.PIPE,
133 stderr=subprocess.PIPE,
132 close_fds=True,
134 close_fds=True,
133 )
135 )
134 stdout, stderr = p.communicate()
136 stdout, stderr = p.communicate()
135 returncode = p.returncode
137 returncode = p.returncode
136 return returncode, stdout, stderr
138 return returncode, stdout, stderr
137
139
138 def write(self, data):
140 def write(self, data):
139 # Won't work on windows because you can't open file second time without
141 # Won't work on windows because you can't open file second time without
140 # closing it
142 # closing it
141 # TODO: rewrite without str.format() and replace NamedTemporaryFile()
143 # TODO: rewrite without str.format() and replace NamedTemporaryFile()
142 # with pycompat.namedtempfile()
144 # with pycompat.namedtempfile()
143 with NamedTemporaryFile() as temp:
145 with NamedTemporaryFile() as temp:
144 temp.write(data)
146 temp.write(data)
145 temp.flush()
147 temp.flush()
146 temp.seek(0)
148 temp.seek(0)
147 formatted_args = [
149 formatted_args = [
148 arg.format(filename=temp.name) for arg in self.put_args
150 arg.format(filename=temp.name) for arg in self.put_args
149 ]
151 ]
150 returncode, stdout, stderr = self._call_binary(
152 returncode, stdout, stderr = self._call_binary(
151 [self.put_binary] + formatted_args
153 [self.put_binary] + formatted_args
152 )
154 )
153
155
154 if returncode != 0:
156 if returncode != 0:
155 raise BundleWriteException(
157 raise BundleWriteException(
156 b'Failed to upload to external store: %s' % stderr
158 b'Failed to upload to external store: %s' % stderr
157 )
159 )
158 stdout_lines = stdout.splitlines()
160 stdout_lines = stdout.splitlines()
159 if len(stdout_lines) == 1:
161 if len(stdout_lines) == 1:
160 return stdout_lines[0]
162 return stdout_lines[0]
161 else:
163 else:
162 raise BundleWriteException(
164 raise BundleWriteException(
163 b'Bad output from %s: %s' % (self.put_binary, stdout)
165 b'Bad output from %s: %s' % (self.put_binary, stdout)
164 )
166 )
165
167
166 def read(self, handle):
168 def read(self, handle):
167 # Won't work on windows because you can't open file second time without
169 # Won't work on windows because you can't open file second time without
168 # closing it
170 # closing it
169 # TODO: rewrite without str.format() and replace NamedTemporaryFile()
171 # TODO: rewrite without str.format() and replace NamedTemporaryFile()
170 # with pycompat.namedtempfile()
172 # with pycompat.namedtempfile()
171 with NamedTemporaryFile() as temp:
173 with NamedTemporaryFile() as temp:
172 formatted_args = [
174 formatted_args = [
173 arg.format(filename=temp.name, handle=handle)
175 arg.format(filename=temp.name, handle=handle)
174 for arg in self.get_args
176 for arg in self.get_args
175 ]
177 ]
176 returncode, stdout, stderr = self._call_binary(
178 returncode, stdout, stderr = self._call_binary(
177 [self.get_binary] + formatted_args
179 [self.get_binary] + formatted_args
178 )
180 )
179
181
180 if returncode != 0:
182 if returncode != 0:
181 raise BundleReadException(
183 raise BundleReadException(
182 b'Failed to download from external store: %s' % stderr
184 b'Failed to download from external store: %s' % stderr
183 )
185 )
184 return temp.read()
186 return temp.read()
@@ -1,669 +1,669 b''
1 # Copyright 2009-2010 Gregory P. Ward
1 # Copyright 2009-2010 Gregory P. Ward
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 # Copyright 2010-2011 Fog Creek Software
3 # Copyright 2010-2011 Fog Creek Software
4 # Copyright 2010-2011 Unity Technologies
4 # Copyright 2010-2011 Unity Technologies
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 '''High-level command function for lfconvert, plus the cmdtable.'''
9 '''High-level command function for lfconvert, plus the cmdtable.'''
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import errno
12 import errno
13 import hashlib
14 import os
13 import os
15 import shutil
14 import shutil
16
15
17 from mercurial.i18n import _
16 from mercurial.i18n import _
18
17
19 from mercurial import (
18 from mercurial import (
20 cmdutil,
19 cmdutil,
21 context,
20 context,
22 error,
21 error,
23 exthelper,
22 exthelper,
24 hg,
23 hg,
25 lock,
24 lock,
26 match as matchmod,
25 match as matchmod,
27 node,
26 node,
28 pycompat,
27 pycompat,
29 scmutil,
28 scmutil,
30 util,
29 util,
31 )
30 )
31 from mercurial.utils import hashutil
32
32
33 from ..convert import (
33 from ..convert import (
34 convcmd,
34 convcmd,
35 filemap,
35 filemap,
36 )
36 )
37
37
38 from . import lfutil, storefactory
38 from . import lfutil, storefactory
39
39
40 release = lock.release
40 release = lock.release
41
41
42 # -- Commands ----------------------------------------------------------
42 # -- Commands ----------------------------------------------------------
43
43
44 eh = exthelper.exthelper()
44 eh = exthelper.exthelper()
45
45
46
46
47 @eh.command(
47 @eh.command(
48 b'lfconvert',
48 b'lfconvert',
49 [
49 [
50 (
50 (
51 b's',
51 b's',
52 b'size',
52 b'size',
53 b'',
53 b'',
54 _(b'minimum size (MB) for files to be converted as largefiles'),
54 _(b'minimum size (MB) for files to be converted as largefiles'),
55 b'SIZE',
55 b'SIZE',
56 ),
56 ),
57 (
57 (
58 b'',
58 b'',
59 b'to-normal',
59 b'to-normal',
60 False,
60 False,
61 _(b'convert from a largefiles repo to a normal repo'),
61 _(b'convert from a largefiles repo to a normal repo'),
62 ),
62 ),
63 ],
63 ],
64 _(b'hg lfconvert SOURCE DEST [FILE ...]'),
64 _(b'hg lfconvert SOURCE DEST [FILE ...]'),
65 norepo=True,
65 norepo=True,
66 inferrepo=True,
66 inferrepo=True,
67 )
67 )
68 def lfconvert(ui, src, dest, *pats, **opts):
68 def lfconvert(ui, src, dest, *pats, **opts):
69 '''convert a normal repository to a largefiles repository
69 '''convert a normal repository to a largefiles repository
70
70
71 Convert repository SOURCE to a new repository DEST, identical to
71 Convert repository SOURCE to a new repository DEST, identical to
72 SOURCE except that certain files will be converted as largefiles:
72 SOURCE except that certain files will be converted as largefiles:
73 specifically, any file that matches any PATTERN *or* whose size is
73 specifically, any file that matches any PATTERN *or* whose size is
74 above the minimum size threshold is converted as a largefile. The
74 above the minimum size threshold is converted as a largefile. The
75 size used to determine whether or not to track a file as a
75 size used to determine whether or not to track a file as a
76 largefile is the size of the first version of the file. The
76 largefile is the size of the first version of the file. The
77 minimum size can be specified either with --size or in
77 minimum size can be specified either with --size or in
78 configuration as ``largefiles.size``.
78 configuration as ``largefiles.size``.
79
79
80 After running this command you will need to make sure that
80 After running this command you will need to make sure that
81 largefiles is enabled anywhere you intend to push the new
81 largefiles is enabled anywhere you intend to push the new
82 repository.
82 repository.
83
83
84 Use --to-normal to convert largefiles back to normal files; after
84 Use --to-normal to convert largefiles back to normal files; after
85 this, the DEST repository can be used without largefiles at all.'''
85 this, the DEST repository can be used without largefiles at all.'''
86
86
87 opts = pycompat.byteskwargs(opts)
87 opts = pycompat.byteskwargs(opts)
88 if opts[b'to_normal']:
88 if opts[b'to_normal']:
89 tolfile = False
89 tolfile = False
90 else:
90 else:
91 tolfile = True
91 tolfile = True
92 size = lfutil.getminsize(ui, True, opts.get(b'size'), default=None)
92 size = lfutil.getminsize(ui, True, opts.get(b'size'), default=None)
93
93
94 if not hg.islocal(src):
94 if not hg.islocal(src):
95 raise error.Abort(_(b'%s is not a local Mercurial repo') % src)
95 raise error.Abort(_(b'%s is not a local Mercurial repo') % src)
96 if not hg.islocal(dest):
96 if not hg.islocal(dest):
97 raise error.Abort(_(b'%s is not a local Mercurial repo') % dest)
97 raise error.Abort(_(b'%s is not a local Mercurial repo') % dest)
98
98
99 rsrc = hg.repository(ui, src)
99 rsrc = hg.repository(ui, src)
100 ui.status(_(b'initializing destination %s\n') % dest)
100 ui.status(_(b'initializing destination %s\n') % dest)
101 rdst = hg.repository(ui, dest, create=True)
101 rdst = hg.repository(ui, dest, create=True)
102
102
103 success = False
103 success = False
104 dstwlock = dstlock = None
104 dstwlock = dstlock = None
105 try:
105 try:
106 # Get a list of all changesets in the source. The easy way to do this
106 # Get a list of all changesets in the source. The easy way to do this
107 # is to simply walk the changelog, using changelog.nodesbetween().
107 # is to simply walk the changelog, using changelog.nodesbetween().
108 # Take a look at mercurial/revlog.py:639 for more details.
108 # Take a look at mercurial/revlog.py:639 for more details.
109 # Use a generator instead of a list to decrease memory usage
109 # Use a generator instead of a list to decrease memory usage
110 ctxs = (
110 ctxs = (
111 rsrc[ctx]
111 rsrc[ctx]
112 for ctx in rsrc.changelog.nodesbetween(None, rsrc.heads())[0]
112 for ctx in rsrc.changelog.nodesbetween(None, rsrc.heads())[0]
113 )
113 )
114 revmap = {node.nullid: node.nullid}
114 revmap = {node.nullid: node.nullid}
115 if tolfile:
115 if tolfile:
116 # Lock destination to prevent modification while it is converted to.
116 # Lock destination to prevent modification while it is converted to.
117 # Don't need to lock src because we are just reading from its
117 # Don't need to lock src because we are just reading from its
118 # history which can't change.
118 # history which can't change.
119 dstwlock = rdst.wlock()
119 dstwlock = rdst.wlock()
120 dstlock = rdst.lock()
120 dstlock = rdst.lock()
121
121
122 lfiles = set()
122 lfiles = set()
123 normalfiles = set()
123 normalfiles = set()
124 if not pats:
124 if not pats:
125 pats = ui.configlist(lfutil.longname, b'patterns')
125 pats = ui.configlist(lfutil.longname, b'patterns')
126 if pats:
126 if pats:
127 matcher = matchmod.match(rsrc.root, b'', list(pats))
127 matcher = matchmod.match(rsrc.root, b'', list(pats))
128 else:
128 else:
129 matcher = None
129 matcher = None
130
130
131 lfiletohash = {}
131 lfiletohash = {}
132 with ui.makeprogress(
132 with ui.makeprogress(
133 _(b'converting revisions'),
133 _(b'converting revisions'),
134 unit=_(b'revisions'),
134 unit=_(b'revisions'),
135 total=rsrc[b'tip'].rev(),
135 total=rsrc[b'tip'].rev(),
136 ) as progress:
136 ) as progress:
137 for ctx in ctxs:
137 for ctx in ctxs:
138 progress.update(ctx.rev())
138 progress.update(ctx.rev())
139 _lfconvert_addchangeset(
139 _lfconvert_addchangeset(
140 rsrc,
140 rsrc,
141 rdst,
141 rdst,
142 ctx,
142 ctx,
143 revmap,
143 revmap,
144 lfiles,
144 lfiles,
145 normalfiles,
145 normalfiles,
146 matcher,
146 matcher,
147 size,
147 size,
148 lfiletohash,
148 lfiletohash,
149 )
149 )
150
150
151 if rdst.wvfs.exists(lfutil.shortname):
151 if rdst.wvfs.exists(lfutil.shortname):
152 rdst.wvfs.rmtree(lfutil.shortname)
152 rdst.wvfs.rmtree(lfutil.shortname)
153
153
154 for f in lfiletohash.keys():
154 for f in lfiletohash.keys():
155 if rdst.wvfs.isfile(f):
155 if rdst.wvfs.isfile(f):
156 rdst.wvfs.unlink(f)
156 rdst.wvfs.unlink(f)
157 try:
157 try:
158 rdst.wvfs.removedirs(rdst.wvfs.dirname(f))
158 rdst.wvfs.removedirs(rdst.wvfs.dirname(f))
159 except OSError:
159 except OSError:
160 pass
160 pass
161
161
162 # If there were any files converted to largefiles, add largefiles
162 # If there were any files converted to largefiles, add largefiles
163 # to the destination repository's requirements.
163 # to the destination repository's requirements.
164 if lfiles:
164 if lfiles:
165 rdst.requirements.add(b'largefiles')
165 rdst.requirements.add(b'largefiles')
166 rdst._writerequirements()
166 rdst._writerequirements()
167 else:
167 else:
168
168
169 class lfsource(filemap.filemap_source):
169 class lfsource(filemap.filemap_source):
170 def __init__(self, ui, source):
170 def __init__(self, ui, source):
171 super(lfsource, self).__init__(ui, source, None)
171 super(lfsource, self).__init__(ui, source, None)
172 self.filemapper.rename[lfutil.shortname] = b'.'
172 self.filemapper.rename[lfutil.shortname] = b'.'
173
173
174 def getfile(self, name, rev):
174 def getfile(self, name, rev):
175 realname, realrev = rev
175 realname, realrev = rev
176 f = super(lfsource, self).getfile(name, rev)
176 f = super(lfsource, self).getfile(name, rev)
177
177
178 if (
178 if (
179 not realname.startswith(lfutil.shortnameslash)
179 not realname.startswith(lfutil.shortnameslash)
180 or f[0] is None
180 or f[0] is None
181 ):
181 ):
182 return f
182 return f
183
183
184 # Substitute in the largefile data for the hash
184 # Substitute in the largefile data for the hash
185 hash = f[0].strip()
185 hash = f[0].strip()
186 path = lfutil.findfile(rsrc, hash)
186 path = lfutil.findfile(rsrc, hash)
187
187
188 if path is None:
188 if path is None:
189 raise error.Abort(
189 raise error.Abort(
190 _(b"missing largefile for '%s' in %s")
190 _(b"missing largefile for '%s' in %s")
191 % (realname, realrev)
191 % (realname, realrev)
192 )
192 )
193 return util.readfile(path), f[1]
193 return util.readfile(path), f[1]
194
194
195 class converter(convcmd.converter):
195 class converter(convcmd.converter):
196 def __init__(self, ui, source, dest, revmapfile, opts):
196 def __init__(self, ui, source, dest, revmapfile, opts):
197 src = lfsource(ui, source)
197 src = lfsource(ui, source)
198
198
199 super(converter, self).__init__(
199 super(converter, self).__init__(
200 ui, src, dest, revmapfile, opts
200 ui, src, dest, revmapfile, opts
201 )
201 )
202
202
203 found, missing = downloadlfiles(ui, rsrc)
203 found, missing = downloadlfiles(ui, rsrc)
204 if missing != 0:
204 if missing != 0:
205 raise error.Abort(_(b"all largefiles must be present locally"))
205 raise error.Abort(_(b"all largefiles must be present locally"))
206
206
207 orig = convcmd.converter
207 orig = convcmd.converter
208 convcmd.converter = converter
208 convcmd.converter = converter
209
209
210 try:
210 try:
211 convcmd.convert(
211 convcmd.convert(
212 ui, src, dest, source_type=b'hg', dest_type=b'hg'
212 ui, src, dest, source_type=b'hg', dest_type=b'hg'
213 )
213 )
214 finally:
214 finally:
215 convcmd.converter = orig
215 convcmd.converter = orig
216 success = True
216 success = True
217 finally:
217 finally:
218 if tolfile:
218 if tolfile:
219 rdst.dirstate.clear()
219 rdst.dirstate.clear()
220 release(dstlock, dstwlock)
220 release(dstlock, dstwlock)
221 if not success:
221 if not success:
222 # we failed, remove the new directory
222 # we failed, remove the new directory
223 shutil.rmtree(rdst.root)
223 shutil.rmtree(rdst.root)
224
224
225
225
226 def _lfconvert_addchangeset(
226 def _lfconvert_addchangeset(
227 rsrc, rdst, ctx, revmap, lfiles, normalfiles, matcher, size, lfiletohash
227 rsrc, rdst, ctx, revmap, lfiles, normalfiles, matcher, size, lfiletohash
228 ):
228 ):
229 # Convert src parents to dst parents
229 # Convert src parents to dst parents
230 parents = _convertparents(ctx, revmap)
230 parents = _convertparents(ctx, revmap)
231
231
232 # Generate list of changed files
232 # Generate list of changed files
233 files = _getchangedfiles(ctx, parents)
233 files = _getchangedfiles(ctx, parents)
234
234
235 dstfiles = []
235 dstfiles = []
236 for f in files:
236 for f in files:
237 if f not in lfiles and f not in normalfiles:
237 if f not in lfiles and f not in normalfiles:
238 islfile = _islfile(f, ctx, matcher, size)
238 islfile = _islfile(f, ctx, matcher, size)
239 # If this file was renamed or copied then copy
239 # If this file was renamed or copied then copy
240 # the largefile-ness of its predecessor
240 # the largefile-ness of its predecessor
241 if f in ctx.manifest():
241 if f in ctx.manifest():
242 fctx = ctx.filectx(f)
242 fctx = ctx.filectx(f)
243 renamed = fctx.copysource()
243 renamed = fctx.copysource()
244 if renamed is None:
244 if renamed is None:
245 # the code below assumes renamed to be a boolean or a list
245 # the code below assumes renamed to be a boolean or a list
246 # and won't quite work with the value None
246 # and won't quite work with the value None
247 renamed = False
247 renamed = False
248 renamedlfile = renamed and renamed in lfiles
248 renamedlfile = renamed and renamed in lfiles
249 islfile |= renamedlfile
249 islfile |= renamedlfile
250 if b'l' in fctx.flags():
250 if b'l' in fctx.flags():
251 if renamedlfile:
251 if renamedlfile:
252 raise error.Abort(
252 raise error.Abort(
253 _(b'renamed/copied largefile %s becomes symlink')
253 _(b'renamed/copied largefile %s becomes symlink')
254 % f
254 % f
255 )
255 )
256 islfile = False
256 islfile = False
257 if islfile:
257 if islfile:
258 lfiles.add(f)
258 lfiles.add(f)
259 else:
259 else:
260 normalfiles.add(f)
260 normalfiles.add(f)
261
261
262 if f in lfiles:
262 if f in lfiles:
263 fstandin = lfutil.standin(f)
263 fstandin = lfutil.standin(f)
264 dstfiles.append(fstandin)
264 dstfiles.append(fstandin)
265 # largefile in manifest if it has not been removed/renamed
265 # largefile in manifest if it has not been removed/renamed
266 if f in ctx.manifest():
266 if f in ctx.manifest():
267 fctx = ctx.filectx(f)
267 fctx = ctx.filectx(f)
268 if b'l' in fctx.flags():
268 if b'l' in fctx.flags():
269 renamed = fctx.copysource()
269 renamed = fctx.copysource()
270 if renamed and renamed in lfiles:
270 if renamed and renamed in lfiles:
271 raise error.Abort(
271 raise error.Abort(
272 _(b'largefile %s becomes symlink') % f
272 _(b'largefile %s becomes symlink') % f
273 )
273 )
274
274
275 # largefile was modified, update standins
275 # largefile was modified, update standins
276 m = hashlib.sha1(b'')
276 m = hashutil.sha1(b'')
277 m.update(ctx[f].data())
277 m.update(ctx[f].data())
278 hash = node.hex(m.digest())
278 hash = node.hex(m.digest())
279 if f not in lfiletohash or lfiletohash[f] != hash:
279 if f not in lfiletohash or lfiletohash[f] != hash:
280 rdst.wwrite(f, ctx[f].data(), ctx[f].flags())
280 rdst.wwrite(f, ctx[f].data(), ctx[f].flags())
281 executable = b'x' in ctx[f].flags()
281 executable = b'x' in ctx[f].flags()
282 lfutil.writestandin(rdst, fstandin, hash, executable)
282 lfutil.writestandin(rdst, fstandin, hash, executable)
283 lfiletohash[f] = hash
283 lfiletohash[f] = hash
284 else:
284 else:
285 # normal file
285 # normal file
286 dstfiles.append(f)
286 dstfiles.append(f)
287
287
288 def getfilectx(repo, memctx, f):
288 def getfilectx(repo, memctx, f):
289 srcfname = lfutil.splitstandin(f)
289 srcfname = lfutil.splitstandin(f)
290 if srcfname is not None:
290 if srcfname is not None:
291 # if the file isn't in the manifest then it was removed
291 # if the file isn't in the manifest then it was removed
292 # or renamed, return None to indicate this
292 # or renamed, return None to indicate this
293 try:
293 try:
294 fctx = ctx.filectx(srcfname)
294 fctx = ctx.filectx(srcfname)
295 except error.LookupError:
295 except error.LookupError:
296 return None
296 return None
297 renamed = fctx.copysource()
297 renamed = fctx.copysource()
298 if renamed:
298 if renamed:
299 # standin is always a largefile because largefile-ness
299 # standin is always a largefile because largefile-ness
300 # doesn't change after rename or copy
300 # doesn't change after rename or copy
301 renamed = lfutil.standin(renamed)
301 renamed = lfutil.standin(renamed)
302
302
303 return context.memfilectx(
303 return context.memfilectx(
304 repo,
304 repo,
305 memctx,
305 memctx,
306 f,
306 f,
307 lfiletohash[srcfname] + b'\n',
307 lfiletohash[srcfname] + b'\n',
308 b'l' in fctx.flags(),
308 b'l' in fctx.flags(),
309 b'x' in fctx.flags(),
309 b'x' in fctx.flags(),
310 renamed,
310 renamed,
311 )
311 )
312 else:
312 else:
313 return _getnormalcontext(repo, ctx, f, revmap)
313 return _getnormalcontext(repo, ctx, f, revmap)
314
314
315 # Commit
315 # Commit
316 _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
316 _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
317
317
318
318
319 def _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap):
319 def _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap):
320 mctx = context.memctx(
320 mctx = context.memctx(
321 rdst,
321 rdst,
322 parents,
322 parents,
323 ctx.description(),
323 ctx.description(),
324 dstfiles,
324 dstfiles,
325 getfilectx,
325 getfilectx,
326 ctx.user(),
326 ctx.user(),
327 ctx.date(),
327 ctx.date(),
328 ctx.extra(),
328 ctx.extra(),
329 )
329 )
330 ret = rdst.commitctx(mctx)
330 ret = rdst.commitctx(mctx)
331 lfutil.copyalltostore(rdst, ret)
331 lfutil.copyalltostore(rdst, ret)
332 rdst.setparents(ret)
332 rdst.setparents(ret)
333 revmap[ctx.node()] = rdst.changelog.tip()
333 revmap[ctx.node()] = rdst.changelog.tip()
334
334
335
335
336 # Generate list of changed files
336 # Generate list of changed files
337 def _getchangedfiles(ctx, parents):
337 def _getchangedfiles(ctx, parents):
338 files = set(ctx.files())
338 files = set(ctx.files())
339 if node.nullid not in parents:
339 if node.nullid not in parents:
340 mc = ctx.manifest()
340 mc = ctx.manifest()
341 for pctx in ctx.parents():
341 for pctx in ctx.parents():
342 for fn in pctx.manifest().diff(mc):
342 for fn in pctx.manifest().diff(mc):
343 files.add(fn)
343 files.add(fn)
344 return files
344 return files
345
345
346
346
347 # Convert src parents to dst parents
347 # Convert src parents to dst parents
348 def _convertparents(ctx, revmap):
348 def _convertparents(ctx, revmap):
349 parents = []
349 parents = []
350 for p in ctx.parents():
350 for p in ctx.parents():
351 parents.append(revmap[p.node()])
351 parents.append(revmap[p.node()])
352 while len(parents) < 2:
352 while len(parents) < 2:
353 parents.append(node.nullid)
353 parents.append(node.nullid)
354 return parents
354 return parents
355
355
356
356
357 # Get memfilectx for a normal file
357 # Get memfilectx for a normal file
358 def _getnormalcontext(repo, ctx, f, revmap):
358 def _getnormalcontext(repo, ctx, f, revmap):
359 try:
359 try:
360 fctx = ctx.filectx(f)
360 fctx = ctx.filectx(f)
361 except error.LookupError:
361 except error.LookupError:
362 return None
362 return None
363 renamed = fctx.copysource()
363 renamed = fctx.copysource()
364
364
365 data = fctx.data()
365 data = fctx.data()
366 if f == b'.hgtags':
366 if f == b'.hgtags':
367 data = _converttags(repo.ui, revmap, data)
367 data = _converttags(repo.ui, revmap, data)
368 return context.memfilectx(
368 return context.memfilectx(
369 repo, ctx, f, data, b'l' in fctx.flags(), b'x' in fctx.flags(), renamed
369 repo, ctx, f, data, b'l' in fctx.flags(), b'x' in fctx.flags(), renamed
370 )
370 )
371
371
372
372
373 # Remap tag data using a revision map
373 # Remap tag data using a revision map
374 def _converttags(ui, revmap, data):
374 def _converttags(ui, revmap, data):
375 newdata = []
375 newdata = []
376 for line in data.splitlines():
376 for line in data.splitlines():
377 try:
377 try:
378 id, name = line.split(b' ', 1)
378 id, name = line.split(b' ', 1)
379 except ValueError:
379 except ValueError:
380 ui.warn(_(b'skipping incorrectly formatted tag %s\n') % line)
380 ui.warn(_(b'skipping incorrectly formatted tag %s\n') % line)
381 continue
381 continue
382 try:
382 try:
383 newid = node.bin(id)
383 newid = node.bin(id)
384 except TypeError:
384 except TypeError:
385 ui.warn(_(b'skipping incorrectly formatted id %s\n') % id)
385 ui.warn(_(b'skipping incorrectly formatted id %s\n') % id)
386 continue
386 continue
387 try:
387 try:
388 newdata.append(b'%s %s\n' % (node.hex(revmap[newid]), name))
388 newdata.append(b'%s %s\n' % (node.hex(revmap[newid]), name))
389 except KeyError:
389 except KeyError:
390 ui.warn(_(b'no mapping for id %s\n') % id)
390 ui.warn(_(b'no mapping for id %s\n') % id)
391 continue
391 continue
392 return b''.join(newdata)
392 return b''.join(newdata)
393
393
394
394
395 def _islfile(file, ctx, matcher, size):
395 def _islfile(file, ctx, matcher, size):
396 '''Return true if file should be considered a largefile, i.e.
396 '''Return true if file should be considered a largefile, i.e.
397 matcher matches it or it is larger than size.'''
397 matcher matches it or it is larger than size.'''
398 # never store special .hg* files as largefiles
398 # never store special .hg* files as largefiles
399 if file == b'.hgtags' or file == b'.hgignore' or file == b'.hgsigs':
399 if file == b'.hgtags' or file == b'.hgignore' or file == b'.hgsigs':
400 return False
400 return False
401 if matcher and matcher(file):
401 if matcher and matcher(file):
402 return True
402 return True
403 try:
403 try:
404 return ctx.filectx(file).size() >= size * 1024 * 1024
404 return ctx.filectx(file).size() >= size * 1024 * 1024
405 except error.LookupError:
405 except error.LookupError:
406 return False
406 return False
407
407
408
408
409 def uploadlfiles(ui, rsrc, rdst, files):
409 def uploadlfiles(ui, rsrc, rdst, files):
410 '''upload largefiles to the central store'''
410 '''upload largefiles to the central store'''
411
411
412 if not files:
412 if not files:
413 return
413 return
414
414
415 store = storefactory.openstore(rsrc, rdst, put=True)
415 store = storefactory.openstore(rsrc, rdst, put=True)
416
416
417 at = 0
417 at = 0
418 ui.debug(b"sending statlfile command for %d largefiles\n" % len(files))
418 ui.debug(b"sending statlfile command for %d largefiles\n" % len(files))
419 retval = store.exists(files)
419 retval = store.exists(files)
420 files = [h for h in files if not retval[h]]
420 files = [h for h in files if not retval[h]]
421 ui.debug(b"%d largefiles need to be uploaded\n" % len(files))
421 ui.debug(b"%d largefiles need to be uploaded\n" % len(files))
422
422
423 with ui.makeprogress(
423 with ui.makeprogress(
424 _(b'uploading largefiles'), unit=_(b'files'), total=len(files)
424 _(b'uploading largefiles'), unit=_(b'files'), total=len(files)
425 ) as progress:
425 ) as progress:
426 for hash in files:
426 for hash in files:
427 progress.update(at)
427 progress.update(at)
428 source = lfutil.findfile(rsrc, hash)
428 source = lfutil.findfile(rsrc, hash)
429 if not source:
429 if not source:
430 raise error.Abort(
430 raise error.Abort(
431 _(
431 _(
432 b'largefile %s missing from store'
432 b'largefile %s missing from store'
433 b' (needs to be uploaded)'
433 b' (needs to be uploaded)'
434 )
434 )
435 % hash
435 % hash
436 )
436 )
437 # XXX check for errors here
437 # XXX check for errors here
438 store.put(source, hash)
438 store.put(source, hash)
439 at += 1
439 at += 1
440
440
441
441
442 def verifylfiles(ui, repo, all=False, contents=False):
442 def verifylfiles(ui, repo, all=False, contents=False):
443 '''Verify that every largefile revision in the current changeset
443 '''Verify that every largefile revision in the current changeset
444 exists in the central store. With --contents, also verify that
444 exists in the central store. With --contents, also verify that
445 the contents of each local largefile file revision are correct (SHA-1 hash
445 the contents of each local largefile file revision are correct (SHA-1 hash
446 matches the revision ID). With --all, check every changeset in
446 matches the revision ID). With --all, check every changeset in
447 this repository.'''
447 this repository.'''
448 if all:
448 if all:
449 revs = repo.revs(b'all()')
449 revs = repo.revs(b'all()')
450 else:
450 else:
451 revs = [b'.']
451 revs = [b'.']
452
452
453 store = storefactory.openstore(repo)
453 store = storefactory.openstore(repo)
454 return store.verify(revs, contents=contents)
454 return store.verify(revs, contents=contents)
455
455
456
456
457 def cachelfiles(ui, repo, node, filelist=None):
457 def cachelfiles(ui, repo, node, filelist=None):
458 '''cachelfiles ensures that all largefiles needed by the specified revision
458 '''cachelfiles ensures that all largefiles needed by the specified revision
459 are present in the repository's largefile cache.
459 are present in the repository's largefile cache.
460
460
461 returns a tuple (cached, missing). cached is the list of files downloaded
461 returns a tuple (cached, missing). cached is the list of files downloaded
462 by this operation; missing is the list of files that were needed but could
462 by this operation; missing is the list of files that were needed but could
463 not be found.'''
463 not be found.'''
464 lfiles = lfutil.listlfiles(repo, node)
464 lfiles = lfutil.listlfiles(repo, node)
465 if filelist:
465 if filelist:
466 lfiles = set(lfiles) & set(filelist)
466 lfiles = set(lfiles) & set(filelist)
467 toget = []
467 toget = []
468
468
469 ctx = repo[node]
469 ctx = repo[node]
470 for lfile in lfiles:
470 for lfile in lfiles:
471 try:
471 try:
472 expectedhash = lfutil.readasstandin(ctx[lfutil.standin(lfile)])
472 expectedhash = lfutil.readasstandin(ctx[lfutil.standin(lfile)])
473 except IOError as err:
473 except IOError as err:
474 if err.errno == errno.ENOENT:
474 if err.errno == errno.ENOENT:
475 continue # node must be None and standin wasn't found in wctx
475 continue # node must be None and standin wasn't found in wctx
476 raise
476 raise
477 if not lfutil.findfile(repo, expectedhash):
477 if not lfutil.findfile(repo, expectedhash):
478 toget.append((lfile, expectedhash))
478 toget.append((lfile, expectedhash))
479
479
480 if toget:
480 if toget:
481 store = storefactory.openstore(repo)
481 store = storefactory.openstore(repo)
482 ret = store.get(toget)
482 ret = store.get(toget)
483 return ret
483 return ret
484
484
485 return ([], [])
485 return ([], [])
486
486
487
487
488 def downloadlfiles(ui, repo, rev=None):
488 def downloadlfiles(ui, repo, rev=None):
489 match = scmutil.match(repo[None], [repo.wjoin(lfutil.shortname)], {})
489 match = scmutil.match(repo[None], [repo.wjoin(lfutil.shortname)], {})
490
490
491 def prepare(ctx, fns):
491 def prepare(ctx, fns):
492 pass
492 pass
493
493
494 totalsuccess = 0
494 totalsuccess = 0
495 totalmissing = 0
495 totalmissing = 0
496 if rev != []: # walkchangerevs on empty list would return all revs
496 if rev != []: # walkchangerevs on empty list would return all revs
497 for ctx in cmdutil.walkchangerevs(repo, match, {b'rev': rev}, prepare):
497 for ctx in cmdutil.walkchangerevs(repo, match, {b'rev': rev}, prepare):
498 success, missing = cachelfiles(ui, repo, ctx.node())
498 success, missing = cachelfiles(ui, repo, ctx.node())
499 totalsuccess += len(success)
499 totalsuccess += len(success)
500 totalmissing += len(missing)
500 totalmissing += len(missing)
501 ui.status(_(b"%d additional largefiles cached\n") % totalsuccess)
501 ui.status(_(b"%d additional largefiles cached\n") % totalsuccess)
502 if totalmissing > 0:
502 if totalmissing > 0:
503 ui.status(_(b"%d largefiles failed to download\n") % totalmissing)
503 ui.status(_(b"%d largefiles failed to download\n") % totalmissing)
504 return totalsuccess, totalmissing
504 return totalsuccess, totalmissing
505
505
506
506
507 def updatelfiles(
507 def updatelfiles(
508 ui, repo, filelist=None, printmessage=None, normallookup=False
508 ui, repo, filelist=None, printmessage=None, normallookup=False
509 ):
509 ):
510 '''Update largefiles according to standins in the working directory
510 '''Update largefiles according to standins in the working directory
511
511
512 If ``printmessage`` is other than ``None``, it means "print (or
512 If ``printmessage`` is other than ``None``, it means "print (or
513 ignore, for false) message forcibly".
513 ignore, for false) message forcibly".
514 '''
514 '''
515 statuswriter = lfutil.getstatuswriter(ui, repo, printmessage)
515 statuswriter = lfutil.getstatuswriter(ui, repo, printmessage)
516 with repo.wlock():
516 with repo.wlock():
517 lfdirstate = lfutil.openlfdirstate(ui, repo)
517 lfdirstate = lfutil.openlfdirstate(ui, repo)
518 lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
518 lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
519
519
520 if filelist is not None:
520 if filelist is not None:
521 filelist = set(filelist)
521 filelist = set(filelist)
522 lfiles = [f for f in lfiles if f in filelist]
522 lfiles = [f for f in lfiles if f in filelist]
523
523
524 update = {}
524 update = {}
525 dropped = set()
525 dropped = set()
526 updated, removed = 0, 0
526 updated, removed = 0, 0
527 wvfs = repo.wvfs
527 wvfs = repo.wvfs
528 wctx = repo[None]
528 wctx = repo[None]
529 for lfile in lfiles:
529 for lfile in lfiles:
530 lfileorig = os.path.relpath(
530 lfileorig = os.path.relpath(
531 scmutil.backuppath(ui, repo, lfile), start=repo.root
531 scmutil.backuppath(ui, repo, lfile), start=repo.root
532 )
532 )
533 standin = lfutil.standin(lfile)
533 standin = lfutil.standin(lfile)
534 standinorig = os.path.relpath(
534 standinorig = os.path.relpath(
535 scmutil.backuppath(ui, repo, standin), start=repo.root
535 scmutil.backuppath(ui, repo, standin), start=repo.root
536 )
536 )
537 if wvfs.exists(standin):
537 if wvfs.exists(standin):
538 if wvfs.exists(standinorig) and wvfs.exists(lfile):
538 if wvfs.exists(standinorig) and wvfs.exists(lfile):
539 shutil.copyfile(wvfs.join(lfile), wvfs.join(lfileorig))
539 shutil.copyfile(wvfs.join(lfile), wvfs.join(lfileorig))
540 wvfs.unlinkpath(standinorig)
540 wvfs.unlinkpath(standinorig)
541 expecthash = lfutil.readasstandin(wctx[standin])
541 expecthash = lfutil.readasstandin(wctx[standin])
542 if expecthash != b'':
542 if expecthash != b'':
543 if lfile not in wctx: # not switched to normal file
543 if lfile not in wctx: # not switched to normal file
544 if repo.dirstate[standin] != b'?':
544 if repo.dirstate[standin] != b'?':
545 wvfs.unlinkpath(lfile, ignoremissing=True)
545 wvfs.unlinkpath(lfile, ignoremissing=True)
546 else:
546 else:
547 dropped.add(lfile)
547 dropped.add(lfile)
548
548
549 # use normallookup() to allocate an entry in largefiles
549 # use normallookup() to allocate an entry in largefiles
550 # dirstate to prevent lfilesrepo.status() from reporting
550 # dirstate to prevent lfilesrepo.status() from reporting
551 # missing files as removed.
551 # missing files as removed.
552 lfdirstate.normallookup(lfile)
552 lfdirstate.normallookup(lfile)
553 update[lfile] = expecthash
553 update[lfile] = expecthash
554 else:
554 else:
555 # Remove lfiles for which the standin is deleted, unless the
555 # Remove lfiles for which the standin is deleted, unless the
556 # lfile is added to the repository again. This happens when a
556 # lfile is added to the repository again. This happens when a
557 # largefile is converted back to a normal file: the standin
557 # largefile is converted back to a normal file: the standin
558 # disappears, but a new (normal) file appears as the lfile.
558 # disappears, but a new (normal) file appears as the lfile.
559 if (
559 if (
560 wvfs.exists(lfile)
560 wvfs.exists(lfile)
561 and repo.dirstate.normalize(lfile) not in wctx
561 and repo.dirstate.normalize(lfile) not in wctx
562 ):
562 ):
563 wvfs.unlinkpath(lfile)
563 wvfs.unlinkpath(lfile)
564 removed += 1
564 removed += 1
565
565
566 # largefile processing might be slow and be interrupted - be prepared
566 # largefile processing might be slow and be interrupted - be prepared
567 lfdirstate.write()
567 lfdirstate.write()
568
568
569 if lfiles:
569 if lfiles:
570 lfiles = [f for f in lfiles if f not in dropped]
570 lfiles = [f for f in lfiles if f not in dropped]
571
571
572 for f in dropped:
572 for f in dropped:
573 repo.wvfs.unlinkpath(lfutil.standin(f))
573 repo.wvfs.unlinkpath(lfutil.standin(f))
574
574
575 # This needs to happen for dropped files, otherwise they stay in
575 # This needs to happen for dropped files, otherwise they stay in
576 # the M state.
576 # the M state.
577 lfutil.synclfdirstate(repo, lfdirstate, f, normallookup)
577 lfutil.synclfdirstate(repo, lfdirstate, f, normallookup)
578
578
579 statuswriter(_(b'getting changed largefiles\n'))
579 statuswriter(_(b'getting changed largefiles\n'))
580 cachelfiles(ui, repo, None, lfiles)
580 cachelfiles(ui, repo, None, lfiles)
581
581
582 for lfile in lfiles:
582 for lfile in lfiles:
583 update1 = 0
583 update1 = 0
584
584
585 expecthash = update.get(lfile)
585 expecthash = update.get(lfile)
586 if expecthash:
586 if expecthash:
587 if not lfutil.copyfromcache(repo, expecthash, lfile):
587 if not lfutil.copyfromcache(repo, expecthash, lfile):
588 # failed ... but already removed and set to normallookup
588 # failed ... but already removed and set to normallookup
589 continue
589 continue
590 # Synchronize largefile dirstate to the last modified
590 # Synchronize largefile dirstate to the last modified
591 # time of the file
591 # time of the file
592 lfdirstate.normal(lfile)
592 lfdirstate.normal(lfile)
593 update1 = 1
593 update1 = 1
594
594
595 # copy the exec mode of largefile standin from the repository's
595 # copy the exec mode of largefile standin from the repository's
596 # dirstate to its state in the lfdirstate.
596 # dirstate to its state in the lfdirstate.
597 standin = lfutil.standin(lfile)
597 standin = lfutil.standin(lfile)
598 if wvfs.exists(standin):
598 if wvfs.exists(standin):
599 # exec is decided by the users permissions using mask 0o100
599 # exec is decided by the users permissions using mask 0o100
600 standinexec = wvfs.stat(standin).st_mode & 0o100
600 standinexec = wvfs.stat(standin).st_mode & 0o100
601 st = wvfs.stat(lfile)
601 st = wvfs.stat(lfile)
602 mode = st.st_mode
602 mode = st.st_mode
603 if standinexec != mode & 0o100:
603 if standinexec != mode & 0o100:
604 # first remove all X bits, then shift all R bits to X
604 # first remove all X bits, then shift all R bits to X
605 mode &= ~0o111
605 mode &= ~0o111
606 if standinexec:
606 if standinexec:
607 mode |= (mode >> 2) & 0o111 & ~util.umask
607 mode |= (mode >> 2) & 0o111 & ~util.umask
608 wvfs.chmod(lfile, mode)
608 wvfs.chmod(lfile, mode)
609 update1 = 1
609 update1 = 1
610
610
611 updated += update1
611 updated += update1
612
612
613 lfutil.synclfdirstate(repo, lfdirstate, lfile, normallookup)
613 lfutil.synclfdirstate(repo, lfdirstate, lfile, normallookup)
614
614
615 lfdirstate.write()
615 lfdirstate.write()
616 if lfiles:
616 if lfiles:
617 statuswriter(
617 statuswriter(
618 _(b'%d largefiles updated, %d removed\n') % (updated, removed)
618 _(b'%d largefiles updated, %d removed\n') % (updated, removed)
619 )
619 )
620
620
621
621
622 @eh.command(
622 @eh.command(
623 b'lfpull',
623 b'lfpull',
624 [(b'r', b'rev', [], _(b'pull largefiles for these revisions'))]
624 [(b'r', b'rev', [], _(b'pull largefiles for these revisions'))]
625 + cmdutil.remoteopts,
625 + cmdutil.remoteopts,
626 _(b'-r REV... [-e CMD] [--remotecmd CMD] [SOURCE]'),
626 _(b'-r REV... [-e CMD] [--remotecmd CMD] [SOURCE]'),
627 )
627 )
628 def lfpull(ui, repo, source=b"default", **opts):
628 def lfpull(ui, repo, source=b"default", **opts):
629 """pull largefiles for the specified revisions from the specified source
629 """pull largefiles for the specified revisions from the specified source
630
630
631 Pull largefiles that are referenced from local changesets but missing
631 Pull largefiles that are referenced from local changesets but missing
632 locally, pulling from a remote repository to the local cache.
632 locally, pulling from a remote repository to the local cache.
633
633
634 If SOURCE is omitted, the 'default' path will be used.
634 If SOURCE is omitted, the 'default' path will be used.
635 See :hg:`help urls` for more information.
635 See :hg:`help urls` for more information.
636
636
637 .. container:: verbose
637 .. container:: verbose
638
638
639 Some examples:
639 Some examples:
640
640
641 - pull largefiles for all branch heads::
641 - pull largefiles for all branch heads::
642
642
643 hg lfpull -r "head() and not closed()"
643 hg lfpull -r "head() and not closed()"
644
644
645 - pull largefiles on the default branch::
645 - pull largefiles on the default branch::
646
646
647 hg lfpull -r "branch(default)"
647 hg lfpull -r "branch(default)"
648 """
648 """
649 repo.lfpullsource = source
649 repo.lfpullsource = source
650
650
651 revs = opts.get('rev', [])
651 revs = opts.get('rev', [])
652 if not revs:
652 if not revs:
653 raise error.Abort(_(b'no revisions specified'))
653 raise error.Abort(_(b'no revisions specified'))
654 revs = scmutil.revrange(repo, revs)
654 revs = scmutil.revrange(repo, revs)
655
655
656 numcached = 0
656 numcached = 0
657 for rev in revs:
657 for rev in revs:
658 ui.note(_(b'pulling largefiles for revision %d\n') % rev)
658 ui.note(_(b'pulling largefiles for revision %d\n') % rev)
659 (cached, missing) = cachelfiles(ui, repo, rev)
659 (cached, missing) = cachelfiles(ui, repo, rev)
660 numcached += len(cached)
660 numcached += len(cached)
661 ui.status(_(b"%d largefiles cached\n") % numcached)
661 ui.status(_(b"%d largefiles cached\n") % numcached)
662
662
663
663
664 @eh.command(b'debuglfput', [] + cmdutil.remoteopts, _(b'FILE'))
664 @eh.command(b'debuglfput', [] + cmdutil.remoteopts, _(b'FILE'))
665 def debuglfput(ui, repo, filepath, **kwargs):
665 def debuglfput(ui, repo, filepath, **kwargs):
666 hash = lfutil.hashfile(filepath)
666 hash = lfutil.hashfile(filepath)
667 storefactory.openstore(repo).put(filepath, hash)
667 storefactory.openstore(repo).put(filepath, hash)
668 ui.write(b'%s\n' % hash)
668 ui.write(b'%s\n' % hash)
669 return 0
669 return 0
@@ -1,760 +1,760 b''
1 # Copyright 2009-2010 Gregory P. Ward
1 # Copyright 2009-2010 Gregory P. Ward
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 # Copyright 2010-2011 Fog Creek Software
3 # Copyright 2010-2011 Fog Creek Software
4 # Copyright 2010-2011 Unity Technologies
4 # Copyright 2010-2011 Unity Technologies
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 '''largefiles utility code: must not import other modules in this package.'''
9 '''largefiles utility code: must not import other modules in this package.'''
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import contextlib
12 import contextlib
13 import copy
13 import copy
14 import hashlib
15 import os
14 import os
16 import stat
15 import stat
17
16
18 from mercurial.i18n import _
17 from mercurial.i18n import _
19 from mercurial.node import hex
18 from mercurial.node import hex
20 from mercurial.pycompat import open
19 from mercurial.pycompat import open
21
20
22 from mercurial import (
21 from mercurial import (
23 dirstate,
22 dirstate,
24 encoding,
23 encoding,
25 error,
24 error,
26 httpconnection,
25 httpconnection,
27 match as matchmod,
26 match as matchmod,
28 node,
27 node,
29 pycompat,
28 pycompat,
30 scmutil,
29 scmutil,
31 sparse,
30 sparse,
32 util,
31 util,
33 vfs as vfsmod,
32 vfs as vfsmod,
34 )
33 )
34 from mercurial.utils import hashutil
35
35
36 shortname = b'.hglf'
36 shortname = b'.hglf'
37 shortnameslash = shortname + b'/'
37 shortnameslash = shortname + b'/'
38 longname = b'largefiles'
38 longname = b'largefiles'
39
39
40 # -- Private worker functions ------------------------------------------
40 # -- Private worker functions ------------------------------------------
41
41
42
42
43 @contextlib.contextmanager
43 @contextlib.contextmanager
44 def lfstatus(repo, value=True):
44 def lfstatus(repo, value=True):
45 oldvalue = getattr(repo, 'lfstatus', False)
45 oldvalue = getattr(repo, 'lfstatus', False)
46 repo.lfstatus = value
46 repo.lfstatus = value
47 try:
47 try:
48 yield
48 yield
49 finally:
49 finally:
50 repo.lfstatus = oldvalue
50 repo.lfstatus = oldvalue
51
51
52
52
53 def getminsize(ui, assumelfiles, opt, default=10):
53 def getminsize(ui, assumelfiles, opt, default=10):
54 lfsize = opt
54 lfsize = opt
55 if not lfsize and assumelfiles:
55 if not lfsize and assumelfiles:
56 lfsize = ui.config(longname, b'minsize', default=default)
56 lfsize = ui.config(longname, b'minsize', default=default)
57 if lfsize:
57 if lfsize:
58 try:
58 try:
59 lfsize = float(lfsize)
59 lfsize = float(lfsize)
60 except ValueError:
60 except ValueError:
61 raise error.Abort(
61 raise error.Abort(
62 _(b'largefiles: size must be number (not %s)\n') % lfsize
62 _(b'largefiles: size must be number (not %s)\n') % lfsize
63 )
63 )
64 if lfsize is None:
64 if lfsize is None:
65 raise error.Abort(_(b'minimum size for largefiles must be specified'))
65 raise error.Abort(_(b'minimum size for largefiles must be specified'))
66 return lfsize
66 return lfsize
67
67
68
68
69 def link(src, dest):
69 def link(src, dest):
70 """Try to create hardlink - if that fails, efficiently make a copy."""
70 """Try to create hardlink - if that fails, efficiently make a copy."""
71 util.makedirs(os.path.dirname(dest))
71 util.makedirs(os.path.dirname(dest))
72 try:
72 try:
73 util.oslink(src, dest)
73 util.oslink(src, dest)
74 except OSError:
74 except OSError:
75 # if hardlinks fail, fallback on atomic copy
75 # if hardlinks fail, fallback on atomic copy
76 with open(src, b'rb') as srcf, util.atomictempfile(dest) as dstf:
76 with open(src, b'rb') as srcf, util.atomictempfile(dest) as dstf:
77 for chunk in util.filechunkiter(srcf):
77 for chunk in util.filechunkiter(srcf):
78 dstf.write(chunk)
78 dstf.write(chunk)
79 os.chmod(dest, os.stat(src).st_mode)
79 os.chmod(dest, os.stat(src).st_mode)
80
80
81
81
82 def usercachepath(ui, hash):
82 def usercachepath(ui, hash):
83 '''Return the correct location in the "global" largefiles cache for a file
83 '''Return the correct location in the "global" largefiles cache for a file
84 with the given hash.
84 with the given hash.
85 This cache is used for sharing of largefiles across repositories - both
85 This cache is used for sharing of largefiles across repositories - both
86 to preserve download bandwidth and storage space.'''
86 to preserve download bandwidth and storage space.'''
87 return os.path.join(_usercachedir(ui), hash)
87 return os.path.join(_usercachedir(ui), hash)
88
88
89
89
90 def _usercachedir(ui, name=longname):
90 def _usercachedir(ui, name=longname):
91 '''Return the location of the "global" largefiles cache.'''
91 '''Return the location of the "global" largefiles cache.'''
92 path = ui.configpath(name, b'usercache')
92 path = ui.configpath(name, b'usercache')
93 if path:
93 if path:
94 return path
94 return path
95 if pycompat.iswindows:
95 if pycompat.iswindows:
96 appdata = encoding.environ.get(
96 appdata = encoding.environ.get(
97 b'LOCALAPPDATA', encoding.environ.get(b'APPDATA')
97 b'LOCALAPPDATA', encoding.environ.get(b'APPDATA')
98 )
98 )
99 if appdata:
99 if appdata:
100 return os.path.join(appdata, name)
100 return os.path.join(appdata, name)
101 elif pycompat.isdarwin:
101 elif pycompat.isdarwin:
102 home = encoding.environ.get(b'HOME')
102 home = encoding.environ.get(b'HOME')
103 if home:
103 if home:
104 return os.path.join(home, b'Library', b'Caches', name)
104 return os.path.join(home, b'Library', b'Caches', name)
105 elif pycompat.isposix:
105 elif pycompat.isposix:
106 path = encoding.environ.get(b'XDG_CACHE_HOME')
106 path = encoding.environ.get(b'XDG_CACHE_HOME')
107 if path:
107 if path:
108 return os.path.join(path, name)
108 return os.path.join(path, name)
109 home = encoding.environ.get(b'HOME')
109 home = encoding.environ.get(b'HOME')
110 if home:
110 if home:
111 return os.path.join(home, b'.cache', name)
111 return os.path.join(home, b'.cache', name)
112 else:
112 else:
113 raise error.Abort(
113 raise error.Abort(
114 _(b'unknown operating system: %s\n') % pycompat.osname
114 _(b'unknown operating system: %s\n') % pycompat.osname
115 )
115 )
116 raise error.Abort(_(b'unknown %s usercache location') % name)
116 raise error.Abort(_(b'unknown %s usercache location') % name)
117
117
118
118
119 def inusercache(ui, hash):
119 def inusercache(ui, hash):
120 path = usercachepath(ui, hash)
120 path = usercachepath(ui, hash)
121 return os.path.exists(path)
121 return os.path.exists(path)
122
122
123
123
124 def findfile(repo, hash):
124 def findfile(repo, hash):
125 '''Return store path of the largefile with the specified hash.
125 '''Return store path of the largefile with the specified hash.
126 As a side effect, the file might be linked from user cache.
126 As a side effect, the file might be linked from user cache.
127 Return None if the file can't be found locally.'''
127 Return None if the file can't be found locally.'''
128 path, exists = findstorepath(repo, hash)
128 path, exists = findstorepath(repo, hash)
129 if exists:
129 if exists:
130 repo.ui.note(_(b'found %s in store\n') % hash)
130 repo.ui.note(_(b'found %s in store\n') % hash)
131 return path
131 return path
132 elif inusercache(repo.ui, hash):
132 elif inusercache(repo.ui, hash):
133 repo.ui.note(_(b'found %s in system cache\n') % hash)
133 repo.ui.note(_(b'found %s in system cache\n') % hash)
134 path = storepath(repo, hash)
134 path = storepath(repo, hash)
135 link(usercachepath(repo.ui, hash), path)
135 link(usercachepath(repo.ui, hash), path)
136 return path
136 return path
137 return None
137 return None
138
138
139
139
140 class largefilesdirstate(dirstate.dirstate):
140 class largefilesdirstate(dirstate.dirstate):
141 def __getitem__(self, key):
141 def __getitem__(self, key):
142 return super(largefilesdirstate, self).__getitem__(unixpath(key))
142 return super(largefilesdirstate, self).__getitem__(unixpath(key))
143
143
144 def normal(self, f):
144 def normal(self, f):
145 return super(largefilesdirstate, self).normal(unixpath(f))
145 return super(largefilesdirstate, self).normal(unixpath(f))
146
146
147 def remove(self, f):
147 def remove(self, f):
148 return super(largefilesdirstate, self).remove(unixpath(f))
148 return super(largefilesdirstate, self).remove(unixpath(f))
149
149
150 def add(self, f):
150 def add(self, f):
151 return super(largefilesdirstate, self).add(unixpath(f))
151 return super(largefilesdirstate, self).add(unixpath(f))
152
152
153 def drop(self, f):
153 def drop(self, f):
154 return super(largefilesdirstate, self).drop(unixpath(f))
154 return super(largefilesdirstate, self).drop(unixpath(f))
155
155
156 def forget(self, f):
156 def forget(self, f):
157 return super(largefilesdirstate, self).forget(unixpath(f))
157 return super(largefilesdirstate, self).forget(unixpath(f))
158
158
159 def normallookup(self, f):
159 def normallookup(self, f):
160 return super(largefilesdirstate, self).normallookup(unixpath(f))
160 return super(largefilesdirstate, self).normallookup(unixpath(f))
161
161
162 def _ignore(self, f):
162 def _ignore(self, f):
163 return False
163 return False
164
164
165 def write(self, tr=False):
165 def write(self, tr=False):
166 # (1) disable PENDING mode always
166 # (1) disable PENDING mode always
167 # (lfdirstate isn't yet managed as a part of the transaction)
167 # (lfdirstate isn't yet managed as a part of the transaction)
168 # (2) avoid develwarn 'use dirstate.write with ....'
168 # (2) avoid develwarn 'use dirstate.write with ....'
169 super(largefilesdirstate, self).write(None)
169 super(largefilesdirstate, self).write(None)
170
170
171
171
172 def openlfdirstate(ui, repo, create=True):
172 def openlfdirstate(ui, repo, create=True):
173 '''
173 '''
174 Return a dirstate object that tracks largefiles: i.e. its root is
174 Return a dirstate object that tracks largefiles: i.e. its root is
175 the repo root, but it is saved in .hg/largefiles/dirstate.
175 the repo root, but it is saved in .hg/largefiles/dirstate.
176 '''
176 '''
177 vfs = repo.vfs
177 vfs = repo.vfs
178 lfstoredir = longname
178 lfstoredir = longname
179 opener = vfsmod.vfs(vfs.join(lfstoredir))
179 opener = vfsmod.vfs(vfs.join(lfstoredir))
180 lfdirstate = largefilesdirstate(
180 lfdirstate = largefilesdirstate(
181 opener,
181 opener,
182 ui,
182 ui,
183 repo.root,
183 repo.root,
184 repo.dirstate._validate,
184 repo.dirstate._validate,
185 lambda: sparse.matcher(repo),
185 lambda: sparse.matcher(repo),
186 )
186 )
187
187
188 # If the largefiles dirstate does not exist, populate and create
188 # If the largefiles dirstate does not exist, populate and create
189 # it. This ensures that we create it on the first meaningful
189 # it. This ensures that we create it on the first meaningful
190 # largefiles operation in a new clone.
190 # largefiles operation in a new clone.
191 if create and not vfs.exists(vfs.join(lfstoredir, b'dirstate')):
191 if create and not vfs.exists(vfs.join(lfstoredir, b'dirstate')):
192 matcher = getstandinmatcher(repo)
192 matcher = getstandinmatcher(repo)
193 standins = repo.dirstate.walk(
193 standins = repo.dirstate.walk(
194 matcher, subrepos=[], unknown=False, ignored=False
194 matcher, subrepos=[], unknown=False, ignored=False
195 )
195 )
196
196
197 if len(standins) > 0:
197 if len(standins) > 0:
198 vfs.makedirs(lfstoredir)
198 vfs.makedirs(lfstoredir)
199
199
200 for standin in standins:
200 for standin in standins:
201 lfile = splitstandin(standin)
201 lfile = splitstandin(standin)
202 lfdirstate.normallookup(lfile)
202 lfdirstate.normallookup(lfile)
203 return lfdirstate
203 return lfdirstate
204
204
205
205
206 def lfdirstatestatus(lfdirstate, repo):
206 def lfdirstatestatus(lfdirstate, repo):
207 pctx = repo[b'.']
207 pctx = repo[b'.']
208 match = matchmod.always()
208 match = matchmod.always()
209 unsure, s = lfdirstate.status(
209 unsure, s = lfdirstate.status(
210 match, subrepos=[], ignored=False, clean=False, unknown=False
210 match, subrepos=[], ignored=False, clean=False, unknown=False
211 )
211 )
212 modified, clean = s.modified, s.clean
212 modified, clean = s.modified, s.clean
213 for lfile in unsure:
213 for lfile in unsure:
214 try:
214 try:
215 fctx = pctx[standin(lfile)]
215 fctx = pctx[standin(lfile)]
216 except LookupError:
216 except LookupError:
217 fctx = None
217 fctx = None
218 if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
218 if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
219 modified.append(lfile)
219 modified.append(lfile)
220 else:
220 else:
221 clean.append(lfile)
221 clean.append(lfile)
222 lfdirstate.normal(lfile)
222 lfdirstate.normal(lfile)
223 return s
223 return s
224
224
225
225
226 def listlfiles(repo, rev=None, matcher=None):
226 def listlfiles(repo, rev=None, matcher=None):
227 '''return a list of largefiles in the working copy or the
227 '''return a list of largefiles in the working copy or the
228 specified changeset'''
228 specified changeset'''
229
229
230 if matcher is None:
230 if matcher is None:
231 matcher = getstandinmatcher(repo)
231 matcher = getstandinmatcher(repo)
232
232
233 # ignore unknown files in working directory
233 # ignore unknown files in working directory
234 return [
234 return [
235 splitstandin(f)
235 splitstandin(f)
236 for f in repo[rev].walk(matcher)
236 for f in repo[rev].walk(matcher)
237 if rev is not None or repo.dirstate[f] != b'?'
237 if rev is not None or repo.dirstate[f] != b'?'
238 ]
238 ]
239
239
240
240
241 def instore(repo, hash, forcelocal=False):
241 def instore(repo, hash, forcelocal=False):
242 '''Return true if a largefile with the given hash exists in the store'''
242 '''Return true if a largefile with the given hash exists in the store'''
243 return os.path.exists(storepath(repo, hash, forcelocal))
243 return os.path.exists(storepath(repo, hash, forcelocal))
244
244
245
245
246 def storepath(repo, hash, forcelocal=False):
246 def storepath(repo, hash, forcelocal=False):
247 '''Return the correct location in the repository largefiles store for a
247 '''Return the correct location in the repository largefiles store for a
248 file with the given hash.'''
248 file with the given hash.'''
249 if not forcelocal and repo.shared():
249 if not forcelocal and repo.shared():
250 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
250 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
251 return repo.vfs.join(longname, hash)
251 return repo.vfs.join(longname, hash)
252
252
253
253
254 def findstorepath(repo, hash):
254 def findstorepath(repo, hash):
255 '''Search through the local store path(s) to find the file for the given
255 '''Search through the local store path(s) to find the file for the given
256 hash. If the file is not found, its path in the primary store is returned.
256 hash. If the file is not found, its path in the primary store is returned.
257 The return value is a tuple of (path, exists(path)).
257 The return value is a tuple of (path, exists(path)).
258 '''
258 '''
259 # For shared repos, the primary store is in the share source. But for
259 # For shared repos, the primary store is in the share source. But for
260 # backward compatibility, force a lookup in the local store if it wasn't
260 # backward compatibility, force a lookup in the local store if it wasn't
261 # found in the share source.
261 # found in the share source.
262 path = storepath(repo, hash, False)
262 path = storepath(repo, hash, False)
263
263
264 if instore(repo, hash):
264 if instore(repo, hash):
265 return (path, True)
265 return (path, True)
266 elif repo.shared() and instore(repo, hash, True):
266 elif repo.shared() and instore(repo, hash, True):
267 return storepath(repo, hash, True), True
267 return storepath(repo, hash, True), True
268
268
269 return (path, False)
269 return (path, False)
270
270
271
271
272 def copyfromcache(repo, hash, filename):
272 def copyfromcache(repo, hash, filename):
273 '''Copy the specified largefile from the repo or system cache to
273 '''Copy the specified largefile from the repo or system cache to
274 filename in the repository. Return true on success or false if the
274 filename in the repository. Return true on success or false if the
275 file was not found in either cache (which should not happened:
275 file was not found in either cache (which should not happened:
276 this is meant to be called only after ensuring that the needed
276 this is meant to be called only after ensuring that the needed
277 largefile exists in the cache).'''
277 largefile exists in the cache).'''
278 wvfs = repo.wvfs
278 wvfs = repo.wvfs
279 path = findfile(repo, hash)
279 path = findfile(repo, hash)
280 if path is None:
280 if path is None:
281 return False
281 return False
282 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
282 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
283 # The write may fail before the file is fully written, but we
283 # The write may fail before the file is fully written, but we
284 # don't use atomic writes in the working copy.
284 # don't use atomic writes in the working copy.
285 with open(path, b'rb') as srcfd, wvfs(filename, b'wb') as destfd:
285 with open(path, b'rb') as srcfd, wvfs(filename, b'wb') as destfd:
286 gothash = copyandhash(util.filechunkiter(srcfd), destfd)
286 gothash = copyandhash(util.filechunkiter(srcfd), destfd)
287 if gothash != hash:
287 if gothash != hash:
288 repo.ui.warn(
288 repo.ui.warn(
289 _(b'%s: data corruption in %s with hash %s\n')
289 _(b'%s: data corruption in %s with hash %s\n')
290 % (filename, path, gothash)
290 % (filename, path, gothash)
291 )
291 )
292 wvfs.unlink(filename)
292 wvfs.unlink(filename)
293 return False
293 return False
294 return True
294 return True
295
295
296
296
297 def copytostore(repo, ctx, file, fstandin):
297 def copytostore(repo, ctx, file, fstandin):
298 wvfs = repo.wvfs
298 wvfs = repo.wvfs
299 hash = readasstandin(ctx[fstandin])
299 hash = readasstandin(ctx[fstandin])
300 if instore(repo, hash):
300 if instore(repo, hash):
301 return
301 return
302 if wvfs.exists(file):
302 if wvfs.exists(file):
303 copytostoreabsolute(repo, wvfs.join(file), hash)
303 copytostoreabsolute(repo, wvfs.join(file), hash)
304 else:
304 else:
305 repo.ui.warn(
305 repo.ui.warn(
306 _(b"%s: largefile %s not available from local store\n")
306 _(b"%s: largefile %s not available from local store\n")
307 % (file, hash)
307 % (file, hash)
308 )
308 )
309
309
310
310
311 def copyalltostore(repo, node):
311 def copyalltostore(repo, node):
312 '''Copy all largefiles in a given revision to the store'''
312 '''Copy all largefiles in a given revision to the store'''
313
313
314 ctx = repo[node]
314 ctx = repo[node]
315 for filename in ctx.files():
315 for filename in ctx.files():
316 realfile = splitstandin(filename)
316 realfile = splitstandin(filename)
317 if realfile is not None and filename in ctx.manifest():
317 if realfile is not None and filename in ctx.manifest():
318 copytostore(repo, ctx, realfile, filename)
318 copytostore(repo, ctx, realfile, filename)
319
319
320
320
321 def copytostoreabsolute(repo, file, hash):
321 def copytostoreabsolute(repo, file, hash):
322 if inusercache(repo.ui, hash):
322 if inusercache(repo.ui, hash):
323 link(usercachepath(repo.ui, hash), storepath(repo, hash))
323 link(usercachepath(repo.ui, hash), storepath(repo, hash))
324 else:
324 else:
325 util.makedirs(os.path.dirname(storepath(repo, hash)))
325 util.makedirs(os.path.dirname(storepath(repo, hash)))
326 with open(file, b'rb') as srcf:
326 with open(file, b'rb') as srcf:
327 with util.atomictempfile(
327 with util.atomictempfile(
328 storepath(repo, hash), createmode=repo.store.createmode
328 storepath(repo, hash), createmode=repo.store.createmode
329 ) as dstf:
329 ) as dstf:
330 for chunk in util.filechunkiter(srcf):
330 for chunk in util.filechunkiter(srcf):
331 dstf.write(chunk)
331 dstf.write(chunk)
332 linktousercache(repo, hash)
332 linktousercache(repo, hash)
333
333
334
334
335 def linktousercache(repo, hash):
335 def linktousercache(repo, hash):
336 '''Link / copy the largefile with the specified hash from the store
336 '''Link / copy the largefile with the specified hash from the store
337 to the cache.'''
337 to the cache.'''
338 path = usercachepath(repo.ui, hash)
338 path = usercachepath(repo.ui, hash)
339 link(storepath(repo, hash), path)
339 link(storepath(repo, hash), path)
340
340
341
341
342 def getstandinmatcher(repo, rmatcher=None):
342 def getstandinmatcher(repo, rmatcher=None):
343 '''Return a match object that applies rmatcher to the standin directory'''
343 '''Return a match object that applies rmatcher to the standin directory'''
344 wvfs = repo.wvfs
344 wvfs = repo.wvfs
345 standindir = shortname
345 standindir = shortname
346
346
347 # no warnings about missing files or directories
347 # no warnings about missing files or directories
348 badfn = lambda f, msg: None
348 badfn = lambda f, msg: None
349
349
350 if rmatcher and not rmatcher.always():
350 if rmatcher and not rmatcher.always():
351 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
351 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
352 if not pats:
352 if not pats:
353 pats = [wvfs.join(standindir)]
353 pats = [wvfs.join(standindir)]
354 match = scmutil.match(repo[None], pats, badfn=badfn)
354 match = scmutil.match(repo[None], pats, badfn=badfn)
355 else:
355 else:
356 # no patterns: relative to repo root
356 # no patterns: relative to repo root
357 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
357 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
358 return match
358 return match
359
359
360
360
361 def composestandinmatcher(repo, rmatcher):
361 def composestandinmatcher(repo, rmatcher):
362 '''Return a matcher that accepts standins corresponding to the
362 '''Return a matcher that accepts standins corresponding to the
363 files accepted by rmatcher. Pass the list of files in the matcher
363 files accepted by rmatcher. Pass the list of files in the matcher
364 as the paths specified by the user.'''
364 as the paths specified by the user.'''
365 smatcher = getstandinmatcher(repo, rmatcher)
365 smatcher = getstandinmatcher(repo, rmatcher)
366 isstandin = smatcher.matchfn
366 isstandin = smatcher.matchfn
367
367
368 def composedmatchfn(f):
368 def composedmatchfn(f):
369 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
369 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
370
370
371 smatcher.matchfn = composedmatchfn
371 smatcher.matchfn = composedmatchfn
372
372
373 return smatcher
373 return smatcher
374
374
375
375
376 def standin(filename):
376 def standin(filename):
377 '''Return the repo-relative path to the standin for the specified big
377 '''Return the repo-relative path to the standin for the specified big
378 file.'''
378 file.'''
379 # Notes:
379 # Notes:
380 # 1) Some callers want an absolute path, but for instance addlargefiles
380 # 1) Some callers want an absolute path, but for instance addlargefiles
381 # needs it repo-relative so it can be passed to repo[None].add(). So
381 # needs it repo-relative so it can be passed to repo[None].add(). So
382 # leave it up to the caller to use repo.wjoin() to get an absolute path.
382 # leave it up to the caller to use repo.wjoin() to get an absolute path.
383 # 2) Join with '/' because that's what dirstate always uses, even on
383 # 2) Join with '/' because that's what dirstate always uses, even on
384 # Windows. Change existing separator to '/' first in case we are
384 # Windows. Change existing separator to '/' first in case we are
385 # passed filenames from an external source (like the command line).
385 # passed filenames from an external source (like the command line).
386 return shortnameslash + util.pconvert(filename)
386 return shortnameslash + util.pconvert(filename)
387
387
388
388
389 def isstandin(filename):
389 def isstandin(filename):
390 '''Return true if filename is a big file standin. filename must be
390 '''Return true if filename is a big file standin. filename must be
391 in Mercurial's internal form (slash-separated).'''
391 in Mercurial's internal form (slash-separated).'''
392 return filename.startswith(shortnameslash)
392 return filename.startswith(shortnameslash)
393
393
394
394
395 def splitstandin(filename):
395 def splitstandin(filename):
396 # Split on / because that's what dirstate always uses, even on Windows.
396 # Split on / because that's what dirstate always uses, even on Windows.
397 # Change local separator to / first just in case we are passed filenames
397 # Change local separator to / first just in case we are passed filenames
398 # from an external source (like the command line).
398 # from an external source (like the command line).
399 bits = util.pconvert(filename).split(b'/', 1)
399 bits = util.pconvert(filename).split(b'/', 1)
400 if len(bits) == 2 and bits[0] == shortname:
400 if len(bits) == 2 and bits[0] == shortname:
401 return bits[1]
401 return bits[1]
402 else:
402 else:
403 return None
403 return None
404
404
405
405
406 def updatestandin(repo, lfile, standin):
406 def updatestandin(repo, lfile, standin):
407 """Re-calculate hash value of lfile and write it into standin
407 """Re-calculate hash value of lfile and write it into standin
408
408
409 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
409 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
410 """
410 """
411 file = repo.wjoin(lfile)
411 file = repo.wjoin(lfile)
412 if repo.wvfs.exists(lfile):
412 if repo.wvfs.exists(lfile):
413 hash = hashfile(file)
413 hash = hashfile(file)
414 executable = getexecutable(file)
414 executable = getexecutable(file)
415 writestandin(repo, standin, hash, executable)
415 writestandin(repo, standin, hash, executable)
416 else:
416 else:
417 raise error.Abort(_(b'%s: file not found!') % lfile)
417 raise error.Abort(_(b'%s: file not found!') % lfile)
418
418
419
419
420 def readasstandin(fctx):
420 def readasstandin(fctx):
421 '''read hex hash from given filectx of standin file
421 '''read hex hash from given filectx of standin file
422
422
423 This encapsulates how "standin" data is stored into storage layer.'''
423 This encapsulates how "standin" data is stored into storage layer.'''
424 return fctx.data().strip()
424 return fctx.data().strip()
425
425
426
426
427 def writestandin(repo, standin, hash, executable):
427 def writestandin(repo, standin, hash, executable):
428 '''write hash to <repo.root>/<standin>'''
428 '''write hash to <repo.root>/<standin>'''
429 repo.wwrite(standin, hash + b'\n', executable and b'x' or b'')
429 repo.wwrite(standin, hash + b'\n', executable and b'x' or b'')
430
430
431
431
432 def copyandhash(instream, outfile):
432 def copyandhash(instream, outfile):
433 '''Read bytes from instream (iterable) and write them to outfile,
433 '''Read bytes from instream (iterable) and write them to outfile,
434 computing the SHA-1 hash of the data along the way. Return the hash.'''
434 computing the SHA-1 hash of the data along the way. Return the hash.'''
435 hasher = hashlib.sha1(b'')
435 hasher = hashutil.sha1(b'')
436 for data in instream:
436 for data in instream:
437 hasher.update(data)
437 hasher.update(data)
438 outfile.write(data)
438 outfile.write(data)
439 return hex(hasher.digest())
439 return hex(hasher.digest())
440
440
441
441
442 def hashfile(file):
442 def hashfile(file):
443 if not os.path.exists(file):
443 if not os.path.exists(file):
444 return b''
444 return b''
445 with open(file, b'rb') as fd:
445 with open(file, b'rb') as fd:
446 return hexsha1(fd)
446 return hexsha1(fd)
447
447
448
448
449 def getexecutable(filename):
449 def getexecutable(filename):
450 mode = os.stat(filename).st_mode
450 mode = os.stat(filename).st_mode
451 return (
451 return (
452 (mode & stat.S_IXUSR)
452 (mode & stat.S_IXUSR)
453 and (mode & stat.S_IXGRP)
453 and (mode & stat.S_IXGRP)
454 and (mode & stat.S_IXOTH)
454 and (mode & stat.S_IXOTH)
455 )
455 )
456
456
457
457
458 def urljoin(first, second, *arg):
458 def urljoin(first, second, *arg):
459 def join(left, right):
459 def join(left, right):
460 if not left.endswith(b'/'):
460 if not left.endswith(b'/'):
461 left += b'/'
461 left += b'/'
462 if right.startswith(b'/'):
462 if right.startswith(b'/'):
463 right = right[1:]
463 right = right[1:]
464 return left + right
464 return left + right
465
465
466 url = join(first, second)
466 url = join(first, second)
467 for a in arg:
467 for a in arg:
468 url = join(url, a)
468 url = join(url, a)
469 return url
469 return url
470
470
471
471
472 def hexsha1(fileobj):
472 def hexsha1(fileobj):
473 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
473 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
474 object data"""
474 object data"""
475 h = hashlib.sha1()
475 h = hashutil.sha1()
476 for chunk in util.filechunkiter(fileobj):
476 for chunk in util.filechunkiter(fileobj):
477 h.update(chunk)
477 h.update(chunk)
478 return hex(h.digest())
478 return hex(h.digest())
479
479
480
480
481 def httpsendfile(ui, filename):
481 def httpsendfile(ui, filename):
482 return httpconnection.httpsendfile(ui, filename, b'rb')
482 return httpconnection.httpsendfile(ui, filename, b'rb')
483
483
484
484
485 def unixpath(path):
485 def unixpath(path):
486 '''Return a version of path normalized for use with the lfdirstate.'''
486 '''Return a version of path normalized for use with the lfdirstate.'''
487 return util.pconvert(os.path.normpath(path))
487 return util.pconvert(os.path.normpath(path))
488
488
489
489
490 def islfilesrepo(repo):
490 def islfilesrepo(repo):
491 '''Return true if the repo is a largefile repo.'''
491 '''Return true if the repo is a largefile repo.'''
492 if b'largefiles' in repo.requirements and any(
492 if b'largefiles' in repo.requirements and any(
493 shortnameslash in f[0] for f in repo.store.datafiles()
493 shortnameslash in f[0] for f in repo.store.datafiles()
494 ):
494 ):
495 return True
495 return True
496
496
497 return any(openlfdirstate(repo.ui, repo, False))
497 return any(openlfdirstate(repo.ui, repo, False))
498
498
499
499
500 class storeprotonotcapable(Exception):
500 class storeprotonotcapable(Exception):
501 def __init__(self, storetypes):
501 def __init__(self, storetypes):
502 self.storetypes = storetypes
502 self.storetypes = storetypes
503
503
504
504
505 def getstandinsstate(repo):
505 def getstandinsstate(repo):
506 standins = []
506 standins = []
507 matcher = getstandinmatcher(repo)
507 matcher = getstandinmatcher(repo)
508 wctx = repo[None]
508 wctx = repo[None]
509 for standin in repo.dirstate.walk(
509 for standin in repo.dirstate.walk(
510 matcher, subrepos=[], unknown=False, ignored=False
510 matcher, subrepos=[], unknown=False, ignored=False
511 ):
511 ):
512 lfile = splitstandin(standin)
512 lfile = splitstandin(standin)
513 try:
513 try:
514 hash = readasstandin(wctx[standin])
514 hash = readasstandin(wctx[standin])
515 except IOError:
515 except IOError:
516 hash = None
516 hash = None
517 standins.append((lfile, hash))
517 standins.append((lfile, hash))
518 return standins
518 return standins
519
519
520
520
521 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
521 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
522 lfstandin = standin(lfile)
522 lfstandin = standin(lfile)
523 if lfstandin in repo.dirstate:
523 if lfstandin in repo.dirstate:
524 stat = repo.dirstate._map[lfstandin]
524 stat = repo.dirstate._map[lfstandin]
525 state, mtime = stat[0], stat[3]
525 state, mtime = stat[0], stat[3]
526 else:
526 else:
527 state, mtime = b'?', -1
527 state, mtime = b'?', -1
528 if state == b'n':
528 if state == b'n':
529 if normallookup or mtime < 0 or not repo.wvfs.exists(lfile):
529 if normallookup or mtime < 0 or not repo.wvfs.exists(lfile):
530 # state 'n' doesn't ensure 'clean' in this case
530 # state 'n' doesn't ensure 'clean' in this case
531 lfdirstate.normallookup(lfile)
531 lfdirstate.normallookup(lfile)
532 else:
532 else:
533 lfdirstate.normal(lfile)
533 lfdirstate.normal(lfile)
534 elif state == b'm':
534 elif state == b'm':
535 lfdirstate.normallookup(lfile)
535 lfdirstate.normallookup(lfile)
536 elif state == b'r':
536 elif state == b'r':
537 lfdirstate.remove(lfile)
537 lfdirstate.remove(lfile)
538 elif state == b'a':
538 elif state == b'a':
539 lfdirstate.add(lfile)
539 lfdirstate.add(lfile)
540 elif state == b'?':
540 elif state == b'?':
541 lfdirstate.drop(lfile)
541 lfdirstate.drop(lfile)
542
542
543
543
544 def markcommitted(orig, ctx, node):
544 def markcommitted(orig, ctx, node):
545 repo = ctx.repo()
545 repo = ctx.repo()
546
546
547 orig(node)
547 orig(node)
548
548
549 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
549 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
550 # because files coming from the 2nd parent are omitted in the latter.
550 # because files coming from the 2nd parent are omitted in the latter.
551 #
551 #
552 # The former should be used to get targets of "synclfdirstate",
552 # The former should be used to get targets of "synclfdirstate",
553 # because such files:
553 # because such files:
554 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
554 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
555 # - have to be marked as "n" after commit, but
555 # - have to be marked as "n" after commit, but
556 # - aren't listed in "repo[node].files()"
556 # - aren't listed in "repo[node].files()"
557
557
558 lfdirstate = openlfdirstate(repo.ui, repo)
558 lfdirstate = openlfdirstate(repo.ui, repo)
559 for f in ctx.files():
559 for f in ctx.files():
560 lfile = splitstandin(f)
560 lfile = splitstandin(f)
561 if lfile is not None:
561 if lfile is not None:
562 synclfdirstate(repo, lfdirstate, lfile, False)
562 synclfdirstate(repo, lfdirstate, lfile, False)
563 lfdirstate.write()
563 lfdirstate.write()
564
564
565 # As part of committing, copy all of the largefiles into the cache.
565 # As part of committing, copy all of the largefiles into the cache.
566 #
566 #
567 # Using "node" instead of "ctx" implies additional "repo[node]"
567 # Using "node" instead of "ctx" implies additional "repo[node]"
568 # lookup while copyalltostore(), but can omit redundant check for
568 # lookup while copyalltostore(), but can omit redundant check for
569 # files comming from the 2nd parent, which should exist in store
569 # files comming from the 2nd parent, which should exist in store
570 # at merging.
570 # at merging.
571 copyalltostore(repo, node)
571 copyalltostore(repo, node)
572
572
573
573
574 def getlfilestoupdate(oldstandins, newstandins):
574 def getlfilestoupdate(oldstandins, newstandins):
575 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
575 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
576 filelist = []
576 filelist = []
577 for f in changedstandins:
577 for f in changedstandins:
578 if f[0] not in filelist:
578 if f[0] not in filelist:
579 filelist.append(f[0])
579 filelist.append(f[0])
580 return filelist
580 return filelist
581
581
582
582
583 def getlfilestoupload(repo, missing, addfunc):
583 def getlfilestoupload(repo, missing, addfunc):
584 makeprogress = repo.ui.makeprogress
584 makeprogress = repo.ui.makeprogress
585 with makeprogress(
585 with makeprogress(
586 _(b'finding outgoing largefiles'),
586 _(b'finding outgoing largefiles'),
587 unit=_(b'revisions'),
587 unit=_(b'revisions'),
588 total=len(missing),
588 total=len(missing),
589 ) as progress:
589 ) as progress:
590 for i, n in enumerate(missing):
590 for i, n in enumerate(missing):
591 progress.update(i)
591 progress.update(i)
592 parents = [p for p in repo[n].parents() if p != node.nullid]
592 parents = [p for p in repo[n].parents() if p != node.nullid]
593
593
594 with lfstatus(repo, value=False):
594 with lfstatus(repo, value=False):
595 ctx = repo[n]
595 ctx = repo[n]
596
596
597 files = set(ctx.files())
597 files = set(ctx.files())
598 if len(parents) == 2:
598 if len(parents) == 2:
599 mc = ctx.manifest()
599 mc = ctx.manifest()
600 mp1 = ctx.p1().manifest()
600 mp1 = ctx.p1().manifest()
601 mp2 = ctx.p2().manifest()
601 mp2 = ctx.p2().manifest()
602 for f in mp1:
602 for f in mp1:
603 if f not in mc:
603 if f not in mc:
604 files.add(f)
604 files.add(f)
605 for f in mp2:
605 for f in mp2:
606 if f not in mc:
606 if f not in mc:
607 files.add(f)
607 files.add(f)
608 for f in mc:
608 for f in mc:
609 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
609 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
610 files.add(f)
610 files.add(f)
611 for fn in files:
611 for fn in files:
612 if isstandin(fn) and fn in ctx:
612 if isstandin(fn) and fn in ctx:
613 addfunc(fn, readasstandin(ctx[fn]))
613 addfunc(fn, readasstandin(ctx[fn]))
614
614
615
615
616 def updatestandinsbymatch(repo, match):
616 def updatestandinsbymatch(repo, match):
617 '''Update standins in the working directory according to specified match
617 '''Update standins in the working directory according to specified match
618
618
619 This returns (possibly modified) ``match`` object to be used for
619 This returns (possibly modified) ``match`` object to be used for
620 subsequent commit process.
620 subsequent commit process.
621 '''
621 '''
622
622
623 ui = repo.ui
623 ui = repo.ui
624
624
625 # Case 1: user calls commit with no specific files or
625 # Case 1: user calls commit with no specific files or
626 # include/exclude patterns: refresh and commit all files that
626 # include/exclude patterns: refresh and commit all files that
627 # are "dirty".
627 # are "dirty".
628 if match is None or match.always():
628 if match is None or match.always():
629 # Spend a bit of time here to get a list of files we know
629 # Spend a bit of time here to get a list of files we know
630 # are modified so we can compare only against those.
630 # are modified so we can compare only against those.
631 # It can cost a lot of time (several seconds)
631 # It can cost a lot of time (several seconds)
632 # otherwise to update all standins if the largefiles are
632 # otherwise to update all standins if the largefiles are
633 # large.
633 # large.
634 lfdirstate = openlfdirstate(ui, repo)
634 lfdirstate = openlfdirstate(ui, repo)
635 dirtymatch = matchmod.always()
635 dirtymatch = matchmod.always()
636 unsure, s = lfdirstate.status(
636 unsure, s = lfdirstate.status(
637 dirtymatch, subrepos=[], ignored=False, clean=False, unknown=False
637 dirtymatch, subrepos=[], ignored=False, clean=False, unknown=False
638 )
638 )
639 modifiedfiles = unsure + s.modified + s.added + s.removed
639 modifiedfiles = unsure + s.modified + s.added + s.removed
640 lfiles = listlfiles(repo)
640 lfiles = listlfiles(repo)
641 # this only loops through largefiles that exist (not
641 # this only loops through largefiles that exist (not
642 # removed/renamed)
642 # removed/renamed)
643 for lfile in lfiles:
643 for lfile in lfiles:
644 if lfile in modifiedfiles:
644 if lfile in modifiedfiles:
645 fstandin = standin(lfile)
645 fstandin = standin(lfile)
646 if repo.wvfs.exists(fstandin):
646 if repo.wvfs.exists(fstandin):
647 # this handles the case where a rebase is being
647 # this handles the case where a rebase is being
648 # performed and the working copy is not updated
648 # performed and the working copy is not updated
649 # yet.
649 # yet.
650 if repo.wvfs.exists(lfile):
650 if repo.wvfs.exists(lfile):
651 updatestandin(repo, lfile, fstandin)
651 updatestandin(repo, lfile, fstandin)
652
652
653 return match
653 return match
654
654
655 lfiles = listlfiles(repo)
655 lfiles = listlfiles(repo)
656 match._files = repo._subdirlfs(match.files(), lfiles)
656 match._files = repo._subdirlfs(match.files(), lfiles)
657
657
658 # Case 2: user calls commit with specified patterns: refresh
658 # Case 2: user calls commit with specified patterns: refresh
659 # any matching big files.
659 # any matching big files.
660 smatcher = composestandinmatcher(repo, match)
660 smatcher = composestandinmatcher(repo, match)
661 standins = repo.dirstate.walk(
661 standins = repo.dirstate.walk(
662 smatcher, subrepos=[], unknown=False, ignored=False
662 smatcher, subrepos=[], unknown=False, ignored=False
663 )
663 )
664
664
665 # No matching big files: get out of the way and pass control to
665 # No matching big files: get out of the way and pass control to
666 # the usual commit() method.
666 # the usual commit() method.
667 if not standins:
667 if not standins:
668 return match
668 return match
669
669
670 # Refresh all matching big files. It's possible that the
670 # Refresh all matching big files. It's possible that the
671 # commit will end up failing, in which case the big files will
671 # commit will end up failing, in which case the big files will
672 # stay refreshed. No harm done: the user modified them and
672 # stay refreshed. No harm done: the user modified them and
673 # asked to commit them, so sooner or later we're going to
673 # asked to commit them, so sooner or later we're going to
674 # refresh the standins. Might as well leave them refreshed.
674 # refresh the standins. Might as well leave them refreshed.
675 lfdirstate = openlfdirstate(ui, repo)
675 lfdirstate = openlfdirstate(ui, repo)
676 for fstandin in standins:
676 for fstandin in standins:
677 lfile = splitstandin(fstandin)
677 lfile = splitstandin(fstandin)
678 if lfdirstate[lfile] != b'r':
678 if lfdirstate[lfile] != b'r':
679 updatestandin(repo, lfile, fstandin)
679 updatestandin(repo, lfile, fstandin)
680
680
681 # Cook up a new matcher that only matches regular files or
681 # Cook up a new matcher that only matches regular files or
682 # standins corresponding to the big files requested by the
682 # standins corresponding to the big files requested by the
683 # user. Have to modify _files to prevent commit() from
683 # user. Have to modify _files to prevent commit() from
684 # complaining "not tracked" for big files.
684 # complaining "not tracked" for big files.
685 match = copy.copy(match)
685 match = copy.copy(match)
686 origmatchfn = match.matchfn
686 origmatchfn = match.matchfn
687
687
688 # Check both the list of largefiles and the list of
688 # Check both the list of largefiles and the list of
689 # standins because if a largefile was removed, it
689 # standins because if a largefile was removed, it
690 # won't be in the list of largefiles at this point
690 # won't be in the list of largefiles at this point
691 match._files += sorted(standins)
691 match._files += sorted(standins)
692
692
693 actualfiles = []
693 actualfiles = []
694 for f in match._files:
694 for f in match._files:
695 fstandin = standin(f)
695 fstandin = standin(f)
696
696
697 # For largefiles, only one of the normal and standin should be
697 # For largefiles, only one of the normal and standin should be
698 # committed (except if one of them is a remove). In the case of a
698 # committed (except if one of them is a remove). In the case of a
699 # standin removal, drop the normal file if it is unknown to dirstate.
699 # standin removal, drop the normal file if it is unknown to dirstate.
700 # Thus, skip plain largefile names but keep the standin.
700 # Thus, skip plain largefile names but keep the standin.
701 if f in lfiles or fstandin in standins:
701 if f in lfiles or fstandin in standins:
702 if repo.dirstate[fstandin] != b'r':
702 if repo.dirstate[fstandin] != b'r':
703 if repo.dirstate[f] != b'r':
703 if repo.dirstate[f] != b'r':
704 continue
704 continue
705 elif repo.dirstate[f] == b'?':
705 elif repo.dirstate[f] == b'?':
706 continue
706 continue
707
707
708 actualfiles.append(f)
708 actualfiles.append(f)
709 match._files = actualfiles
709 match._files = actualfiles
710
710
711 def matchfn(f):
711 def matchfn(f):
712 if origmatchfn(f):
712 if origmatchfn(f):
713 return f not in lfiles
713 return f not in lfiles
714 else:
714 else:
715 return f in standins
715 return f in standins
716
716
717 match.matchfn = matchfn
717 match.matchfn = matchfn
718
718
719 return match
719 return match
720
720
721
721
722 class automatedcommithook(object):
722 class automatedcommithook(object):
723 '''Stateful hook to update standins at the 1st commit of resuming
723 '''Stateful hook to update standins at the 1st commit of resuming
724
724
725 For efficiency, updating standins in the working directory should
725 For efficiency, updating standins in the working directory should
726 be avoided while automated committing (like rebase, transplant and
726 be avoided while automated committing (like rebase, transplant and
727 so on), because they should be updated before committing.
727 so on), because they should be updated before committing.
728
728
729 But the 1st commit of resuming automated committing (e.g. ``rebase
729 But the 1st commit of resuming automated committing (e.g. ``rebase
730 --continue``) should update them, because largefiles may be
730 --continue``) should update them, because largefiles may be
731 modified manually.
731 modified manually.
732 '''
732 '''
733
733
734 def __init__(self, resuming):
734 def __init__(self, resuming):
735 self.resuming = resuming
735 self.resuming = resuming
736
736
737 def __call__(self, repo, match):
737 def __call__(self, repo, match):
738 if self.resuming:
738 if self.resuming:
739 self.resuming = False # avoids updating at subsequent commits
739 self.resuming = False # avoids updating at subsequent commits
740 return updatestandinsbymatch(repo, match)
740 return updatestandinsbymatch(repo, match)
741 else:
741 else:
742 return match
742 return match
743
743
744
744
745 def getstatuswriter(ui, repo, forcibly=None):
745 def getstatuswriter(ui, repo, forcibly=None):
746 '''Return the function to write largefiles specific status out
746 '''Return the function to write largefiles specific status out
747
747
748 If ``forcibly`` is ``None``, this returns the last element of
748 If ``forcibly`` is ``None``, this returns the last element of
749 ``repo._lfstatuswriters`` as "default" writer function.
749 ``repo._lfstatuswriters`` as "default" writer function.
750
750
751 Otherwise, this returns the function to always write out (or
751 Otherwise, this returns the function to always write out (or
752 ignore if ``not forcibly``) status.
752 ignore if ``not forcibly``) status.
753 '''
753 '''
754 if forcibly is None and util.safehasattr(repo, b'_largefilesenabled'):
754 if forcibly is None and util.safehasattr(repo, b'_largefilesenabled'):
755 return repo._lfstatuswriters[-1]
755 return repo._lfstatuswriters[-1]
756 else:
756 else:
757 if forcibly:
757 if forcibly:
758 return ui.status # forcibly WRITE OUT
758 return ui.status # forcibly WRITE OUT
759 else:
759 else:
760 return lambda *msg, **opts: None # forcibly IGNORE
760 return lambda *msg, **opts: None # forcibly IGNORE
@@ -1,561 +1,561 b''
1 from __future__ import absolute_import
1 from __future__ import absolute_import
2
2
3 import collections
3 import collections
4 import errno
4 import errno
5 import hashlib
6 import mmap
5 import mmap
7 import os
6 import os
8 import struct
7 import struct
9 import time
8 import time
10
9
11 from mercurial.i18n import _
10 from mercurial.i18n import _
12 from mercurial.pycompat import (
11 from mercurial.pycompat import (
13 getattr,
12 getattr,
14 open,
13 open,
15 )
14 )
16 from mercurial import (
15 from mercurial import (
17 node as nodemod,
16 node as nodemod,
18 policy,
17 policy,
19 pycompat,
18 pycompat,
20 util,
19 util,
21 vfs as vfsmod,
20 vfs as vfsmod,
22 )
21 )
22 from mercurial.utils import hashutil
23 from . import shallowutil
23 from . import shallowutil
24
24
25 osutil = policy.importmod('osutil')
25 osutil = policy.importmod('osutil')
26
26
27 # The pack version supported by this implementation. This will need to be
27 # The pack version supported by this implementation. This will need to be
28 # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
28 # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
29 # changing any of the int sizes, changing the delta algorithm, etc.
29 # changing any of the int sizes, changing the delta algorithm, etc.
30 PACKVERSIONSIZE = 1
30 PACKVERSIONSIZE = 1
31 INDEXVERSIONSIZE = 2
31 INDEXVERSIONSIZE = 2
32
32
33 FANOUTSTART = INDEXVERSIONSIZE
33 FANOUTSTART = INDEXVERSIONSIZE
34
34
35 # Constant that indicates a fanout table entry hasn't been filled in. (This does
35 # Constant that indicates a fanout table entry hasn't been filled in. (This does
36 # not get serialized)
36 # not get serialized)
37 EMPTYFANOUT = -1
37 EMPTYFANOUT = -1
38
38
39 # The fanout prefix is the number of bytes that can be addressed by the fanout
39 # The fanout prefix is the number of bytes that can be addressed by the fanout
40 # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
40 # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
41 # look in the fanout table (which will be 2^8 entries long).
41 # look in the fanout table (which will be 2^8 entries long).
42 SMALLFANOUTPREFIX = 1
42 SMALLFANOUTPREFIX = 1
43 LARGEFANOUTPREFIX = 2
43 LARGEFANOUTPREFIX = 2
44
44
45 # The number of entries in the index at which point we switch to a large fanout.
45 # The number of entries in the index at which point we switch to a large fanout.
46 # It is chosen to balance the linear scan through a sparse fanout, with the
46 # It is chosen to balance the linear scan through a sparse fanout, with the
47 # size of the bisect in actual index.
47 # size of the bisect in actual index.
48 # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
48 # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
49 # bisect) with (8 step fanout scan + 1 step bisect)
49 # bisect) with (8 step fanout scan + 1 step bisect)
50 # 5 step bisect = log(2^16 / 8 / 255) # fanout
50 # 5 step bisect = log(2^16 / 8 / 255) # fanout
51 # 10 step fanout scan = 2^16 / (2^16 / 8) # fanout space divided by entries
51 # 10 step fanout scan = 2^16 / (2^16 / 8) # fanout space divided by entries
52 SMALLFANOUTCUTOFF = 2 ** 16 // 8
52 SMALLFANOUTCUTOFF = 2 ** 16 // 8
53
53
54 # The amount of time to wait between checking for new packs. This prevents an
54 # The amount of time to wait between checking for new packs. This prevents an
55 # exception when data is moved to a new pack after the process has already
55 # exception when data is moved to a new pack after the process has already
56 # loaded the pack list.
56 # loaded the pack list.
57 REFRESHRATE = 0.1
57 REFRESHRATE = 0.1
58
58
59 if pycompat.isposix and not pycompat.ispy3:
59 if pycompat.isposix and not pycompat.ispy3:
60 # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening.
60 # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening.
61 # The 'e' flag will be ignored on older versions of glibc.
61 # The 'e' flag will be ignored on older versions of glibc.
62 # Python 3 can't handle the 'e' flag.
62 # Python 3 can't handle the 'e' flag.
63 PACKOPENMODE = b'rbe'
63 PACKOPENMODE = b'rbe'
64 else:
64 else:
65 PACKOPENMODE = b'rb'
65 PACKOPENMODE = b'rb'
66
66
67
67
68 class _cachebackedpacks(object):
68 class _cachebackedpacks(object):
69 def __init__(self, packs, cachesize):
69 def __init__(self, packs, cachesize):
70 self._packs = set(packs)
70 self._packs = set(packs)
71 self._lrucache = util.lrucachedict(cachesize)
71 self._lrucache = util.lrucachedict(cachesize)
72 self._lastpack = None
72 self._lastpack = None
73
73
74 # Avoid cold start of the cache by populating the most recent packs
74 # Avoid cold start of the cache by populating the most recent packs
75 # in the cache.
75 # in the cache.
76 for i in reversed(range(min(cachesize, len(packs)))):
76 for i in reversed(range(min(cachesize, len(packs)))):
77 self._movetofront(packs[i])
77 self._movetofront(packs[i])
78
78
79 def _movetofront(self, pack):
79 def _movetofront(self, pack):
80 # This effectively makes pack the first entry in the cache.
80 # This effectively makes pack the first entry in the cache.
81 self._lrucache[pack] = True
81 self._lrucache[pack] = True
82
82
83 def _registerlastpackusage(self):
83 def _registerlastpackusage(self):
84 if self._lastpack is not None:
84 if self._lastpack is not None:
85 self._movetofront(self._lastpack)
85 self._movetofront(self._lastpack)
86 self._lastpack = None
86 self._lastpack = None
87
87
88 def add(self, pack):
88 def add(self, pack):
89 self._registerlastpackusage()
89 self._registerlastpackusage()
90
90
91 # This method will mostly be called when packs are not in cache.
91 # This method will mostly be called when packs are not in cache.
92 # Therefore, adding pack to the cache.
92 # Therefore, adding pack to the cache.
93 self._movetofront(pack)
93 self._movetofront(pack)
94 self._packs.add(pack)
94 self._packs.add(pack)
95
95
96 def __iter__(self):
96 def __iter__(self):
97 self._registerlastpackusage()
97 self._registerlastpackusage()
98
98
99 # Cache iteration is based on LRU.
99 # Cache iteration is based on LRU.
100 for pack in self._lrucache:
100 for pack in self._lrucache:
101 self._lastpack = pack
101 self._lastpack = pack
102 yield pack
102 yield pack
103
103
104 cachedpacks = set(pack for pack in self._lrucache)
104 cachedpacks = set(pack for pack in self._lrucache)
105 # Yield for paths not in the cache.
105 # Yield for paths not in the cache.
106 for pack in self._packs - cachedpacks:
106 for pack in self._packs - cachedpacks:
107 self._lastpack = pack
107 self._lastpack = pack
108 yield pack
108 yield pack
109
109
110 # Data not found in any pack.
110 # Data not found in any pack.
111 self._lastpack = None
111 self._lastpack = None
112
112
113
113
114 class basepackstore(object):
114 class basepackstore(object):
115 # Default cache size limit for the pack files.
115 # Default cache size limit for the pack files.
116 DEFAULTCACHESIZE = 100
116 DEFAULTCACHESIZE = 100
117
117
118 def __init__(self, ui, path):
118 def __init__(self, ui, path):
119 self.ui = ui
119 self.ui = ui
120 self.path = path
120 self.path = path
121
121
122 # lastrefesh is 0 so we'll immediately check for new packs on the first
122 # lastrefesh is 0 so we'll immediately check for new packs on the first
123 # failure.
123 # failure.
124 self.lastrefresh = 0
124 self.lastrefresh = 0
125
125
126 packs = []
126 packs = []
127 for filepath, __, __ in self._getavailablepackfilessorted():
127 for filepath, __, __ in self._getavailablepackfilessorted():
128 try:
128 try:
129 pack = self.getpack(filepath)
129 pack = self.getpack(filepath)
130 except Exception as ex:
130 except Exception as ex:
131 # An exception may be thrown if the pack file is corrupted
131 # An exception may be thrown if the pack file is corrupted
132 # somehow. Log a warning but keep going in this case, just
132 # somehow. Log a warning but keep going in this case, just
133 # skipping this pack file.
133 # skipping this pack file.
134 #
134 #
135 # If this is an ENOENT error then don't even bother logging.
135 # If this is an ENOENT error then don't even bother logging.
136 # Someone could have removed the file since we retrieved the
136 # Someone could have removed the file since we retrieved the
137 # list of paths.
137 # list of paths.
138 if getattr(ex, 'errno', None) != errno.ENOENT:
138 if getattr(ex, 'errno', None) != errno.ENOENT:
139 ui.warn(_(b'unable to load pack %s: %s\n') % (filepath, ex))
139 ui.warn(_(b'unable to load pack %s: %s\n') % (filepath, ex))
140 continue
140 continue
141 packs.append(pack)
141 packs.append(pack)
142
142
143 self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
143 self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
144
144
145 def _getavailablepackfiles(self):
145 def _getavailablepackfiles(self):
146 """For each pack file (a index/data file combo), yields:
146 """For each pack file (a index/data file combo), yields:
147 (full path without extension, mtime, size)
147 (full path without extension, mtime, size)
148
148
149 mtime will be the mtime of the index/data file (whichever is newer)
149 mtime will be the mtime of the index/data file (whichever is newer)
150 size is the combined size of index/data file
150 size is the combined size of index/data file
151 """
151 """
152 indexsuffixlen = len(self.INDEXSUFFIX)
152 indexsuffixlen = len(self.INDEXSUFFIX)
153 packsuffixlen = len(self.PACKSUFFIX)
153 packsuffixlen = len(self.PACKSUFFIX)
154
154
155 ids = set()
155 ids = set()
156 sizes = collections.defaultdict(lambda: 0)
156 sizes = collections.defaultdict(lambda: 0)
157 mtimes = collections.defaultdict(lambda: [])
157 mtimes = collections.defaultdict(lambda: [])
158 try:
158 try:
159 for filename, type, stat in osutil.listdir(self.path, stat=True):
159 for filename, type, stat in osutil.listdir(self.path, stat=True):
160 id = None
160 id = None
161 if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
161 if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
162 id = filename[:-indexsuffixlen]
162 id = filename[:-indexsuffixlen]
163 elif filename[-packsuffixlen:] == self.PACKSUFFIX:
163 elif filename[-packsuffixlen:] == self.PACKSUFFIX:
164 id = filename[:-packsuffixlen]
164 id = filename[:-packsuffixlen]
165
165
166 # Since we expect to have two files corresponding to each ID
166 # Since we expect to have two files corresponding to each ID
167 # (the index file and the pack file), we can yield once we see
167 # (the index file and the pack file), we can yield once we see
168 # it twice.
168 # it twice.
169 if id:
169 if id:
170 sizes[id] += stat.st_size # Sum both files' sizes together
170 sizes[id] += stat.st_size # Sum both files' sizes together
171 mtimes[id].append(stat.st_mtime)
171 mtimes[id].append(stat.st_mtime)
172 if id in ids:
172 if id in ids:
173 yield (
173 yield (
174 os.path.join(self.path, id),
174 os.path.join(self.path, id),
175 max(mtimes[id]),
175 max(mtimes[id]),
176 sizes[id],
176 sizes[id],
177 )
177 )
178 else:
178 else:
179 ids.add(id)
179 ids.add(id)
180 except OSError as ex:
180 except OSError as ex:
181 if ex.errno != errno.ENOENT:
181 if ex.errno != errno.ENOENT:
182 raise
182 raise
183
183
184 def _getavailablepackfilessorted(self):
184 def _getavailablepackfilessorted(self):
185 """Like `_getavailablepackfiles`, but also sorts the files by mtime,
185 """Like `_getavailablepackfiles`, but also sorts the files by mtime,
186 yielding newest files first.
186 yielding newest files first.
187
187
188 This is desirable, since it is more likely newer packfiles have more
188 This is desirable, since it is more likely newer packfiles have more
189 desirable data.
189 desirable data.
190 """
190 """
191 files = []
191 files = []
192 for path, mtime, size in self._getavailablepackfiles():
192 for path, mtime, size in self._getavailablepackfiles():
193 files.append((mtime, size, path))
193 files.append((mtime, size, path))
194 files = sorted(files, reverse=True)
194 files = sorted(files, reverse=True)
195 for mtime, size, path in files:
195 for mtime, size, path in files:
196 yield path, mtime, size
196 yield path, mtime, size
197
197
198 def gettotalsizeandcount(self):
198 def gettotalsizeandcount(self):
199 """Returns the total disk size (in bytes) of all the pack files in
199 """Returns the total disk size (in bytes) of all the pack files in
200 this store, and the count of pack files.
200 this store, and the count of pack files.
201
201
202 (This might be smaller than the total size of the ``self.path``
202 (This might be smaller than the total size of the ``self.path``
203 directory, since this only considers fuly-writen pack files, and not
203 directory, since this only considers fuly-writen pack files, and not
204 temporary files or other detritus on the directory.)
204 temporary files or other detritus on the directory.)
205 """
205 """
206 totalsize = 0
206 totalsize = 0
207 count = 0
207 count = 0
208 for __, __, size in self._getavailablepackfiles():
208 for __, __, size in self._getavailablepackfiles():
209 totalsize += size
209 totalsize += size
210 count += 1
210 count += 1
211 return totalsize, count
211 return totalsize, count
212
212
213 def getmetrics(self):
213 def getmetrics(self):
214 """Returns metrics on the state of this store."""
214 """Returns metrics on the state of this store."""
215 size, count = self.gettotalsizeandcount()
215 size, count = self.gettotalsizeandcount()
216 return {
216 return {
217 b'numpacks': count,
217 b'numpacks': count,
218 b'totalpacksize': size,
218 b'totalpacksize': size,
219 }
219 }
220
220
221 def getpack(self, path):
221 def getpack(self, path):
222 raise NotImplementedError()
222 raise NotImplementedError()
223
223
224 def getmissing(self, keys):
224 def getmissing(self, keys):
225 missing = keys
225 missing = keys
226 for pack in self.packs:
226 for pack in self.packs:
227 missing = pack.getmissing(missing)
227 missing = pack.getmissing(missing)
228
228
229 # Ensures better performance of the cache by keeping the most
229 # Ensures better performance of the cache by keeping the most
230 # recently accessed pack at the beginning in subsequent iterations.
230 # recently accessed pack at the beginning in subsequent iterations.
231 if not missing:
231 if not missing:
232 return missing
232 return missing
233
233
234 if missing:
234 if missing:
235 for pack in self.refresh():
235 for pack in self.refresh():
236 missing = pack.getmissing(missing)
236 missing = pack.getmissing(missing)
237
237
238 return missing
238 return missing
239
239
240 def markledger(self, ledger, options=None):
240 def markledger(self, ledger, options=None):
241 for pack in self.packs:
241 for pack in self.packs:
242 pack.markledger(ledger)
242 pack.markledger(ledger)
243
243
244 def markforrefresh(self):
244 def markforrefresh(self):
245 """Tells the store that there may be new pack files, so the next time it
245 """Tells the store that there may be new pack files, so the next time it
246 has a lookup miss it should check for new files."""
246 has a lookup miss it should check for new files."""
247 self.lastrefresh = 0
247 self.lastrefresh = 0
248
248
249 def refresh(self):
249 def refresh(self):
250 """Checks for any new packs on disk, adds them to the main pack list,
250 """Checks for any new packs on disk, adds them to the main pack list,
251 and returns a list of just the new packs."""
251 and returns a list of just the new packs."""
252 now = time.time()
252 now = time.time()
253
253
254 # If we experience a lot of misses (like in the case of getmissing() on
254 # If we experience a lot of misses (like in the case of getmissing() on
255 # new objects), let's only actually check disk for new stuff every once
255 # new objects), let's only actually check disk for new stuff every once
256 # in a while. Generally this code path should only ever matter when a
256 # in a while. Generally this code path should only ever matter when a
257 # repack is going on in the background, and that should be pretty rare
257 # repack is going on in the background, and that should be pretty rare
258 # to have that happen twice in quick succession.
258 # to have that happen twice in quick succession.
259 newpacks = []
259 newpacks = []
260 if now > self.lastrefresh + REFRESHRATE:
260 if now > self.lastrefresh + REFRESHRATE:
261 self.lastrefresh = now
261 self.lastrefresh = now
262 previous = set(p.path for p in self.packs)
262 previous = set(p.path for p in self.packs)
263 for filepath, __, __ in self._getavailablepackfilessorted():
263 for filepath, __, __ in self._getavailablepackfilessorted():
264 if filepath not in previous:
264 if filepath not in previous:
265 newpack = self.getpack(filepath)
265 newpack = self.getpack(filepath)
266 newpacks.append(newpack)
266 newpacks.append(newpack)
267 self.packs.add(newpack)
267 self.packs.add(newpack)
268
268
269 return newpacks
269 return newpacks
270
270
271
271
272 class versionmixin(object):
272 class versionmixin(object):
273 # Mix-in for classes with multiple supported versions
273 # Mix-in for classes with multiple supported versions
274 VERSION = None
274 VERSION = None
275 SUPPORTED_VERSIONS = [2]
275 SUPPORTED_VERSIONS = [2]
276
276
277 def _checkversion(self, version):
277 def _checkversion(self, version):
278 if version in self.SUPPORTED_VERSIONS:
278 if version in self.SUPPORTED_VERSIONS:
279 if self.VERSION is None:
279 if self.VERSION is None:
280 # only affect this instance
280 # only affect this instance
281 self.VERSION = version
281 self.VERSION = version
282 elif self.VERSION != version:
282 elif self.VERSION != version:
283 raise RuntimeError(b'inconsistent version: %d' % version)
283 raise RuntimeError(b'inconsistent version: %d' % version)
284 else:
284 else:
285 raise RuntimeError(b'unsupported version: %d' % version)
285 raise RuntimeError(b'unsupported version: %d' % version)
286
286
287
287
288 class basepack(versionmixin):
288 class basepack(versionmixin):
289 # The maximum amount we should read via mmap before remmaping so the old
289 # The maximum amount we should read via mmap before remmaping so the old
290 # pages can be released (100MB)
290 # pages can be released (100MB)
291 MAXPAGEDIN = 100 * 1024 ** 2
291 MAXPAGEDIN = 100 * 1024 ** 2
292
292
293 SUPPORTED_VERSIONS = [2]
293 SUPPORTED_VERSIONS = [2]
294
294
295 def __init__(self, path):
295 def __init__(self, path):
296 self.path = path
296 self.path = path
297 self.packpath = path + self.PACKSUFFIX
297 self.packpath = path + self.PACKSUFFIX
298 self.indexpath = path + self.INDEXSUFFIX
298 self.indexpath = path + self.INDEXSUFFIX
299
299
300 self.indexsize = os.stat(self.indexpath).st_size
300 self.indexsize = os.stat(self.indexpath).st_size
301 self.datasize = os.stat(self.packpath).st_size
301 self.datasize = os.stat(self.packpath).st_size
302
302
303 self._index = None
303 self._index = None
304 self._data = None
304 self._data = None
305 self.freememory() # initialize the mmap
305 self.freememory() # initialize the mmap
306
306
307 version = struct.unpack(b'!B', self._data[:PACKVERSIONSIZE])[0]
307 version = struct.unpack(b'!B', self._data[:PACKVERSIONSIZE])[0]
308 self._checkversion(version)
308 self._checkversion(version)
309
309
310 version, config = struct.unpack(b'!BB', self._index[:INDEXVERSIONSIZE])
310 version, config = struct.unpack(b'!BB', self._index[:INDEXVERSIONSIZE])
311 self._checkversion(version)
311 self._checkversion(version)
312
312
313 if 0b10000000 & config:
313 if 0b10000000 & config:
314 self.params = indexparams(LARGEFANOUTPREFIX, version)
314 self.params = indexparams(LARGEFANOUTPREFIX, version)
315 else:
315 else:
316 self.params = indexparams(SMALLFANOUTPREFIX, version)
316 self.params = indexparams(SMALLFANOUTPREFIX, version)
317
317
318 @util.propertycache
318 @util.propertycache
319 def _fanouttable(self):
319 def _fanouttable(self):
320 params = self.params
320 params = self.params
321 rawfanout = self._index[FANOUTSTART : FANOUTSTART + params.fanoutsize]
321 rawfanout = self._index[FANOUTSTART : FANOUTSTART + params.fanoutsize]
322 fanouttable = []
322 fanouttable = []
323 for i in pycompat.xrange(0, params.fanoutcount):
323 for i in pycompat.xrange(0, params.fanoutcount):
324 loc = i * 4
324 loc = i * 4
325 fanoutentry = struct.unpack(b'!I', rawfanout[loc : loc + 4])[0]
325 fanoutentry = struct.unpack(b'!I', rawfanout[loc : loc + 4])[0]
326 fanouttable.append(fanoutentry)
326 fanouttable.append(fanoutentry)
327 return fanouttable
327 return fanouttable
328
328
329 @util.propertycache
329 @util.propertycache
330 def _indexend(self):
330 def _indexend(self):
331 nodecount = struct.unpack_from(
331 nodecount = struct.unpack_from(
332 b'!Q', self._index, self.params.indexstart - 8
332 b'!Q', self._index, self.params.indexstart - 8
333 )[0]
333 )[0]
334 return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
334 return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
335
335
336 def freememory(self):
336 def freememory(self):
337 """Unmap and remap the memory to free it up after known expensive
337 """Unmap and remap the memory to free it up after known expensive
338 operations. Return True if self._data and self._index were reloaded.
338 operations. Return True if self._data and self._index were reloaded.
339 """
339 """
340 if self._index:
340 if self._index:
341 if self._pagedin < self.MAXPAGEDIN:
341 if self._pagedin < self.MAXPAGEDIN:
342 return False
342 return False
343
343
344 self._index.close()
344 self._index.close()
345 self._data.close()
345 self._data.close()
346
346
347 # TODO: use an opener/vfs to access these paths
347 # TODO: use an opener/vfs to access these paths
348 with open(self.indexpath, PACKOPENMODE) as indexfp:
348 with open(self.indexpath, PACKOPENMODE) as indexfp:
349 # memory-map the file, size 0 means whole file
349 # memory-map the file, size 0 means whole file
350 self._index = mmap.mmap(
350 self._index = mmap.mmap(
351 indexfp.fileno(), 0, access=mmap.ACCESS_READ
351 indexfp.fileno(), 0, access=mmap.ACCESS_READ
352 )
352 )
353 with open(self.packpath, PACKOPENMODE) as datafp:
353 with open(self.packpath, PACKOPENMODE) as datafp:
354 self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
354 self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
355
355
356 self._pagedin = 0
356 self._pagedin = 0
357 return True
357 return True
358
358
359 def getmissing(self, keys):
359 def getmissing(self, keys):
360 raise NotImplementedError()
360 raise NotImplementedError()
361
361
362 def markledger(self, ledger, options=None):
362 def markledger(self, ledger, options=None):
363 raise NotImplementedError()
363 raise NotImplementedError()
364
364
365 def cleanup(self, ledger):
365 def cleanup(self, ledger):
366 raise NotImplementedError()
366 raise NotImplementedError()
367
367
368 def __iter__(self):
368 def __iter__(self):
369 raise NotImplementedError()
369 raise NotImplementedError()
370
370
371 def iterentries(self):
371 def iterentries(self):
372 raise NotImplementedError()
372 raise NotImplementedError()
373
373
374
374
375 class mutablebasepack(versionmixin):
375 class mutablebasepack(versionmixin):
376 def __init__(self, ui, packdir, version=2):
376 def __init__(self, ui, packdir, version=2):
377 self._checkversion(version)
377 self._checkversion(version)
378 # TODO(augie): make this configurable
378 # TODO(augie): make this configurable
379 self._compressor = b'GZ'
379 self._compressor = b'GZ'
380 opener = vfsmod.vfs(packdir)
380 opener = vfsmod.vfs(packdir)
381 opener.createmode = 0o444
381 opener.createmode = 0o444
382 self.opener = opener
382 self.opener = opener
383
383
384 self.entries = {}
384 self.entries = {}
385
385
386 shallowutil.mkstickygroupdir(ui, packdir)
386 shallowutil.mkstickygroupdir(ui, packdir)
387 self.packfp, self.packpath = opener.mkstemp(
387 self.packfp, self.packpath = opener.mkstemp(
388 suffix=self.PACKSUFFIX + b'-tmp'
388 suffix=self.PACKSUFFIX + b'-tmp'
389 )
389 )
390 self.idxfp, self.idxpath = opener.mkstemp(
390 self.idxfp, self.idxpath = opener.mkstemp(
391 suffix=self.INDEXSUFFIX + b'-tmp'
391 suffix=self.INDEXSUFFIX + b'-tmp'
392 )
392 )
393 self.packfp = os.fdopen(self.packfp, 'wb+')
393 self.packfp = os.fdopen(self.packfp, 'wb+')
394 self.idxfp = os.fdopen(self.idxfp, 'wb+')
394 self.idxfp = os.fdopen(self.idxfp, 'wb+')
395 self.sha = hashlib.sha1()
395 self.sha = hashutil.sha1()
396 self._closed = False
396 self._closed = False
397
397
398 # The opener provides no way of doing permission fixup on files created
398 # The opener provides no way of doing permission fixup on files created
399 # via mkstemp, so we must fix it ourselves. We can probably fix this
399 # via mkstemp, so we must fix it ourselves. We can probably fix this
400 # upstream in vfs.mkstemp so we don't need to use the private method.
400 # upstream in vfs.mkstemp so we don't need to use the private method.
401 opener._fixfilemode(opener.join(self.packpath))
401 opener._fixfilemode(opener.join(self.packpath))
402 opener._fixfilemode(opener.join(self.idxpath))
402 opener._fixfilemode(opener.join(self.idxpath))
403
403
404 # Write header
404 # Write header
405 # TODO: make it extensible (ex: allow specifying compression algorithm,
405 # TODO: make it extensible (ex: allow specifying compression algorithm,
406 # a flexible key/value header, delta algorithm, fanout size, etc)
406 # a flexible key/value header, delta algorithm, fanout size, etc)
407 versionbuf = struct.pack(b'!B', self.VERSION) # unsigned 1 byte int
407 versionbuf = struct.pack(b'!B', self.VERSION) # unsigned 1 byte int
408 self.writeraw(versionbuf)
408 self.writeraw(versionbuf)
409
409
410 def __enter__(self):
410 def __enter__(self):
411 return self
411 return self
412
412
413 def __exit__(self, exc_type, exc_value, traceback):
413 def __exit__(self, exc_type, exc_value, traceback):
414 if exc_type is None:
414 if exc_type is None:
415 self.close()
415 self.close()
416 else:
416 else:
417 self.abort()
417 self.abort()
418
418
419 def abort(self):
419 def abort(self):
420 # Unclean exit
420 # Unclean exit
421 self._cleantemppacks()
421 self._cleantemppacks()
422
422
423 def writeraw(self, data):
423 def writeraw(self, data):
424 self.packfp.write(data)
424 self.packfp.write(data)
425 self.sha.update(data)
425 self.sha.update(data)
426
426
427 def close(self, ledger=None):
427 def close(self, ledger=None):
428 if self._closed:
428 if self._closed:
429 return
429 return
430
430
431 try:
431 try:
432 sha = nodemod.hex(self.sha.digest())
432 sha = nodemod.hex(self.sha.digest())
433 self.packfp.close()
433 self.packfp.close()
434 self.writeindex()
434 self.writeindex()
435
435
436 if len(self.entries) == 0:
436 if len(self.entries) == 0:
437 # Empty pack
437 # Empty pack
438 self._cleantemppacks()
438 self._cleantemppacks()
439 self._closed = True
439 self._closed = True
440 return None
440 return None
441
441
442 self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
442 self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
443 try:
443 try:
444 self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
444 self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
445 except Exception as ex:
445 except Exception as ex:
446 try:
446 try:
447 self.opener.unlink(sha + self.PACKSUFFIX)
447 self.opener.unlink(sha + self.PACKSUFFIX)
448 except Exception:
448 except Exception:
449 pass
449 pass
450 # Throw exception 'ex' explicitly since a normal 'raise' would
450 # Throw exception 'ex' explicitly since a normal 'raise' would
451 # potentially throw an exception from the unlink cleanup.
451 # potentially throw an exception from the unlink cleanup.
452 raise ex
452 raise ex
453 except Exception:
453 except Exception:
454 # Clean up temp packs in all exception cases
454 # Clean up temp packs in all exception cases
455 self._cleantemppacks()
455 self._cleantemppacks()
456 raise
456 raise
457
457
458 self._closed = True
458 self._closed = True
459 result = self.opener.join(sha)
459 result = self.opener.join(sha)
460 if ledger:
460 if ledger:
461 ledger.addcreated(result)
461 ledger.addcreated(result)
462 return result
462 return result
463
463
464 def _cleantemppacks(self):
464 def _cleantemppacks(self):
465 try:
465 try:
466 self.opener.unlink(self.packpath)
466 self.opener.unlink(self.packpath)
467 except Exception:
467 except Exception:
468 pass
468 pass
469 try:
469 try:
470 self.opener.unlink(self.idxpath)
470 self.opener.unlink(self.idxpath)
471 except Exception:
471 except Exception:
472 pass
472 pass
473
473
474 def writeindex(self):
474 def writeindex(self):
475 largefanout = len(self.entries) > SMALLFANOUTCUTOFF
475 largefanout = len(self.entries) > SMALLFANOUTCUTOFF
476 if largefanout:
476 if largefanout:
477 params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
477 params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
478 else:
478 else:
479 params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
479 params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
480
480
481 fanouttable = [EMPTYFANOUT] * params.fanoutcount
481 fanouttable = [EMPTYFANOUT] * params.fanoutcount
482
482
483 # Precompute the location of each entry
483 # Precompute the location of each entry
484 locations = {}
484 locations = {}
485 count = 0
485 count = 0
486 for node in sorted(self.entries):
486 for node in sorted(self.entries):
487 location = count * self.INDEXENTRYLENGTH
487 location = count * self.INDEXENTRYLENGTH
488 locations[node] = location
488 locations[node] = location
489 count += 1
489 count += 1
490
490
491 # Must use [0] on the unpack result since it's always a tuple.
491 # Must use [0] on the unpack result since it's always a tuple.
492 fanoutkey = struct.unpack(
492 fanoutkey = struct.unpack(
493 params.fanoutstruct, node[: params.fanoutprefix]
493 params.fanoutstruct, node[: params.fanoutprefix]
494 )[0]
494 )[0]
495 if fanouttable[fanoutkey] == EMPTYFANOUT:
495 if fanouttable[fanoutkey] == EMPTYFANOUT:
496 fanouttable[fanoutkey] = location
496 fanouttable[fanoutkey] = location
497
497
498 rawfanouttable = b''
498 rawfanouttable = b''
499 last = 0
499 last = 0
500 for offset in fanouttable:
500 for offset in fanouttable:
501 offset = offset if offset != EMPTYFANOUT else last
501 offset = offset if offset != EMPTYFANOUT else last
502 last = offset
502 last = offset
503 rawfanouttable += struct.pack(b'!I', offset)
503 rawfanouttable += struct.pack(b'!I', offset)
504
504
505 rawentrieslength = struct.pack(b'!Q', len(self.entries))
505 rawentrieslength = struct.pack(b'!Q', len(self.entries))
506
506
507 # The index offset is the it's location in the file. So after the 2 byte
507 # The index offset is the it's location in the file. So after the 2 byte
508 # header and the fanouttable.
508 # header and the fanouttable.
509 rawindex = self.createindex(locations, 2 + len(rawfanouttable))
509 rawindex = self.createindex(locations, 2 + len(rawfanouttable))
510
510
511 self._writeheader(params)
511 self._writeheader(params)
512 self.idxfp.write(rawfanouttable)
512 self.idxfp.write(rawfanouttable)
513 self.idxfp.write(rawentrieslength)
513 self.idxfp.write(rawentrieslength)
514 self.idxfp.write(rawindex)
514 self.idxfp.write(rawindex)
515 self.idxfp.close()
515 self.idxfp.close()
516
516
517 def createindex(self, nodelocations):
517 def createindex(self, nodelocations):
518 raise NotImplementedError()
518 raise NotImplementedError()
519
519
520 def _writeheader(self, indexparams):
520 def _writeheader(self, indexparams):
521 # Index header
521 # Index header
522 # <version: 1 byte>
522 # <version: 1 byte>
523 # <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
523 # <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
524 # <unused: 7 bit> # future use (compression, delta format, etc)
524 # <unused: 7 bit> # future use (compression, delta format, etc)
525 config = 0
525 config = 0
526 if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
526 if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
527 config = 0b10000000
527 config = 0b10000000
528 self.idxfp.write(struct.pack(b'!BB', self.VERSION, config))
528 self.idxfp.write(struct.pack(b'!BB', self.VERSION, config))
529
529
530
530
531 class indexparams(object):
531 class indexparams(object):
532 __slots__ = (
532 __slots__ = (
533 'fanoutprefix',
533 'fanoutprefix',
534 'fanoutstruct',
534 'fanoutstruct',
535 'fanoutcount',
535 'fanoutcount',
536 'fanoutsize',
536 'fanoutsize',
537 'indexstart',
537 'indexstart',
538 )
538 )
539
539
540 def __init__(self, prefixsize, version):
540 def __init__(self, prefixsize, version):
541 self.fanoutprefix = prefixsize
541 self.fanoutprefix = prefixsize
542
542
543 # The struct pack format for fanout table location (i.e. the format that
543 # The struct pack format for fanout table location (i.e. the format that
544 # converts the node prefix into an integer location in the fanout
544 # converts the node prefix into an integer location in the fanout
545 # table).
545 # table).
546 if prefixsize == SMALLFANOUTPREFIX:
546 if prefixsize == SMALLFANOUTPREFIX:
547 self.fanoutstruct = b'!B'
547 self.fanoutstruct = b'!B'
548 elif prefixsize == LARGEFANOUTPREFIX:
548 elif prefixsize == LARGEFANOUTPREFIX:
549 self.fanoutstruct = b'!H'
549 self.fanoutstruct = b'!H'
550 else:
550 else:
551 raise ValueError(b"invalid fanout prefix size: %s" % prefixsize)
551 raise ValueError(b"invalid fanout prefix size: %s" % prefixsize)
552
552
553 # The number of fanout table entries
553 # The number of fanout table entries
554 self.fanoutcount = 2 ** (prefixsize * 8)
554 self.fanoutcount = 2 ** (prefixsize * 8)
555
555
556 # The total bytes used by the fanout table
556 # The total bytes used by the fanout table
557 self.fanoutsize = self.fanoutcount * 4
557 self.fanoutsize = self.fanoutcount * 4
558
558
559 self.indexstart = FANOUTSTART + self.fanoutsize
559 self.indexstart = FANOUTSTART + self.fanoutsize
560 # Skip the index length
560 # Skip the index length
561 self.indexstart += 8
561 self.indexstart += 8
@@ -1,461 +1,461 b''
1 from __future__ import absolute_import
1 from __future__ import absolute_import
2
2
3 import errno
3 import errno
4 import hashlib
5 import os
4 import os
6 import shutil
5 import shutil
7 import stat
6 import stat
8 import time
7 import time
9
8
10 from mercurial.i18n import _
9 from mercurial.i18n import _
11 from mercurial.node import bin, hex
10 from mercurial.node import bin, hex
12 from mercurial.pycompat import open
11 from mercurial.pycompat import open
13 from mercurial import (
12 from mercurial import (
14 error,
13 error,
15 pycompat,
14 pycompat,
16 util,
15 util,
17 )
16 )
17 from mercurial.utils import hashutil
18 from . import (
18 from . import (
19 constants,
19 constants,
20 shallowutil,
20 shallowutil,
21 )
21 )
22
22
23
23
24 class basestore(object):
24 class basestore(object):
25 def __init__(self, repo, path, reponame, shared=False):
25 def __init__(self, repo, path, reponame, shared=False):
26 """Creates a remotefilelog store object for the given repo name.
26 """Creates a remotefilelog store object for the given repo name.
27
27
28 `path` - The file path where this store keeps its data
28 `path` - The file path where this store keeps its data
29 `reponame` - The name of the repo. This is used to partition data from
29 `reponame` - The name of the repo. This is used to partition data from
30 many repos.
30 many repos.
31 `shared` - True if this store is a shared cache of data from the central
31 `shared` - True if this store is a shared cache of data from the central
32 server, for many repos on this machine. False means this store is for
32 server, for many repos on this machine. False means this store is for
33 the local data for one repo.
33 the local data for one repo.
34 """
34 """
35 self.repo = repo
35 self.repo = repo
36 self.ui = repo.ui
36 self.ui = repo.ui
37 self._path = path
37 self._path = path
38 self._reponame = reponame
38 self._reponame = reponame
39 self._shared = shared
39 self._shared = shared
40 self._uid = os.getuid() if not pycompat.iswindows else None
40 self._uid = os.getuid() if not pycompat.iswindows else None
41
41
42 self._validatecachelog = self.ui.config(
42 self._validatecachelog = self.ui.config(
43 b"remotefilelog", b"validatecachelog"
43 b"remotefilelog", b"validatecachelog"
44 )
44 )
45 self._validatecache = self.ui.config(
45 self._validatecache = self.ui.config(
46 b"remotefilelog", b"validatecache", b'on'
46 b"remotefilelog", b"validatecache", b'on'
47 )
47 )
48 if self._validatecache not in (b'on', b'strict', b'off'):
48 if self._validatecache not in (b'on', b'strict', b'off'):
49 self._validatecache = b'on'
49 self._validatecache = b'on'
50 if self._validatecache == b'off':
50 if self._validatecache == b'off':
51 self._validatecache = False
51 self._validatecache = False
52
52
53 if shared:
53 if shared:
54 shallowutil.mkstickygroupdir(self.ui, path)
54 shallowutil.mkstickygroupdir(self.ui, path)
55
55
56 def getmissing(self, keys):
56 def getmissing(self, keys):
57 missing = []
57 missing = []
58 for name, node in keys:
58 for name, node in keys:
59 filepath = self._getfilepath(name, node)
59 filepath = self._getfilepath(name, node)
60 exists = os.path.exists(filepath)
60 exists = os.path.exists(filepath)
61 if (
61 if (
62 exists
62 exists
63 and self._validatecache == b'strict'
63 and self._validatecache == b'strict'
64 and not self._validatekey(filepath, b'contains')
64 and not self._validatekey(filepath, b'contains')
65 ):
65 ):
66 exists = False
66 exists = False
67 if not exists:
67 if not exists:
68 missing.append((name, node))
68 missing.append((name, node))
69
69
70 return missing
70 return missing
71
71
72 # BELOW THIS ARE IMPLEMENTATIONS OF REPACK SOURCE
72 # BELOW THIS ARE IMPLEMENTATIONS OF REPACK SOURCE
73
73
74 def markledger(self, ledger, options=None):
74 def markledger(self, ledger, options=None):
75 if options and options.get(constants.OPTION_PACKSONLY):
75 if options and options.get(constants.OPTION_PACKSONLY):
76 return
76 return
77 if self._shared:
77 if self._shared:
78 for filename, nodes in self._getfiles():
78 for filename, nodes in self._getfiles():
79 for node in nodes:
79 for node in nodes:
80 ledger.markdataentry(self, filename, node)
80 ledger.markdataentry(self, filename, node)
81 ledger.markhistoryentry(self, filename, node)
81 ledger.markhistoryentry(self, filename, node)
82
82
83 def cleanup(self, ledger):
83 def cleanup(self, ledger):
84 ui = self.ui
84 ui = self.ui
85 entries = ledger.sources.get(self, [])
85 entries = ledger.sources.get(self, [])
86 count = 0
86 count = 0
87 progress = ui.makeprogress(
87 progress = ui.makeprogress(
88 _(b"cleaning up"), unit=b"files", total=len(entries)
88 _(b"cleaning up"), unit=b"files", total=len(entries)
89 )
89 )
90 for entry in entries:
90 for entry in entries:
91 if entry.gced or (entry.datarepacked and entry.historyrepacked):
91 if entry.gced or (entry.datarepacked and entry.historyrepacked):
92 progress.update(count)
92 progress.update(count)
93 path = self._getfilepath(entry.filename, entry.node)
93 path = self._getfilepath(entry.filename, entry.node)
94 util.tryunlink(path)
94 util.tryunlink(path)
95 count += 1
95 count += 1
96 progress.complete()
96 progress.complete()
97
97
98 # Clean up the repo cache directory.
98 # Clean up the repo cache directory.
99 self._cleanupdirectory(self._getrepocachepath())
99 self._cleanupdirectory(self._getrepocachepath())
100
100
101 # BELOW THIS ARE NON-STANDARD APIS
101 # BELOW THIS ARE NON-STANDARD APIS
102
102
103 def _cleanupdirectory(self, rootdir):
103 def _cleanupdirectory(self, rootdir):
104 """Removes the empty directories and unnecessary files within the root
104 """Removes the empty directories and unnecessary files within the root
105 directory recursively. Note that this method does not remove the root
105 directory recursively. Note that this method does not remove the root
106 directory itself. """
106 directory itself. """
107
107
108 oldfiles = set()
108 oldfiles = set()
109 otherfiles = set()
109 otherfiles = set()
110 # osutil.listdir returns stat information which saves some rmdir/listdir
110 # osutil.listdir returns stat information which saves some rmdir/listdir
111 # syscalls.
111 # syscalls.
112 for name, mode in util.osutil.listdir(rootdir):
112 for name, mode in util.osutil.listdir(rootdir):
113 if stat.S_ISDIR(mode):
113 if stat.S_ISDIR(mode):
114 dirpath = os.path.join(rootdir, name)
114 dirpath = os.path.join(rootdir, name)
115 self._cleanupdirectory(dirpath)
115 self._cleanupdirectory(dirpath)
116
116
117 # Now that the directory specified by dirpath is potentially
117 # Now that the directory specified by dirpath is potentially
118 # empty, try and remove it.
118 # empty, try and remove it.
119 try:
119 try:
120 os.rmdir(dirpath)
120 os.rmdir(dirpath)
121 except OSError:
121 except OSError:
122 pass
122 pass
123
123
124 elif stat.S_ISREG(mode):
124 elif stat.S_ISREG(mode):
125 if name.endswith(b'_old'):
125 if name.endswith(b'_old'):
126 oldfiles.add(name[:-4])
126 oldfiles.add(name[:-4])
127 else:
127 else:
128 otherfiles.add(name)
128 otherfiles.add(name)
129
129
130 # Remove the files which end with suffix '_old' and have no
130 # Remove the files which end with suffix '_old' and have no
131 # corresponding file without the suffix '_old'. See addremotefilelognode
131 # corresponding file without the suffix '_old'. See addremotefilelognode
132 # method for the generation/purpose of files with '_old' suffix.
132 # method for the generation/purpose of files with '_old' suffix.
133 for filename in oldfiles - otherfiles:
133 for filename in oldfiles - otherfiles:
134 filepath = os.path.join(rootdir, filename + b'_old')
134 filepath = os.path.join(rootdir, filename + b'_old')
135 util.tryunlink(filepath)
135 util.tryunlink(filepath)
136
136
137 def _getfiles(self):
137 def _getfiles(self):
138 """Return a list of (filename, [node,...]) for all the revisions that
138 """Return a list of (filename, [node,...]) for all the revisions that
139 exist in the store.
139 exist in the store.
140
140
141 This is useful for obtaining a list of all the contents of the store
141 This is useful for obtaining a list of all the contents of the store
142 when performing a repack to another store, since the store API requires
142 when performing a repack to another store, since the store API requires
143 name+node keys and not namehash+node keys.
143 name+node keys and not namehash+node keys.
144 """
144 """
145 existing = {}
145 existing = {}
146 for filenamehash, node in self._listkeys():
146 for filenamehash, node in self._listkeys():
147 existing.setdefault(filenamehash, []).append(node)
147 existing.setdefault(filenamehash, []).append(node)
148
148
149 filenamemap = self._resolvefilenames(existing.keys())
149 filenamemap = self._resolvefilenames(existing.keys())
150
150
151 for filename, sha in pycompat.iteritems(filenamemap):
151 for filename, sha in pycompat.iteritems(filenamemap):
152 yield (filename, existing[sha])
152 yield (filename, existing[sha])
153
153
154 def _resolvefilenames(self, hashes):
154 def _resolvefilenames(self, hashes):
155 """Given a list of filename hashes that are present in the
155 """Given a list of filename hashes that are present in the
156 remotefilelog store, return a mapping from filename->hash.
156 remotefilelog store, return a mapping from filename->hash.
157
157
158 This is useful when converting remotefilelog blobs into other storage
158 This is useful when converting remotefilelog blobs into other storage
159 formats.
159 formats.
160 """
160 """
161 if not hashes:
161 if not hashes:
162 return {}
162 return {}
163
163
164 filenames = {}
164 filenames = {}
165 missingfilename = set(hashes)
165 missingfilename = set(hashes)
166
166
167 # Start with a full manifest, since it'll cover the majority of files
167 # Start with a full manifest, since it'll cover the majority of files
168 for filename in self.repo[b'tip'].manifest():
168 for filename in self.repo[b'tip'].manifest():
169 sha = hashlib.sha1(filename).digest()
169 sha = hashutil.sha1(filename).digest()
170 if sha in missingfilename:
170 if sha in missingfilename:
171 filenames[filename] = sha
171 filenames[filename] = sha
172 missingfilename.discard(sha)
172 missingfilename.discard(sha)
173
173
174 # Scan the changelog until we've found every file name
174 # Scan the changelog until we've found every file name
175 cl = self.repo.unfiltered().changelog
175 cl = self.repo.unfiltered().changelog
176 for rev in pycompat.xrange(len(cl) - 1, -1, -1):
176 for rev in pycompat.xrange(len(cl) - 1, -1, -1):
177 if not missingfilename:
177 if not missingfilename:
178 break
178 break
179 files = cl.readfiles(cl.node(rev))
179 files = cl.readfiles(cl.node(rev))
180 for filename in files:
180 for filename in files:
181 sha = hashlib.sha1(filename).digest()
181 sha = hashutil.sha1(filename).digest()
182 if sha in missingfilename:
182 if sha in missingfilename:
183 filenames[filename] = sha
183 filenames[filename] = sha
184 missingfilename.discard(sha)
184 missingfilename.discard(sha)
185
185
186 return filenames
186 return filenames
187
187
188 def _getrepocachepath(self):
188 def _getrepocachepath(self):
189 return (
189 return (
190 os.path.join(self._path, self._reponame)
190 os.path.join(self._path, self._reponame)
191 if self._shared
191 if self._shared
192 else self._path
192 else self._path
193 )
193 )
194
194
195 def _listkeys(self):
195 def _listkeys(self):
196 """List all the remotefilelog keys that exist in the store.
196 """List all the remotefilelog keys that exist in the store.
197
197
198 Returns a iterator of (filename hash, filecontent hash) tuples.
198 Returns a iterator of (filename hash, filecontent hash) tuples.
199 """
199 """
200
200
201 for root, dirs, files in os.walk(self._getrepocachepath()):
201 for root, dirs, files in os.walk(self._getrepocachepath()):
202 for filename in files:
202 for filename in files:
203 if len(filename) != 40:
203 if len(filename) != 40:
204 continue
204 continue
205 node = filename
205 node = filename
206 if self._shared:
206 if self._shared:
207 # .../1a/85ffda..be21
207 # .../1a/85ffda..be21
208 filenamehash = root[-41:-39] + root[-38:]
208 filenamehash = root[-41:-39] + root[-38:]
209 else:
209 else:
210 filenamehash = root[-40:]
210 filenamehash = root[-40:]
211 yield (bin(filenamehash), bin(node))
211 yield (bin(filenamehash), bin(node))
212
212
213 def _getfilepath(self, name, node):
213 def _getfilepath(self, name, node):
214 node = hex(node)
214 node = hex(node)
215 if self._shared:
215 if self._shared:
216 key = shallowutil.getcachekey(self._reponame, name, node)
216 key = shallowutil.getcachekey(self._reponame, name, node)
217 else:
217 else:
218 key = shallowutil.getlocalkey(name, node)
218 key = shallowutil.getlocalkey(name, node)
219
219
220 return os.path.join(self._path, key)
220 return os.path.join(self._path, key)
221
221
222 def _getdata(self, name, node):
222 def _getdata(self, name, node):
223 filepath = self._getfilepath(name, node)
223 filepath = self._getfilepath(name, node)
224 try:
224 try:
225 data = shallowutil.readfile(filepath)
225 data = shallowutil.readfile(filepath)
226 if self._validatecache and not self._validatedata(data, filepath):
226 if self._validatecache and not self._validatedata(data, filepath):
227 if self._validatecachelog:
227 if self._validatecachelog:
228 with open(self._validatecachelog, b'a+') as f:
228 with open(self._validatecachelog, b'a+') as f:
229 f.write(b"corrupt %s during read\n" % filepath)
229 f.write(b"corrupt %s during read\n" % filepath)
230 os.rename(filepath, filepath + b".corrupt")
230 os.rename(filepath, filepath + b".corrupt")
231 raise KeyError(b"corrupt local cache file %s" % filepath)
231 raise KeyError(b"corrupt local cache file %s" % filepath)
232 except IOError:
232 except IOError:
233 raise KeyError(
233 raise KeyError(
234 b"no file found at %s for %s:%s" % (filepath, name, hex(node))
234 b"no file found at %s for %s:%s" % (filepath, name, hex(node))
235 )
235 )
236
236
237 return data
237 return data
238
238
239 def addremotefilelognode(self, name, node, data):
239 def addremotefilelognode(self, name, node, data):
240 filepath = self._getfilepath(name, node)
240 filepath = self._getfilepath(name, node)
241
241
242 oldumask = os.umask(0o002)
242 oldumask = os.umask(0o002)
243 try:
243 try:
244 # if this node already exists, save the old version for
244 # if this node already exists, save the old version for
245 # recovery/debugging purposes.
245 # recovery/debugging purposes.
246 if os.path.exists(filepath):
246 if os.path.exists(filepath):
247 newfilename = filepath + b'_old'
247 newfilename = filepath + b'_old'
248 # newfilename can be read-only and shutil.copy will fail.
248 # newfilename can be read-only and shutil.copy will fail.
249 # Delete newfilename to avoid it
249 # Delete newfilename to avoid it
250 if os.path.exists(newfilename):
250 if os.path.exists(newfilename):
251 shallowutil.unlinkfile(newfilename)
251 shallowutil.unlinkfile(newfilename)
252 shutil.copy(filepath, newfilename)
252 shutil.copy(filepath, newfilename)
253
253
254 shallowutil.mkstickygroupdir(self.ui, os.path.dirname(filepath))
254 shallowutil.mkstickygroupdir(self.ui, os.path.dirname(filepath))
255 shallowutil.writefile(filepath, data, readonly=True)
255 shallowutil.writefile(filepath, data, readonly=True)
256
256
257 if self._validatecache:
257 if self._validatecache:
258 if not self._validatekey(filepath, b'write'):
258 if not self._validatekey(filepath, b'write'):
259 raise error.Abort(
259 raise error.Abort(
260 _(b"local cache write was corrupted %s") % filepath
260 _(b"local cache write was corrupted %s") % filepath
261 )
261 )
262 finally:
262 finally:
263 os.umask(oldumask)
263 os.umask(oldumask)
264
264
265 def markrepo(self, path):
265 def markrepo(self, path):
266 """Call this to add the given repo path to the store's list of
266 """Call this to add the given repo path to the store's list of
267 repositories that are using it. This is useful later when doing garbage
267 repositories that are using it. This is useful later when doing garbage
268 collection, since it allows us to insecpt the repos to see what nodes
268 collection, since it allows us to insecpt the repos to see what nodes
269 they want to be kept alive in the store.
269 they want to be kept alive in the store.
270 """
270 """
271 repospath = os.path.join(self._path, b"repos")
271 repospath = os.path.join(self._path, b"repos")
272 with open(repospath, b'ab') as reposfile:
272 with open(repospath, b'ab') as reposfile:
273 reposfile.write(os.path.dirname(path) + b"\n")
273 reposfile.write(os.path.dirname(path) + b"\n")
274
274
275 repospathstat = os.stat(repospath)
275 repospathstat = os.stat(repospath)
276 if repospathstat.st_uid == self._uid:
276 if repospathstat.st_uid == self._uid:
277 os.chmod(repospath, 0o0664)
277 os.chmod(repospath, 0o0664)
278
278
279 def _validatekey(self, path, action):
279 def _validatekey(self, path, action):
280 with open(path, b'rb') as f:
280 with open(path, b'rb') as f:
281 data = f.read()
281 data = f.read()
282
282
283 if self._validatedata(data, path):
283 if self._validatedata(data, path):
284 return True
284 return True
285
285
286 if self._validatecachelog:
286 if self._validatecachelog:
287 with open(self._validatecachelog, b'ab+') as f:
287 with open(self._validatecachelog, b'ab+') as f:
288 f.write(b"corrupt %s during %s\n" % (path, action))
288 f.write(b"corrupt %s during %s\n" % (path, action))
289
289
290 os.rename(path, path + b".corrupt")
290 os.rename(path, path + b".corrupt")
291 return False
291 return False
292
292
293 def _validatedata(self, data, path):
293 def _validatedata(self, data, path):
294 try:
294 try:
295 if len(data) > 0:
295 if len(data) > 0:
296 # see remotefilelogserver.createfileblob for the format
296 # see remotefilelogserver.createfileblob for the format
297 offset, size, flags = shallowutil.parsesizeflags(data)
297 offset, size, flags = shallowutil.parsesizeflags(data)
298 if len(data) <= size:
298 if len(data) <= size:
299 # it is truncated
299 # it is truncated
300 return False
300 return False
301
301
302 # extract the node from the metadata
302 # extract the node from the metadata
303 offset += size
303 offset += size
304 datanode = data[offset : offset + 20]
304 datanode = data[offset : offset + 20]
305
305
306 # and compare against the path
306 # and compare against the path
307 if os.path.basename(path) == hex(datanode):
307 if os.path.basename(path) == hex(datanode):
308 # Content matches the intended path
308 # Content matches the intended path
309 return True
309 return True
310 return False
310 return False
311 except (ValueError, RuntimeError):
311 except (ValueError, RuntimeError):
312 pass
312 pass
313
313
314 return False
314 return False
315
315
316 def gc(self, keepkeys):
316 def gc(self, keepkeys):
317 ui = self.ui
317 ui = self.ui
318 cachepath = self._path
318 cachepath = self._path
319
319
320 # prune cache
320 # prune cache
321 queue = pycompat.queue.PriorityQueue()
321 queue = pycompat.queue.PriorityQueue()
322 originalsize = 0
322 originalsize = 0
323 size = 0
323 size = 0
324 count = 0
324 count = 0
325 removed = 0
325 removed = 0
326
326
327 # keep files newer than a day even if they aren't needed
327 # keep files newer than a day even if they aren't needed
328 limit = time.time() - (60 * 60 * 24)
328 limit = time.time() - (60 * 60 * 24)
329
329
330 progress = ui.makeprogress(
330 progress = ui.makeprogress(
331 _(b"removing unnecessary files"), unit=b"files"
331 _(b"removing unnecessary files"), unit=b"files"
332 )
332 )
333 progress.update(0)
333 progress.update(0)
334 for root, dirs, files in os.walk(cachepath):
334 for root, dirs, files in os.walk(cachepath):
335 for file in files:
335 for file in files:
336 if file == b'repos':
336 if file == b'repos':
337 continue
337 continue
338
338
339 # Don't delete pack files
339 # Don't delete pack files
340 if b'/packs/' in root:
340 if b'/packs/' in root:
341 continue
341 continue
342
342
343 progress.update(count)
343 progress.update(count)
344 path = os.path.join(root, file)
344 path = os.path.join(root, file)
345 key = os.path.relpath(path, cachepath)
345 key = os.path.relpath(path, cachepath)
346 count += 1
346 count += 1
347 try:
347 try:
348 pathstat = os.stat(path)
348 pathstat = os.stat(path)
349 except OSError as e:
349 except OSError as e:
350 # errno.ENOENT = no such file or directory
350 # errno.ENOENT = no such file or directory
351 if e.errno != errno.ENOENT:
351 if e.errno != errno.ENOENT:
352 raise
352 raise
353 msg = _(
353 msg = _(
354 b"warning: file %s was removed by another process\n"
354 b"warning: file %s was removed by another process\n"
355 )
355 )
356 ui.warn(msg % path)
356 ui.warn(msg % path)
357 continue
357 continue
358
358
359 originalsize += pathstat.st_size
359 originalsize += pathstat.st_size
360
360
361 if key in keepkeys or pathstat.st_atime > limit:
361 if key in keepkeys or pathstat.st_atime > limit:
362 queue.put((pathstat.st_atime, path, pathstat))
362 queue.put((pathstat.st_atime, path, pathstat))
363 size += pathstat.st_size
363 size += pathstat.st_size
364 else:
364 else:
365 try:
365 try:
366 shallowutil.unlinkfile(path)
366 shallowutil.unlinkfile(path)
367 except OSError as e:
367 except OSError as e:
368 # errno.ENOENT = no such file or directory
368 # errno.ENOENT = no such file or directory
369 if e.errno != errno.ENOENT:
369 if e.errno != errno.ENOENT:
370 raise
370 raise
371 msg = _(
371 msg = _(
372 b"warning: file %s was removed by another "
372 b"warning: file %s was removed by another "
373 b"process\n"
373 b"process\n"
374 )
374 )
375 ui.warn(msg % path)
375 ui.warn(msg % path)
376 continue
376 continue
377 removed += 1
377 removed += 1
378 progress.complete()
378 progress.complete()
379
379
380 # remove oldest files until under limit
380 # remove oldest files until under limit
381 limit = ui.configbytes(b"remotefilelog", b"cachelimit")
381 limit = ui.configbytes(b"remotefilelog", b"cachelimit")
382 if size > limit:
382 if size > limit:
383 excess = size - limit
383 excess = size - limit
384 progress = ui.makeprogress(
384 progress = ui.makeprogress(
385 _(b"enforcing cache limit"), unit=b"bytes", total=excess
385 _(b"enforcing cache limit"), unit=b"bytes", total=excess
386 )
386 )
387 removedexcess = 0
387 removedexcess = 0
388 while queue and size > limit and size > 0:
388 while queue and size > limit and size > 0:
389 progress.update(removedexcess)
389 progress.update(removedexcess)
390 atime, oldpath, oldpathstat = queue.get()
390 atime, oldpath, oldpathstat = queue.get()
391 try:
391 try:
392 shallowutil.unlinkfile(oldpath)
392 shallowutil.unlinkfile(oldpath)
393 except OSError as e:
393 except OSError as e:
394 # errno.ENOENT = no such file or directory
394 # errno.ENOENT = no such file or directory
395 if e.errno != errno.ENOENT:
395 if e.errno != errno.ENOENT:
396 raise
396 raise
397 msg = _(
397 msg = _(
398 b"warning: file %s was removed by another process\n"
398 b"warning: file %s was removed by another process\n"
399 )
399 )
400 ui.warn(msg % oldpath)
400 ui.warn(msg % oldpath)
401 size -= oldpathstat.st_size
401 size -= oldpathstat.st_size
402 removed += 1
402 removed += 1
403 removedexcess += oldpathstat.st_size
403 removedexcess += oldpathstat.st_size
404 progress.complete()
404 progress.complete()
405
405
406 ui.status(
406 ui.status(
407 _(b"finished: removed %d of %d files (%0.2f GB to %0.2f GB)\n")
407 _(b"finished: removed %d of %d files (%0.2f GB to %0.2f GB)\n")
408 % (
408 % (
409 removed,
409 removed,
410 count,
410 count,
411 float(originalsize) / 1024.0 / 1024.0 / 1024.0,
411 float(originalsize) / 1024.0 / 1024.0 / 1024.0,
412 float(size) / 1024.0 / 1024.0 / 1024.0,
412 float(size) / 1024.0 / 1024.0 / 1024.0,
413 )
413 )
414 )
414 )
415
415
416
416
417 class baseunionstore(object):
417 class baseunionstore(object):
418 def __init__(self, *args, **kwargs):
418 def __init__(self, *args, **kwargs):
419 # If one of the functions that iterates all of the stores is about to
419 # If one of the functions that iterates all of the stores is about to
420 # throw a KeyError, try this many times with a full refresh between
420 # throw a KeyError, try this many times with a full refresh between
421 # attempts. A repack operation may have moved data from one store to
421 # attempts. A repack operation may have moved data from one store to
422 # another while we were running.
422 # another while we were running.
423 self.numattempts = kwargs.get('numretries', 0) + 1
423 self.numattempts = kwargs.get('numretries', 0) + 1
424 # If not-None, call this function on every retry and if the attempts are
424 # If not-None, call this function on every retry and if the attempts are
425 # exhausted.
425 # exhausted.
426 self.retrylog = kwargs.get('retrylog', None)
426 self.retrylog = kwargs.get('retrylog', None)
427
427
428 def markforrefresh(self):
428 def markforrefresh(self):
429 for store in self.stores:
429 for store in self.stores:
430 if util.safehasattr(store, b'markforrefresh'):
430 if util.safehasattr(store, b'markforrefresh'):
431 store.markforrefresh()
431 store.markforrefresh()
432
432
433 @staticmethod
433 @staticmethod
434 def retriable(fn):
434 def retriable(fn):
435 def noop(*args):
435 def noop(*args):
436 pass
436 pass
437
437
438 def wrapped(self, *args, **kwargs):
438 def wrapped(self, *args, **kwargs):
439 retrylog = self.retrylog or noop
439 retrylog = self.retrylog or noop
440 funcname = fn.__name__
440 funcname = fn.__name__
441 i = 0
441 i = 0
442 while i < self.numattempts:
442 while i < self.numattempts:
443 if i > 0:
443 if i > 0:
444 retrylog(
444 retrylog(
445 b're-attempting (n=%d) %s\n'
445 b're-attempting (n=%d) %s\n'
446 % (i, pycompat.sysbytes(funcname))
446 % (i, pycompat.sysbytes(funcname))
447 )
447 )
448 self.markforrefresh()
448 self.markforrefresh()
449 i += 1
449 i += 1
450 try:
450 try:
451 return fn(self, *args, **kwargs)
451 return fn(self, *args, **kwargs)
452 except KeyError:
452 except KeyError:
453 if i == self.numattempts:
453 if i == self.numattempts:
454 # retries exhausted
454 # retries exhausted
455 retrylog(
455 retrylog(
456 b'retries exhausted in %s, raising KeyError\n'
456 b'retries exhausted in %s, raising KeyError\n'
457 % pycompat.sysbytes(funcname)
457 % pycompat.sysbytes(funcname)
458 )
458 )
459 raise
459 raise
460
460
461 return wrapped
461 return wrapped
@@ -1,477 +1,477 b''
1 # debugcommands.py - debug logic for remotefilelog
1 # debugcommands.py - debug logic for remotefilelog
2 #
2 #
3 # Copyright 2013 Facebook, Inc.
3 # Copyright 2013 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7 from __future__ import absolute_import
7 from __future__ import absolute_import
8
8
9 import hashlib
10 import os
9 import os
11 import zlib
10 import zlib
12
11
13 from mercurial.node import bin, hex, nullid, short
12 from mercurial.node import bin, hex, nullid, short
14 from mercurial.i18n import _
13 from mercurial.i18n import _
15 from mercurial.pycompat import open
14 from mercurial.pycompat import open
16 from mercurial import (
15 from mercurial import (
17 error,
16 error,
18 filelog,
17 filelog,
19 lock as lockmod,
18 lock as lockmod,
20 node as nodemod,
19 node as nodemod,
21 pycompat,
20 pycompat,
22 revlog,
21 revlog,
23 )
22 )
23 from mercurial.utils import hashutil
24 from . import (
24 from . import (
25 constants,
25 constants,
26 datapack,
26 datapack,
27 fileserverclient,
27 fileserverclient,
28 historypack,
28 historypack,
29 repack,
29 repack,
30 shallowutil,
30 shallowutil,
31 )
31 )
32
32
33
33
34 def debugremotefilelog(ui, path, **opts):
34 def debugremotefilelog(ui, path, **opts):
35 decompress = opts.get('decompress')
35 decompress = opts.get('decompress')
36
36
37 size, firstnode, mapping = parsefileblob(path, decompress)
37 size, firstnode, mapping = parsefileblob(path, decompress)
38
38
39 ui.status(_(b"size: %d bytes\n") % size)
39 ui.status(_(b"size: %d bytes\n") % size)
40 ui.status(_(b"path: %s \n") % path)
40 ui.status(_(b"path: %s \n") % path)
41 ui.status(_(b"key: %s \n") % (short(firstnode)))
41 ui.status(_(b"key: %s \n") % (short(firstnode)))
42 ui.status(_(b"\n"))
42 ui.status(_(b"\n"))
43 ui.status(
43 ui.status(
44 _(b"%12s => %12s %13s %13s %12s\n")
44 _(b"%12s => %12s %13s %13s %12s\n")
45 % (b"node", b"p1", b"p2", b"linknode", b"copyfrom")
45 % (b"node", b"p1", b"p2", b"linknode", b"copyfrom")
46 )
46 )
47
47
48 queue = [firstnode]
48 queue = [firstnode]
49 while queue:
49 while queue:
50 node = queue.pop(0)
50 node = queue.pop(0)
51 p1, p2, linknode, copyfrom = mapping[node]
51 p1, p2, linknode, copyfrom = mapping[node]
52 ui.status(
52 ui.status(
53 _(b"%s => %s %s %s %s\n")
53 _(b"%s => %s %s %s %s\n")
54 % (short(node), short(p1), short(p2), short(linknode), copyfrom)
54 % (short(node), short(p1), short(p2), short(linknode), copyfrom)
55 )
55 )
56 if p1 != nullid:
56 if p1 != nullid:
57 queue.append(p1)
57 queue.append(p1)
58 if p2 != nullid:
58 if p2 != nullid:
59 queue.append(p2)
59 queue.append(p2)
60
60
61
61
62 def buildtemprevlog(repo, file):
62 def buildtemprevlog(repo, file):
63 # get filename key
63 # get filename key
64 filekey = nodemod.hex(hashlib.sha1(file).digest())
64 filekey = nodemod.hex(hashutil.sha1(file).digest())
65 filedir = os.path.join(repo.path, b'store/data', filekey)
65 filedir = os.path.join(repo.path, b'store/data', filekey)
66
66
67 # sort all entries based on linkrev
67 # sort all entries based on linkrev
68 fctxs = []
68 fctxs = []
69 for filenode in os.listdir(filedir):
69 for filenode in os.listdir(filedir):
70 if b'_old' not in filenode:
70 if b'_old' not in filenode:
71 fctxs.append(repo.filectx(file, fileid=bin(filenode)))
71 fctxs.append(repo.filectx(file, fileid=bin(filenode)))
72
72
73 fctxs = sorted(fctxs, key=lambda x: x.linkrev())
73 fctxs = sorted(fctxs, key=lambda x: x.linkrev())
74
74
75 # add to revlog
75 # add to revlog
76 temppath = repo.sjoin(b'data/temprevlog.i')
76 temppath = repo.sjoin(b'data/temprevlog.i')
77 if os.path.exists(temppath):
77 if os.path.exists(temppath):
78 os.remove(temppath)
78 os.remove(temppath)
79 r = filelog.filelog(repo.svfs, b'temprevlog')
79 r = filelog.filelog(repo.svfs, b'temprevlog')
80
80
81 class faket(object):
81 class faket(object):
82 def add(self, a, b, c):
82 def add(self, a, b, c):
83 pass
83 pass
84
84
85 t = faket()
85 t = faket()
86 for fctx in fctxs:
86 for fctx in fctxs:
87 if fctx.node() not in repo:
87 if fctx.node() not in repo:
88 continue
88 continue
89
89
90 p = fctx.filelog().parents(fctx.filenode())
90 p = fctx.filelog().parents(fctx.filenode())
91 meta = {}
91 meta = {}
92 if fctx.renamed():
92 if fctx.renamed():
93 meta[b'copy'] = fctx.renamed()[0]
93 meta[b'copy'] = fctx.renamed()[0]
94 meta[b'copyrev'] = hex(fctx.renamed()[1])
94 meta[b'copyrev'] = hex(fctx.renamed()[1])
95
95
96 r.add(fctx.data(), meta, t, fctx.linkrev(), p[0], p[1])
96 r.add(fctx.data(), meta, t, fctx.linkrev(), p[0], p[1])
97
97
98 return r
98 return r
99
99
100
100
101 def debugindex(orig, ui, repo, file_=None, **opts):
101 def debugindex(orig, ui, repo, file_=None, **opts):
102 """dump the contents of an index file"""
102 """dump the contents of an index file"""
103 if (
103 if (
104 opts.get('changelog')
104 opts.get('changelog')
105 or opts.get('manifest')
105 or opts.get('manifest')
106 or opts.get('dir')
106 or opts.get('dir')
107 or not shallowutil.isenabled(repo)
107 or not shallowutil.isenabled(repo)
108 or not repo.shallowmatch(file_)
108 or not repo.shallowmatch(file_)
109 ):
109 ):
110 return orig(ui, repo, file_, **opts)
110 return orig(ui, repo, file_, **opts)
111
111
112 r = buildtemprevlog(repo, file_)
112 r = buildtemprevlog(repo, file_)
113
113
114 # debugindex like normal
114 # debugindex like normal
115 format = opts.get(b'format', 0)
115 format = opts.get(b'format', 0)
116 if format not in (0, 1):
116 if format not in (0, 1):
117 raise error.Abort(_(b"unknown format %d") % format)
117 raise error.Abort(_(b"unknown format %d") % format)
118
118
119 generaldelta = r.version & revlog.FLAG_GENERALDELTA
119 generaldelta = r.version & revlog.FLAG_GENERALDELTA
120 if generaldelta:
120 if generaldelta:
121 basehdr = b' delta'
121 basehdr = b' delta'
122 else:
122 else:
123 basehdr = b' base'
123 basehdr = b' base'
124
124
125 if format == 0:
125 if format == 0:
126 ui.write(
126 ui.write(
127 (
127 (
128 b" rev offset length " + basehdr + b" linkrev"
128 b" rev offset length " + basehdr + b" linkrev"
129 b" nodeid p1 p2\n"
129 b" nodeid p1 p2\n"
130 )
130 )
131 )
131 )
132 elif format == 1:
132 elif format == 1:
133 ui.write(
133 ui.write(
134 (
134 (
135 b" rev flag offset length"
135 b" rev flag offset length"
136 b" size " + basehdr + b" link p1 p2"
136 b" size " + basehdr + b" link p1 p2"
137 b" nodeid\n"
137 b" nodeid\n"
138 )
138 )
139 )
139 )
140
140
141 for i in r:
141 for i in r:
142 node = r.node(i)
142 node = r.node(i)
143 if generaldelta:
143 if generaldelta:
144 base = r.deltaparent(i)
144 base = r.deltaparent(i)
145 else:
145 else:
146 base = r.chainbase(i)
146 base = r.chainbase(i)
147 if format == 0:
147 if format == 0:
148 try:
148 try:
149 pp = r.parents(node)
149 pp = r.parents(node)
150 except Exception:
150 except Exception:
151 pp = [nullid, nullid]
151 pp = [nullid, nullid]
152 ui.write(
152 ui.write(
153 b"% 6d % 9d % 7d % 6d % 7d %s %s %s\n"
153 b"% 6d % 9d % 7d % 6d % 7d %s %s %s\n"
154 % (
154 % (
155 i,
155 i,
156 r.start(i),
156 r.start(i),
157 r.length(i),
157 r.length(i),
158 base,
158 base,
159 r.linkrev(i),
159 r.linkrev(i),
160 short(node),
160 short(node),
161 short(pp[0]),
161 short(pp[0]),
162 short(pp[1]),
162 short(pp[1]),
163 )
163 )
164 )
164 )
165 elif format == 1:
165 elif format == 1:
166 pr = r.parentrevs(i)
166 pr = r.parentrevs(i)
167 ui.write(
167 ui.write(
168 b"% 6d %04x % 8d % 8d % 8d % 6d % 6d % 6d % 6d %s\n"
168 b"% 6d %04x % 8d % 8d % 8d % 6d % 6d % 6d % 6d %s\n"
169 % (
169 % (
170 i,
170 i,
171 r.flags(i),
171 r.flags(i),
172 r.start(i),
172 r.start(i),
173 r.length(i),
173 r.length(i),
174 r.rawsize(i),
174 r.rawsize(i),
175 base,
175 base,
176 r.linkrev(i),
176 r.linkrev(i),
177 pr[0],
177 pr[0],
178 pr[1],
178 pr[1],
179 short(node),
179 short(node),
180 )
180 )
181 )
181 )
182
182
183
183
184 def debugindexdot(orig, ui, repo, file_):
184 def debugindexdot(orig, ui, repo, file_):
185 """dump an index DAG as a graphviz dot file"""
185 """dump an index DAG as a graphviz dot file"""
186 if not shallowutil.isenabled(repo):
186 if not shallowutil.isenabled(repo):
187 return orig(ui, repo, file_)
187 return orig(ui, repo, file_)
188
188
189 r = buildtemprevlog(repo, os.path.basename(file_)[:-2])
189 r = buildtemprevlog(repo, os.path.basename(file_)[:-2])
190
190
191 ui.writenoi18n(b"digraph G {\n")
191 ui.writenoi18n(b"digraph G {\n")
192 for i in r:
192 for i in r:
193 node = r.node(i)
193 node = r.node(i)
194 pp = r.parents(node)
194 pp = r.parents(node)
195 ui.write(b"\t%d -> %d\n" % (r.rev(pp[0]), i))
195 ui.write(b"\t%d -> %d\n" % (r.rev(pp[0]), i))
196 if pp[1] != nullid:
196 if pp[1] != nullid:
197 ui.write(b"\t%d -> %d\n" % (r.rev(pp[1]), i))
197 ui.write(b"\t%d -> %d\n" % (r.rev(pp[1]), i))
198 ui.write(b"}\n")
198 ui.write(b"}\n")
199
199
200
200
201 def verifyremotefilelog(ui, path, **opts):
201 def verifyremotefilelog(ui, path, **opts):
202 decompress = opts.get('decompress')
202 decompress = opts.get('decompress')
203
203
204 for root, dirs, files in os.walk(path):
204 for root, dirs, files in os.walk(path):
205 for file in files:
205 for file in files:
206 if file == b"repos":
206 if file == b"repos":
207 continue
207 continue
208 filepath = os.path.join(root, file)
208 filepath = os.path.join(root, file)
209 size, firstnode, mapping = parsefileblob(filepath, decompress)
209 size, firstnode, mapping = parsefileblob(filepath, decompress)
210 for p1, p2, linknode, copyfrom in pycompat.itervalues(mapping):
210 for p1, p2, linknode, copyfrom in pycompat.itervalues(mapping):
211 if linknode == nullid:
211 if linknode == nullid:
212 actualpath = os.path.relpath(root, path)
212 actualpath = os.path.relpath(root, path)
213 key = fileserverclient.getcachekey(
213 key = fileserverclient.getcachekey(
214 b"reponame", actualpath, file
214 b"reponame", actualpath, file
215 )
215 )
216 ui.status(
216 ui.status(
217 b"%s %s\n" % (key, os.path.relpath(filepath, path))
217 b"%s %s\n" % (key, os.path.relpath(filepath, path))
218 )
218 )
219
219
220
220
221 def _decompressblob(raw):
221 def _decompressblob(raw):
222 return zlib.decompress(raw)
222 return zlib.decompress(raw)
223
223
224
224
225 def parsefileblob(path, decompress):
225 def parsefileblob(path, decompress):
226 f = open(path, b"rb")
226 f = open(path, b"rb")
227 try:
227 try:
228 raw = f.read()
228 raw = f.read()
229 finally:
229 finally:
230 f.close()
230 f.close()
231
231
232 if decompress:
232 if decompress:
233 raw = _decompressblob(raw)
233 raw = _decompressblob(raw)
234
234
235 offset, size, flags = shallowutil.parsesizeflags(raw)
235 offset, size, flags = shallowutil.parsesizeflags(raw)
236 start = offset + size
236 start = offset + size
237
237
238 firstnode = None
238 firstnode = None
239
239
240 mapping = {}
240 mapping = {}
241 while start < len(raw):
241 while start < len(raw):
242 divider = raw.index(b'\0', start + 80)
242 divider = raw.index(b'\0', start + 80)
243
243
244 currentnode = raw[start : (start + 20)]
244 currentnode = raw[start : (start + 20)]
245 if not firstnode:
245 if not firstnode:
246 firstnode = currentnode
246 firstnode = currentnode
247
247
248 p1 = raw[(start + 20) : (start + 40)]
248 p1 = raw[(start + 20) : (start + 40)]
249 p2 = raw[(start + 40) : (start + 60)]
249 p2 = raw[(start + 40) : (start + 60)]
250 linknode = raw[(start + 60) : (start + 80)]
250 linknode = raw[(start + 60) : (start + 80)]
251 copyfrom = raw[(start + 80) : divider]
251 copyfrom = raw[(start + 80) : divider]
252
252
253 mapping[currentnode] = (p1, p2, linknode, copyfrom)
253 mapping[currentnode] = (p1, p2, linknode, copyfrom)
254 start = divider + 1
254 start = divider + 1
255
255
256 return size, firstnode, mapping
256 return size, firstnode, mapping
257
257
258
258
259 def debugdatapack(ui, *paths, **opts):
259 def debugdatapack(ui, *paths, **opts):
260 for path in paths:
260 for path in paths:
261 if b'.data' in path:
261 if b'.data' in path:
262 path = path[: path.index(b'.data')]
262 path = path[: path.index(b'.data')]
263 ui.write(b"%s:\n" % path)
263 ui.write(b"%s:\n" % path)
264 dpack = datapack.datapack(path)
264 dpack = datapack.datapack(path)
265 node = opts.get('node')
265 node = opts.get('node')
266 if node:
266 if node:
267 deltachain = dpack.getdeltachain(b'', bin(node))
267 deltachain = dpack.getdeltachain(b'', bin(node))
268 dumpdeltachain(ui, deltachain, **opts)
268 dumpdeltachain(ui, deltachain, **opts)
269 return
269 return
270
270
271 if opts.get('long'):
271 if opts.get('long'):
272 hashformatter = hex
272 hashformatter = hex
273 hashlen = 42
273 hashlen = 42
274 else:
274 else:
275 hashformatter = short
275 hashformatter = short
276 hashlen = 14
276 hashlen = 14
277
277
278 lastfilename = None
278 lastfilename = None
279 totaldeltasize = 0
279 totaldeltasize = 0
280 totalblobsize = 0
280 totalblobsize = 0
281
281
282 def printtotals():
282 def printtotals():
283 if lastfilename is not None:
283 if lastfilename is not None:
284 ui.write(b"\n")
284 ui.write(b"\n")
285 if not totaldeltasize or not totalblobsize:
285 if not totaldeltasize or not totalblobsize:
286 return
286 return
287 difference = totalblobsize - totaldeltasize
287 difference = totalblobsize - totaldeltasize
288 deltastr = b"%0.1f%% %s" % (
288 deltastr = b"%0.1f%% %s" % (
289 (100.0 * abs(difference) / totalblobsize),
289 (100.0 * abs(difference) / totalblobsize),
290 (b"smaller" if difference > 0 else b"bigger"),
290 (b"smaller" if difference > 0 else b"bigger"),
291 )
291 )
292
292
293 ui.writenoi18n(
293 ui.writenoi18n(
294 b"Total:%s%s %s (%s)\n"
294 b"Total:%s%s %s (%s)\n"
295 % (
295 % (
296 b"".ljust(2 * hashlen - len(b"Total:")),
296 b"".ljust(2 * hashlen - len(b"Total:")),
297 (b'%d' % totaldeltasize).ljust(12),
297 (b'%d' % totaldeltasize).ljust(12),
298 (b'%d' % totalblobsize).ljust(9),
298 (b'%d' % totalblobsize).ljust(9),
299 deltastr,
299 deltastr,
300 )
300 )
301 )
301 )
302
302
303 bases = {}
303 bases = {}
304 nodes = set()
304 nodes = set()
305 failures = 0
305 failures = 0
306 for filename, node, deltabase, deltalen in dpack.iterentries():
306 for filename, node, deltabase, deltalen in dpack.iterentries():
307 bases[node] = deltabase
307 bases[node] = deltabase
308 if node in nodes:
308 if node in nodes:
309 ui.write((b"Bad entry: %s appears twice\n" % short(node)))
309 ui.write((b"Bad entry: %s appears twice\n" % short(node)))
310 failures += 1
310 failures += 1
311 nodes.add(node)
311 nodes.add(node)
312 if filename != lastfilename:
312 if filename != lastfilename:
313 printtotals()
313 printtotals()
314 name = b'(empty name)' if filename == b'' else filename
314 name = b'(empty name)' if filename == b'' else filename
315 ui.write(b"%s:\n" % name)
315 ui.write(b"%s:\n" % name)
316 ui.write(
316 ui.write(
317 b"%s%s%s%s\n"
317 b"%s%s%s%s\n"
318 % (
318 % (
319 b"Node".ljust(hashlen),
319 b"Node".ljust(hashlen),
320 b"Delta Base".ljust(hashlen),
320 b"Delta Base".ljust(hashlen),
321 b"Delta Length".ljust(14),
321 b"Delta Length".ljust(14),
322 b"Blob Size".ljust(9),
322 b"Blob Size".ljust(9),
323 )
323 )
324 )
324 )
325 lastfilename = filename
325 lastfilename = filename
326 totalblobsize = 0
326 totalblobsize = 0
327 totaldeltasize = 0
327 totaldeltasize = 0
328
328
329 # Metadata could be missing, in which case it will be an empty dict.
329 # Metadata could be missing, in which case it will be an empty dict.
330 meta = dpack.getmeta(filename, node)
330 meta = dpack.getmeta(filename, node)
331 if constants.METAKEYSIZE in meta:
331 if constants.METAKEYSIZE in meta:
332 blobsize = meta[constants.METAKEYSIZE]
332 blobsize = meta[constants.METAKEYSIZE]
333 totaldeltasize += deltalen
333 totaldeltasize += deltalen
334 totalblobsize += blobsize
334 totalblobsize += blobsize
335 else:
335 else:
336 blobsize = b"(missing)"
336 blobsize = b"(missing)"
337 ui.write(
337 ui.write(
338 b"%s %s %s%s\n"
338 b"%s %s %s%s\n"
339 % (
339 % (
340 hashformatter(node),
340 hashformatter(node),
341 hashformatter(deltabase),
341 hashformatter(deltabase),
342 (b'%d' % deltalen).ljust(14),
342 (b'%d' % deltalen).ljust(14),
343 pycompat.bytestr(blobsize),
343 pycompat.bytestr(blobsize),
344 )
344 )
345 )
345 )
346
346
347 if filename is not None:
347 if filename is not None:
348 printtotals()
348 printtotals()
349
349
350 failures += _sanitycheck(ui, set(nodes), bases)
350 failures += _sanitycheck(ui, set(nodes), bases)
351 if failures > 1:
351 if failures > 1:
352 ui.warn((b"%d failures\n" % failures))
352 ui.warn((b"%d failures\n" % failures))
353 return 1
353 return 1
354
354
355
355
356 def _sanitycheck(ui, nodes, bases):
356 def _sanitycheck(ui, nodes, bases):
357 """
357 """
358 Does some basic sanity checking on a packfiles with ``nodes`` ``bases`` (a
358 Does some basic sanity checking on a packfiles with ``nodes`` ``bases`` (a
359 mapping of node->base):
359 mapping of node->base):
360
360
361 - Each deltabase must itself be a node elsewhere in the pack
361 - Each deltabase must itself be a node elsewhere in the pack
362 - There must be no cycles
362 - There must be no cycles
363 """
363 """
364 failures = 0
364 failures = 0
365 for node in nodes:
365 for node in nodes:
366 seen = set()
366 seen = set()
367 current = node
367 current = node
368 deltabase = bases[current]
368 deltabase = bases[current]
369
369
370 while deltabase != nullid:
370 while deltabase != nullid:
371 if deltabase not in nodes:
371 if deltabase not in nodes:
372 ui.warn(
372 ui.warn(
373 (
373 (
374 b"Bad entry: %s has an unknown deltabase (%s)\n"
374 b"Bad entry: %s has an unknown deltabase (%s)\n"
375 % (short(node), short(deltabase))
375 % (short(node), short(deltabase))
376 )
376 )
377 )
377 )
378 failures += 1
378 failures += 1
379 break
379 break
380
380
381 if deltabase in seen:
381 if deltabase in seen:
382 ui.warn(
382 ui.warn(
383 (
383 (
384 b"Bad entry: %s has a cycle (at %s)\n"
384 b"Bad entry: %s has a cycle (at %s)\n"
385 % (short(node), short(deltabase))
385 % (short(node), short(deltabase))
386 )
386 )
387 )
387 )
388 failures += 1
388 failures += 1
389 break
389 break
390
390
391 current = deltabase
391 current = deltabase
392 seen.add(current)
392 seen.add(current)
393 deltabase = bases[current]
393 deltabase = bases[current]
394 # Since ``node`` begins a valid chain, reset/memoize its base to nullid
394 # Since ``node`` begins a valid chain, reset/memoize its base to nullid
395 # so we don't traverse it again.
395 # so we don't traverse it again.
396 bases[node] = nullid
396 bases[node] = nullid
397 return failures
397 return failures
398
398
399
399
400 def dumpdeltachain(ui, deltachain, **opts):
400 def dumpdeltachain(ui, deltachain, **opts):
401 hashformatter = hex
401 hashformatter = hex
402 hashlen = 40
402 hashlen = 40
403
403
404 lastfilename = None
404 lastfilename = None
405 for filename, node, filename, deltabasenode, delta in deltachain:
405 for filename, node, filename, deltabasenode, delta in deltachain:
406 if filename != lastfilename:
406 if filename != lastfilename:
407 ui.write(b"\n%s\n" % filename)
407 ui.write(b"\n%s\n" % filename)
408 lastfilename = filename
408 lastfilename = filename
409 ui.write(
409 ui.write(
410 b"%s %s %s %s\n"
410 b"%s %s %s %s\n"
411 % (
411 % (
412 b"Node".ljust(hashlen),
412 b"Node".ljust(hashlen),
413 b"Delta Base".ljust(hashlen),
413 b"Delta Base".ljust(hashlen),
414 b"Delta SHA1".ljust(hashlen),
414 b"Delta SHA1".ljust(hashlen),
415 b"Delta Length".ljust(6),
415 b"Delta Length".ljust(6),
416 )
416 )
417 )
417 )
418
418
419 ui.write(
419 ui.write(
420 b"%s %s %s %d\n"
420 b"%s %s %s %d\n"
421 % (
421 % (
422 hashformatter(node),
422 hashformatter(node),
423 hashformatter(deltabasenode),
423 hashformatter(deltabasenode),
424 nodemod.hex(hashlib.sha1(delta).digest()),
424 nodemod.hex(hashutil.sha1(delta).digest()),
425 len(delta),
425 len(delta),
426 )
426 )
427 )
427 )
428
428
429
429
430 def debughistorypack(ui, path):
430 def debughistorypack(ui, path):
431 if b'.hist' in path:
431 if b'.hist' in path:
432 path = path[: path.index(b'.hist')]
432 path = path[: path.index(b'.hist')]
433 hpack = historypack.historypack(path)
433 hpack = historypack.historypack(path)
434
434
435 lastfilename = None
435 lastfilename = None
436 for entry in hpack.iterentries():
436 for entry in hpack.iterentries():
437 filename, node, p1node, p2node, linknode, copyfrom = entry
437 filename, node, p1node, p2node, linknode, copyfrom = entry
438 if filename != lastfilename:
438 if filename != lastfilename:
439 ui.write(b"\n%s\n" % filename)
439 ui.write(b"\n%s\n" % filename)
440 ui.write(
440 ui.write(
441 b"%s%s%s%s%s\n"
441 b"%s%s%s%s%s\n"
442 % (
442 % (
443 b"Node".ljust(14),
443 b"Node".ljust(14),
444 b"P1 Node".ljust(14),
444 b"P1 Node".ljust(14),
445 b"P2 Node".ljust(14),
445 b"P2 Node".ljust(14),
446 b"Link Node".ljust(14),
446 b"Link Node".ljust(14),
447 b"Copy From",
447 b"Copy From",
448 )
448 )
449 )
449 )
450 lastfilename = filename
450 lastfilename = filename
451 ui.write(
451 ui.write(
452 b"%s %s %s %s %s\n"
452 b"%s %s %s %s %s\n"
453 % (
453 % (
454 short(node),
454 short(node),
455 short(p1node),
455 short(p1node),
456 short(p2node),
456 short(p2node),
457 short(linknode),
457 short(linknode),
458 copyfrom,
458 copyfrom,
459 )
459 )
460 )
460 )
461
461
462
462
463 def debugwaitonrepack(repo):
463 def debugwaitonrepack(repo):
464 with lockmod.lock(repack.repacklockvfs(repo), b"repacklock", timeout=-1):
464 with lockmod.lock(repack.repacklockvfs(repo), b"repacklock", timeout=-1):
465 return
465 return
466
466
467
467
468 def debugwaitonprefetch(repo):
468 def debugwaitonprefetch(repo):
469 with repo._lock(
469 with repo._lock(
470 repo.svfs,
470 repo.svfs,
471 b"prefetchlock",
471 b"prefetchlock",
472 True,
472 True,
473 None,
473 None,
474 None,
474 None,
475 _(b'prefetching in %s') % repo.origroot,
475 _(b'prefetching in %s') % repo.origroot,
476 ):
476 ):
477 pass
477 pass
@@ -1,667 +1,669 b''
1 # fileserverclient.py - client for communicating with the cache process
1 # fileserverclient.py - client for communicating with the cache process
2 #
2 #
3 # Copyright 2013 Facebook, Inc.
3 # Copyright 2013 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import hashlib
11 import io
10 import io
12 import os
11 import os
13 import threading
12 import threading
14 import time
13 import time
15 import zlib
14 import zlib
16
15
17 from mercurial.i18n import _
16 from mercurial.i18n import _
18 from mercurial.node import bin, hex, nullid
17 from mercurial.node import bin, hex, nullid
19 from mercurial import (
18 from mercurial import (
20 error,
19 error,
21 node,
20 node,
22 pycompat,
21 pycompat,
23 revlog,
22 revlog,
24 sshpeer,
23 sshpeer,
25 util,
24 util,
26 wireprotov1peer,
25 wireprotov1peer,
27 )
26 )
28 from mercurial.utils import procutil
27 from mercurial.utils import (
28 hashutil,
29 procutil,
30 )
29
31
30 from . import (
32 from . import (
31 constants,
33 constants,
32 contentstore,
34 contentstore,
33 metadatastore,
35 metadatastore,
34 )
36 )
35
37
36 _sshv1peer = sshpeer.sshv1peer
38 _sshv1peer = sshpeer.sshv1peer
37
39
38 # Statistics for debugging
40 # Statistics for debugging
39 fetchcost = 0
41 fetchcost = 0
40 fetches = 0
42 fetches = 0
41 fetched = 0
43 fetched = 0
42 fetchmisses = 0
44 fetchmisses = 0
43
45
44 _lfsmod = None
46 _lfsmod = None
45
47
46
48
47 def getcachekey(reponame, file, id):
49 def getcachekey(reponame, file, id):
48 pathhash = node.hex(hashlib.sha1(file).digest())
50 pathhash = node.hex(hashutil.sha1(file).digest())
49 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
51 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
50
52
51
53
52 def getlocalkey(file, id):
54 def getlocalkey(file, id):
53 pathhash = node.hex(hashlib.sha1(file).digest())
55 pathhash = node.hex(hashutil.sha1(file).digest())
54 return os.path.join(pathhash, id)
56 return os.path.join(pathhash, id)
55
57
56
58
57 def peersetup(ui, peer):
59 def peersetup(ui, peer):
58 class remotefilepeer(peer.__class__):
60 class remotefilepeer(peer.__class__):
59 @wireprotov1peer.batchable
61 @wireprotov1peer.batchable
60 def x_rfl_getfile(self, file, node):
62 def x_rfl_getfile(self, file, node):
61 if not self.capable(b'x_rfl_getfile'):
63 if not self.capable(b'x_rfl_getfile'):
62 raise error.Abort(
64 raise error.Abort(
63 b'configured remotefile server does not support getfile'
65 b'configured remotefile server does not support getfile'
64 )
66 )
65 f = wireprotov1peer.future()
67 f = wireprotov1peer.future()
66 yield {b'file': file, b'node': node}, f
68 yield {b'file': file, b'node': node}, f
67 code, data = f.value.split(b'\0', 1)
69 code, data = f.value.split(b'\0', 1)
68 if int(code):
70 if int(code):
69 raise error.LookupError(file, node, data)
71 raise error.LookupError(file, node, data)
70 yield data
72 yield data
71
73
72 @wireprotov1peer.batchable
74 @wireprotov1peer.batchable
73 def x_rfl_getflogheads(self, path):
75 def x_rfl_getflogheads(self, path):
74 if not self.capable(b'x_rfl_getflogheads'):
76 if not self.capable(b'x_rfl_getflogheads'):
75 raise error.Abort(
77 raise error.Abort(
76 b'configured remotefile server does not '
78 b'configured remotefile server does not '
77 b'support getflogheads'
79 b'support getflogheads'
78 )
80 )
79 f = wireprotov1peer.future()
81 f = wireprotov1peer.future()
80 yield {b'path': path}, f
82 yield {b'path': path}, f
81 heads = f.value.split(b'\n') if f.value else []
83 heads = f.value.split(b'\n') if f.value else []
82 yield heads
84 yield heads
83
85
84 def _updatecallstreamopts(self, command, opts):
86 def _updatecallstreamopts(self, command, opts):
85 if command != b'getbundle':
87 if command != b'getbundle':
86 return
88 return
87 if (
89 if (
88 constants.NETWORK_CAP_LEGACY_SSH_GETFILES
90 constants.NETWORK_CAP_LEGACY_SSH_GETFILES
89 not in self.capabilities()
91 not in self.capabilities()
90 ):
92 ):
91 return
93 return
92 if not util.safehasattr(self, '_localrepo'):
94 if not util.safehasattr(self, '_localrepo'):
93 return
95 return
94 if (
96 if (
95 constants.SHALLOWREPO_REQUIREMENT
97 constants.SHALLOWREPO_REQUIREMENT
96 not in self._localrepo.requirements
98 not in self._localrepo.requirements
97 ):
99 ):
98 return
100 return
99
101
100 bundlecaps = opts.get(b'bundlecaps')
102 bundlecaps = opts.get(b'bundlecaps')
101 if bundlecaps:
103 if bundlecaps:
102 bundlecaps = [bundlecaps]
104 bundlecaps = [bundlecaps]
103 else:
105 else:
104 bundlecaps = []
106 bundlecaps = []
105
107
106 # shallow, includepattern, and excludepattern are a hacky way of
108 # shallow, includepattern, and excludepattern are a hacky way of
107 # carrying over data from the local repo to this getbundle
109 # carrying over data from the local repo to this getbundle
108 # command. We need to do it this way because bundle1 getbundle
110 # command. We need to do it this way because bundle1 getbundle
109 # doesn't provide any other place we can hook in to manipulate
111 # doesn't provide any other place we can hook in to manipulate
110 # getbundle args before it goes across the wire. Once we get rid
112 # getbundle args before it goes across the wire. Once we get rid
111 # of bundle1, we can use bundle2's _pullbundle2extraprepare to
113 # of bundle1, we can use bundle2's _pullbundle2extraprepare to
112 # do this more cleanly.
114 # do this more cleanly.
113 bundlecaps.append(constants.BUNDLE2_CAPABLITY)
115 bundlecaps.append(constants.BUNDLE2_CAPABLITY)
114 if self._localrepo.includepattern:
116 if self._localrepo.includepattern:
115 patterns = b'\0'.join(self._localrepo.includepattern)
117 patterns = b'\0'.join(self._localrepo.includepattern)
116 includecap = b"includepattern=" + patterns
118 includecap = b"includepattern=" + patterns
117 bundlecaps.append(includecap)
119 bundlecaps.append(includecap)
118 if self._localrepo.excludepattern:
120 if self._localrepo.excludepattern:
119 patterns = b'\0'.join(self._localrepo.excludepattern)
121 patterns = b'\0'.join(self._localrepo.excludepattern)
120 excludecap = b"excludepattern=" + patterns
122 excludecap = b"excludepattern=" + patterns
121 bundlecaps.append(excludecap)
123 bundlecaps.append(excludecap)
122 opts[b'bundlecaps'] = b','.join(bundlecaps)
124 opts[b'bundlecaps'] = b','.join(bundlecaps)
123
125
124 def _sendrequest(self, command, args, **opts):
126 def _sendrequest(self, command, args, **opts):
125 self._updatecallstreamopts(command, args)
127 self._updatecallstreamopts(command, args)
126 return super(remotefilepeer, self)._sendrequest(
128 return super(remotefilepeer, self)._sendrequest(
127 command, args, **opts
129 command, args, **opts
128 )
130 )
129
131
130 def _callstream(self, command, **opts):
132 def _callstream(self, command, **opts):
131 supertype = super(remotefilepeer, self)
133 supertype = super(remotefilepeer, self)
132 if not util.safehasattr(supertype, '_sendrequest'):
134 if not util.safehasattr(supertype, '_sendrequest'):
133 self._updatecallstreamopts(command, pycompat.byteskwargs(opts))
135 self._updatecallstreamopts(command, pycompat.byteskwargs(opts))
134 return super(remotefilepeer, self)._callstream(command, **opts)
136 return super(remotefilepeer, self)._callstream(command, **opts)
135
137
136 peer.__class__ = remotefilepeer
138 peer.__class__ = remotefilepeer
137
139
138
140
139 class cacheconnection(object):
141 class cacheconnection(object):
140 """The connection for communicating with the remote cache. Performs
142 """The connection for communicating with the remote cache. Performs
141 gets and sets by communicating with an external process that has the
143 gets and sets by communicating with an external process that has the
142 cache-specific implementation.
144 cache-specific implementation.
143 """
145 """
144
146
145 def __init__(self):
147 def __init__(self):
146 self.pipeo = self.pipei = self.pipee = None
148 self.pipeo = self.pipei = self.pipee = None
147 self.subprocess = None
149 self.subprocess = None
148 self.connected = False
150 self.connected = False
149
151
150 def connect(self, cachecommand):
152 def connect(self, cachecommand):
151 if self.pipeo:
153 if self.pipeo:
152 raise error.Abort(_(b"cache connection already open"))
154 raise error.Abort(_(b"cache connection already open"))
153 self.pipei, self.pipeo, self.pipee, self.subprocess = procutil.popen4(
155 self.pipei, self.pipeo, self.pipee, self.subprocess = procutil.popen4(
154 cachecommand
156 cachecommand
155 )
157 )
156 self.connected = True
158 self.connected = True
157
159
158 def close(self):
160 def close(self):
159 def tryclose(pipe):
161 def tryclose(pipe):
160 try:
162 try:
161 pipe.close()
163 pipe.close()
162 except Exception:
164 except Exception:
163 pass
165 pass
164
166
165 if self.connected:
167 if self.connected:
166 try:
168 try:
167 self.pipei.write(b"exit\n")
169 self.pipei.write(b"exit\n")
168 except Exception:
170 except Exception:
169 pass
171 pass
170 tryclose(self.pipei)
172 tryclose(self.pipei)
171 self.pipei = None
173 self.pipei = None
172 tryclose(self.pipeo)
174 tryclose(self.pipeo)
173 self.pipeo = None
175 self.pipeo = None
174 tryclose(self.pipee)
176 tryclose(self.pipee)
175 self.pipee = None
177 self.pipee = None
176 try:
178 try:
177 # Wait for process to terminate, making sure to avoid deadlock.
179 # Wait for process to terminate, making sure to avoid deadlock.
178 # See https://docs.python.org/2/library/subprocess.html for
180 # See https://docs.python.org/2/library/subprocess.html for
179 # warnings about wait() and deadlocking.
181 # warnings about wait() and deadlocking.
180 self.subprocess.communicate()
182 self.subprocess.communicate()
181 except Exception:
183 except Exception:
182 pass
184 pass
183 self.subprocess = None
185 self.subprocess = None
184 self.connected = False
186 self.connected = False
185
187
186 def request(self, request, flush=True):
188 def request(self, request, flush=True):
187 if self.connected:
189 if self.connected:
188 try:
190 try:
189 self.pipei.write(request)
191 self.pipei.write(request)
190 if flush:
192 if flush:
191 self.pipei.flush()
193 self.pipei.flush()
192 except IOError:
194 except IOError:
193 self.close()
195 self.close()
194
196
195 def receiveline(self):
197 def receiveline(self):
196 if not self.connected:
198 if not self.connected:
197 return None
199 return None
198 try:
200 try:
199 result = self.pipeo.readline()[:-1]
201 result = self.pipeo.readline()[:-1]
200 if not result:
202 if not result:
201 self.close()
203 self.close()
202 except IOError:
204 except IOError:
203 self.close()
205 self.close()
204
206
205 return result
207 return result
206
208
207
209
208 def _getfilesbatch(
210 def _getfilesbatch(
209 remote, receivemissing, progresstick, missed, idmap, batchsize
211 remote, receivemissing, progresstick, missed, idmap, batchsize
210 ):
212 ):
211 # Over http(s), iterbatch is a streamy method and we can start
213 # Over http(s), iterbatch is a streamy method and we can start
212 # looking at results early. This means we send one (potentially
214 # looking at results early. This means we send one (potentially
213 # large) request, but then we show nice progress as we process
215 # large) request, but then we show nice progress as we process
214 # file results, rather than showing chunks of $batchsize in
216 # file results, rather than showing chunks of $batchsize in
215 # progress.
217 # progress.
216 #
218 #
217 # Over ssh, iterbatch isn't streamy because batch() wasn't
219 # Over ssh, iterbatch isn't streamy because batch() wasn't
218 # explicitly designed as a streaming method. In the future we
220 # explicitly designed as a streaming method. In the future we
219 # should probably introduce a streambatch() method upstream and
221 # should probably introduce a streambatch() method upstream and
220 # use that for this.
222 # use that for this.
221 with remote.commandexecutor() as e:
223 with remote.commandexecutor() as e:
222 futures = []
224 futures = []
223 for m in missed:
225 for m in missed:
224 futures.append(
226 futures.append(
225 e.callcommand(
227 e.callcommand(
226 b'x_rfl_getfile', {b'file': idmap[m], b'node': m[-40:]}
228 b'x_rfl_getfile', {b'file': idmap[m], b'node': m[-40:]}
227 )
229 )
228 )
230 )
229
231
230 for i, m in enumerate(missed):
232 for i, m in enumerate(missed):
231 r = futures[i].result()
233 r = futures[i].result()
232 futures[i] = None # release memory
234 futures[i] = None # release memory
233 file_ = idmap[m]
235 file_ = idmap[m]
234 node = m[-40:]
236 node = m[-40:]
235 receivemissing(io.BytesIO(b'%d\n%s' % (len(r), r)), file_, node)
237 receivemissing(io.BytesIO(b'%d\n%s' % (len(r), r)), file_, node)
236 progresstick()
238 progresstick()
237
239
238
240
239 def _getfiles_optimistic(
241 def _getfiles_optimistic(
240 remote, receivemissing, progresstick, missed, idmap, step
242 remote, receivemissing, progresstick, missed, idmap, step
241 ):
243 ):
242 remote._callstream(b"x_rfl_getfiles")
244 remote._callstream(b"x_rfl_getfiles")
243 i = 0
245 i = 0
244 pipeo = remote._pipeo
246 pipeo = remote._pipeo
245 pipei = remote._pipei
247 pipei = remote._pipei
246 while i < len(missed):
248 while i < len(missed):
247 # issue a batch of requests
249 # issue a batch of requests
248 start = i
250 start = i
249 end = min(len(missed), start + step)
251 end = min(len(missed), start + step)
250 i = end
252 i = end
251 for missingid in missed[start:end]:
253 for missingid in missed[start:end]:
252 # issue new request
254 # issue new request
253 versionid = missingid[-40:]
255 versionid = missingid[-40:]
254 file = idmap[missingid]
256 file = idmap[missingid]
255 sshrequest = b"%s%s\n" % (versionid, file)
257 sshrequest = b"%s%s\n" % (versionid, file)
256 pipeo.write(sshrequest)
258 pipeo.write(sshrequest)
257 pipeo.flush()
259 pipeo.flush()
258
260
259 # receive batch results
261 # receive batch results
260 for missingid in missed[start:end]:
262 for missingid in missed[start:end]:
261 versionid = missingid[-40:]
263 versionid = missingid[-40:]
262 file = idmap[missingid]
264 file = idmap[missingid]
263 receivemissing(pipei, file, versionid)
265 receivemissing(pipei, file, versionid)
264 progresstick()
266 progresstick()
265
267
266 # End the command
268 # End the command
267 pipeo.write(b'\n')
269 pipeo.write(b'\n')
268 pipeo.flush()
270 pipeo.flush()
269
271
270
272
271 def _getfiles_threaded(
273 def _getfiles_threaded(
272 remote, receivemissing, progresstick, missed, idmap, step
274 remote, receivemissing, progresstick, missed, idmap, step
273 ):
275 ):
274 remote._callstream(b"getfiles")
276 remote._callstream(b"getfiles")
275 pipeo = remote._pipeo
277 pipeo = remote._pipeo
276 pipei = remote._pipei
278 pipei = remote._pipei
277
279
278 def writer():
280 def writer():
279 for missingid in missed:
281 for missingid in missed:
280 versionid = missingid[-40:]
282 versionid = missingid[-40:]
281 file = idmap[missingid]
283 file = idmap[missingid]
282 sshrequest = b"%s%s\n" % (versionid, file)
284 sshrequest = b"%s%s\n" % (versionid, file)
283 pipeo.write(sshrequest)
285 pipeo.write(sshrequest)
284 pipeo.flush()
286 pipeo.flush()
285
287
286 writerthread = threading.Thread(target=writer)
288 writerthread = threading.Thread(target=writer)
287 writerthread.daemon = True
289 writerthread.daemon = True
288 writerthread.start()
290 writerthread.start()
289
291
290 for missingid in missed:
292 for missingid in missed:
291 versionid = missingid[-40:]
293 versionid = missingid[-40:]
292 file = idmap[missingid]
294 file = idmap[missingid]
293 receivemissing(pipei, file, versionid)
295 receivemissing(pipei, file, versionid)
294 progresstick()
296 progresstick()
295
297
296 writerthread.join()
298 writerthread.join()
297 # End the command
299 # End the command
298 pipeo.write(b'\n')
300 pipeo.write(b'\n')
299 pipeo.flush()
301 pipeo.flush()
300
302
301
303
302 class fileserverclient(object):
304 class fileserverclient(object):
303 """A client for requesting files from the remote file server.
305 """A client for requesting files from the remote file server.
304 """
306 """
305
307
306 def __init__(self, repo):
308 def __init__(self, repo):
307 ui = repo.ui
309 ui = repo.ui
308 self.repo = repo
310 self.repo = repo
309 self.ui = ui
311 self.ui = ui
310 self.cacheprocess = ui.config(b"remotefilelog", b"cacheprocess")
312 self.cacheprocess = ui.config(b"remotefilelog", b"cacheprocess")
311 if self.cacheprocess:
313 if self.cacheprocess:
312 self.cacheprocess = util.expandpath(self.cacheprocess)
314 self.cacheprocess = util.expandpath(self.cacheprocess)
313
315
314 # This option causes remotefilelog to pass the full file path to the
316 # This option causes remotefilelog to pass the full file path to the
315 # cacheprocess instead of a hashed key.
317 # cacheprocess instead of a hashed key.
316 self.cacheprocesspasspath = ui.configbool(
318 self.cacheprocesspasspath = ui.configbool(
317 b"remotefilelog", b"cacheprocess.includepath"
319 b"remotefilelog", b"cacheprocess.includepath"
318 )
320 )
319
321
320 self.debugoutput = ui.configbool(b"remotefilelog", b"debug")
322 self.debugoutput = ui.configbool(b"remotefilelog", b"debug")
321
323
322 self.remotecache = cacheconnection()
324 self.remotecache = cacheconnection()
323
325
324 def setstore(self, datastore, historystore, writedata, writehistory):
326 def setstore(self, datastore, historystore, writedata, writehistory):
325 self.datastore = datastore
327 self.datastore = datastore
326 self.historystore = historystore
328 self.historystore = historystore
327 self.writedata = writedata
329 self.writedata = writedata
328 self.writehistory = writehistory
330 self.writehistory = writehistory
329
331
330 def _connect(self):
332 def _connect(self):
331 return self.repo.connectionpool.get(self.repo.fallbackpath)
333 return self.repo.connectionpool.get(self.repo.fallbackpath)
332
334
333 def request(self, fileids):
335 def request(self, fileids):
334 """Takes a list of filename/node pairs and fetches them from the
336 """Takes a list of filename/node pairs and fetches them from the
335 server. Files are stored in the local cache.
337 server. Files are stored in the local cache.
336 A list of nodes that the server couldn't find is returned.
338 A list of nodes that the server couldn't find is returned.
337 If the connection fails, an exception is raised.
339 If the connection fails, an exception is raised.
338 """
340 """
339 if not self.remotecache.connected:
341 if not self.remotecache.connected:
340 self.connect()
342 self.connect()
341 cache = self.remotecache
343 cache = self.remotecache
342 writedata = self.writedata
344 writedata = self.writedata
343
345
344 repo = self.repo
346 repo = self.repo
345 total = len(fileids)
347 total = len(fileids)
346 request = b"get\n%d\n" % total
348 request = b"get\n%d\n" % total
347 idmap = {}
349 idmap = {}
348 reponame = repo.name
350 reponame = repo.name
349 for file, id in fileids:
351 for file, id in fileids:
350 fullid = getcachekey(reponame, file, id)
352 fullid = getcachekey(reponame, file, id)
351 if self.cacheprocesspasspath:
353 if self.cacheprocesspasspath:
352 request += file + b'\0'
354 request += file + b'\0'
353 request += fullid + b"\n"
355 request += fullid + b"\n"
354 idmap[fullid] = file
356 idmap[fullid] = file
355
357
356 cache.request(request)
358 cache.request(request)
357
359
358 progress = self.ui.makeprogress(_(b'downloading'), total=total)
360 progress = self.ui.makeprogress(_(b'downloading'), total=total)
359 progress.update(0)
361 progress.update(0)
360
362
361 missed = []
363 missed = []
362 while True:
364 while True:
363 missingid = cache.receiveline()
365 missingid = cache.receiveline()
364 if not missingid:
366 if not missingid:
365 missedset = set(missed)
367 missedset = set(missed)
366 for missingid in idmap:
368 for missingid in idmap:
367 if not missingid in missedset:
369 if not missingid in missedset:
368 missed.append(missingid)
370 missed.append(missingid)
369 self.ui.warn(
371 self.ui.warn(
370 _(
372 _(
371 b"warning: cache connection closed early - "
373 b"warning: cache connection closed early - "
372 + b"falling back to server\n"
374 + b"falling back to server\n"
373 )
375 )
374 )
376 )
375 break
377 break
376 if missingid == b"0":
378 if missingid == b"0":
377 break
379 break
378 if missingid.startswith(b"_hits_"):
380 if missingid.startswith(b"_hits_"):
379 # receive progress reports
381 # receive progress reports
380 parts = missingid.split(b"_")
382 parts = missingid.split(b"_")
381 progress.increment(int(parts[2]))
383 progress.increment(int(parts[2]))
382 continue
384 continue
383
385
384 missed.append(missingid)
386 missed.append(missingid)
385
387
386 global fetchmisses
388 global fetchmisses
387 fetchmisses += len(missed)
389 fetchmisses += len(missed)
388
390
389 fromcache = total - len(missed)
391 fromcache = total - len(missed)
390 progress.update(fromcache, total=total)
392 progress.update(fromcache, total=total)
391 self.ui.log(
393 self.ui.log(
392 b"remotefilelog",
394 b"remotefilelog",
393 b"remote cache hit rate is %r of %r\n",
395 b"remote cache hit rate is %r of %r\n",
394 fromcache,
396 fromcache,
395 total,
397 total,
396 hit=fromcache,
398 hit=fromcache,
397 total=total,
399 total=total,
398 )
400 )
399
401
400 oldumask = os.umask(0o002)
402 oldumask = os.umask(0o002)
401 try:
403 try:
402 # receive cache misses from master
404 # receive cache misses from master
403 if missed:
405 if missed:
404 # When verbose is true, sshpeer prints 'running ssh...'
406 # When verbose is true, sshpeer prints 'running ssh...'
405 # to stdout, which can interfere with some command
407 # to stdout, which can interfere with some command
406 # outputs
408 # outputs
407 verbose = self.ui.verbose
409 verbose = self.ui.verbose
408 self.ui.verbose = False
410 self.ui.verbose = False
409 try:
411 try:
410 with self._connect() as conn:
412 with self._connect() as conn:
411 remote = conn.peer
413 remote = conn.peer
412 if remote.capable(
414 if remote.capable(
413 constants.NETWORK_CAP_LEGACY_SSH_GETFILES
415 constants.NETWORK_CAP_LEGACY_SSH_GETFILES
414 ):
416 ):
415 if not isinstance(remote, _sshv1peer):
417 if not isinstance(remote, _sshv1peer):
416 raise error.Abort(
418 raise error.Abort(
417 b'remotefilelog requires ssh servers'
419 b'remotefilelog requires ssh servers'
418 )
420 )
419 step = self.ui.configint(
421 step = self.ui.configint(
420 b'remotefilelog', b'getfilesstep'
422 b'remotefilelog', b'getfilesstep'
421 )
423 )
422 getfilestype = self.ui.config(
424 getfilestype = self.ui.config(
423 b'remotefilelog', b'getfilestype'
425 b'remotefilelog', b'getfilestype'
424 )
426 )
425 if getfilestype == b'threaded':
427 if getfilestype == b'threaded':
426 _getfiles = _getfiles_threaded
428 _getfiles = _getfiles_threaded
427 else:
429 else:
428 _getfiles = _getfiles_optimistic
430 _getfiles = _getfiles_optimistic
429 _getfiles(
431 _getfiles(
430 remote,
432 remote,
431 self.receivemissing,
433 self.receivemissing,
432 progress.increment,
434 progress.increment,
433 missed,
435 missed,
434 idmap,
436 idmap,
435 step,
437 step,
436 )
438 )
437 elif remote.capable(b"x_rfl_getfile"):
439 elif remote.capable(b"x_rfl_getfile"):
438 if remote.capable(b'batch'):
440 if remote.capable(b'batch'):
439 batchdefault = 100
441 batchdefault = 100
440 else:
442 else:
441 batchdefault = 10
443 batchdefault = 10
442 batchsize = self.ui.configint(
444 batchsize = self.ui.configint(
443 b'remotefilelog', b'batchsize', batchdefault
445 b'remotefilelog', b'batchsize', batchdefault
444 )
446 )
445 self.ui.debug(
447 self.ui.debug(
446 b'requesting %d files from '
448 b'requesting %d files from '
447 b'remotefilelog server...\n' % len(missed)
449 b'remotefilelog server...\n' % len(missed)
448 )
450 )
449 _getfilesbatch(
451 _getfilesbatch(
450 remote,
452 remote,
451 self.receivemissing,
453 self.receivemissing,
452 progress.increment,
454 progress.increment,
453 missed,
455 missed,
454 idmap,
456 idmap,
455 batchsize,
457 batchsize,
456 )
458 )
457 else:
459 else:
458 raise error.Abort(
460 raise error.Abort(
459 b"configured remotefilelog server"
461 b"configured remotefilelog server"
460 b" does not support remotefilelog"
462 b" does not support remotefilelog"
461 )
463 )
462
464
463 self.ui.log(
465 self.ui.log(
464 b"remotefilefetchlog",
466 b"remotefilefetchlog",
465 b"Success\n",
467 b"Success\n",
466 fetched_files=progress.pos - fromcache,
468 fetched_files=progress.pos - fromcache,
467 total_to_fetch=total - fromcache,
469 total_to_fetch=total - fromcache,
468 )
470 )
469 except Exception:
471 except Exception:
470 self.ui.log(
472 self.ui.log(
471 b"remotefilefetchlog",
473 b"remotefilefetchlog",
472 b"Fail\n",
474 b"Fail\n",
473 fetched_files=progress.pos - fromcache,
475 fetched_files=progress.pos - fromcache,
474 total_to_fetch=total - fromcache,
476 total_to_fetch=total - fromcache,
475 )
477 )
476 raise
478 raise
477 finally:
479 finally:
478 self.ui.verbose = verbose
480 self.ui.verbose = verbose
479 # send to memcache
481 # send to memcache
480 request = b"set\n%d\n%s\n" % (len(missed), b"\n".join(missed))
482 request = b"set\n%d\n%s\n" % (len(missed), b"\n".join(missed))
481 cache.request(request)
483 cache.request(request)
482
484
483 progress.complete()
485 progress.complete()
484
486
485 # mark ourselves as a user of this cache
487 # mark ourselves as a user of this cache
486 writedata.markrepo(self.repo.path)
488 writedata.markrepo(self.repo.path)
487 finally:
489 finally:
488 os.umask(oldumask)
490 os.umask(oldumask)
489
491
490 def receivemissing(self, pipe, filename, node):
492 def receivemissing(self, pipe, filename, node):
491 line = pipe.readline()[:-1]
493 line = pipe.readline()[:-1]
492 if not line:
494 if not line:
493 raise error.ResponseError(
495 raise error.ResponseError(
494 _(b"error downloading file contents:"),
496 _(b"error downloading file contents:"),
495 _(b"connection closed early"),
497 _(b"connection closed early"),
496 )
498 )
497 size = int(line)
499 size = int(line)
498 data = pipe.read(size)
500 data = pipe.read(size)
499 if len(data) != size:
501 if len(data) != size:
500 raise error.ResponseError(
502 raise error.ResponseError(
501 _(b"error downloading file contents:"),
503 _(b"error downloading file contents:"),
502 _(b"only received %s of %s bytes") % (len(data), size),
504 _(b"only received %s of %s bytes") % (len(data), size),
503 )
505 )
504
506
505 self.writedata.addremotefilelognode(
507 self.writedata.addremotefilelognode(
506 filename, bin(node), zlib.decompress(data)
508 filename, bin(node), zlib.decompress(data)
507 )
509 )
508
510
509 def connect(self):
511 def connect(self):
510 if self.cacheprocess:
512 if self.cacheprocess:
511 cmd = b"%s %s" % (self.cacheprocess, self.writedata._path)
513 cmd = b"%s %s" % (self.cacheprocess, self.writedata._path)
512 self.remotecache.connect(cmd)
514 self.remotecache.connect(cmd)
513 else:
515 else:
514 # If no cache process is specified, we fake one that always
516 # If no cache process is specified, we fake one that always
515 # returns cache misses. This enables tests to run easily
517 # returns cache misses. This enables tests to run easily
516 # and may eventually allow us to be a drop in replacement
518 # and may eventually allow us to be a drop in replacement
517 # for the largefiles extension.
519 # for the largefiles extension.
518 class simplecache(object):
520 class simplecache(object):
519 def __init__(self):
521 def __init__(self):
520 self.missingids = []
522 self.missingids = []
521 self.connected = True
523 self.connected = True
522
524
523 def close(self):
525 def close(self):
524 pass
526 pass
525
527
526 def request(self, value, flush=True):
528 def request(self, value, flush=True):
527 lines = value.split(b"\n")
529 lines = value.split(b"\n")
528 if lines[0] != b"get":
530 if lines[0] != b"get":
529 return
531 return
530 self.missingids = lines[2:-1]
532 self.missingids = lines[2:-1]
531 self.missingids.append(b'0')
533 self.missingids.append(b'0')
532
534
533 def receiveline(self):
535 def receiveline(self):
534 if len(self.missingids) > 0:
536 if len(self.missingids) > 0:
535 return self.missingids.pop(0)
537 return self.missingids.pop(0)
536 return None
538 return None
537
539
538 self.remotecache = simplecache()
540 self.remotecache = simplecache()
539
541
540 def close(self):
542 def close(self):
541 if fetches:
543 if fetches:
542 msg = (
544 msg = (
543 b"%d files fetched over %d fetches - "
545 b"%d files fetched over %d fetches - "
544 + b"(%d misses, %0.2f%% hit ratio) over %0.2fs\n"
546 + b"(%d misses, %0.2f%% hit ratio) over %0.2fs\n"
545 ) % (
547 ) % (
546 fetched,
548 fetched,
547 fetches,
549 fetches,
548 fetchmisses,
550 fetchmisses,
549 float(fetched - fetchmisses) / float(fetched) * 100.0,
551 float(fetched - fetchmisses) / float(fetched) * 100.0,
550 fetchcost,
552 fetchcost,
551 )
553 )
552 if self.debugoutput:
554 if self.debugoutput:
553 self.ui.warn(msg)
555 self.ui.warn(msg)
554 self.ui.log(
556 self.ui.log(
555 b"remotefilelog.prefetch",
557 b"remotefilelog.prefetch",
556 msg.replace(b"%", b"%%"),
558 msg.replace(b"%", b"%%"),
557 remotefilelogfetched=fetched,
559 remotefilelogfetched=fetched,
558 remotefilelogfetches=fetches,
560 remotefilelogfetches=fetches,
559 remotefilelogfetchmisses=fetchmisses,
561 remotefilelogfetchmisses=fetchmisses,
560 remotefilelogfetchtime=fetchcost * 1000,
562 remotefilelogfetchtime=fetchcost * 1000,
561 )
563 )
562
564
563 if self.remotecache.connected:
565 if self.remotecache.connected:
564 self.remotecache.close()
566 self.remotecache.close()
565
567
566 def prefetch(
568 def prefetch(
567 self, fileids, force=False, fetchdata=True, fetchhistory=False
569 self, fileids, force=False, fetchdata=True, fetchhistory=False
568 ):
570 ):
569 """downloads the given file versions to the cache
571 """downloads the given file versions to the cache
570 """
572 """
571 repo = self.repo
573 repo = self.repo
572 idstocheck = []
574 idstocheck = []
573 for file, id in fileids:
575 for file, id in fileids:
574 # hack
576 # hack
575 # - we don't use .hgtags
577 # - we don't use .hgtags
576 # - workingctx produces ids with length 42,
578 # - workingctx produces ids with length 42,
577 # which we skip since they aren't in any cache
579 # which we skip since they aren't in any cache
578 if (
580 if (
579 file == b'.hgtags'
581 file == b'.hgtags'
580 or len(id) == 42
582 or len(id) == 42
581 or not repo.shallowmatch(file)
583 or not repo.shallowmatch(file)
582 ):
584 ):
583 continue
585 continue
584
586
585 idstocheck.append((file, bin(id)))
587 idstocheck.append((file, bin(id)))
586
588
587 datastore = self.datastore
589 datastore = self.datastore
588 historystore = self.historystore
590 historystore = self.historystore
589 if force:
591 if force:
590 datastore = contentstore.unioncontentstore(*repo.shareddatastores)
592 datastore = contentstore.unioncontentstore(*repo.shareddatastores)
591 historystore = metadatastore.unionmetadatastore(
593 historystore = metadatastore.unionmetadatastore(
592 *repo.sharedhistorystores
594 *repo.sharedhistorystores
593 )
595 )
594
596
595 missingids = set()
597 missingids = set()
596 if fetchdata:
598 if fetchdata:
597 missingids.update(datastore.getmissing(idstocheck))
599 missingids.update(datastore.getmissing(idstocheck))
598 if fetchhistory:
600 if fetchhistory:
599 missingids.update(historystore.getmissing(idstocheck))
601 missingids.update(historystore.getmissing(idstocheck))
600
602
601 # partition missing nodes into nullid and not-nullid so we can
603 # partition missing nodes into nullid and not-nullid so we can
602 # warn about this filtering potentially shadowing bugs.
604 # warn about this filtering potentially shadowing bugs.
603 nullids = len([None for unused, id in missingids if id == nullid])
605 nullids = len([None for unused, id in missingids if id == nullid])
604 if nullids:
606 if nullids:
605 missingids = [(f, id) for f, id in missingids if id != nullid]
607 missingids = [(f, id) for f, id in missingids if id != nullid]
606 repo.ui.develwarn(
608 repo.ui.develwarn(
607 (
609 (
608 b'remotefilelog not fetching %d null revs'
610 b'remotefilelog not fetching %d null revs'
609 b' - this is likely hiding bugs' % nullids
611 b' - this is likely hiding bugs' % nullids
610 ),
612 ),
611 config=b'remotefilelog-ext',
613 config=b'remotefilelog-ext',
612 )
614 )
613 if missingids:
615 if missingids:
614 global fetches, fetched, fetchcost
616 global fetches, fetched, fetchcost
615 fetches += 1
617 fetches += 1
616
618
617 # We want to be able to detect excess individual file downloads, so
619 # We want to be able to detect excess individual file downloads, so
618 # let's log that information for debugging.
620 # let's log that information for debugging.
619 if fetches >= 15 and fetches < 18:
621 if fetches >= 15 and fetches < 18:
620 if fetches == 15:
622 if fetches == 15:
621 fetchwarning = self.ui.config(
623 fetchwarning = self.ui.config(
622 b'remotefilelog', b'fetchwarning'
624 b'remotefilelog', b'fetchwarning'
623 )
625 )
624 if fetchwarning:
626 if fetchwarning:
625 self.ui.warn(fetchwarning + b'\n')
627 self.ui.warn(fetchwarning + b'\n')
626 self.logstacktrace()
628 self.logstacktrace()
627 missingids = [(file, hex(id)) for file, id in sorted(missingids)]
629 missingids = [(file, hex(id)) for file, id in sorted(missingids)]
628 fetched += len(missingids)
630 fetched += len(missingids)
629 start = time.time()
631 start = time.time()
630 missingids = self.request(missingids)
632 missingids = self.request(missingids)
631 if missingids:
633 if missingids:
632 raise error.Abort(
634 raise error.Abort(
633 _(b"unable to download %d files") % len(missingids)
635 _(b"unable to download %d files") % len(missingids)
634 )
636 )
635 fetchcost += time.time() - start
637 fetchcost += time.time() - start
636 self._lfsprefetch(fileids)
638 self._lfsprefetch(fileids)
637
639
638 def _lfsprefetch(self, fileids):
640 def _lfsprefetch(self, fileids):
639 if not _lfsmod or not util.safehasattr(
641 if not _lfsmod or not util.safehasattr(
640 self.repo.svfs, b'lfslocalblobstore'
642 self.repo.svfs, b'lfslocalblobstore'
641 ):
643 ):
642 return
644 return
643 if not _lfsmod.wrapper.candownload(self.repo):
645 if not _lfsmod.wrapper.candownload(self.repo):
644 return
646 return
645 pointers = []
647 pointers = []
646 store = self.repo.svfs.lfslocalblobstore
648 store = self.repo.svfs.lfslocalblobstore
647 for file, id in fileids:
649 for file, id in fileids:
648 node = bin(id)
650 node = bin(id)
649 rlog = self.repo.file(file)
651 rlog = self.repo.file(file)
650 if rlog.flags(node) & revlog.REVIDX_EXTSTORED:
652 if rlog.flags(node) & revlog.REVIDX_EXTSTORED:
651 text = rlog.rawdata(node)
653 text = rlog.rawdata(node)
652 p = _lfsmod.pointer.deserialize(text)
654 p = _lfsmod.pointer.deserialize(text)
653 oid = p.oid()
655 oid = p.oid()
654 if not store.has(oid):
656 if not store.has(oid):
655 pointers.append(p)
657 pointers.append(p)
656 if len(pointers) > 0:
658 if len(pointers) > 0:
657 self.repo.svfs.lfsremoteblobstore.readbatch(pointers, store)
659 self.repo.svfs.lfsremoteblobstore.readbatch(pointers, store)
658 assert all(store.has(p.oid()) for p in pointers)
660 assert all(store.has(p.oid()) for p in pointers)
659
661
660 def logstacktrace(self):
662 def logstacktrace(self):
661 import traceback
663 import traceback
662
664
663 self.ui.log(
665 self.ui.log(
664 b'remotefilelog',
666 b'remotefilelog',
665 b'excess remotefilelog fetching:\n%s\n',
667 b'excess remotefilelog fetching:\n%s\n',
666 b''.join(pycompat.sysbytes(s) for s in traceback.format_stack()),
668 b''.join(pycompat.sysbytes(s) for s in traceback.format_stack()),
667 )
669 )
@@ -1,572 +1,572 b''
1 from __future__ import absolute_import
1 from __future__ import absolute_import
2
2
3 import hashlib
4 import struct
3 import struct
5
4
6 from mercurial.node import hex, nullid
5 from mercurial.node import hex, nullid
7 from mercurial import (
6 from mercurial import (
8 pycompat,
7 pycompat,
9 util,
8 util,
10 )
9 )
10 from mercurial.utils import hashutil
11 from . import (
11 from . import (
12 basepack,
12 basepack,
13 constants,
13 constants,
14 shallowutil,
14 shallowutil,
15 )
15 )
16
16
17 # (filename hash, offset, size)
17 # (filename hash, offset, size)
18 INDEXFORMAT2 = b'!20sQQII'
18 INDEXFORMAT2 = b'!20sQQII'
19 INDEXENTRYLENGTH2 = struct.calcsize(INDEXFORMAT2)
19 INDEXENTRYLENGTH2 = struct.calcsize(INDEXFORMAT2)
20 NODELENGTH = 20
20 NODELENGTH = 20
21
21
22 NODEINDEXFORMAT = b'!20sQ'
22 NODEINDEXFORMAT = b'!20sQ'
23 NODEINDEXENTRYLENGTH = struct.calcsize(NODEINDEXFORMAT)
23 NODEINDEXENTRYLENGTH = struct.calcsize(NODEINDEXFORMAT)
24
24
25 # (node, p1, p2, linknode)
25 # (node, p1, p2, linknode)
26 PACKFORMAT = b"!20s20s20s20sH"
26 PACKFORMAT = b"!20s20s20s20sH"
27 PACKENTRYLENGTH = 82
27 PACKENTRYLENGTH = 82
28
28
29 ENTRYCOUNTSIZE = 4
29 ENTRYCOUNTSIZE = 4
30
30
31 INDEXSUFFIX = b'.histidx'
31 INDEXSUFFIX = b'.histidx'
32 PACKSUFFIX = b'.histpack'
32 PACKSUFFIX = b'.histpack'
33
33
34 ANC_NODE = 0
34 ANC_NODE = 0
35 ANC_P1NODE = 1
35 ANC_P1NODE = 1
36 ANC_P2NODE = 2
36 ANC_P2NODE = 2
37 ANC_LINKNODE = 3
37 ANC_LINKNODE = 3
38 ANC_COPYFROM = 4
38 ANC_COPYFROM = 4
39
39
40
40
41 class historypackstore(basepack.basepackstore):
41 class historypackstore(basepack.basepackstore):
42 INDEXSUFFIX = INDEXSUFFIX
42 INDEXSUFFIX = INDEXSUFFIX
43 PACKSUFFIX = PACKSUFFIX
43 PACKSUFFIX = PACKSUFFIX
44
44
45 def getpack(self, path):
45 def getpack(self, path):
46 return historypack(path)
46 return historypack(path)
47
47
48 def getancestors(self, name, node, known=None):
48 def getancestors(self, name, node, known=None):
49 for pack in self.packs:
49 for pack in self.packs:
50 try:
50 try:
51 return pack.getancestors(name, node, known=known)
51 return pack.getancestors(name, node, known=known)
52 except KeyError:
52 except KeyError:
53 pass
53 pass
54
54
55 for pack in self.refresh():
55 for pack in self.refresh():
56 try:
56 try:
57 return pack.getancestors(name, node, known=known)
57 return pack.getancestors(name, node, known=known)
58 except KeyError:
58 except KeyError:
59 pass
59 pass
60
60
61 raise KeyError((name, node))
61 raise KeyError((name, node))
62
62
63 def getnodeinfo(self, name, node):
63 def getnodeinfo(self, name, node):
64 for pack in self.packs:
64 for pack in self.packs:
65 try:
65 try:
66 return pack.getnodeinfo(name, node)
66 return pack.getnodeinfo(name, node)
67 except KeyError:
67 except KeyError:
68 pass
68 pass
69
69
70 for pack in self.refresh():
70 for pack in self.refresh():
71 try:
71 try:
72 return pack.getnodeinfo(name, node)
72 return pack.getnodeinfo(name, node)
73 except KeyError:
73 except KeyError:
74 pass
74 pass
75
75
76 raise KeyError((name, node))
76 raise KeyError((name, node))
77
77
78 def add(self, filename, node, p1, p2, linknode, copyfrom):
78 def add(self, filename, node, p1, p2, linknode, copyfrom):
79 raise RuntimeError(
79 raise RuntimeError(
80 b"cannot add to historypackstore (%s:%s)" % (filename, hex(node))
80 b"cannot add to historypackstore (%s:%s)" % (filename, hex(node))
81 )
81 )
82
82
83
83
84 class historypack(basepack.basepack):
84 class historypack(basepack.basepack):
85 INDEXSUFFIX = INDEXSUFFIX
85 INDEXSUFFIX = INDEXSUFFIX
86 PACKSUFFIX = PACKSUFFIX
86 PACKSUFFIX = PACKSUFFIX
87
87
88 SUPPORTED_VERSIONS = [2]
88 SUPPORTED_VERSIONS = [2]
89
89
90 def __init__(self, path):
90 def __init__(self, path):
91 super(historypack, self).__init__(path)
91 super(historypack, self).__init__(path)
92 self.INDEXFORMAT = INDEXFORMAT2
92 self.INDEXFORMAT = INDEXFORMAT2
93 self.INDEXENTRYLENGTH = INDEXENTRYLENGTH2
93 self.INDEXENTRYLENGTH = INDEXENTRYLENGTH2
94
94
95 def getmissing(self, keys):
95 def getmissing(self, keys):
96 missing = []
96 missing = []
97 for name, node in keys:
97 for name, node in keys:
98 try:
98 try:
99 self._findnode(name, node)
99 self._findnode(name, node)
100 except KeyError:
100 except KeyError:
101 missing.append((name, node))
101 missing.append((name, node))
102
102
103 return missing
103 return missing
104
104
105 def getancestors(self, name, node, known=None):
105 def getancestors(self, name, node, known=None):
106 """Returns as many ancestors as we're aware of.
106 """Returns as many ancestors as we're aware of.
107
107
108 return value: {
108 return value: {
109 node: (p1, p2, linknode, copyfrom),
109 node: (p1, p2, linknode, copyfrom),
110 ...
110 ...
111 }
111 }
112 """
112 """
113 if known and node in known:
113 if known and node in known:
114 return []
114 return []
115
115
116 ancestors = self._getancestors(name, node, known=known)
116 ancestors = self._getancestors(name, node, known=known)
117 results = {}
117 results = {}
118 for ancnode, p1, p2, linknode, copyfrom in ancestors:
118 for ancnode, p1, p2, linknode, copyfrom in ancestors:
119 results[ancnode] = (p1, p2, linknode, copyfrom)
119 results[ancnode] = (p1, p2, linknode, copyfrom)
120
120
121 if not results:
121 if not results:
122 raise KeyError((name, node))
122 raise KeyError((name, node))
123 return results
123 return results
124
124
125 def getnodeinfo(self, name, node):
125 def getnodeinfo(self, name, node):
126 # Drop the node from the tuple before returning, since the result should
126 # Drop the node from the tuple before returning, since the result should
127 # just be (p1, p2, linknode, copyfrom)
127 # just be (p1, p2, linknode, copyfrom)
128 return self._findnode(name, node)[1:]
128 return self._findnode(name, node)[1:]
129
129
130 def _getancestors(self, name, node, known=None):
130 def _getancestors(self, name, node, known=None):
131 if known is None:
131 if known is None:
132 known = set()
132 known = set()
133 section = self._findsection(name)
133 section = self._findsection(name)
134 filename, offset, size, nodeindexoffset, nodeindexsize = section
134 filename, offset, size, nodeindexoffset, nodeindexsize = section
135 pending = set((node,))
135 pending = set((node,))
136 o = 0
136 o = 0
137 while o < size:
137 while o < size:
138 if not pending:
138 if not pending:
139 break
139 break
140 entry, copyfrom = self._readentry(offset + o)
140 entry, copyfrom = self._readentry(offset + o)
141 o += PACKENTRYLENGTH
141 o += PACKENTRYLENGTH
142 if copyfrom:
142 if copyfrom:
143 o += len(copyfrom)
143 o += len(copyfrom)
144
144
145 ancnode = entry[ANC_NODE]
145 ancnode = entry[ANC_NODE]
146 if ancnode in pending:
146 if ancnode in pending:
147 pending.remove(ancnode)
147 pending.remove(ancnode)
148 p1node = entry[ANC_P1NODE]
148 p1node = entry[ANC_P1NODE]
149 p2node = entry[ANC_P2NODE]
149 p2node = entry[ANC_P2NODE]
150 if p1node != nullid and p1node not in known:
150 if p1node != nullid and p1node not in known:
151 pending.add(p1node)
151 pending.add(p1node)
152 if p2node != nullid and p2node not in known:
152 if p2node != nullid and p2node not in known:
153 pending.add(p2node)
153 pending.add(p2node)
154
154
155 yield (ancnode, p1node, p2node, entry[ANC_LINKNODE], copyfrom)
155 yield (ancnode, p1node, p2node, entry[ANC_LINKNODE], copyfrom)
156
156
157 def _readentry(self, offset):
157 def _readentry(self, offset):
158 data = self._data
158 data = self._data
159 entry = struct.unpack(
159 entry = struct.unpack(
160 PACKFORMAT, data[offset : offset + PACKENTRYLENGTH]
160 PACKFORMAT, data[offset : offset + PACKENTRYLENGTH]
161 )
161 )
162 copyfrom = None
162 copyfrom = None
163 copyfromlen = entry[ANC_COPYFROM]
163 copyfromlen = entry[ANC_COPYFROM]
164 if copyfromlen != 0:
164 if copyfromlen != 0:
165 offset += PACKENTRYLENGTH
165 offset += PACKENTRYLENGTH
166 copyfrom = data[offset : offset + copyfromlen]
166 copyfrom = data[offset : offset + copyfromlen]
167 return entry, copyfrom
167 return entry, copyfrom
168
168
169 def add(self, filename, node, p1, p2, linknode, copyfrom):
169 def add(self, filename, node, p1, p2, linknode, copyfrom):
170 raise RuntimeError(
170 raise RuntimeError(
171 b"cannot add to historypack (%s:%s)" % (filename, hex(node))
171 b"cannot add to historypack (%s:%s)" % (filename, hex(node))
172 )
172 )
173
173
174 def _findnode(self, name, node):
174 def _findnode(self, name, node):
175 if self.VERSION == 0:
175 if self.VERSION == 0:
176 ancestors = self._getancestors(name, node)
176 ancestors = self._getancestors(name, node)
177 for ancnode, p1node, p2node, linknode, copyfrom in ancestors:
177 for ancnode, p1node, p2node, linknode, copyfrom in ancestors:
178 if ancnode == node:
178 if ancnode == node:
179 return (ancnode, p1node, p2node, linknode, copyfrom)
179 return (ancnode, p1node, p2node, linknode, copyfrom)
180 else:
180 else:
181 section = self._findsection(name)
181 section = self._findsection(name)
182 nodeindexoffset, nodeindexsize = section[3:]
182 nodeindexoffset, nodeindexsize = section[3:]
183 entry = self._bisect(
183 entry = self._bisect(
184 node,
184 node,
185 nodeindexoffset,
185 nodeindexoffset,
186 nodeindexoffset + nodeindexsize,
186 nodeindexoffset + nodeindexsize,
187 NODEINDEXENTRYLENGTH,
187 NODEINDEXENTRYLENGTH,
188 )
188 )
189 if entry is not None:
189 if entry is not None:
190 node, offset = struct.unpack(NODEINDEXFORMAT, entry)
190 node, offset = struct.unpack(NODEINDEXFORMAT, entry)
191 entry, copyfrom = self._readentry(offset)
191 entry, copyfrom = self._readentry(offset)
192 # Drop the copyfromlen from the end of entry, and replace it
192 # Drop the copyfromlen from the end of entry, and replace it
193 # with the copyfrom string.
193 # with the copyfrom string.
194 return entry[:4] + (copyfrom,)
194 return entry[:4] + (copyfrom,)
195
195
196 raise KeyError(b"unable to find history for %s:%s" % (name, hex(node)))
196 raise KeyError(b"unable to find history for %s:%s" % (name, hex(node)))
197
197
198 def _findsection(self, name):
198 def _findsection(self, name):
199 params = self.params
199 params = self.params
200 namehash = hashlib.sha1(name).digest()
200 namehash = hashutil.sha1(name).digest()
201 fanoutkey = struct.unpack(
201 fanoutkey = struct.unpack(
202 params.fanoutstruct, namehash[: params.fanoutprefix]
202 params.fanoutstruct, namehash[: params.fanoutprefix]
203 )[0]
203 )[0]
204 fanout = self._fanouttable
204 fanout = self._fanouttable
205
205
206 start = fanout[fanoutkey] + params.indexstart
206 start = fanout[fanoutkey] + params.indexstart
207 indexend = self._indexend
207 indexend = self._indexend
208
208
209 for i in pycompat.xrange(fanoutkey + 1, params.fanoutcount):
209 for i in pycompat.xrange(fanoutkey + 1, params.fanoutcount):
210 end = fanout[i] + params.indexstart
210 end = fanout[i] + params.indexstart
211 if end != start:
211 if end != start:
212 break
212 break
213 else:
213 else:
214 end = indexend
214 end = indexend
215
215
216 entry = self._bisect(namehash, start, end, self.INDEXENTRYLENGTH)
216 entry = self._bisect(namehash, start, end, self.INDEXENTRYLENGTH)
217 if not entry:
217 if not entry:
218 raise KeyError(name)
218 raise KeyError(name)
219
219
220 rawentry = struct.unpack(self.INDEXFORMAT, entry)
220 rawentry = struct.unpack(self.INDEXFORMAT, entry)
221 x, offset, size, nodeindexoffset, nodeindexsize = rawentry
221 x, offset, size, nodeindexoffset, nodeindexsize = rawentry
222 rawnamelen = self._index[
222 rawnamelen = self._index[
223 nodeindexoffset : nodeindexoffset + constants.FILENAMESIZE
223 nodeindexoffset : nodeindexoffset + constants.FILENAMESIZE
224 ]
224 ]
225 actualnamelen = struct.unpack(b'!H', rawnamelen)[0]
225 actualnamelen = struct.unpack(b'!H', rawnamelen)[0]
226 nodeindexoffset += constants.FILENAMESIZE
226 nodeindexoffset += constants.FILENAMESIZE
227 actualname = self._index[
227 actualname = self._index[
228 nodeindexoffset : nodeindexoffset + actualnamelen
228 nodeindexoffset : nodeindexoffset + actualnamelen
229 ]
229 ]
230 if actualname != name:
230 if actualname != name:
231 raise KeyError(
231 raise KeyError(
232 b"found file name %s when looking for %s" % (actualname, name)
232 b"found file name %s when looking for %s" % (actualname, name)
233 )
233 )
234 nodeindexoffset += actualnamelen
234 nodeindexoffset += actualnamelen
235
235
236 filenamelength = struct.unpack(
236 filenamelength = struct.unpack(
237 b'!H', self._data[offset : offset + constants.FILENAMESIZE]
237 b'!H', self._data[offset : offset + constants.FILENAMESIZE]
238 )[0]
238 )[0]
239 offset += constants.FILENAMESIZE
239 offset += constants.FILENAMESIZE
240
240
241 actualname = self._data[offset : offset + filenamelength]
241 actualname = self._data[offset : offset + filenamelength]
242 offset += filenamelength
242 offset += filenamelength
243
243
244 if name != actualname:
244 if name != actualname:
245 raise KeyError(
245 raise KeyError(
246 b"found file name %s when looking for %s" % (actualname, name)
246 b"found file name %s when looking for %s" % (actualname, name)
247 )
247 )
248
248
249 # Skip entry list size
249 # Skip entry list size
250 offset += ENTRYCOUNTSIZE
250 offset += ENTRYCOUNTSIZE
251
251
252 nodelistoffset = offset
252 nodelistoffset = offset
253 nodelistsize = (
253 nodelistsize = (
254 size - constants.FILENAMESIZE - filenamelength - ENTRYCOUNTSIZE
254 size - constants.FILENAMESIZE - filenamelength - ENTRYCOUNTSIZE
255 )
255 )
256 return (
256 return (
257 name,
257 name,
258 nodelistoffset,
258 nodelistoffset,
259 nodelistsize,
259 nodelistsize,
260 nodeindexoffset,
260 nodeindexoffset,
261 nodeindexsize,
261 nodeindexsize,
262 )
262 )
263
263
264 def _bisect(self, node, start, end, entrylen):
264 def _bisect(self, node, start, end, entrylen):
265 # Bisect between start and end to find node
265 # Bisect between start and end to find node
266 origstart = start
266 origstart = start
267 startnode = self._index[start : start + NODELENGTH]
267 startnode = self._index[start : start + NODELENGTH]
268 endnode = self._index[end : end + NODELENGTH]
268 endnode = self._index[end : end + NODELENGTH]
269
269
270 if startnode == node:
270 if startnode == node:
271 return self._index[start : start + entrylen]
271 return self._index[start : start + entrylen]
272 elif endnode == node:
272 elif endnode == node:
273 return self._index[end : end + entrylen]
273 return self._index[end : end + entrylen]
274 else:
274 else:
275 while start < end - entrylen:
275 while start < end - entrylen:
276 mid = start + (end - start) // 2
276 mid = start + (end - start) // 2
277 mid = mid - ((mid - origstart) % entrylen)
277 mid = mid - ((mid - origstart) % entrylen)
278 midnode = self._index[mid : mid + NODELENGTH]
278 midnode = self._index[mid : mid + NODELENGTH]
279 if midnode == node:
279 if midnode == node:
280 return self._index[mid : mid + entrylen]
280 return self._index[mid : mid + entrylen]
281 if node > midnode:
281 if node > midnode:
282 start = mid
282 start = mid
283 elif node < midnode:
283 elif node < midnode:
284 end = mid
284 end = mid
285 return None
285 return None
286
286
287 def markledger(self, ledger, options=None):
287 def markledger(self, ledger, options=None):
288 for filename, node in self:
288 for filename, node in self:
289 ledger.markhistoryentry(self, filename, node)
289 ledger.markhistoryentry(self, filename, node)
290
290
291 def cleanup(self, ledger):
291 def cleanup(self, ledger):
292 entries = ledger.sources.get(self, [])
292 entries = ledger.sources.get(self, [])
293 allkeys = set(self)
293 allkeys = set(self)
294 repackedkeys = set(
294 repackedkeys = set(
295 (e.filename, e.node) for e in entries if e.historyrepacked
295 (e.filename, e.node) for e in entries if e.historyrepacked
296 )
296 )
297
297
298 if len(allkeys - repackedkeys) == 0:
298 if len(allkeys - repackedkeys) == 0:
299 if self.path not in ledger.created:
299 if self.path not in ledger.created:
300 util.unlinkpath(self.indexpath, ignoremissing=True)
300 util.unlinkpath(self.indexpath, ignoremissing=True)
301 util.unlinkpath(self.packpath, ignoremissing=True)
301 util.unlinkpath(self.packpath, ignoremissing=True)
302
302
303 def __iter__(self):
303 def __iter__(self):
304 for f, n, x, x, x, x in self.iterentries():
304 for f, n, x, x, x, x in self.iterentries():
305 yield f, n
305 yield f, n
306
306
307 def iterentries(self):
307 def iterentries(self):
308 # Start at 1 to skip the header
308 # Start at 1 to skip the header
309 offset = 1
309 offset = 1
310 while offset < self.datasize:
310 while offset < self.datasize:
311 data = self._data
311 data = self._data
312 # <2 byte len> + <filename>
312 # <2 byte len> + <filename>
313 filenamelen = struct.unpack(
313 filenamelen = struct.unpack(
314 b'!H', data[offset : offset + constants.FILENAMESIZE]
314 b'!H', data[offset : offset + constants.FILENAMESIZE]
315 )[0]
315 )[0]
316 offset += constants.FILENAMESIZE
316 offset += constants.FILENAMESIZE
317 filename = data[offset : offset + filenamelen]
317 filename = data[offset : offset + filenamelen]
318 offset += filenamelen
318 offset += filenamelen
319
319
320 revcount = struct.unpack(
320 revcount = struct.unpack(
321 b'!I', data[offset : offset + ENTRYCOUNTSIZE]
321 b'!I', data[offset : offset + ENTRYCOUNTSIZE]
322 )[0]
322 )[0]
323 offset += ENTRYCOUNTSIZE
323 offset += ENTRYCOUNTSIZE
324
324
325 for i in pycompat.xrange(revcount):
325 for i in pycompat.xrange(revcount):
326 entry = struct.unpack(
326 entry = struct.unpack(
327 PACKFORMAT, data[offset : offset + PACKENTRYLENGTH]
327 PACKFORMAT, data[offset : offset + PACKENTRYLENGTH]
328 )
328 )
329 offset += PACKENTRYLENGTH
329 offset += PACKENTRYLENGTH
330
330
331 copyfrom = data[offset : offset + entry[ANC_COPYFROM]]
331 copyfrom = data[offset : offset + entry[ANC_COPYFROM]]
332 offset += entry[ANC_COPYFROM]
332 offset += entry[ANC_COPYFROM]
333
333
334 yield (
334 yield (
335 filename,
335 filename,
336 entry[ANC_NODE],
336 entry[ANC_NODE],
337 entry[ANC_P1NODE],
337 entry[ANC_P1NODE],
338 entry[ANC_P2NODE],
338 entry[ANC_P2NODE],
339 entry[ANC_LINKNODE],
339 entry[ANC_LINKNODE],
340 copyfrom,
340 copyfrom,
341 )
341 )
342
342
343 self._pagedin += PACKENTRYLENGTH
343 self._pagedin += PACKENTRYLENGTH
344
344
345 # If we've read a lot of data from the mmap, free some memory.
345 # If we've read a lot of data from the mmap, free some memory.
346 self.freememory()
346 self.freememory()
347
347
348
348
349 class mutablehistorypack(basepack.mutablebasepack):
349 class mutablehistorypack(basepack.mutablebasepack):
350 """A class for constructing and serializing a histpack file and index.
350 """A class for constructing and serializing a histpack file and index.
351
351
352 A history pack is a pair of files that contain the revision history for
352 A history pack is a pair of files that contain the revision history for
353 various file revisions in Mercurial. It contains only revision history (like
353 various file revisions in Mercurial. It contains only revision history (like
354 parent pointers and linknodes), not any revision content information.
354 parent pointers and linknodes), not any revision content information.
355
355
356 It consists of two files, with the following format:
356 It consists of two files, with the following format:
357
357
358 .histpack
358 .histpack
359 The pack itself is a series of file revisions with some basic header
359 The pack itself is a series of file revisions with some basic header
360 information on each.
360 information on each.
361
361
362 datapack = <version: 1 byte>
362 datapack = <version: 1 byte>
363 [<filesection>,...]
363 [<filesection>,...]
364 filesection = <filename len: 2 byte unsigned int>
364 filesection = <filename len: 2 byte unsigned int>
365 <filename>
365 <filename>
366 <revision count: 4 byte unsigned int>
366 <revision count: 4 byte unsigned int>
367 [<revision>,...]
367 [<revision>,...]
368 revision = <node: 20 byte>
368 revision = <node: 20 byte>
369 <p1node: 20 byte>
369 <p1node: 20 byte>
370 <p2node: 20 byte>
370 <p2node: 20 byte>
371 <linknode: 20 byte>
371 <linknode: 20 byte>
372 <copyfromlen: 2 byte>
372 <copyfromlen: 2 byte>
373 <copyfrom>
373 <copyfrom>
374
374
375 The revisions within each filesection are stored in topological order
375 The revisions within each filesection are stored in topological order
376 (newest first). If a given entry has a parent from another file (a copy)
376 (newest first). If a given entry has a parent from another file (a copy)
377 then p1node is the node from the other file, and copyfrom is the
377 then p1node is the node from the other file, and copyfrom is the
378 filepath of the other file.
378 filepath of the other file.
379
379
380 .histidx
380 .histidx
381 The index file provides a mapping from filename to the file section in
381 The index file provides a mapping from filename to the file section in
382 the histpack. In V1 it also contains sub-indexes for specific nodes
382 the histpack. In V1 it also contains sub-indexes for specific nodes
383 within each file. It consists of three parts, the fanout, the file index
383 within each file. It consists of three parts, the fanout, the file index
384 and the node indexes.
384 and the node indexes.
385
385
386 The file index is a list of index entries, sorted by filename hash (one
386 The file index is a list of index entries, sorted by filename hash (one
387 per file section in the pack). Each entry has:
387 per file section in the pack). Each entry has:
388
388
389 - node (The 20 byte hash of the filename)
389 - node (The 20 byte hash of the filename)
390 - pack entry offset (The location of this file section in the histpack)
390 - pack entry offset (The location of this file section in the histpack)
391 - pack content size (The on-disk length of this file section's pack
391 - pack content size (The on-disk length of this file section's pack
392 data)
392 data)
393 - node index offset (The location of the file's node index in the index
393 - node index offset (The location of the file's node index in the index
394 file) [1]
394 file) [1]
395 - node index size (the on-disk length of this file's node index) [1]
395 - node index size (the on-disk length of this file's node index) [1]
396
396
397 The fanout is a quick lookup table to reduce the number of steps for
397 The fanout is a quick lookup table to reduce the number of steps for
398 bisecting the index. It is a series of 4 byte pointers to positions
398 bisecting the index. It is a series of 4 byte pointers to positions
399 within the index. It has 2^16 entries, which corresponds to hash
399 within the index. It has 2^16 entries, which corresponds to hash
400 prefixes [00, 01, 02,..., FD, FE, FF]. Example: the pointer in slot 4F
400 prefixes [00, 01, 02,..., FD, FE, FF]. Example: the pointer in slot 4F
401 points to the index position of the first revision whose node starts
401 points to the index position of the first revision whose node starts
402 with 4F. This saves log(2^16) bisect steps.
402 with 4F. This saves log(2^16) bisect steps.
403
403
404 dataidx = <fanouttable>
404 dataidx = <fanouttable>
405 <file count: 8 byte unsigned> [1]
405 <file count: 8 byte unsigned> [1]
406 <fileindex>
406 <fileindex>
407 <node count: 8 byte unsigned> [1]
407 <node count: 8 byte unsigned> [1]
408 [<nodeindex>,...] [1]
408 [<nodeindex>,...] [1]
409 fanouttable = [<index offset: 4 byte unsigned int>,...] (2^16 entries)
409 fanouttable = [<index offset: 4 byte unsigned int>,...] (2^16 entries)
410
410
411 fileindex = [<file index entry>,...]
411 fileindex = [<file index entry>,...]
412 fileindexentry = <node: 20 byte>
412 fileindexentry = <node: 20 byte>
413 <pack file section offset: 8 byte unsigned int>
413 <pack file section offset: 8 byte unsigned int>
414 <pack file section size: 8 byte unsigned int>
414 <pack file section size: 8 byte unsigned int>
415 <node index offset: 4 byte unsigned int> [1]
415 <node index offset: 4 byte unsigned int> [1]
416 <node index size: 4 byte unsigned int> [1]
416 <node index size: 4 byte unsigned int> [1]
417 nodeindex = <filename>[<node index entry>,...] [1]
417 nodeindex = <filename>[<node index entry>,...] [1]
418 filename = <filename len : 2 byte unsigned int><filename value> [1]
418 filename = <filename len : 2 byte unsigned int><filename value> [1]
419 nodeindexentry = <node: 20 byte> [1]
419 nodeindexentry = <node: 20 byte> [1]
420 <pack file node offset: 8 byte unsigned int> [1]
420 <pack file node offset: 8 byte unsigned int> [1]
421
421
422 [1]: new in version 1.
422 [1]: new in version 1.
423 """
423 """
424
424
425 INDEXSUFFIX = INDEXSUFFIX
425 INDEXSUFFIX = INDEXSUFFIX
426 PACKSUFFIX = PACKSUFFIX
426 PACKSUFFIX = PACKSUFFIX
427
427
428 SUPPORTED_VERSIONS = [2]
428 SUPPORTED_VERSIONS = [2]
429
429
430 def __init__(self, ui, packpath, version=2):
430 def __init__(self, ui, packpath, version=2):
431 super(mutablehistorypack, self).__init__(ui, packpath, version=version)
431 super(mutablehistorypack, self).__init__(ui, packpath, version=version)
432 self.files = {}
432 self.files = {}
433 self.entrylocations = {}
433 self.entrylocations = {}
434 self.fileentries = {}
434 self.fileentries = {}
435
435
436 self.INDEXFORMAT = INDEXFORMAT2
436 self.INDEXFORMAT = INDEXFORMAT2
437 self.INDEXENTRYLENGTH = INDEXENTRYLENGTH2
437 self.INDEXENTRYLENGTH = INDEXENTRYLENGTH2
438
438
439 self.NODEINDEXFORMAT = NODEINDEXFORMAT
439 self.NODEINDEXFORMAT = NODEINDEXFORMAT
440 self.NODEINDEXENTRYLENGTH = NODEINDEXENTRYLENGTH
440 self.NODEINDEXENTRYLENGTH = NODEINDEXENTRYLENGTH
441
441
442 def add(self, filename, node, p1, p2, linknode, copyfrom):
442 def add(self, filename, node, p1, p2, linknode, copyfrom):
443 copyfrom = copyfrom or b''
443 copyfrom = copyfrom or b''
444 copyfromlen = struct.pack(b'!H', len(copyfrom))
444 copyfromlen = struct.pack(b'!H', len(copyfrom))
445 self.fileentries.setdefault(filename, []).append(
445 self.fileentries.setdefault(filename, []).append(
446 (node, p1, p2, linknode, copyfromlen, copyfrom)
446 (node, p1, p2, linknode, copyfromlen, copyfrom)
447 )
447 )
448
448
449 def _write(self):
449 def _write(self):
450 for filename in sorted(self.fileentries):
450 for filename in sorted(self.fileentries):
451 entries = self.fileentries[filename]
451 entries = self.fileentries[filename]
452 sectionstart = self.packfp.tell()
452 sectionstart = self.packfp.tell()
453
453
454 # Write the file section content
454 # Write the file section content
455 entrymap = dict((e[0], e) for e in entries)
455 entrymap = dict((e[0], e) for e in entries)
456
456
457 def parentfunc(node):
457 def parentfunc(node):
458 x, p1, p2, x, x, x = entrymap[node]
458 x, p1, p2, x, x, x = entrymap[node]
459 parents = []
459 parents = []
460 if p1 != nullid:
460 if p1 != nullid:
461 parents.append(p1)
461 parents.append(p1)
462 if p2 != nullid:
462 if p2 != nullid:
463 parents.append(p2)
463 parents.append(p2)
464 return parents
464 return parents
465
465
466 sortednodes = list(
466 sortednodes = list(
467 reversed(
467 reversed(
468 shallowutil.sortnodes((e[0] for e in entries), parentfunc)
468 shallowutil.sortnodes((e[0] for e in entries), parentfunc)
469 )
469 )
470 )
470 )
471
471
472 # Write the file section header
472 # Write the file section header
473 self.writeraw(
473 self.writeraw(
474 b"%s%s%s"
474 b"%s%s%s"
475 % (
475 % (
476 struct.pack(b'!H', len(filename)),
476 struct.pack(b'!H', len(filename)),
477 filename,
477 filename,
478 struct.pack(b'!I', len(sortednodes)),
478 struct.pack(b'!I', len(sortednodes)),
479 )
479 )
480 )
480 )
481
481
482 sectionlen = constants.FILENAMESIZE + len(filename) + 4
482 sectionlen = constants.FILENAMESIZE + len(filename) + 4
483
483
484 rawstrings = []
484 rawstrings = []
485
485
486 # Record the node locations for the index
486 # Record the node locations for the index
487 locations = self.entrylocations.setdefault(filename, {})
487 locations = self.entrylocations.setdefault(filename, {})
488 offset = sectionstart + sectionlen
488 offset = sectionstart + sectionlen
489 for node in sortednodes:
489 for node in sortednodes:
490 locations[node] = offset
490 locations[node] = offset
491 raw = b'%s%s%s%s%s%s' % entrymap[node]
491 raw = b'%s%s%s%s%s%s' % entrymap[node]
492 rawstrings.append(raw)
492 rawstrings.append(raw)
493 offset += len(raw)
493 offset += len(raw)
494
494
495 rawdata = b''.join(rawstrings)
495 rawdata = b''.join(rawstrings)
496 sectionlen += len(rawdata)
496 sectionlen += len(rawdata)
497
497
498 self.writeraw(rawdata)
498 self.writeraw(rawdata)
499
499
500 # Record metadata for the index
500 # Record metadata for the index
501 self.files[filename] = (sectionstart, sectionlen)
501 self.files[filename] = (sectionstart, sectionlen)
502 node = hashlib.sha1(filename).digest()
502 node = hashutil.sha1(filename).digest()
503 self.entries[node] = node
503 self.entries[node] = node
504
504
505 def close(self, ledger=None):
505 def close(self, ledger=None):
506 if self._closed:
506 if self._closed:
507 return
507 return
508
508
509 self._write()
509 self._write()
510
510
511 return super(mutablehistorypack, self).close(ledger=ledger)
511 return super(mutablehistorypack, self).close(ledger=ledger)
512
512
513 def createindex(self, nodelocations, indexoffset):
513 def createindex(self, nodelocations, indexoffset):
514 fileindexformat = self.INDEXFORMAT
514 fileindexformat = self.INDEXFORMAT
515 fileindexlength = self.INDEXENTRYLENGTH
515 fileindexlength = self.INDEXENTRYLENGTH
516 nodeindexformat = self.NODEINDEXFORMAT
516 nodeindexformat = self.NODEINDEXFORMAT
517 nodeindexlength = self.NODEINDEXENTRYLENGTH
517 nodeindexlength = self.NODEINDEXENTRYLENGTH
518
518
519 files = (
519 files = (
520 (hashlib.sha1(filename).digest(), filename, offset, size)
520 (hashutil.sha1(filename).digest(), filename, offset, size)
521 for filename, (offset, size) in pycompat.iteritems(self.files)
521 for filename, (offset, size) in pycompat.iteritems(self.files)
522 )
522 )
523 files = sorted(files)
523 files = sorted(files)
524
524
525 # node index is after file index size, file index, and node index size
525 # node index is after file index size, file index, and node index size
526 indexlensize = struct.calcsize(b'!Q')
526 indexlensize = struct.calcsize(b'!Q')
527 nodeindexoffset = (
527 nodeindexoffset = (
528 indexoffset
528 indexoffset
529 + indexlensize
529 + indexlensize
530 + (len(files) * fileindexlength)
530 + (len(files) * fileindexlength)
531 + indexlensize
531 + indexlensize
532 )
532 )
533
533
534 fileindexentries = []
534 fileindexentries = []
535 nodeindexentries = []
535 nodeindexentries = []
536 nodecount = 0
536 nodecount = 0
537 for namehash, filename, offset, size in files:
537 for namehash, filename, offset, size in files:
538 # File section index
538 # File section index
539 nodelocations = self.entrylocations[filename]
539 nodelocations = self.entrylocations[filename]
540
540
541 nodeindexsize = len(nodelocations) * nodeindexlength
541 nodeindexsize = len(nodelocations) * nodeindexlength
542
542
543 rawentry = struct.pack(
543 rawentry = struct.pack(
544 fileindexformat,
544 fileindexformat,
545 namehash,
545 namehash,
546 offset,
546 offset,
547 size,
547 size,
548 nodeindexoffset,
548 nodeindexoffset,
549 nodeindexsize,
549 nodeindexsize,
550 )
550 )
551 # Node index
551 # Node index
552 nodeindexentries.append(
552 nodeindexentries.append(
553 struct.pack(constants.FILENAMESTRUCT, len(filename)) + filename
553 struct.pack(constants.FILENAMESTRUCT, len(filename)) + filename
554 )
554 )
555 nodeindexoffset += constants.FILENAMESIZE + len(filename)
555 nodeindexoffset += constants.FILENAMESIZE + len(filename)
556
556
557 for node, location in sorted(pycompat.iteritems(nodelocations)):
557 for node, location in sorted(pycompat.iteritems(nodelocations)):
558 nodeindexentries.append(
558 nodeindexentries.append(
559 struct.pack(nodeindexformat, node, location)
559 struct.pack(nodeindexformat, node, location)
560 )
560 )
561 nodecount += 1
561 nodecount += 1
562
562
563 nodeindexoffset += len(nodelocations) * nodeindexlength
563 nodeindexoffset += len(nodelocations) * nodeindexlength
564
564
565 fileindexentries.append(rawentry)
565 fileindexentries.append(rawentry)
566
566
567 nodecountraw = struct.pack(b'!Q', nodecount)
567 nodecountraw = struct.pack(b'!Q', nodecount)
568 return (
568 return (
569 b''.join(fileindexentries)
569 b''.join(fileindexentries)
570 + nodecountraw
570 + nodecountraw
571 + b''.join(nodeindexentries)
571 + b''.join(nodeindexentries)
572 )
572 )
@@ -1,536 +1,536 b''
1 # shallowutil.py -- remotefilelog utilities
1 # shallowutil.py -- remotefilelog utilities
2 #
2 #
3 # Copyright 2014 Facebook, Inc.
3 # Copyright 2014 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7 from __future__ import absolute_import
7 from __future__ import absolute_import
8
8
9 import collections
9 import collections
10 import errno
10 import errno
11 import hashlib
12 import os
11 import os
13 import stat
12 import stat
14 import struct
13 import struct
15 import tempfile
14 import tempfile
16
15
17 from mercurial.i18n import _
16 from mercurial.i18n import _
18 from mercurial.pycompat import open
17 from mercurial.pycompat import open
19 from mercurial import (
18 from mercurial import (
20 error,
19 error,
21 node,
20 node,
22 pycompat,
21 pycompat,
23 revlog,
22 revlog,
24 util,
23 util,
25 )
24 )
26 from mercurial.utils import (
25 from mercurial.utils import (
26 hashutil,
27 storageutil,
27 storageutil,
28 stringutil,
28 stringutil,
29 )
29 )
30 from . import constants
30 from . import constants
31
31
32 if not pycompat.iswindows:
32 if not pycompat.iswindows:
33 import grp
33 import grp
34
34
35
35
36 def isenabled(repo):
36 def isenabled(repo):
37 """returns whether the repository is remotefilelog enabled or not"""
37 """returns whether the repository is remotefilelog enabled or not"""
38 return constants.SHALLOWREPO_REQUIREMENT in repo.requirements
38 return constants.SHALLOWREPO_REQUIREMENT in repo.requirements
39
39
40
40
41 def getcachekey(reponame, file, id):
41 def getcachekey(reponame, file, id):
42 pathhash = node.hex(hashlib.sha1(file).digest())
42 pathhash = node.hex(hashutil.sha1(file).digest())
43 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
43 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
44
44
45
45
46 def getlocalkey(file, id):
46 def getlocalkey(file, id):
47 pathhash = node.hex(hashlib.sha1(file).digest())
47 pathhash = node.hex(hashutil.sha1(file).digest())
48 return os.path.join(pathhash, id)
48 return os.path.join(pathhash, id)
49
49
50
50
51 def getcachepath(ui, allowempty=False):
51 def getcachepath(ui, allowempty=False):
52 cachepath = ui.config(b"remotefilelog", b"cachepath")
52 cachepath = ui.config(b"remotefilelog", b"cachepath")
53 if not cachepath:
53 if not cachepath:
54 if allowempty:
54 if allowempty:
55 return None
55 return None
56 else:
56 else:
57 raise error.Abort(
57 raise error.Abort(
58 _(b"could not find config option remotefilelog.cachepath")
58 _(b"could not find config option remotefilelog.cachepath")
59 )
59 )
60 return util.expandpath(cachepath)
60 return util.expandpath(cachepath)
61
61
62
62
63 def getcachepackpath(repo, category):
63 def getcachepackpath(repo, category):
64 cachepath = getcachepath(repo.ui)
64 cachepath = getcachepath(repo.ui)
65 if category != constants.FILEPACK_CATEGORY:
65 if category != constants.FILEPACK_CATEGORY:
66 return os.path.join(cachepath, repo.name, b'packs', category)
66 return os.path.join(cachepath, repo.name, b'packs', category)
67 else:
67 else:
68 return os.path.join(cachepath, repo.name, b'packs')
68 return os.path.join(cachepath, repo.name, b'packs')
69
69
70
70
71 def getlocalpackpath(base, category):
71 def getlocalpackpath(base, category):
72 return os.path.join(base, b'packs', category)
72 return os.path.join(base, b'packs', category)
73
73
74
74
75 def createrevlogtext(text, copyfrom=None, copyrev=None):
75 def createrevlogtext(text, copyfrom=None, copyrev=None):
76 """returns a string that matches the revlog contents in a
76 """returns a string that matches the revlog contents in a
77 traditional revlog
77 traditional revlog
78 """
78 """
79 meta = {}
79 meta = {}
80 if copyfrom or text.startswith(b'\1\n'):
80 if copyfrom or text.startswith(b'\1\n'):
81 if copyfrom:
81 if copyfrom:
82 meta[b'copy'] = copyfrom
82 meta[b'copy'] = copyfrom
83 meta[b'copyrev'] = copyrev
83 meta[b'copyrev'] = copyrev
84 text = storageutil.packmeta(meta, text)
84 text = storageutil.packmeta(meta, text)
85
85
86 return text
86 return text
87
87
88
88
89 def parsemeta(text):
89 def parsemeta(text):
90 """parse mercurial filelog metadata"""
90 """parse mercurial filelog metadata"""
91 meta, size = storageutil.parsemeta(text)
91 meta, size = storageutil.parsemeta(text)
92 if text.startswith(b'\1\n'):
92 if text.startswith(b'\1\n'):
93 s = text.index(b'\1\n', 2)
93 s = text.index(b'\1\n', 2)
94 text = text[s + 2 :]
94 text = text[s + 2 :]
95 return meta or {}, text
95 return meta or {}, text
96
96
97
97
98 def sumdicts(*dicts):
98 def sumdicts(*dicts):
99 """Adds all the values of *dicts together into one dictionary. This assumes
99 """Adds all the values of *dicts together into one dictionary. This assumes
100 the values in *dicts are all summable.
100 the values in *dicts are all summable.
101
101
102 e.g. [{'a': 4', 'b': 2}, {'b': 3, 'c': 1}] -> {'a': 4, 'b': 5, 'c': 1}
102 e.g. [{'a': 4', 'b': 2}, {'b': 3, 'c': 1}] -> {'a': 4, 'b': 5, 'c': 1}
103 """
103 """
104 result = collections.defaultdict(lambda: 0)
104 result = collections.defaultdict(lambda: 0)
105 for dict in dicts:
105 for dict in dicts:
106 for k, v in pycompat.iteritems(dict):
106 for k, v in pycompat.iteritems(dict):
107 result[k] += v
107 result[k] += v
108 return result
108 return result
109
109
110
110
111 def prefixkeys(dict, prefix):
111 def prefixkeys(dict, prefix):
112 """Returns ``dict`` with ``prefix`` prepended to all its keys."""
112 """Returns ``dict`` with ``prefix`` prepended to all its keys."""
113 result = {}
113 result = {}
114 for k, v in pycompat.iteritems(dict):
114 for k, v in pycompat.iteritems(dict):
115 result[prefix + k] = v
115 result[prefix + k] = v
116 return result
116 return result
117
117
118
118
119 def reportpackmetrics(ui, prefix, *stores):
119 def reportpackmetrics(ui, prefix, *stores):
120 dicts = [s.getmetrics() for s in stores]
120 dicts = [s.getmetrics() for s in stores]
121 dict = prefixkeys(sumdicts(*dicts), prefix + b'_')
121 dict = prefixkeys(sumdicts(*dicts), prefix + b'_')
122 ui.log(prefix + b"_packsizes", b"\n", **pycompat.strkwargs(dict))
122 ui.log(prefix + b"_packsizes", b"\n", **pycompat.strkwargs(dict))
123
123
124
124
125 def _parsepackmeta(metabuf):
125 def _parsepackmeta(metabuf):
126 """parse datapack meta, bytes (<metadata-list>) -> dict
126 """parse datapack meta, bytes (<metadata-list>) -> dict
127
127
128 The dict contains raw content - both keys and values are strings.
128 The dict contains raw content - both keys and values are strings.
129 Upper-level business may want to convert some of them to other types like
129 Upper-level business may want to convert some of them to other types like
130 integers, on their own.
130 integers, on their own.
131
131
132 raise ValueError if the data is corrupted
132 raise ValueError if the data is corrupted
133 """
133 """
134 metadict = {}
134 metadict = {}
135 offset = 0
135 offset = 0
136 buflen = len(metabuf)
136 buflen = len(metabuf)
137 while buflen - offset >= 3:
137 while buflen - offset >= 3:
138 key = metabuf[offset : offset + 1]
138 key = metabuf[offset : offset + 1]
139 offset += 1
139 offset += 1
140 metalen = struct.unpack_from(b'!H', metabuf, offset)[0]
140 metalen = struct.unpack_from(b'!H', metabuf, offset)[0]
141 offset += 2
141 offset += 2
142 if offset + metalen > buflen:
142 if offset + metalen > buflen:
143 raise ValueError(b'corrupted metadata: incomplete buffer')
143 raise ValueError(b'corrupted metadata: incomplete buffer')
144 value = metabuf[offset : offset + metalen]
144 value = metabuf[offset : offset + metalen]
145 metadict[key] = value
145 metadict[key] = value
146 offset += metalen
146 offset += metalen
147 if offset != buflen:
147 if offset != buflen:
148 raise ValueError(b'corrupted metadata: redundant data')
148 raise ValueError(b'corrupted metadata: redundant data')
149 return metadict
149 return metadict
150
150
151
151
152 def _buildpackmeta(metadict):
152 def _buildpackmeta(metadict):
153 """reverse of _parsepackmeta, dict -> bytes (<metadata-list>)
153 """reverse of _parsepackmeta, dict -> bytes (<metadata-list>)
154
154
155 The dict contains raw content - both keys and values are strings.
155 The dict contains raw content - both keys and values are strings.
156 Upper-level business may want to serialize some of other types (like
156 Upper-level business may want to serialize some of other types (like
157 integers) to strings before calling this function.
157 integers) to strings before calling this function.
158
158
159 raise ProgrammingError when metadata key is illegal, or ValueError if
159 raise ProgrammingError when metadata key is illegal, or ValueError if
160 length limit is exceeded
160 length limit is exceeded
161 """
161 """
162 metabuf = b''
162 metabuf = b''
163 for k, v in sorted(pycompat.iteritems((metadict or {}))):
163 for k, v in sorted(pycompat.iteritems((metadict or {}))):
164 if len(k) != 1:
164 if len(k) != 1:
165 raise error.ProgrammingError(b'packmeta: illegal key: %s' % k)
165 raise error.ProgrammingError(b'packmeta: illegal key: %s' % k)
166 if len(v) > 0xFFFE:
166 if len(v) > 0xFFFE:
167 raise ValueError(
167 raise ValueError(
168 b'metadata value is too long: 0x%x > 0xfffe' % len(v)
168 b'metadata value is too long: 0x%x > 0xfffe' % len(v)
169 )
169 )
170 metabuf += k
170 metabuf += k
171 metabuf += struct.pack(b'!H', len(v))
171 metabuf += struct.pack(b'!H', len(v))
172 metabuf += v
172 metabuf += v
173 # len(metabuf) is guaranteed representable in 4 bytes, because there are
173 # len(metabuf) is guaranteed representable in 4 bytes, because there are
174 # only 256 keys, and for each value, len(value) <= 0xfffe.
174 # only 256 keys, and for each value, len(value) <= 0xfffe.
175 return metabuf
175 return metabuf
176
176
177
177
178 _metaitemtypes = {
178 _metaitemtypes = {
179 constants.METAKEYFLAG: (int, pycompat.long),
179 constants.METAKEYFLAG: (int, pycompat.long),
180 constants.METAKEYSIZE: (int, pycompat.long),
180 constants.METAKEYSIZE: (int, pycompat.long),
181 }
181 }
182
182
183
183
184 def buildpackmeta(metadict):
184 def buildpackmeta(metadict):
185 """like _buildpackmeta, but typechecks metadict and normalize it.
185 """like _buildpackmeta, but typechecks metadict and normalize it.
186
186
187 This means, METAKEYSIZE and METAKEYSIZE should have integers as values,
187 This means, METAKEYSIZE and METAKEYSIZE should have integers as values,
188 and METAKEYFLAG will be dropped if its value is 0.
188 and METAKEYFLAG will be dropped if its value is 0.
189 """
189 """
190 newmeta = {}
190 newmeta = {}
191 for k, v in pycompat.iteritems(metadict or {}):
191 for k, v in pycompat.iteritems(metadict or {}):
192 expectedtype = _metaitemtypes.get(k, (bytes,))
192 expectedtype = _metaitemtypes.get(k, (bytes,))
193 if not isinstance(v, expectedtype):
193 if not isinstance(v, expectedtype):
194 raise error.ProgrammingError(b'packmeta: wrong type of key %s' % k)
194 raise error.ProgrammingError(b'packmeta: wrong type of key %s' % k)
195 # normalize int to binary buffer
195 # normalize int to binary buffer
196 if int in expectedtype:
196 if int in expectedtype:
197 # optimization: remove flag if it's 0 to save space
197 # optimization: remove flag if it's 0 to save space
198 if k == constants.METAKEYFLAG and v == 0:
198 if k == constants.METAKEYFLAG and v == 0:
199 continue
199 continue
200 v = int2bin(v)
200 v = int2bin(v)
201 newmeta[k] = v
201 newmeta[k] = v
202 return _buildpackmeta(newmeta)
202 return _buildpackmeta(newmeta)
203
203
204
204
205 def parsepackmeta(metabuf):
205 def parsepackmeta(metabuf):
206 """like _parsepackmeta, but convert fields to desired types automatically.
206 """like _parsepackmeta, but convert fields to desired types automatically.
207
207
208 This means, METAKEYFLAG and METAKEYSIZE fields will be converted to
208 This means, METAKEYFLAG and METAKEYSIZE fields will be converted to
209 integers.
209 integers.
210 """
210 """
211 metadict = _parsepackmeta(metabuf)
211 metadict = _parsepackmeta(metabuf)
212 for k, v in pycompat.iteritems(metadict):
212 for k, v in pycompat.iteritems(metadict):
213 if k in _metaitemtypes and int in _metaitemtypes[k]:
213 if k in _metaitemtypes and int in _metaitemtypes[k]:
214 metadict[k] = bin2int(v)
214 metadict[k] = bin2int(v)
215 return metadict
215 return metadict
216
216
217
217
218 def int2bin(n):
218 def int2bin(n):
219 """convert a non-negative integer to raw binary buffer"""
219 """convert a non-negative integer to raw binary buffer"""
220 buf = bytearray()
220 buf = bytearray()
221 while n > 0:
221 while n > 0:
222 buf.insert(0, n & 0xFF)
222 buf.insert(0, n & 0xFF)
223 n >>= 8
223 n >>= 8
224 return bytes(buf)
224 return bytes(buf)
225
225
226
226
227 def bin2int(buf):
227 def bin2int(buf):
228 """the reverse of int2bin, convert a binary buffer to an integer"""
228 """the reverse of int2bin, convert a binary buffer to an integer"""
229 x = 0
229 x = 0
230 for b in bytearray(buf):
230 for b in bytearray(buf):
231 x <<= 8
231 x <<= 8
232 x |= b
232 x |= b
233 return x
233 return x
234
234
235
235
236 def parsesizeflags(raw):
236 def parsesizeflags(raw):
237 """given a remotefilelog blob, return (headersize, rawtextsize, flags)
237 """given a remotefilelog blob, return (headersize, rawtextsize, flags)
238
238
239 see remotefilelogserver.createfileblob for the format.
239 see remotefilelogserver.createfileblob for the format.
240 raise RuntimeError if the content is illformed.
240 raise RuntimeError if the content is illformed.
241 """
241 """
242 flags = revlog.REVIDX_DEFAULT_FLAGS
242 flags = revlog.REVIDX_DEFAULT_FLAGS
243 size = None
243 size = None
244 try:
244 try:
245 index = raw.index(b'\0')
245 index = raw.index(b'\0')
246 header = raw[:index]
246 header = raw[:index]
247 if header.startswith(b'v'):
247 if header.startswith(b'v'):
248 # v1 and above, header starts with 'v'
248 # v1 and above, header starts with 'v'
249 if header.startswith(b'v1\n'):
249 if header.startswith(b'v1\n'):
250 for s in header.split(b'\n'):
250 for s in header.split(b'\n'):
251 if s.startswith(constants.METAKEYSIZE):
251 if s.startswith(constants.METAKEYSIZE):
252 size = int(s[len(constants.METAKEYSIZE) :])
252 size = int(s[len(constants.METAKEYSIZE) :])
253 elif s.startswith(constants.METAKEYFLAG):
253 elif s.startswith(constants.METAKEYFLAG):
254 flags = int(s[len(constants.METAKEYFLAG) :])
254 flags = int(s[len(constants.METAKEYFLAG) :])
255 else:
255 else:
256 raise RuntimeError(
256 raise RuntimeError(
257 b'unsupported remotefilelog header: %s' % header
257 b'unsupported remotefilelog header: %s' % header
258 )
258 )
259 else:
259 else:
260 # v0, str(int(size)) is the header
260 # v0, str(int(size)) is the header
261 size = int(header)
261 size = int(header)
262 except ValueError:
262 except ValueError:
263 raise RuntimeError("unexpected remotefilelog header: illegal format")
263 raise RuntimeError("unexpected remotefilelog header: illegal format")
264 if size is None:
264 if size is None:
265 raise RuntimeError("unexpected remotefilelog header: no size found")
265 raise RuntimeError("unexpected remotefilelog header: no size found")
266 return index + 1, size, flags
266 return index + 1, size, flags
267
267
268
268
269 def buildfileblobheader(size, flags, version=None):
269 def buildfileblobheader(size, flags, version=None):
270 """return the header of a remotefilelog blob.
270 """return the header of a remotefilelog blob.
271
271
272 see remotefilelogserver.createfileblob for the format.
272 see remotefilelogserver.createfileblob for the format.
273 approximately the reverse of parsesizeflags.
273 approximately the reverse of parsesizeflags.
274
274
275 version could be 0 or 1, or None (auto decide).
275 version could be 0 or 1, or None (auto decide).
276 """
276 """
277 # choose v0 if flags is empty, otherwise v1
277 # choose v0 if flags is empty, otherwise v1
278 if version is None:
278 if version is None:
279 version = int(bool(flags))
279 version = int(bool(flags))
280 if version == 1:
280 if version == 1:
281 header = b'v1\n%s%d\n%s%d' % (
281 header = b'v1\n%s%d\n%s%d' % (
282 constants.METAKEYSIZE,
282 constants.METAKEYSIZE,
283 size,
283 size,
284 constants.METAKEYFLAG,
284 constants.METAKEYFLAG,
285 flags,
285 flags,
286 )
286 )
287 elif version == 0:
287 elif version == 0:
288 if flags:
288 if flags:
289 raise error.ProgrammingError(b'fileblob v0 does not support flag')
289 raise error.ProgrammingError(b'fileblob v0 does not support flag')
290 header = b'%d' % size
290 header = b'%d' % size
291 else:
291 else:
292 raise error.ProgrammingError(b'unknown fileblob version %d' % version)
292 raise error.ProgrammingError(b'unknown fileblob version %d' % version)
293 return header
293 return header
294
294
295
295
296 def ancestormap(raw):
296 def ancestormap(raw):
297 offset, size, flags = parsesizeflags(raw)
297 offset, size, flags = parsesizeflags(raw)
298 start = offset + size
298 start = offset + size
299
299
300 mapping = {}
300 mapping = {}
301 while start < len(raw):
301 while start < len(raw):
302 divider = raw.index(b'\0', start + 80)
302 divider = raw.index(b'\0', start + 80)
303
303
304 currentnode = raw[start : (start + 20)]
304 currentnode = raw[start : (start + 20)]
305 p1 = raw[(start + 20) : (start + 40)]
305 p1 = raw[(start + 20) : (start + 40)]
306 p2 = raw[(start + 40) : (start + 60)]
306 p2 = raw[(start + 40) : (start + 60)]
307 linknode = raw[(start + 60) : (start + 80)]
307 linknode = raw[(start + 60) : (start + 80)]
308 copyfrom = raw[(start + 80) : divider]
308 copyfrom = raw[(start + 80) : divider]
309
309
310 mapping[currentnode] = (p1, p2, linknode, copyfrom)
310 mapping[currentnode] = (p1, p2, linknode, copyfrom)
311 start = divider + 1
311 start = divider + 1
312
312
313 return mapping
313 return mapping
314
314
315
315
316 def readfile(path):
316 def readfile(path):
317 f = open(path, b'rb')
317 f = open(path, b'rb')
318 try:
318 try:
319 result = f.read()
319 result = f.read()
320
320
321 # we should never have empty files
321 # we should never have empty files
322 if not result:
322 if not result:
323 os.remove(path)
323 os.remove(path)
324 raise IOError(b"empty file: %s" % path)
324 raise IOError(b"empty file: %s" % path)
325
325
326 return result
326 return result
327 finally:
327 finally:
328 f.close()
328 f.close()
329
329
330
330
331 def unlinkfile(filepath):
331 def unlinkfile(filepath):
332 if pycompat.iswindows:
332 if pycompat.iswindows:
333 # On Windows, os.unlink cannnot delete readonly files
333 # On Windows, os.unlink cannnot delete readonly files
334 os.chmod(filepath, stat.S_IWUSR)
334 os.chmod(filepath, stat.S_IWUSR)
335 os.unlink(filepath)
335 os.unlink(filepath)
336
336
337
337
338 def renamefile(source, destination):
338 def renamefile(source, destination):
339 if pycompat.iswindows:
339 if pycompat.iswindows:
340 # On Windows, os.rename cannot rename readonly files
340 # On Windows, os.rename cannot rename readonly files
341 # and cannot overwrite destination if it exists
341 # and cannot overwrite destination if it exists
342 os.chmod(source, stat.S_IWUSR)
342 os.chmod(source, stat.S_IWUSR)
343 if os.path.isfile(destination):
343 if os.path.isfile(destination):
344 os.chmod(destination, stat.S_IWUSR)
344 os.chmod(destination, stat.S_IWUSR)
345 os.unlink(destination)
345 os.unlink(destination)
346
346
347 os.rename(source, destination)
347 os.rename(source, destination)
348
348
349
349
350 def writefile(path, content, readonly=False):
350 def writefile(path, content, readonly=False):
351 dirname, filename = os.path.split(path)
351 dirname, filename = os.path.split(path)
352 if not os.path.exists(dirname):
352 if not os.path.exists(dirname):
353 try:
353 try:
354 os.makedirs(dirname)
354 os.makedirs(dirname)
355 except OSError as ex:
355 except OSError as ex:
356 if ex.errno != errno.EEXIST:
356 if ex.errno != errno.EEXIST:
357 raise
357 raise
358
358
359 fd, temp = tempfile.mkstemp(prefix=b'.%s-' % filename, dir=dirname)
359 fd, temp = tempfile.mkstemp(prefix=b'.%s-' % filename, dir=dirname)
360 os.close(fd)
360 os.close(fd)
361
361
362 try:
362 try:
363 f = util.posixfile(temp, b'wb')
363 f = util.posixfile(temp, b'wb')
364 f.write(content)
364 f.write(content)
365 f.close()
365 f.close()
366
366
367 if readonly:
367 if readonly:
368 mode = 0o444
368 mode = 0o444
369 else:
369 else:
370 # tempfiles are created with 0o600, so we need to manually set the
370 # tempfiles are created with 0o600, so we need to manually set the
371 # mode.
371 # mode.
372 oldumask = os.umask(0)
372 oldumask = os.umask(0)
373 # there's no way to get the umask without modifying it, so set it
373 # there's no way to get the umask without modifying it, so set it
374 # back
374 # back
375 os.umask(oldumask)
375 os.umask(oldumask)
376 mode = ~oldumask
376 mode = ~oldumask
377
377
378 renamefile(temp, path)
378 renamefile(temp, path)
379 os.chmod(path, mode)
379 os.chmod(path, mode)
380 except Exception:
380 except Exception:
381 try:
381 try:
382 unlinkfile(temp)
382 unlinkfile(temp)
383 except OSError:
383 except OSError:
384 pass
384 pass
385 raise
385 raise
386
386
387
387
388 def sortnodes(nodes, parentfunc):
388 def sortnodes(nodes, parentfunc):
389 """Topologically sorts the nodes, using the parentfunc to find
389 """Topologically sorts the nodes, using the parentfunc to find
390 the parents of nodes."""
390 the parents of nodes."""
391 nodes = set(nodes)
391 nodes = set(nodes)
392 childmap = {}
392 childmap = {}
393 parentmap = {}
393 parentmap = {}
394 roots = []
394 roots = []
395
395
396 # Build a child and parent map
396 # Build a child and parent map
397 for n in nodes:
397 for n in nodes:
398 parents = [p for p in parentfunc(n) if p in nodes]
398 parents = [p for p in parentfunc(n) if p in nodes]
399 parentmap[n] = set(parents)
399 parentmap[n] = set(parents)
400 for p in parents:
400 for p in parents:
401 childmap.setdefault(p, set()).add(n)
401 childmap.setdefault(p, set()).add(n)
402 if not parents:
402 if not parents:
403 roots.append(n)
403 roots.append(n)
404
404
405 roots.sort()
405 roots.sort()
406 # Process roots, adding children to the queue as they become roots
406 # Process roots, adding children to the queue as they become roots
407 results = []
407 results = []
408 while roots:
408 while roots:
409 n = roots.pop(0)
409 n = roots.pop(0)
410 results.append(n)
410 results.append(n)
411 if n in childmap:
411 if n in childmap:
412 children = childmap[n]
412 children = childmap[n]
413 for c in children:
413 for c in children:
414 childparents = parentmap[c]
414 childparents = parentmap[c]
415 childparents.remove(n)
415 childparents.remove(n)
416 if len(childparents) == 0:
416 if len(childparents) == 0:
417 # insert at the beginning, that way child nodes
417 # insert at the beginning, that way child nodes
418 # are likely to be output immediately after their
418 # are likely to be output immediately after their
419 # parents. This gives better compression results.
419 # parents. This gives better compression results.
420 roots.insert(0, c)
420 roots.insert(0, c)
421
421
422 return results
422 return results
423
423
424
424
425 def readexactly(stream, n):
425 def readexactly(stream, n):
426 '''read n bytes from stream.read and abort if less was available'''
426 '''read n bytes from stream.read and abort if less was available'''
427 s = stream.read(n)
427 s = stream.read(n)
428 if len(s) < n:
428 if len(s) < n:
429 raise error.Abort(
429 raise error.Abort(
430 _(b"stream ended unexpectedly (got %d bytes, expected %d)")
430 _(b"stream ended unexpectedly (got %d bytes, expected %d)")
431 % (len(s), n)
431 % (len(s), n)
432 )
432 )
433 return s
433 return s
434
434
435
435
436 def readunpack(stream, fmt):
436 def readunpack(stream, fmt):
437 data = readexactly(stream, struct.calcsize(fmt))
437 data = readexactly(stream, struct.calcsize(fmt))
438 return struct.unpack(fmt, data)
438 return struct.unpack(fmt, data)
439
439
440
440
441 def readpath(stream):
441 def readpath(stream):
442 rawlen = readexactly(stream, constants.FILENAMESIZE)
442 rawlen = readexactly(stream, constants.FILENAMESIZE)
443 pathlen = struct.unpack(constants.FILENAMESTRUCT, rawlen)[0]
443 pathlen = struct.unpack(constants.FILENAMESTRUCT, rawlen)[0]
444 return readexactly(stream, pathlen)
444 return readexactly(stream, pathlen)
445
445
446
446
447 def readnodelist(stream):
447 def readnodelist(stream):
448 rawlen = readexactly(stream, constants.NODECOUNTSIZE)
448 rawlen = readexactly(stream, constants.NODECOUNTSIZE)
449 nodecount = struct.unpack(constants.NODECOUNTSTRUCT, rawlen)[0]
449 nodecount = struct.unpack(constants.NODECOUNTSTRUCT, rawlen)[0]
450 for i in pycompat.xrange(nodecount):
450 for i in pycompat.xrange(nodecount):
451 yield readexactly(stream, constants.NODESIZE)
451 yield readexactly(stream, constants.NODESIZE)
452
452
453
453
454 def readpathlist(stream):
454 def readpathlist(stream):
455 rawlen = readexactly(stream, constants.PATHCOUNTSIZE)
455 rawlen = readexactly(stream, constants.PATHCOUNTSIZE)
456 pathcount = struct.unpack(constants.PATHCOUNTSTRUCT, rawlen)[0]
456 pathcount = struct.unpack(constants.PATHCOUNTSTRUCT, rawlen)[0]
457 for i in pycompat.xrange(pathcount):
457 for i in pycompat.xrange(pathcount):
458 yield readpath(stream)
458 yield readpath(stream)
459
459
460
460
461 def getgid(groupname):
461 def getgid(groupname):
462 try:
462 try:
463 gid = grp.getgrnam(pycompat.fsdecode(groupname)).gr_gid
463 gid = grp.getgrnam(pycompat.fsdecode(groupname)).gr_gid
464 return gid
464 return gid
465 except KeyError:
465 except KeyError:
466 return None
466 return None
467
467
468
468
469 def setstickygroupdir(path, gid, warn=None):
469 def setstickygroupdir(path, gid, warn=None):
470 if gid is None:
470 if gid is None:
471 return
471 return
472 try:
472 try:
473 os.chown(path, -1, gid)
473 os.chown(path, -1, gid)
474 os.chmod(path, 0o2775)
474 os.chmod(path, 0o2775)
475 except (IOError, OSError) as ex:
475 except (IOError, OSError) as ex:
476 if warn:
476 if warn:
477 warn(_(b'unable to chown/chmod on %s: %s\n') % (path, ex))
477 warn(_(b'unable to chown/chmod on %s: %s\n') % (path, ex))
478
478
479
479
480 def mkstickygroupdir(ui, path):
480 def mkstickygroupdir(ui, path):
481 """Creates the given directory (if it doesn't exist) and give it a
481 """Creates the given directory (if it doesn't exist) and give it a
482 particular group with setgid enabled."""
482 particular group with setgid enabled."""
483 gid = None
483 gid = None
484 groupname = ui.config(b"remotefilelog", b"cachegroup")
484 groupname = ui.config(b"remotefilelog", b"cachegroup")
485 if groupname:
485 if groupname:
486 gid = getgid(groupname)
486 gid = getgid(groupname)
487 if gid is None:
487 if gid is None:
488 ui.warn(_(b'unable to resolve group name: %s\n') % groupname)
488 ui.warn(_(b'unable to resolve group name: %s\n') % groupname)
489
489
490 # we use a single stat syscall to test the existence and mode / group bit
490 # we use a single stat syscall to test the existence and mode / group bit
491 st = None
491 st = None
492 try:
492 try:
493 st = os.stat(path)
493 st = os.stat(path)
494 except OSError:
494 except OSError:
495 pass
495 pass
496
496
497 if st:
497 if st:
498 # exists
498 # exists
499 if (st.st_mode & 0o2775) != 0o2775 or st.st_gid != gid:
499 if (st.st_mode & 0o2775) != 0o2775 or st.st_gid != gid:
500 # permission needs to be fixed
500 # permission needs to be fixed
501 setstickygroupdir(path, gid, ui.warn)
501 setstickygroupdir(path, gid, ui.warn)
502 return
502 return
503
503
504 oldumask = os.umask(0o002)
504 oldumask = os.umask(0o002)
505 try:
505 try:
506 missingdirs = [path]
506 missingdirs = [path]
507 path = os.path.dirname(path)
507 path = os.path.dirname(path)
508 while path and not os.path.exists(path):
508 while path and not os.path.exists(path):
509 missingdirs.append(path)
509 missingdirs.append(path)
510 path = os.path.dirname(path)
510 path = os.path.dirname(path)
511
511
512 for path in reversed(missingdirs):
512 for path in reversed(missingdirs):
513 try:
513 try:
514 os.mkdir(path)
514 os.mkdir(path)
515 except OSError as ex:
515 except OSError as ex:
516 if ex.errno != errno.EEXIST:
516 if ex.errno != errno.EEXIST:
517 raise
517 raise
518
518
519 for path in missingdirs:
519 for path in missingdirs:
520 setstickygroupdir(path, gid, ui.warn)
520 setstickygroupdir(path, gid, ui.warn)
521 finally:
521 finally:
522 os.umask(oldumask)
522 os.umask(oldumask)
523
523
524
524
525 def getusername(ui):
525 def getusername(ui):
526 try:
526 try:
527 return stringutil.shortuser(ui.username())
527 return stringutil.shortuser(ui.username())
528 except Exception:
528 except Exception:
529 return b'unknown'
529 return b'unknown'
530
530
531
531
532 def getreponame(ui):
532 def getreponame(ui):
533 reponame = ui.config(b'paths', b'default')
533 reponame = ui.config(b'paths', b'default')
534 if reponame:
534 if reponame:
535 return os.path.basename(reponame)
535 return os.path.basename(reponame)
536 return b"unknown"
536 return b"unknown"
@@ -1,1293 +1,1295 b''
1 # sqlitestore.py - Storage backend that uses SQLite
1 # sqlitestore.py - Storage backend that uses SQLite
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """store repository data in SQLite (EXPERIMENTAL)
8 """store repository data in SQLite (EXPERIMENTAL)
9
9
10 The sqlitestore extension enables the storage of repository data in SQLite.
10 The sqlitestore extension enables the storage of repository data in SQLite.
11
11
12 This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY
12 This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY
13 GUARANTEES. This means that repositories created with this extension may
13 GUARANTEES. This means that repositories created with this extension may
14 only be usable with the exact version of this extension/Mercurial that was
14 only be usable with the exact version of this extension/Mercurial that was
15 used. The extension attempts to enforce this in order to prevent repository
15 used. The extension attempts to enforce this in order to prevent repository
16 corruption.
16 corruption.
17
17
18 In addition, several features are not yet supported or have known bugs:
18 In addition, several features are not yet supported or have known bugs:
19
19
20 * Only some data is stored in SQLite. Changeset, manifest, and other repository
20 * Only some data is stored in SQLite. Changeset, manifest, and other repository
21 data is not yet stored in SQLite.
21 data is not yet stored in SQLite.
22 * Transactions are not robust. If the process is aborted at the right time
22 * Transactions are not robust. If the process is aborted at the right time
23 during transaction close/rollback, the repository could be in an inconsistent
23 during transaction close/rollback, the repository could be in an inconsistent
24 state. This problem will diminish once all repository data is tracked by
24 state. This problem will diminish once all repository data is tracked by
25 SQLite.
25 SQLite.
26 * Bundle repositories do not work (the ability to use e.g.
26 * Bundle repositories do not work (the ability to use e.g.
27 `hg -R <bundle-file> log` to automatically overlay a bundle on top of the
27 `hg -R <bundle-file> log` to automatically overlay a bundle on top of the
28 existing repository).
28 existing repository).
29 * Various other features don't work.
29 * Various other features don't work.
30
30
31 This extension should work for basic clone/pull, update, and commit workflows.
31 This extension should work for basic clone/pull, update, and commit workflows.
32 Some history rewriting operations may fail due to lack of support for bundle
32 Some history rewriting operations may fail due to lack of support for bundle
33 repositories.
33 repositories.
34
34
35 To use, activate the extension and set the ``storage.new-repo-backend`` config
35 To use, activate the extension and set the ``storage.new-repo-backend`` config
36 option to ``sqlite`` to enable new repositories to use SQLite for storage.
36 option to ``sqlite`` to enable new repositories to use SQLite for storage.
37 """
37 """
38
38
39 # To run the test suite with repos using SQLite by default, execute the
39 # To run the test suite with repos using SQLite by default, execute the
40 # following:
40 # following:
41 #
41 #
42 # HGREPOFEATURES="sqlitestore" run-tests.py \
42 # HGREPOFEATURES="sqlitestore" run-tests.py \
43 # --extra-config-opt extensions.sqlitestore= \
43 # --extra-config-opt extensions.sqlitestore= \
44 # --extra-config-opt storage.new-repo-backend=sqlite
44 # --extra-config-opt storage.new-repo-backend=sqlite
45
45
46 from __future__ import absolute_import
46 from __future__ import absolute_import
47
47
48 import hashlib
49 import sqlite3
48 import sqlite3
50 import struct
49 import struct
51 import threading
50 import threading
52 import zlib
51 import zlib
53
52
54 from mercurial.i18n import _
53 from mercurial.i18n import _
55 from mercurial.node import (
54 from mercurial.node import (
56 nullid,
55 nullid,
57 nullrev,
56 nullrev,
58 short,
57 short,
59 )
58 )
60 from mercurial.thirdparty import attr
59 from mercurial.thirdparty import attr
61 from mercurial import (
60 from mercurial import (
62 ancestor,
61 ancestor,
63 dagop,
62 dagop,
64 encoding,
63 encoding,
65 error,
64 error,
66 extensions,
65 extensions,
67 localrepo,
66 localrepo,
68 mdiff,
67 mdiff,
69 pycompat,
68 pycompat,
70 registrar,
69 registrar,
71 util,
70 util,
72 verify,
71 verify,
73 )
72 )
74 from mercurial.interfaces import (
73 from mercurial.interfaces import (
75 repository,
74 repository,
76 util as interfaceutil,
75 util as interfaceutil,
77 )
76 )
78 from mercurial.utils import storageutil
77 from mercurial.utils import (
78 hashutil,
79 storageutil,
80 )
79
81
80 try:
82 try:
81 from mercurial import zstd
83 from mercurial import zstd
82
84
83 zstd.__version__
85 zstd.__version__
84 except ImportError:
86 except ImportError:
85 zstd = None
87 zstd = None
86
88
87 configtable = {}
89 configtable = {}
88 configitem = registrar.configitem(configtable)
90 configitem = registrar.configitem(configtable)
89
91
90 # experimental config: storage.sqlite.compression
92 # experimental config: storage.sqlite.compression
91 configitem(
93 configitem(
92 b'storage',
94 b'storage',
93 b'sqlite.compression',
95 b'sqlite.compression',
94 default=b'zstd' if zstd else b'zlib',
96 default=b'zstd' if zstd else b'zlib',
95 experimental=True,
97 experimental=True,
96 )
98 )
97
99
98 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
100 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
99 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
101 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
100 # be specifying the version(s) of Mercurial they are tested with, or
102 # be specifying the version(s) of Mercurial they are tested with, or
101 # leave the attribute unspecified.
103 # leave the attribute unspecified.
102 testedwith = b'ships-with-hg-core'
104 testedwith = b'ships-with-hg-core'
103
105
104 REQUIREMENT = b'exp-sqlite-001'
106 REQUIREMENT = b'exp-sqlite-001'
105 REQUIREMENT_ZSTD = b'exp-sqlite-comp-001=zstd'
107 REQUIREMENT_ZSTD = b'exp-sqlite-comp-001=zstd'
106 REQUIREMENT_ZLIB = b'exp-sqlite-comp-001=zlib'
108 REQUIREMENT_ZLIB = b'exp-sqlite-comp-001=zlib'
107 REQUIREMENT_NONE = b'exp-sqlite-comp-001=none'
109 REQUIREMENT_NONE = b'exp-sqlite-comp-001=none'
108 REQUIREMENT_SHALLOW_FILES = b'exp-sqlite-shallow-files'
110 REQUIREMENT_SHALLOW_FILES = b'exp-sqlite-shallow-files'
109
111
110 CURRENT_SCHEMA_VERSION = 1
112 CURRENT_SCHEMA_VERSION = 1
111
113
112 COMPRESSION_NONE = 1
114 COMPRESSION_NONE = 1
113 COMPRESSION_ZSTD = 2
115 COMPRESSION_ZSTD = 2
114 COMPRESSION_ZLIB = 3
116 COMPRESSION_ZLIB = 3
115
117
116 FLAG_CENSORED = 1
118 FLAG_CENSORED = 1
117 FLAG_MISSING_P1 = 2
119 FLAG_MISSING_P1 = 2
118 FLAG_MISSING_P2 = 4
120 FLAG_MISSING_P2 = 4
119
121
120 CREATE_SCHEMA = [
122 CREATE_SCHEMA = [
121 # Deltas are stored as content-indexed blobs.
123 # Deltas are stored as content-indexed blobs.
122 # compression column holds COMPRESSION_* constant for how the
124 # compression column holds COMPRESSION_* constant for how the
123 # delta is encoded.
125 # delta is encoded.
124 'CREATE TABLE delta ('
126 'CREATE TABLE delta ('
125 ' id INTEGER PRIMARY KEY, '
127 ' id INTEGER PRIMARY KEY, '
126 ' compression INTEGER NOT NULL, '
128 ' compression INTEGER NOT NULL, '
127 ' hash BLOB UNIQUE ON CONFLICT ABORT, '
129 ' hash BLOB UNIQUE ON CONFLICT ABORT, '
128 ' delta BLOB NOT NULL '
130 ' delta BLOB NOT NULL '
129 ')',
131 ')',
130 # Tracked paths are denormalized to integers to avoid redundant
132 # Tracked paths are denormalized to integers to avoid redundant
131 # storage of the path name.
133 # storage of the path name.
132 'CREATE TABLE filepath ('
134 'CREATE TABLE filepath ('
133 ' id INTEGER PRIMARY KEY, '
135 ' id INTEGER PRIMARY KEY, '
134 ' path BLOB NOT NULL '
136 ' path BLOB NOT NULL '
135 ')',
137 ')',
136 'CREATE UNIQUE INDEX filepath_path ON filepath (path)',
138 'CREATE UNIQUE INDEX filepath_path ON filepath (path)',
137 # We have a single table for all file revision data.
139 # We have a single table for all file revision data.
138 # Each file revision is uniquely described by a (path, rev) and
140 # Each file revision is uniquely described by a (path, rev) and
139 # (path, node).
141 # (path, node).
140 #
142 #
141 # Revision data is stored as a pointer to the delta producing this
143 # Revision data is stored as a pointer to the delta producing this
142 # revision and the file revision whose delta should be applied before
144 # revision and the file revision whose delta should be applied before
143 # that one. One can reconstruct the delta chain by recursively following
145 # that one. One can reconstruct the delta chain by recursively following
144 # the delta base revision pointers until one encounters NULL.
146 # the delta base revision pointers until one encounters NULL.
145 #
147 #
146 # flags column holds bitwise integer flags controlling storage options.
148 # flags column holds bitwise integer flags controlling storage options.
147 # These flags are defined by the FLAG_* constants.
149 # These flags are defined by the FLAG_* constants.
148 'CREATE TABLE fileindex ('
150 'CREATE TABLE fileindex ('
149 ' id INTEGER PRIMARY KEY, '
151 ' id INTEGER PRIMARY KEY, '
150 ' pathid INTEGER REFERENCES filepath(id), '
152 ' pathid INTEGER REFERENCES filepath(id), '
151 ' revnum INTEGER NOT NULL, '
153 ' revnum INTEGER NOT NULL, '
152 ' p1rev INTEGER NOT NULL, '
154 ' p1rev INTEGER NOT NULL, '
153 ' p2rev INTEGER NOT NULL, '
155 ' p2rev INTEGER NOT NULL, '
154 ' linkrev INTEGER NOT NULL, '
156 ' linkrev INTEGER NOT NULL, '
155 ' flags INTEGER NOT NULL, '
157 ' flags INTEGER NOT NULL, '
156 ' deltaid INTEGER REFERENCES delta(id), '
158 ' deltaid INTEGER REFERENCES delta(id), '
157 ' deltabaseid INTEGER REFERENCES fileindex(id), '
159 ' deltabaseid INTEGER REFERENCES fileindex(id), '
158 ' node BLOB NOT NULL '
160 ' node BLOB NOT NULL '
159 ')',
161 ')',
160 'CREATE UNIQUE INDEX fileindex_pathrevnum '
162 'CREATE UNIQUE INDEX fileindex_pathrevnum '
161 ' ON fileindex (pathid, revnum)',
163 ' ON fileindex (pathid, revnum)',
162 'CREATE UNIQUE INDEX fileindex_pathnode ON fileindex (pathid, node)',
164 'CREATE UNIQUE INDEX fileindex_pathnode ON fileindex (pathid, node)',
163 # Provide a view over all file data for convenience.
165 # Provide a view over all file data for convenience.
164 'CREATE VIEW filedata AS '
166 'CREATE VIEW filedata AS '
165 'SELECT '
167 'SELECT '
166 ' fileindex.id AS id, '
168 ' fileindex.id AS id, '
167 ' filepath.id AS pathid, '
169 ' filepath.id AS pathid, '
168 ' filepath.path AS path, '
170 ' filepath.path AS path, '
169 ' fileindex.revnum AS revnum, '
171 ' fileindex.revnum AS revnum, '
170 ' fileindex.node AS node, '
172 ' fileindex.node AS node, '
171 ' fileindex.p1rev AS p1rev, '
173 ' fileindex.p1rev AS p1rev, '
172 ' fileindex.p2rev AS p2rev, '
174 ' fileindex.p2rev AS p2rev, '
173 ' fileindex.linkrev AS linkrev, '
175 ' fileindex.linkrev AS linkrev, '
174 ' fileindex.flags AS flags, '
176 ' fileindex.flags AS flags, '
175 ' fileindex.deltaid AS deltaid, '
177 ' fileindex.deltaid AS deltaid, '
176 ' fileindex.deltabaseid AS deltabaseid '
178 ' fileindex.deltabaseid AS deltabaseid '
177 'FROM filepath, fileindex '
179 'FROM filepath, fileindex '
178 'WHERE fileindex.pathid=filepath.id',
180 'WHERE fileindex.pathid=filepath.id',
179 'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION,
181 'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION,
180 ]
182 ]
181
183
182
184
183 def resolvedeltachain(db, pathid, node, revisioncache, stoprids, zstddctx=None):
185 def resolvedeltachain(db, pathid, node, revisioncache, stoprids, zstddctx=None):
184 """Resolve a delta chain for a file node."""
186 """Resolve a delta chain for a file node."""
185
187
186 # TODO the "not in ({stops})" here is possibly slowing down the query
188 # TODO the "not in ({stops})" here is possibly slowing down the query
187 # because it needs to perform the lookup on every recursive invocation.
189 # because it needs to perform the lookup on every recursive invocation.
188 # This could possibly be faster if we created a temporary query with
190 # This could possibly be faster if we created a temporary query with
189 # baseid "poisoned" to null and limited the recursive filter to
191 # baseid "poisoned" to null and limited the recursive filter to
190 # "is not null".
192 # "is not null".
191 res = db.execute(
193 res = db.execute(
192 'WITH RECURSIVE '
194 'WITH RECURSIVE '
193 ' deltachain(deltaid, baseid) AS ('
195 ' deltachain(deltaid, baseid) AS ('
194 ' SELECT deltaid, deltabaseid FROM fileindex '
196 ' SELECT deltaid, deltabaseid FROM fileindex '
195 ' WHERE pathid=? AND node=? '
197 ' WHERE pathid=? AND node=? '
196 ' UNION ALL '
198 ' UNION ALL '
197 ' SELECT fileindex.deltaid, deltabaseid '
199 ' SELECT fileindex.deltaid, deltabaseid '
198 ' FROM fileindex, deltachain '
200 ' FROM fileindex, deltachain '
199 ' WHERE '
201 ' WHERE '
200 ' fileindex.id=deltachain.baseid '
202 ' fileindex.id=deltachain.baseid '
201 ' AND deltachain.baseid IS NOT NULL '
203 ' AND deltachain.baseid IS NOT NULL '
202 ' AND fileindex.id NOT IN ({stops}) '
204 ' AND fileindex.id NOT IN ({stops}) '
203 ' ) '
205 ' ) '
204 'SELECT deltachain.baseid, compression, delta '
206 'SELECT deltachain.baseid, compression, delta '
205 'FROM deltachain, delta '
207 'FROM deltachain, delta '
206 'WHERE delta.id=deltachain.deltaid'.format(
208 'WHERE delta.id=deltachain.deltaid'.format(
207 stops=','.join(['?'] * len(stoprids))
209 stops=','.join(['?'] * len(stoprids))
208 ),
210 ),
209 tuple([pathid, node] + list(stoprids.keys())),
211 tuple([pathid, node] + list(stoprids.keys())),
210 )
212 )
211
213
212 deltas = []
214 deltas = []
213 lastdeltabaseid = None
215 lastdeltabaseid = None
214
216
215 for deltabaseid, compression, delta in res:
217 for deltabaseid, compression, delta in res:
216 lastdeltabaseid = deltabaseid
218 lastdeltabaseid = deltabaseid
217
219
218 if compression == COMPRESSION_ZSTD:
220 if compression == COMPRESSION_ZSTD:
219 delta = zstddctx.decompress(delta)
221 delta = zstddctx.decompress(delta)
220 elif compression == COMPRESSION_NONE:
222 elif compression == COMPRESSION_NONE:
221 delta = delta
223 delta = delta
222 elif compression == COMPRESSION_ZLIB:
224 elif compression == COMPRESSION_ZLIB:
223 delta = zlib.decompress(delta)
225 delta = zlib.decompress(delta)
224 else:
226 else:
225 raise SQLiteStoreError(
227 raise SQLiteStoreError(
226 b'unhandled compression type: %d' % compression
228 b'unhandled compression type: %d' % compression
227 )
229 )
228
230
229 deltas.append(delta)
231 deltas.append(delta)
230
232
231 if lastdeltabaseid in stoprids:
233 if lastdeltabaseid in stoprids:
232 basetext = revisioncache[stoprids[lastdeltabaseid]]
234 basetext = revisioncache[stoprids[lastdeltabaseid]]
233 else:
235 else:
234 basetext = deltas.pop()
236 basetext = deltas.pop()
235
237
236 deltas.reverse()
238 deltas.reverse()
237 fulltext = mdiff.patches(basetext, deltas)
239 fulltext = mdiff.patches(basetext, deltas)
238
240
239 # SQLite returns buffer instances for blob columns on Python 2. This
241 # SQLite returns buffer instances for blob columns on Python 2. This
240 # type can propagate through the delta application layer. Because
242 # type can propagate through the delta application layer. Because
241 # downstream callers assume revisions are bytes, cast as needed.
243 # downstream callers assume revisions are bytes, cast as needed.
242 if not isinstance(fulltext, bytes):
244 if not isinstance(fulltext, bytes):
243 fulltext = bytes(delta)
245 fulltext = bytes(delta)
244
246
245 return fulltext
247 return fulltext
246
248
247
249
248 def insertdelta(db, compression, hash, delta):
250 def insertdelta(db, compression, hash, delta):
249 try:
251 try:
250 return db.execute(
252 return db.execute(
251 'INSERT INTO delta (compression, hash, delta) VALUES (?, ?, ?)',
253 'INSERT INTO delta (compression, hash, delta) VALUES (?, ?, ?)',
252 (compression, hash, delta),
254 (compression, hash, delta),
253 ).lastrowid
255 ).lastrowid
254 except sqlite3.IntegrityError:
256 except sqlite3.IntegrityError:
255 return db.execute(
257 return db.execute(
256 'SELECT id FROM delta WHERE hash=?', (hash,)
258 'SELECT id FROM delta WHERE hash=?', (hash,)
257 ).fetchone()[0]
259 ).fetchone()[0]
258
260
259
261
260 class SQLiteStoreError(error.StorageError):
262 class SQLiteStoreError(error.StorageError):
261 pass
263 pass
262
264
263
265
264 @attr.s
266 @attr.s
265 class revisionentry(object):
267 class revisionentry(object):
266 rid = attr.ib()
268 rid = attr.ib()
267 rev = attr.ib()
269 rev = attr.ib()
268 node = attr.ib()
270 node = attr.ib()
269 p1rev = attr.ib()
271 p1rev = attr.ib()
270 p2rev = attr.ib()
272 p2rev = attr.ib()
271 p1node = attr.ib()
273 p1node = attr.ib()
272 p2node = attr.ib()
274 p2node = attr.ib()
273 linkrev = attr.ib()
275 linkrev = attr.ib()
274 flags = attr.ib()
276 flags = attr.ib()
275
277
276
278
277 @interfaceutil.implementer(repository.irevisiondelta)
279 @interfaceutil.implementer(repository.irevisiondelta)
278 @attr.s(slots=True)
280 @attr.s(slots=True)
279 class sqliterevisiondelta(object):
281 class sqliterevisiondelta(object):
280 node = attr.ib()
282 node = attr.ib()
281 p1node = attr.ib()
283 p1node = attr.ib()
282 p2node = attr.ib()
284 p2node = attr.ib()
283 basenode = attr.ib()
285 basenode = attr.ib()
284 flags = attr.ib()
286 flags = attr.ib()
285 baserevisionsize = attr.ib()
287 baserevisionsize = attr.ib()
286 revision = attr.ib()
288 revision = attr.ib()
287 delta = attr.ib()
289 delta = attr.ib()
288 linknode = attr.ib(default=None)
290 linknode = attr.ib(default=None)
289
291
290
292
291 @interfaceutil.implementer(repository.iverifyproblem)
293 @interfaceutil.implementer(repository.iverifyproblem)
292 @attr.s(frozen=True)
294 @attr.s(frozen=True)
293 class sqliteproblem(object):
295 class sqliteproblem(object):
294 warning = attr.ib(default=None)
296 warning = attr.ib(default=None)
295 error = attr.ib(default=None)
297 error = attr.ib(default=None)
296 node = attr.ib(default=None)
298 node = attr.ib(default=None)
297
299
298
300
299 @interfaceutil.implementer(repository.ifilestorage)
301 @interfaceutil.implementer(repository.ifilestorage)
300 class sqlitefilestore(object):
302 class sqlitefilestore(object):
301 """Implements storage for an individual tracked path."""
303 """Implements storage for an individual tracked path."""
302
304
303 def __init__(self, db, path, compression):
305 def __init__(self, db, path, compression):
304 self._db = db
306 self._db = db
305 self._path = path
307 self._path = path
306
308
307 self._pathid = None
309 self._pathid = None
308
310
309 # revnum -> node
311 # revnum -> node
310 self._revtonode = {}
312 self._revtonode = {}
311 # node -> revnum
313 # node -> revnum
312 self._nodetorev = {}
314 self._nodetorev = {}
313 # node -> data structure
315 # node -> data structure
314 self._revisions = {}
316 self._revisions = {}
315
317
316 self._revisioncache = util.lrucachedict(10)
318 self._revisioncache = util.lrucachedict(10)
317
319
318 self._compengine = compression
320 self._compengine = compression
319
321
320 if compression == b'zstd':
322 if compression == b'zstd':
321 self._cctx = zstd.ZstdCompressor(level=3)
323 self._cctx = zstd.ZstdCompressor(level=3)
322 self._dctx = zstd.ZstdDecompressor()
324 self._dctx = zstd.ZstdDecompressor()
323 else:
325 else:
324 self._cctx = None
326 self._cctx = None
325 self._dctx = None
327 self._dctx = None
326
328
327 self._refreshindex()
329 self._refreshindex()
328
330
329 def _refreshindex(self):
331 def _refreshindex(self):
330 self._revtonode = {}
332 self._revtonode = {}
331 self._nodetorev = {}
333 self._nodetorev = {}
332 self._revisions = {}
334 self._revisions = {}
333
335
334 res = list(
336 res = list(
335 self._db.execute(
337 self._db.execute(
336 'SELECT id FROM filepath WHERE path=?', (self._path,)
338 'SELECT id FROM filepath WHERE path=?', (self._path,)
337 )
339 )
338 )
340 )
339
341
340 if not res:
342 if not res:
341 self._pathid = None
343 self._pathid = None
342 return
344 return
343
345
344 self._pathid = res[0][0]
346 self._pathid = res[0][0]
345
347
346 res = self._db.execute(
348 res = self._db.execute(
347 'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags '
349 'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags '
348 'FROM fileindex '
350 'FROM fileindex '
349 'WHERE pathid=? '
351 'WHERE pathid=? '
350 'ORDER BY revnum ASC',
352 'ORDER BY revnum ASC',
351 (self._pathid,),
353 (self._pathid,),
352 )
354 )
353
355
354 for i, row in enumerate(res):
356 for i, row in enumerate(res):
355 rid, rev, node, p1rev, p2rev, linkrev, flags = row
357 rid, rev, node, p1rev, p2rev, linkrev, flags = row
356
358
357 if i != rev:
359 if i != rev:
358 raise SQLiteStoreError(
360 raise SQLiteStoreError(
359 _(b'sqlite database has inconsistent revision numbers')
361 _(b'sqlite database has inconsistent revision numbers')
360 )
362 )
361
363
362 if p1rev == nullrev:
364 if p1rev == nullrev:
363 p1node = nullid
365 p1node = nullid
364 else:
366 else:
365 p1node = self._revtonode[p1rev]
367 p1node = self._revtonode[p1rev]
366
368
367 if p2rev == nullrev:
369 if p2rev == nullrev:
368 p2node = nullid
370 p2node = nullid
369 else:
371 else:
370 p2node = self._revtonode[p2rev]
372 p2node = self._revtonode[p2rev]
371
373
372 entry = revisionentry(
374 entry = revisionentry(
373 rid=rid,
375 rid=rid,
374 rev=rev,
376 rev=rev,
375 node=node,
377 node=node,
376 p1rev=p1rev,
378 p1rev=p1rev,
377 p2rev=p2rev,
379 p2rev=p2rev,
378 p1node=p1node,
380 p1node=p1node,
379 p2node=p2node,
381 p2node=p2node,
380 linkrev=linkrev,
382 linkrev=linkrev,
381 flags=flags,
383 flags=flags,
382 )
384 )
383
385
384 self._revtonode[rev] = node
386 self._revtonode[rev] = node
385 self._nodetorev[node] = rev
387 self._nodetorev[node] = rev
386 self._revisions[node] = entry
388 self._revisions[node] = entry
387
389
388 # Start of ifileindex interface.
390 # Start of ifileindex interface.
389
391
390 def __len__(self):
392 def __len__(self):
391 return len(self._revisions)
393 return len(self._revisions)
392
394
393 def __iter__(self):
395 def __iter__(self):
394 return iter(pycompat.xrange(len(self._revisions)))
396 return iter(pycompat.xrange(len(self._revisions)))
395
397
396 def hasnode(self, node):
398 def hasnode(self, node):
397 if node == nullid:
399 if node == nullid:
398 return False
400 return False
399
401
400 return node in self._nodetorev
402 return node in self._nodetorev
401
403
402 def revs(self, start=0, stop=None):
404 def revs(self, start=0, stop=None):
403 return storageutil.iterrevs(
405 return storageutil.iterrevs(
404 len(self._revisions), start=start, stop=stop
406 len(self._revisions), start=start, stop=stop
405 )
407 )
406
408
407 def parents(self, node):
409 def parents(self, node):
408 if node == nullid:
410 if node == nullid:
409 return nullid, nullid
411 return nullid, nullid
410
412
411 if node not in self._revisions:
413 if node not in self._revisions:
412 raise error.LookupError(node, self._path, _(b'no node'))
414 raise error.LookupError(node, self._path, _(b'no node'))
413
415
414 entry = self._revisions[node]
416 entry = self._revisions[node]
415 return entry.p1node, entry.p2node
417 return entry.p1node, entry.p2node
416
418
417 def parentrevs(self, rev):
419 def parentrevs(self, rev):
418 if rev == nullrev:
420 if rev == nullrev:
419 return nullrev, nullrev
421 return nullrev, nullrev
420
422
421 if rev not in self._revtonode:
423 if rev not in self._revtonode:
422 raise IndexError(rev)
424 raise IndexError(rev)
423
425
424 entry = self._revisions[self._revtonode[rev]]
426 entry = self._revisions[self._revtonode[rev]]
425 return entry.p1rev, entry.p2rev
427 return entry.p1rev, entry.p2rev
426
428
427 def rev(self, node):
429 def rev(self, node):
428 if node == nullid:
430 if node == nullid:
429 return nullrev
431 return nullrev
430
432
431 if node not in self._nodetorev:
433 if node not in self._nodetorev:
432 raise error.LookupError(node, self._path, _(b'no node'))
434 raise error.LookupError(node, self._path, _(b'no node'))
433
435
434 return self._nodetorev[node]
436 return self._nodetorev[node]
435
437
436 def node(self, rev):
438 def node(self, rev):
437 if rev == nullrev:
439 if rev == nullrev:
438 return nullid
440 return nullid
439
441
440 if rev not in self._revtonode:
442 if rev not in self._revtonode:
441 raise IndexError(rev)
443 raise IndexError(rev)
442
444
443 return self._revtonode[rev]
445 return self._revtonode[rev]
444
446
445 def lookup(self, node):
447 def lookup(self, node):
446 return storageutil.fileidlookup(self, node, self._path)
448 return storageutil.fileidlookup(self, node, self._path)
447
449
448 def linkrev(self, rev):
450 def linkrev(self, rev):
449 if rev == nullrev:
451 if rev == nullrev:
450 return nullrev
452 return nullrev
451
453
452 if rev not in self._revtonode:
454 if rev not in self._revtonode:
453 raise IndexError(rev)
455 raise IndexError(rev)
454
456
455 entry = self._revisions[self._revtonode[rev]]
457 entry = self._revisions[self._revtonode[rev]]
456 return entry.linkrev
458 return entry.linkrev
457
459
458 def iscensored(self, rev):
460 def iscensored(self, rev):
459 if rev == nullrev:
461 if rev == nullrev:
460 return False
462 return False
461
463
462 if rev not in self._revtonode:
464 if rev not in self._revtonode:
463 raise IndexError(rev)
465 raise IndexError(rev)
464
466
465 return self._revisions[self._revtonode[rev]].flags & FLAG_CENSORED
467 return self._revisions[self._revtonode[rev]].flags & FLAG_CENSORED
466
468
467 def commonancestorsheads(self, node1, node2):
469 def commonancestorsheads(self, node1, node2):
468 rev1 = self.rev(node1)
470 rev1 = self.rev(node1)
469 rev2 = self.rev(node2)
471 rev2 = self.rev(node2)
470
472
471 ancestors = ancestor.commonancestorsheads(self.parentrevs, rev1, rev2)
473 ancestors = ancestor.commonancestorsheads(self.parentrevs, rev1, rev2)
472 return pycompat.maplist(self.node, ancestors)
474 return pycompat.maplist(self.node, ancestors)
473
475
474 def descendants(self, revs):
476 def descendants(self, revs):
475 # TODO we could implement this using a recursive SQL query, which
477 # TODO we could implement this using a recursive SQL query, which
476 # might be faster.
478 # might be faster.
477 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
479 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
478
480
479 def heads(self, start=None, stop=None):
481 def heads(self, start=None, stop=None):
480 if start is None and stop is None:
482 if start is None and stop is None:
481 if not len(self):
483 if not len(self):
482 return [nullid]
484 return [nullid]
483
485
484 startrev = self.rev(start) if start is not None else nullrev
486 startrev = self.rev(start) if start is not None else nullrev
485 stoprevs = {self.rev(n) for n in stop or []}
487 stoprevs = {self.rev(n) for n in stop or []}
486
488
487 revs = dagop.headrevssubset(
489 revs = dagop.headrevssubset(
488 self.revs, self.parentrevs, startrev=startrev, stoprevs=stoprevs
490 self.revs, self.parentrevs, startrev=startrev, stoprevs=stoprevs
489 )
491 )
490
492
491 return [self.node(rev) for rev in revs]
493 return [self.node(rev) for rev in revs]
492
494
493 def children(self, node):
495 def children(self, node):
494 rev = self.rev(node)
496 rev = self.rev(node)
495
497
496 res = self._db.execute(
498 res = self._db.execute(
497 'SELECT'
499 'SELECT'
498 ' node '
500 ' node '
499 ' FROM filedata '
501 ' FROM filedata '
500 ' WHERE path=? AND (p1rev=? OR p2rev=?) '
502 ' WHERE path=? AND (p1rev=? OR p2rev=?) '
501 ' ORDER BY revnum ASC',
503 ' ORDER BY revnum ASC',
502 (self._path, rev, rev),
504 (self._path, rev, rev),
503 )
505 )
504
506
505 return [row[0] for row in res]
507 return [row[0] for row in res]
506
508
507 # End of ifileindex interface.
509 # End of ifileindex interface.
508
510
509 # Start of ifiledata interface.
511 # Start of ifiledata interface.
510
512
511 def size(self, rev):
513 def size(self, rev):
512 if rev == nullrev:
514 if rev == nullrev:
513 return 0
515 return 0
514
516
515 if rev not in self._revtonode:
517 if rev not in self._revtonode:
516 raise IndexError(rev)
518 raise IndexError(rev)
517
519
518 node = self._revtonode[rev]
520 node = self._revtonode[rev]
519
521
520 if self.renamed(node):
522 if self.renamed(node):
521 return len(self.read(node))
523 return len(self.read(node))
522
524
523 return len(self.revision(node))
525 return len(self.revision(node))
524
526
525 def revision(self, node, raw=False, _verifyhash=True):
527 def revision(self, node, raw=False, _verifyhash=True):
526 if node in (nullid, nullrev):
528 if node in (nullid, nullrev):
527 return b''
529 return b''
528
530
529 if isinstance(node, int):
531 if isinstance(node, int):
530 node = self.node(node)
532 node = self.node(node)
531
533
532 if node not in self._nodetorev:
534 if node not in self._nodetorev:
533 raise error.LookupError(node, self._path, _(b'no node'))
535 raise error.LookupError(node, self._path, _(b'no node'))
534
536
535 if node in self._revisioncache:
537 if node in self._revisioncache:
536 return self._revisioncache[node]
538 return self._revisioncache[node]
537
539
538 # Because we have a fulltext revision cache, we are able to
540 # Because we have a fulltext revision cache, we are able to
539 # short-circuit delta chain traversal and decompression as soon as
541 # short-circuit delta chain traversal and decompression as soon as
540 # we encounter a revision in the cache.
542 # we encounter a revision in the cache.
541
543
542 stoprids = {self._revisions[n].rid: n for n in self._revisioncache}
544 stoprids = {self._revisions[n].rid: n for n in self._revisioncache}
543
545
544 if not stoprids:
546 if not stoprids:
545 stoprids[-1] = None
547 stoprids[-1] = None
546
548
547 fulltext = resolvedeltachain(
549 fulltext = resolvedeltachain(
548 self._db,
550 self._db,
549 self._pathid,
551 self._pathid,
550 node,
552 node,
551 self._revisioncache,
553 self._revisioncache,
552 stoprids,
554 stoprids,
553 zstddctx=self._dctx,
555 zstddctx=self._dctx,
554 )
556 )
555
557
556 # Don't verify hashes if parent nodes were rewritten, as the hash
558 # Don't verify hashes if parent nodes were rewritten, as the hash
557 # wouldn't verify.
559 # wouldn't verify.
558 if self._revisions[node].flags & (FLAG_MISSING_P1 | FLAG_MISSING_P2):
560 if self._revisions[node].flags & (FLAG_MISSING_P1 | FLAG_MISSING_P2):
559 _verifyhash = False
561 _verifyhash = False
560
562
561 if _verifyhash:
563 if _verifyhash:
562 self._checkhash(fulltext, node)
564 self._checkhash(fulltext, node)
563 self._revisioncache[node] = fulltext
565 self._revisioncache[node] = fulltext
564
566
565 return fulltext
567 return fulltext
566
568
567 def rawdata(self, *args, **kwargs):
569 def rawdata(self, *args, **kwargs):
568 return self.revision(*args, **kwargs)
570 return self.revision(*args, **kwargs)
569
571
570 def read(self, node):
572 def read(self, node):
571 return storageutil.filtermetadata(self.revision(node))
573 return storageutil.filtermetadata(self.revision(node))
572
574
573 def renamed(self, node):
575 def renamed(self, node):
574 return storageutil.filerevisioncopied(self, node)
576 return storageutil.filerevisioncopied(self, node)
575
577
576 def cmp(self, node, fulltext):
578 def cmp(self, node, fulltext):
577 return not storageutil.filedataequivalent(self, node, fulltext)
579 return not storageutil.filedataequivalent(self, node, fulltext)
578
580
579 def emitrevisions(
581 def emitrevisions(
580 self,
582 self,
581 nodes,
583 nodes,
582 nodesorder=None,
584 nodesorder=None,
583 revisiondata=False,
585 revisiondata=False,
584 assumehaveparentrevisions=False,
586 assumehaveparentrevisions=False,
585 deltamode=repository.CG_DELTAMODE_STD,
587 deltamode=repository.CG_DELTAMODE_STD,
586 ):
588 ):
587 if nodesorder not in (b'nodes', b'storage', b'linear', None):
589 if nodesorder not in (b'nodes', b'storage', b'linear', None):
588 raise error.ProgrammingError(
590 raise error.ProgrammingError(
589 b'unhandled value for nodesorder: %s' % nodesorder
591 b'unhandled value for nodesorder: %s' % nodesorder
590 )
592 )
591
593
592 nodes = [n for n in nodes if n != nullid]
594 nodes = [n for n in nodes if n != nullid]
593
595
594 if not nodes:
596 if not nodes:
595 return
597 return
596
598
597 # TODO perform in a single query.
599 # TODO perform in a single query.
598 res = self._db.execute(
600 res = self._db.execute(
599 'SELECT revnum, deltaid FROM fileindex '
601 'SELECT revnum, deltaid FROM fileindex '
600 'WHERE pathid=? '
602 'WHERE pathid=? '
601 ' AND node in (%s)' % (','.join(['?'] * len(nodes))),
603 ' AND node in (%s)' % (','.join(['?'] * len(nodes))),
602 tuple([self._pathid] + nodes),
604 tuple([self._pathid] + nodes),
603 )
605 )
604
606
605 deltabases = {}
607 deltabases = {}
606
608
607 for rev, deltaid in res:
609 for rev, deltaid in res:
608 res = self._db.execute(
610 res = self._db.execute(
609 'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?',
611 'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?',
610 (self._pathid, deltaid),
612 (self._pathid, deltaid),
611 )
613 )
612 deltabases[rev] = res.fetchone()[0]
614 deltabases[rev] = res.fetchone()[0]
613
615
614 # TODO define revdifffn so we can use delta from storage.
616 # TODO define revdifffn so we can use delta from storage.
615 for delta in storageutil.emitrevisions(
617 for delta in storageutil.emitrevisions(
616 self,
618 self,
617 nodes,
619 nodes,
618 nodesorder,
620 nodesorder,
619 sqliterevisiondelta,
621 sqliterevisiondelta,
620 deltaparentfn=deltabases.__getitem__,
622 deltaparentfn=deltabases.__getitem__,
621 revisiondata=revisiondata,
623 revisiondata=revisiondata,
622 assumehaveparentrevisions=assumehaveparentrevisions,
624 assumehaveparentrevisions=assumehaveparentrevisions,
623 deltamode=deltamode,
625 deltamode=deltamode,
624 ):
626 ):
625
627
626 yield delta
628 yield delta
627
629
628 # End of ifiledata interface.
630 # End of ifiledata interface.
629
631
630 # Start of ifilemutation interface.
632 # Start of ifilemutation interface.
631
633
632 def add(self, filedata, meta, transaction, linkrev, p1, p2):
634 def add(self, filedata, meta, transaction, linkrev, p1, p2):
633 if meta or filedata.startswith(b'\x01\n'):
635 if meta or filedata.startswith(b'\x01\n'):
634 filedata = storageutil.packmeta(meta, filedata)
636 filedata = storageutil.packmeta(meta, filedata)
635
637
636 return self.addrevision(filedata, transaction, linkrev, p1, p2)
638 return self.addrevision(filedata, transaction, linkrev, p1, p2)
637
639
638 def addrevision(
640 def addrevision(
639 self,
641 self,
640 revisiondata,
642 revisiondata,
641 transaction,
643 transaction,
642 linkrev,
644 linkrev,
643 p1,
645 p1,
644 p2,
646 p2,
645 node=None,
647 node=None,
646 flags=0,
648 flags=0,
647 cachedelta=None,
649 cachedelta=None,
648 ):
650 ):
649 if flags:
651 if flags:
650 raise SQLiteStoreError(_(b'flags not supported on revisions'))
652 raise SQLiteStoreError(_(b'flags not supported on revisions'))
651
653
652 validatehash = node is not None
654 validatehash = node is not None
653 node = node or storageutil.hashrevisionsha1(revisiondata, p1, p2)
655 node = node or storageutil.hashrevisionsha1(revisiondata, p1, p2)
654
656
655 if validatehash:
657 if validatehash:
656 self._checkhash(revisiondata, node, p1, p2)
658 self._checkhash(revisiondata, node, p1, p2)
657
659
658 if node in self._nodetorev:
660 if node in self._nodetorev:
659 return node
661 return node
660
662
661 node = self._addrawrevision(
663 node = self._addrawrevision(
662 node, revisiondata, transaction, linkrev, p1, p2
664 node, revisiondata, transaction, linkrev, p1, p2
663 )
665 )
664
666
665 self._revisioncache[node] = revisiondata
667 self._revisioncache[node] = revisiondata
666 return node
668 return node
667
669
668 def addgroup(
670 def addgroup(
669 self,
671 self,
670 deltas,
672 deltas,
671 linkmapper,
673 linkmapper,
672 transaction,
674 transaction,
673 addrevisioncb=None,
675 addrevisioncb=None,
674 maybemissingparents=False,
676 maybemissingparents=False,
675 ):
677 ):
676 nodes = []
678 nodes = []
677
679
678 for node, p1, p2, linknode, deltabase, delta, wireflags in deltas:
680 for node, p1, p2, linknode, deltabase, delta, wireflags in deltas:
679 storeflags = 0
681 storeflags = 0
680
682
681 if wireflags & repository.REVISION_FLAG_CENSORED:
683 if wireflags & repository.REVISION_FLAG_CENSORED:
682 storeflags |= FLAG_CENSORED
684 storeflags |= FLAG_CENSORED
683
685
684 if wireflags & ~repository.REVISION_FLAG_CENSORED:
686 if wireflags & ~repository.REVISION_FLAG_CENSORED:
685 raise SQLiteStoreError(b'unhandled revision flag')
687 raise SQLiteStoreError(b'unhandled revision flag')
686
688
687 if maybemissingparents:
689 if maybemissingparents:
688 if p1 != nullid and not self.hasnode(p1):
690 if p1 != nullid and not self.hasnode(p1):
689 p1 = nullid
691 p1 = nullid
690 storeflags |= FLAG_MISSING_P1
692 storeflags |= FLAG_MISSING_P1
691
693
692 if p2 != nullid and not self.hasnode(p2):
694 if p2 != nullid and not self.hasnode(p2):
693 p2 = nullid
695 p2 = nullid
694 storeflags |= FLAG_MISSING_P2
696 storeflags |= FLAG_MISSING_P2
695
697
696 baserev = self.rev(deltabase)
698 baserev = self.rev(deltabase)
697
699
698 # If base is censored, delta must be full replacement in a single
700 # If base is censored, delta must be full replacement in a single
699 # patch operation.
701 # patch operation.
700 if baserev != nullrev and self.iscensored(baserev):
702 if baserev != nullrev and self.iscensored(baserev):
701 hlen = struct.calcsize(b'>lll')
703 hlen = struct.calcsize(b'>lll')
702 oldlen = len(self.rawdata(deltabase, _verifyhash=False))
704 oldlen = len(self.rawdata(deltabase, _verifyhash=False))
703 newlen = len(delta) - hlen
705 newlen = len(delta) - hlen
704
706
705 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
707 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
706 raise error.CensoredBaseError(self._path, deltabase)
708 raise error.CensoredBaseError(self._path, deltabase)
707
709
708 if not (storeflags & FLAG_CENSORED) and storageutil.deltaiscensored(
710 if not (storeflags & FLAG_CENSORED) and storageutil.deltaiscensored(
709 delta, baserev, lambda x: len(self.rawdata(x))
711 delta, baserev, lambda x: len(self.rawdata(x))
710 ):
712 ):
711 storeflags |= FLAG_CENSORED
713 storeflags |= FLAG_CENSORED
712
714
713 linkrev = linkmapper(linknode)
715 linkrev = linkmapper(linknode)
714
716
715 nodes.append(node)
717 nodes.append(node)
716
718
717 if node in self._revisions:
719 if node in self._revisions:
718 # Possibly reset parents to make them proper.
720 # Possibly reset parents to make them proper.
719 entry = self._revisions[node]
721 entry = self._revisions[node]
720
722
721 if entry.flags & FLAG_MISSING_P1 and p1 != nullid:
723 if entry.flags & FLAG_MISSING_P1 and p1 != nullid:
722 entry.p1node = p1
724 entry.p1node = p1
723 entry.p1rev = self._nodetorev[p1]
725 entry.p1rev = self._nodetorev[p1]
724 entry.flags &= ~FLAG_MISSING_P1
726 entry.flags &= ~FLAG_MISSING_P1
725
727
726 self._db.execute(
728 self._db.execute(
727 'UPDATE fileindex SET p1rev=?, flags=? WHERE id=?',
729 'UPDATE fileindex SET p1rev=?, flags=? WHERE id=?',
728 (self._nodetorev[p1], entry.flags, entry.rid),
730 (self._nodetorev[p1], entry.flags, entry.rid),
729 )
731 )
730
732
731 if entry.flags & FLAG_MISSING_P2 and p2 != nullid:
733 if entry.flags & FLAG_MISSING_P2 and p2 != nullid:
732 entry.p2node = p2
734 entry.p2node = p2
733 entry.p2rev = self._nodetorev[p2]
735 entry.p2rev = self._nodetorev[p2]
734 entry.flags &= ~FLAG_MISSING_P2
736 entry.flags &= ~FLAG_MISSING_P2
735
737
736 self._db.execute(
738 self._db.execute(
737 'UPDATE fileindex SET p2rev=?, flags=? WHERE id=?',
739 'UPDATE fileindex SET p2rev=?, flags=? WHERE id=?',
738 (self._nodetorev[p1], entry.flags, entry.rid),
740 (self._nodetorev[p1], entry.flags, entry.rid),
739 )
741 )
740
742
741 continue
743 continue
742
744
743 if deltabase == nullid:
745 if deltabase == nullid:
744 text = mdiff.patch(b'', delta)
746 text = mdiff.patch(b'', delta)
745 storedelta = None
747 storedelta = None
746 else:
748 else:
747 text = None
749 text = None
748 storedelta = (deltabase, delta)
750 storedelta = (deltabase, delta)
749
751
750 self._addrawrevision(
752 self._addrawrevision(
751 node,
753 node,
752 text,
754 text,
753 transaction,
755 transaction,
754 linkrev,
756 linkrev,
755 p1,
757 p1,
756 p2,
758 p2,
757 storedelta=storedelta,
759 storedelta=storedelta,
758 flags=storeflags,
760 flags=storeflags,
759 )
761 )
760
762
761 if addrevisioncb:
763 if addrevisioncb:
762 addrevisioncb(self, node)
764 addrevisioncb(self, node)
763
765
764 return nodes
766 return nodes
765
767
766 def censorrevision(self, tr, censornode, tombstone=b''):
768 def censorrevision(self, tr, censornode, tombstone=b''):
767 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
769 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
768
770
769 # This restriction is cargo culted from revlogs and makes no sense for
771 # This restriction is cargo culted from revlogs and makes no sense for
770 # SQLite, since columns can be resized at will.
772 # SQLite, since columns can be resized at will.
771 if len(tombstone) > len(self.rawdata(censornode)):
773 if len(tombstone) > len(self.rawdata(censornode)):
772 raise error.Abort(
774 raise error.Abort(
773 _(b'censor tombstone must be no longer than censored data')
775 _(b'censor tombstone must be no longer than censored data')
774 )
776 )
775
777
776 # We need to replace the censored revision's data with the tombstone.
778 # We need to replace the censored revision's data with the tombstone.
777 # But replacing that data will have implications for delta chains that
779 # But replacing that data will have implications for delta chains that
778 # reference it.
780 # reference it.
779 #
781 #
780 # While "better," more complex strategies are possible, we do something
782 # While "better," more complex strategies are possible, we do something
781 # simple: we find delta chain children of the censored revision and we
783 # simple: we find delta chain children of the censored revision and we
782 # replace those incremental deltas with fulltexts of their corresponding
784 # replace those incremental deltas with fulltexts of their corresponding
783 # revision. Then we delete the now-unreferenced delta and original
785 # revision. Then we delete the now-unreferenced delta and original
784 # revision and insert a replacement.
786 # revision and insert a replacement.
785
787
786 # Find the delta to be censored.
788 # Find the delta to be censored.
787 censoreddeltaid = self._db.execute(
789 censoreddeltaid = self._db.execute(
788 'SELECT deltaid FROM fileindex WHERE id=?',
790 'SELECT deltaid FROM fileindex WHERE id=?',
789 (self._revisions[censornode].rid,),
791 (self._revisions[censornode].rid,),
790 ).fetchone()[0]
792 ).fetchone()[0]
791
793
792 # Find all its delta chain children.
794 # Find all its delta chain children.
793 # TODO once we support storing deltas for !files, we'll need to look
795 # TODO once we support storing deltas for !files, we'll need to look
794 # for those delta chains too.
796 # for those delta chains too.
795 rows = list(
797 rows = list(
796 self._db.execute(
798 self._db.execute(
797 'SELECT id, pathid, node FROM fileindex '
799 'SELECT id, pathid, node FROM fileindex '
798 'WHERE deltabaseid=? OR deltaid=?',
800 'WHERE deltabaseid=? OR deltaid=?',
799 (censoreddeltaid, censoreddeltaid),
801 (censoreddeltaid, censoreddeltaid),
800 )
802 )
801 )
803 )
802
804
803 for row in rows:
805 for row in rows:
804 rid, pathid, node = row
806 rid, pathid, node = row
805
807
806 fulltext = resolvedeltachain(
808 fulltext = resolvedeltachain(
807 self._db, pathid, node, {}, {-1: None}, zstddctx=self._dctx
809 self._db, pathid, node, {}, {-1: None}, zstddctx=self._dctx
808 )
810 )
809
811
810 deltahash = hashlib.sha1(fulltext).digest()
812 deltahash = hashutil.sha1(fulltext).digest()
811
813
812 if self._compengine == b'zstd':
814 if self._compengine == b'zstd':
813 deltablob = self._cctx.compress(fulltext)
815 deltablob = self._cctx.compress(fulltext)
814 compression = COMPRESSION_ZSTD
816 compression = COMPRESSION_ZSTD
815 elif self._compengine == b'zlib':
817 elif self._compengine == b'zlib':
816 deltablob = zlib.compress(fulltext)
818 deltablob = zlib.compress(fulltext)
817 compression = COMPRESSION_ZLIB
819 compression = COMPRESSION_ZLIB
818 elif self._compengine == b'none':
820 elif self._compengine == b'none':
819 deltablob = fulltext
821 deltablob = fulltext
820 compression = COMPRESSION_NONE
822 compression = COMPRESSION_NONE
821 else:
823 else:
822 raise error.ProgrammingError(
824 raise error.ProgrammingError(
823 b'unhandled compression engine: %s' % self._compengine
825 b'unhandled compression engine: %s' % self._compengine
824 )
826 )
825
827
826 if len(deltablob) >= len(fulltext):
828 if len(deltablob) >= len(fulltext):
827 deltablob = fulltext
829 deltablob = fulltext
828 compression = COMPRESSION_NONE
830 compression = COMPRESSION_NONE
829
831
830 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
832 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
831
833
832 self._db.execute(
834 self._db.execute(
833 'UPDATE fileindex SET deltaid=?, deltabaseid=NULL '
835 'UPDATE fileindex SET deltaid=?, deltabaseid=NULL '
834 'WHERE id=?',
836 'WHERE id=?',
835 (deltaid, rid),
837 (deltaid, rid),
836 )
838 )
837
839
838 # Now create the tombstone delta and replace the delta on the censored
840 # Now create the tombstone delta and replace the delta on the censored
839 # node.
841 # node.
840 deltahash = hashlib.sha1(tombstone).digest()
842 deltahash = hashutil.sha1(tombstone).digest()
841 tombstonedeltaid = insertdelta(
843 tombstonedeltaid = insertdelta(
842 self._db, COMPRESSION_NONE, deltahash, tombstone
844 self._db, COMPRESSION_NONE, deltahash, tombstone
843 )
845 )
844
846
845 flags = self._revisions[censornode].flags
847 flags = self._revisions[censornode].flags
846 flags |= FLAG_CENSORED
848 flags |= FLAG_CENSORED
847
849
848 self._db.execute(
850 self._db.execute(
849 'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL '
851 'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL '
850 'WHERE pathid=? AND node=?',
852 'WHERE pathid=? AND node=?',
851 (flags, tombstonedeltaid, self._pathid, censornode),
853 (flags, tombstonedeltaid, self._pathid, censornode),
852 )
854 )
853
855
854 self._db.execute('DELETE FROM delta WHERE id=?', (censoreddeltaid,))
856 self._db.execute('DELETE FROM delta WHERE id=?', (censoreddeltaid,))
855
857
856 self._refreshindex()
858 self._refreshindex()
857 self._revisioncache.clear()
859 self._revisioncache.clear()
858
860
859 def getstrippoint(self, minlink):
861 def getstrippoint(self, minlink):
860 return storageutil.resolvestripinfo(
862 return storageutil.resolvestripinfo(
861 minlink,
863 minlink,
862 len(self) - 1,
864 len(self) - 1,
863 [self.rev(n) for n in self.heads()],
865 [self.rev(n) for n in self.heads()],
864 self.linkrev,
866 self.linkrev,
865 self.parentrevs,
867 self.parentrevs,
866 )
868 )
867
869
868 def strip(self, minlink, transaction):
870 def strip(self, minlink, transaction):
869 if not len(self):
871 if not len(self):
870 return
872 return
871
873
872 rev, _ignored = self.getstrippoint(minlink)
874 rev, _ignored = self.getstrippoint(minlink)
873
875
874 if rev == len(self):
876 if rev == len(self):
875 return
877 return
876
878
877 for rev in self.revs(rev):
879 for rev in self.revs(rev):
878 self._db.execute(
880 self._db.execute(
879 'DELETE FROM fileindex WHERE pathid=? AND node=?',
881 'DELETE FROM fileindex WHERE pathid=? AND node=?',
880 (self._pathid, self.node(rev)),
882 (self._pathid, self.node(rev)),
881 )
883 )
882
884
883 # TODO how should we garbage collect data in delta table?
885 # TODO how should we garbage collect data in delta table?
884
886
885 self._refreshindex()
887 self._refreshindex()
886
888
887 # End of ifilemutation interface.
889 # End of ifilemutation interface.
888
890
889 # Start of ifilestorage interface.
891 # Start of ifilestorage interface.
890
892
891 def files(self):
893 def files(self):
892 return []
894 return []
893
895
894 def storageinfo(
896 def storageinfo(
895 self,
897 self,
896 exclusivefiles=False,
898 exclusivefiles=False,
897 sharedfiles=False,
899 sharedfiles=False,
898 revisionscount=False,
900 revisionscount=False,
899 trackedsize=False,
901 trackedsize=False,
900 storedsize=False,
902 storedsize=False,
901 ):
903 ):
902 d = {}
904 d = {}
903
905
904 if exclusivefiles:
906 if exclusivefiles:
905 d[b'exclusivefiles'] = []
907 d[b'exclusivefiles'] = []
906
908
907 if sharedfiles:
909 if sharedfiles:
908 # TODO list sqlite file(s) here.
910 # TODO list sqlite file(s) here.
909 d[b'sharedfiles'] = []
911 d[b'sharedfiles'] = []
910
912
911 if revisionscount:
913 if revisionscount:
912 d[b'revisionscount'] = len(self)
914 d[b'revisionscount'] = len(self)
913
915
914 if trackedsize:
916 if trackedsize:
915 d[b'trackedsize'] = sum(
917 d[b'trackedsize'] = sum(
916 len(self.revision(node)) for node in self._nodetorev
918 len(self.revision(node)) for node in self._nodetorev
917 )
919 )
918
920
919 if storedsize:
921 if storedsize:
920 # TODO implement this?
922 # TODO implement this?
921 d[b'storedsize'] = None
923 d[b'storedsize'] = None
922
924
923 return d
925 return d
924
926
925 def verifyintegrity(self, state):
927 def verifyintegrity(self, state):
926 state[b'skipread'] = set()
928 state[b'skipread'] = set()
927
929
928 for rev in self:
930 for rev in self:
929 node = self.node(rev)
931 node = self.node(rev)
930
932
931 try:
933 try:
932 self.revision(node)
934 self.revision(node)
933 except Exception as e:
935 except Exception as e:
934 yield sqliteproblem(
936 yield sqliteproblem(
935 error=_(b'unpacking %s: %s') % (short(node), e), node=node
937 error=_(b'unpacking %s: %s') % (short(node), e), node=node
936 )
938 )
937
939
938 state[b'skipread'].add(node)
940 state[b'skipread'].add(node)
939
941
940 # End of ifilestorage interface.
942 # End of ifilestorage interface.
941
943
942 def _checkhash(self, fulltext, node, p1=None, p2=None):
944 def _checkhash(self, fulltext, node, p1=None, p2=None):
943 if p1 is None and p2 is None:
945 if p1 is None and p2 is None:
944 p1, p2 = self.parents(node)
946 p1, p2 = self.parents(node)
945
947
946 if node == storageutil.hashrevisionsha1(fulltext, p1, p2):
948 if node == storageutil.hashrevisionsha1(fulltext, p1, p2):
947 return
949 return
948
950
949 try:
951 try:
950 del self._revisioncache[node]
952 del self._revisioncache[node]
951 except KeyError:
953 except KeyError:
952 pass
954 pass
953
955
954 if storageutil.iscensoredtext(fulltext):
956 if storageutil.iscensoredtext(fulltext):
955 raise error.CensoredNodeError(self._path, node, fulltext)
957 raise error.CensoredNodeError(self._path, node, fulltext)
956
958
957 raise SQLiteStoreError(_(b'integrity check failed on %s') % self._path)
959 raise SQLiteStoreError(_(b'integrity check failed on %s') % self._path)
958
960
959 def _addrawrevision(
961 def _addrawrevision(
960 self,
962 self,
961 node,
963 node,
962 revisiondata,
964 revisiondata,
963 transaction,
965 transaction,
964 linkrev,
966 linkrev,
965 p1,
967 p1,
966 p2,
968 p2,
967 storedelta=None,
969 storedelta=None,
968 flags=0,
970 flags=0,
969 ):
971 ):
970 if self._pathid is None:
972 if self._pathid is None:
971 res = self._db.execute(
973 res = self._db.execute(
972 'INSERT INTO filepath (path) VALUES (?)', (self._path,)
974 'INSERT INTO filepath (path) VALUES (?)', (self._path,)
973 )
975 )
974 self._pathid = res.lastrowid
976 self._pathid = res.lastrowid
975
977
976 # For simplicity, always store a delta against p1.
978 # For simplicity, always store a delta against p1.
977 # TODO we need a lot more logic here to make behavior reasonable.
979 # TODO we need a lot more logic here to make behavior reasonable.
978
980
979 if storedelta:
981 if storedelta:
980 deltabase, delta = storedelta
982 deltabase, delta = storedelta
981
983
982 if isinstance(deltabase, int):
984 if isinstance(deltabase, int):
983 deltabase = self.node(deltabase)
985 deltabase = self.node(deltabase)
984
986
985 else:
987 else:
986 assert revisiondata is not None
988 assert revisiondata is not None
987 deltabase = p1
989 deltabase = p1
988
990
989 if deltabase == nullid:
991 if deltabase == nullid:
990 delta = revisiondata
992 delta = revisiondata
991 else:
993 else:
992 delta = mdiff.textdiff(
994 delta = mdiff.textdiff(
993 self.revision(self.rev(deltabase)), revisiondata
995 self.revision(self.rev(deltabase)), revisiondata
994 )
996 )
995
997
996 # File index stores a pointer to its delta and the parent delta.
998 # File index stores a pointer to its delta and the parent delta.
997 # The parent delta is stored via a pointer to the fileindex PK.
999 # The parent delta is stored via a pointer to the fileindex PK.
998 if deltabase == nullid:
1000 if deltabase == nullid:
999 baseid = None
1001 baseid = None
1000 else:
1002 else:
1001 baseid = self._revisions[deltabase].rid
1003 baseid = self._revisions[deltabase].rid
1002
1004
1003 # Deltas are stored with a hash of their content. This allows
1005 # Deltas are stored with a hash of their content. This allows
1004 # us to de-duplicate. The table is configured to ignore conflicts
1006 # us to de-duplicate. The table is configured to ignore conflicts
1005 # and it is faster to just insert and silently noop than to look
1007 # and it is faster to just insert and silently noop than to look
1006 # first.
1008 # first.
1007 deltahash = hashlib.sha1(delta).digest()
1009 deltahash = hashutil.sha1(delta).digest()
1008
1010
1009 if self._compengine == b'zstd':
1011 if self._compengine == b'zstd':
1010 deltablob = self._cctx.compress(delta)
1012 deltablob = self._cctx.compress(delta)
1011 compression = COMPRESSION_ZSTD
1013 compression = COMPRESSION_ZSTD
1012 elif self._compengine == b'zlib':
1014 elif self._compengine == b'zlib':
1013 deltablob = zlib.compress(delta)
1015 deltablob = zlib.compress(delta)
1014 compression = COMPRESSION_ZLIB
1016 compression = COMPRESSION_ZLIB
1015 elif self._compengine == b'none':
1017 elif self._compengine == b'none':
1016 deltablob = delta
1018 deltablob = delta
1017 compression = COMPRESSION_NONE
1019 compression = COMPRESSION_NONE
1018 else:
1020 else:
1019 raise error.ProgrammingError(
1021 raise error.ProgrammingError(
1020 b'unhandled compression engine: %s' % self._compengine
1022 b'unhandled compression engine: %s' % self._compengine
1021 )
1023 )
1022
1024
1023 # Don't store compressed data if it isn't practical.
1025 # Don't store compressed data if it isn't practical.
1024 if len(deltablob) >= len(delta):
1026 if len(deltablob) >= len(delta):
1025 deltablob = delta
1027 deltablob = delta
1026 compression = COMPRESSION_NONE
1028 compression = COMPRESSION_NONE
1027
1029
1028 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
1030 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
1029
1031
1030 rev = len(self)
1032 rev = len(self)
1031
1033
1032 if p1 == nullid:
1034 if p1 == nullid:
1033 p1rev = nullrev
1035 p1rev = nullrev
1034 else:
1036 else:
1035 p1rev = self._nodetorev[p1]
1037 p1rev = self._nodetorev[p1]
1036
1038
1037 if p2 == nullid:
1039 if p2 == nullid:
1038 p2rev = nullrev
1040 p2rev = nullrev
1039 else:
1041 else:
1040 p2rev = self._nodetorev[p2]
1042 p2rev = self._nodetorev[p2]
1041
1043
1042 rid = self._db.execute(
1044 rid = self._db.execute(
1043 'INSERT INTO fileindex ('
1045 'INSERT INTO fileindex ('
1044 ' pathid, revnum, node, p1rev, p2rev, linkrev, flags, '
1046 ' pathid, revnum, node, p1rev, p2rev, linkrev, flags, '
1045 ' deltaid, deltabaseid) '
1047 ' deltaid, deltabaseid) '
1046 ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
1048 ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
1047 (
1049 (
1048 self._pathid,
1050 self._pathid,
1049 rev,
1051 rev,
1050 node,
1052 node,
1051 p1rev,
1053 p1rev,
1052 p2rev,
1054 p2rev,
1053 linkrev,
1055 linkrev,
1054 flags,
1056 flags,
1055 deltaid,
1057 deltaid,
1056 baseid,
1058 baseid,
1057 ),
1059 ),
1058 ).lastrowid
1060 ).lastrowid
1059
1061
1060 entry = revisionentry(
1062 entry = revisionentry(
1061 rid=rid,
1063 rid=rid,
1062 rev=rev,
1064 rev=rev,
1063 node=node,
1065 node=node,
1064 p1rev=p1rev,
1066 p1rev=p1rev,
1065 p2rev=p2rev,
1067 p2rev=p2rev,
1066 p1node=p1,
1068 p1node=p1,
1067 p2node=p2,
1069 p2node=p2,
1068 linkrev=linkrev,
1070 linkrev=linkrev,
1069 flags=flags,
1071 flags=flags,
1070 )
1072 )
1071
1073
1072 self._nodetorev[node] = rev
1074 self._nodetorev[node] = rev
1073 self._revtonode[rev] = node
1075 self._revtonode[rev] = node
1074 self._revisions[node] = entry
1076 self._revisions[node] = entry
1075
1077
1076 return node
1078 return node
1077
1079
1078
1080
1079 class sqliterepository(localrepo.localrepository):
1081 class sqliterepository(localrepo.localrepository):
1080 def cancopy(self):
1082 def cancopy(self):
1081 return False
1083 return False
1082
1084
1083 def transaction(self, *args, **kwargs):
1085 def transaction(self, *args, **kwargs):
1084 current = self.currenttransaction()
1086 current = self.currenttransaction()
1085
1087
1086 tr = super(sqliterepository, self).transaction(*args, **kwargs)
1088 tr = super(sqliterepository, self).transaction(*args, **kwargs)
1087
1089
1088 if current:
1090 if current:
1089 return tr
1091 return tr
1090
1092
1091 self._dbconn.execute('BEGIN TRANSACTION')
1093 self._dbconn.execute('BEGIN TRANSACTION')
1092
1094
1093 def committransaction(_):
1095 def committransaction(_):
1094 self._dbconn.commit()
1096 self._dbconn.commit()
1095
1097
1096 tr.addfinalize(b'sqlitestore', committransaction)
1098 tr.addfinalize(b'sqlitestore', committransaction)
1097
1099
1098 return tr
1100 return tr
1099
1101
1100 @property
1102 @property
1101 def _dbconn(self):
1103 def _dbconn(self):
1102 # SQLite connections can only be used on the thread that created
1104 # SQLite connections can only be used on the thread that created
1103 # them. In most cases, this "just works." However, hgweb uses
1105 # them. In most cases, this "just works." However, hgweb uses
1104 # multiple threads.
1106 # multiple threads.
1105 tid = threading.current_thread().ident
1107 tid = threading.current_thread().ident
1106
1108
1107 if self._db:
1109 if self._db:
1108 if self._db[0] == tid:
1110 if self._db[0] == tid:
1109 return self._db[1]
1111 return self._db[1]
1110
1112
1111 db = makedb(self.svfs.join(b'db.sqlite'))
1113 db = makedb(self.svfs.join(b'db.sqlite'))
1112 self._db = (tid, db)
1114 self._db = (tid, db)
1113
1115
1114 return db
1116 return db
1115
1117
1116
1118
1117 def makedb(path):
1119 def makedb(path):
1118 """Construct a database handle for a database at path."""
1120 """Construct a database handle for a database at path."""
1119
1121
1120 db = sqlite3.connect(encoding.strfromlocal(path))
1122 db = sqlite3.connect(encoding.strfromlocal(path))
1121 db.text_factory = bytes
1123 db.text_factory = bytes
1122
1124
1123 res = db.execute('PRAGMA user_version').fetchone()[0]
1125 res = db.execute('PRAGMA user_version').fetchone()[0]
1124
1126
1125 # New database.
1127 # New database.
1126 if res == 0:
1128 if res == 0:
1127 for statement in CREATE_SCHEMA:
1129 for statement in CREATE_SCHEMA:
1128 db.execute(statement)
1130 db.execute(statement)
1129
1131
1130 db.commit()
1132 db.commit()
1131
1133
1132 elif res == CURRENT_SCHEMA_VERSION:
1134 elif res == CURRENT_SCHEMA_VERSION:
1133 pass
1135 pass
1134
1136
1135 else:
1137 else:
1136 raise error.Abort(_(b'sqlite database has unrecognized version'))
1138 raise error.Abort(_(b'sqlite database has unrecognized version'))
1137
1139
1138 db.execute('PRAGMA journal_mode=WAL')
1140 db.execute('PRAGMA journal_mode=WAL')
1139
1141
1140 return db
1142 return db
1141
1143
1142
1144
1143 def featuresetup(ui, supported):
1145 def featuresetup(ui, supported):
1144 supported.add(REQUIREMENT)
1146 supported.add(REQUIREMENT)
1145
1147
1146 if zstd:
1148 if zstd:
1147 supported.add(REQUIREMENT_ZSTD)
1149 supported.add(REQUIREMENT_ZSTD)
1148
1150
1149 supported.add(REQUIREMENT_ZLIB)
1151 supported.add(REQUIREMENT_ZLIB)
1150 supported.add(REQUIREMENT_NONE)
1152 supported.add(REQUIREMENT_NONE)
1151 supported.add(REQUIREMENT_SHALLOW_FILES)
1153 supported.add(REQUIREMENT_SHALLOW_FILES)
1152 supported.add(repository.NARROW_REQUIREMENT)
1154 supported.add(repository.NARROW_REQUIREMENT)
1153
1155
1154
1156
1155 def newreporequirements(orig, ui, createopts):
1157 def newreporequirements(orig, ui, createopts):
1156 if createopts[b'backend'] != b'sqlite':
1158 if createopts[b'backend'] != b'sqlite':
1157 return orig(ui, createopts)
1159 return orig(ui, createopts)
1158
1160
1159 # This restriction can be lifted once we have more confidence.
1161 # This restriction can be lifted once we have more confidence.
1160 if b'sharedrepo' in createopts:
1162 if b'sharedrepo' in createopts:
1161 raise error.Abort(
1163 raise error.Abort(
1162 _(b'shared repositories not supported with SQLite store')
1164 _(b'shared repositories not supported with SQLite store')
1163 )
1165 )
1164
1166
1165 # This filtering is out of an abundance of caution: we want to ensure
1167 # This filtering is out of an abundance of caution: we want to ensure
1166 # we honor creation options and we do that by annotating exactly the
1168 # we honor creation options and we do that by annotating exactly the
1167 # creation options we recognize.
1169 # creation options we recognize.
1168 known = {
1170 known = {
1169 b'narrowfiles',
1171 b'narrowfiles',
1170 b'backend',
1172 b'backend',
1171 b'shallowfilestore',
1173 b'shallowfilestore',
1172 }
1174 }
1173
1175
1174 unsupported = set(createopts) - known
1176 unsupported = set(createopts) - known
1175 if unsupported:
1177 if unsupported:
1176 raise error.Abort(
1178 raise error.Abort(
1177 _(b'SQLite store does not support repo creation option: %s')
1179 _(b'SQLite store does not support repo creation option: %s')
1178 % b', '.join(sorted(unsupported))
1180 % b', '.join(sorted(unsupported))
1179 )
1181 )
1180
1182
1181 # Since we're a hybrid store that still relies on revlogs, we fall back
1183 # Since we're a hybrid store that still relies on revlogs, we fall back
1182 # to using the revlogv1 backend's storage requirements then adding our
1184 # to using the revlogv1 backend's storage requirements then adding our
1183 # own requirement.
1185 # own requirement.
1184 createopts[b'backend'] = b'revlogv1'
1186 createopts[b'backend'] = b'revlogv1'
1185 requirements = orig(ui, createopts)
1187 requirements = orig(ui, createopts)
1186 requirements.add(REQUIREMENT)
1188 requirements.add(REQUIREMENT)
1187
1189
1188 compression = ui.config(b'storage', b'sqlite.compression')
1190 compression = ui.config(b'storage', b'sqlite.compression')
1189
1191
1190 if compression == b'zstd' and not zstd:
1192 if compression == b'zstd' and not zstd:
1191 raise error.Abort(
1193 raise error.Abort(
1192 _(
1194 _(
1193 b'storage.sqlite.compression set to "zstd" but '
1195 b'storage.sqlite.compression set to "zstd" but '
1194 b'zstandard compression not available to this '
1196 b'zstandard compression not available to this '
1195 b'Mercurial install'
1197 b'Mercurial install'
1196 )
1198 )
1197 )
1199 )
1198
1200
1199 if compression == b'zstd':
1201 if compression == b'zstd':
1200 requirements.add(REQUIREMENT_ZSTD)
1202 requirements.add(REQUIREMENT_ZSTD)
1201 elif compression == b'zlib':
1203 elif compression == b'zlib':
1202 requirements.add(REQUIREMENT_ZLIB)
1204 requirements.add(REQUIREMENT_ZLIB)
1203 elif compression == b'none':
1205 elif compression == b'none':
1204 requirements.add(REQUIREMENT_NONE)
1206 requirements.add(REQUIREMENT_NONE)
1205 else:
1207 else:
1206 raise error.Abort(
1208 raise error.Abort(
1207 _(
1209 _(
1208 b'unknown compression engine defined in '
1210 b'unknown compression engine defined in '
1209 b'storage.sqlite.compression: %s'
1211 b'storage.sqlite.compression: %s'
1210 )
1212 )
1211 % compression
1213 % compression
1212 )
1214 )
1213
1215
1214 if createopts.get(b'shallowfilestore'):
1216 if createopts.get(b'shallowfilestore'):
1215 requirements.add(REQUIREMENT_SHALLOW_FILES)
1217 requirements.add(REQUIREMENT_SHALLOW_FILES)
1216
1218
1217 return requirements
1219 return requirements
1218
1220
1219
1221
1220 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1222 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1221 class sqlitefilestorage(object):
1223 class sqlitefilestorage(object):
1222 """Repository file storage backed by SQLite."""
1224 """Repository file storage backed by SQLite."""
1223
1225
1224 def file(self, path):
1226 def file(self, path):
1225 if path[0] == b'/':
1227 if path[0] == b'/':
1226 path = path[1:]
1228 path = path[1:]
1227
1229
1228 if REQUIREMENT_ZSTD in self.requirements:
1230 if REQUIREMENT_ZSTD in self.requirements:
1229 compression = b'zstd'
1231 compression = b'zstd'
1230 elif REQUIREMENT_ZLIB in self.requirements:
1232 elif REQUIREMENT_ZLIB in self.requirements:
1231 compression = b'zlib'
1233 compression = b'zlib'
1232 elif REQUIREMENT_NONE in self.requirements:
1234 elif REQUIREMENT_NONE in self.requirements:
1233 compression = b'none'
1235 compression = b'none'
1234 else:
1236 else:
1235 raise error.Abort(
1237 raise error.Abort(
1236 _(
1238 _(
1237 b'unable to determine what compression engine '
1239 b'unable to determine what compression engine '
1238 b'to use for SQLite storage'
1240 b'to use for SQLite storage'
1239 )
1241 )
1240 )
1242 )
1241
1243
1242 return sqlitefilestore(self._dbconn, path, compression)
1244 return sqlitefilestore(self._dbconn, path, compression)
1243
1245
1244
1246
1245 def makefilestorage(orig, requirements, features, **kwargs):
1247 def makefilestorage(orig, requirements, features, **kwargs):
1246 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1248 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1247 if REQUIREMENT in requirements:
1249 if REQUIREMENT in requirements:
1248 if REQUIREMENT_SHALLOW_FILES in requirements:
1250 if REQUIREMENT_SHALLOW_FILES in requirements:
1249 features.add(repository.REPO_FEATURE_SHALLOW_FILE_STORAGE)
1251 features.add(repository.REPO_FEATURE_SHALLOW_FILE_STORAGE)
1250
1252
1251 return sqlitefilestorage
1253 return sqlitefilestorage
1252 else:
1254 else:
1253 return orig(requirements=requirements, features=features, **kwargs)
1255 return orig(requirements=requirements, features=features, **kwargs)
1254
1256
1255
1257
1256 def makemain(orig, ui, requirements, **kwargs):
1258 def makemain(orig, ui, requirements, **kwargs):
1257 if REQUIREMENT in requirements:
1259 if REQUIREMENT in requirements:
1258 if REQUIREMENT_ZSTD in requirements and not zstd:
1260 if REQUIREMENT_ZSTD in requirements and not zstd:
1259 raise error.Abort(
1261 raise error.Abort(
1260 _(
1262 _(
1261 b'repository uses zstandard compression, which '
1263 b'repository uses zstandard compression, which '
1262 b'is not available to this Mercurial install'
1264 b'is not available to this Mercurial install'
1263 )
1265 )
1264 )
1266 )
1265
1267
1266 return sqliterepository
1268 return sqliterepository
1267
1269
1268 return orig(requirements=requirements, **kwargs)
1270 return orig(requirements=requirements, **kwargs)
1269
1271
1270
1272
1271 def verifierinit(orig, self, *args, **kwargs):
1273 def verifierinit(orig, self, *args, **kwargs):
1272 orig(self, *args, **kwargs)
1274 orig(self, *args, **kwargs)
1273
1275
1274 # We don't care that files in the store don't align with what is
1276 # We don't care that files in the store don't align with what is
1275 # advertised. So suppress these warnings.
1277 # advertised. So suppress these warnings.
1276 self.warnorphanstorefiles = False
1278 self.warnorphanstorefiles = False
1277
1279
1278
1280
1279 def extsetup(ui):
1281 def extsetup(ui):
1280 localrepo.featuresetupfuncs.add(featuresetup)
1282 localrepo.featuresetupfuncs.add(featuresetup)
1281 extensions.wrapfunction(
1283 extensions.wrapfunction(
1282 localrepo, b'newreporequirements', newreporequirements
1284 localrepo, b'newreporequirements', newreporequirements
1283 )
1285 )
1284 extensions.wrapfunction(localrepo, b'makefilestorage', makefilestorage)
1286 extensions.wrapfunction(localrepo, b'makefilestorage', makefilestorage)
1285 extensions.wrapfunction(localrepo, b'makemain', makemain)
1287 extensions.wrapfunction(localrepo, b'makemain', makemain)
1286 extensions.wrapfunction(verify.verifier, b'__init__', verifierinit)
1288 extensions.wrapfunction(verify.verifier, b'__init__', verifierinit)
1287
1289
1288
1290
1289 def reposetup(ui, repo):
1291 def reposetup(ui, repo):
1290 if isinstance(repo, sqliterepository):
1292 if isinstance(repo, sqliterepository):
1291 repo._db = None
1293 repo._db = None
1292
1294
1293 # TODO check for bundlerepository?
1295 # TODO check for bundlerepository?
General Comments 0
You need to be logged in to leave comments. Login now