Show More
@@ -1,856 +1,858 b'' | |||
|
1 | 1 | # Copyright 2016-present Facebook. All Rights Reserved. |
|
2 | 2 | # |
|
3 | 3 | # context: context needed to annotate a file |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | from __future__ import absolute_import |
|
9 | 9 | |
|
10 | 10 | import collections |
|
11 | 11 | import contextlib |
|
12 | import hashlib | |
|
13 | 12 | import os |
|
14 | 13 | |
|
15 | 14 | from mercurial.i18n import _ |
|
16 | 15 | from mercurial.pycompat import ( |
|
17 | 16 | getattr, |
|
18 | 17 | open, |
|
19 | 18 | setattr, |
|
20 | 19 | ) |
|
21 | 20 | from mercurial import ( |
|
22 | 21 | error, |
|
23 | 22 | linelog as linelogmod, |
|
24 | 23 | lock as lockmod, |
|
25 | 24 | mdiff, |
|
26 | 25 | node, |
|
27 | 26 | pycompat, |
|
28 | 27 | scmutil, |
|
29 | 28 | util, |
|
30 | 29 | ) |
|
31 |
from mercurial.utils import |
|
|
30 | from mercurial.utils import ( | |
|
31 | hashutil, | |
|
32 | stringutil, | |
|
33 | ) | |
|
32 | 34 | |
|
33 | 35 | from . import ( |
|
34 | 36 | error as faerror, |
|
35 | 37 | revmap as revmapmod, |
|
36 | 38 | ) |
|
37 | 39 | |
|
38 | 40 | # given path, get filelog, cached |
|
39 | 41 | @util.lrucachefunc |
|
40 | 42 | def _getflog(repo, path): |
|
41 | 43 | return repo.file(path) |
|
42 | 44 | |
|
43 | 45 | |
|
44 | 46 | # extracted from mercurial.context.basefilectx.annotate |
|
45 | 47 | def _parents(f, follow=True): |
|
46 | 48 | # Cut _descendantrev here to mitigate the penalty of lazy linkrev |
|
47 | 49 | # adjustment. Otherwise, p._adjustlinkrev() would walk changelog |
|
48 | 50 | # from the topmost introrev (= srcrev) down to p.linkrev() if it |
|
49 | 51 | # isn't an ancestor of the srcrev. |
|
50 | 52 | f._changeid |
|
51 | 53 | pl = f.parents() |
|
52 | 54 | |
|
53 | 55 | # Don't return renamed parents if we aren't following. |
|
54 | 56 | if not follow: |
|
55 | 57 | pl = [p for p in pl if p.path() == f.path()] |
|
56 | 58 | |
|
57 | 59 | # renamed filectx won't have a filelog yet, so set it |
|
58 | 60 | # from the cache to save time |
|
59 | 61 | for p in pl: |
|
60 | 62 | if not '_filelog' in p.__dict__: |
|
61 | 63 | p._filelog = _getflog(f._repo, p.path()) |
|
62 | 64 | |
|
63 | 65 | return pl |
|
64 | 66 | |
|
65 | 67 | |
|
66 | 68 | # extracted from mercurial.context.basefilectx.annotate. slightly modified |
|
67 | 69 | # so it takes a fctx instead of a pair of text and fctx. |
|
68 | 70 | def _decorate(fctx): |
|
69 | 71 | text = fctx.data() |
|
70 | 72 | linecount = text.count(b'\n') |
|
71 | 73 | if text and not text.endswith(b'\n'): |
|
72 | 74 | linecount += 1 |
|
73 | 75 | return ([(fctx, i) for i in pycompat.xrange(linecount)], text) |
|
74 | 76 | |
|
75 | 77 | |
|
76 | 78 | # extracted from mercurial.context.basefilectx.annotate. slightly modified |
|
77 | 79 | # so it takes an extra "blocks" parameter calculated elsewhere, instead of |
|
78 | 80 | # calculating diff here. |
|
79 | 81 | def _pair(parent, child, blocks): |
|
80 | 82 | for (a1, a2, b1, b2), t in blocks: |
|
81 | 83 | # Changed blocks ('!') or blocks made only of blank lines ('~') |
|
82 | 84 | # belong to the child. |
|
83 | 85 | if t == b'=': |
|
84 | 86 | child[0][b1:b2] = parent[0][a1:a2] |
|
85 | 87 | return child |
|
86 | 88 | |
|
87 | 89 | |
|
88 | 90 | # like scmutil.revsingle, but with lru cache, so their states (like manifests) |
|
89 | 91 | # could be reused |
|
90 | 92 | _revsingle = util.lrucachefunc(scmutil.revsingle) |
|
91 | 93 | |
|
92 | 94 | |
|
93 | 95 | def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None): |
|
94 | 96 | """(repo, str, str) -> fctx |
|
95 | 97 | |
|
96 | 98 | get the filectx object from repo, rev, path, in an efficient way. |
|
97 | 99 | |
|
98 | 100 | if resolverev is True, "rev" is a revision specified by the revset |
|
99 | 101 | language, otherwise "rev" is a nodeid, or a revision number that can |
|
100 | 102 | be consumed by repo.__getitem__. |
|
101 | 103 | |
|
102 | 104 | if adjustctx is not None, the returned fctx will point to a changeset |
|
103 | 105 | that introduces the change (last modified the file). if adjustctx |
|
104 | 106 | is 'linkrev', trust the linkrev and do not adjust it. this is noticeably |
|
105 | 107 | faster for big repos but is incorrect for some cases. |
|
106 | 108 | """ |
|
107 | 109 | if resolverev and not isinstance(rev, int) and rev is not None: |
|
108 | 110 | ctx = _revsingle(repo, rev) |
|
109 | 111 | else: |
|
110 | 112 | ctx = repo[rev] |
|
111 | 113 | |
|
112 | 114 | # If we don't need to adjust the linkrev, create the filectx using the |
|
113 | 115 | # changectx instead of using ctx[path]. This means it already has the |
|
114 | 116 | # changectx information, so blame -u will be able to look directly at the |
|
115 | 117 | # commitctx object instead of having to resolve it by going through the |
|
116 | 118 | # manifest. In a lazy-manifest world this can prevent us from downloading a |
|
117 | 119 | # lot of data. |
|
118 | 120 | if adjustctx is None: |
|
119 | 121 | # ctx.rev() is None means it's the working copy, which is a special |
|
120 | 122 | # case. |
|
121 | 123 | if ctx.rev() is None: |
|
122 | 124 | fctx = ctx[path] |
|
123 | 125 | else: |
|
124 | 126 | fctx = repo.filectx(path, changeid=ctx.rev()) |
|
125 | 127 | else: |
|
126 | 128 | fctx = ctx[path] |
|
127 | 129 | if adjustctx == b'linkrev': |
|
128 | 130 | introrev = fctx.linkrev() |
|
129 | 131 | else: |
|
130 | 132 | introrev = fctx.introrev() |
|
131 | 133 | if introrev != ctx.rev(): |
|
132 | 134 | fctx._changeid = introrev |
|
133 | 135 | fctx._changectx = repo[introrev] |
|
134 | 136 | return fctx |
|
135 | 137 | |
|
136 | 138 | |
|
137 | 139 | # like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock |
|
138 | 140 | def encodedir(path): |
|
139 | 141 | return ( |
|
140 | 142 | path.replace(b'.hg/', b'.hg.hg/') |
|
141 | 143 | .replace(b'.l/', b'.l.hg/') |
|
142 | 144 | .replace(b'.m/', b'.m.hg/') |
|
143 | 145 | .replace(b'.lock/', b'.lock.hg/') |
|
144 | 146 | ) |
|
145 | 147 | |
|
146 | 148 | |
|
147 | 149 | def hashdiffopts(diffopts): |
|
148 | 150 | diffoptstr = stringutil.pprint( |
|
149 | 151 | sorted((k, getattr(diffopts, k)) for k in mdiff.diffopts.defaults) |
|
150 | 152 | ) |
|
151 |
return node.hex(hashl |
|
|
153 | return node.hex(hashutil.sha1(diffoptstr).digest())[:6] | |
|
152 | 154 | |
|
153 | 155 | |
|
154 | 156 | _defaultdiffopthash = hashdiffopts(mdiff.defaultopts) |
|
155 | 157 | |
|
156 | 158 | |
|
157 | 159 | class annotateopts(object): |
|
158 | 160 | """like mercurial.mdiff.diffopts, but is for annotate |
|
159 | 161 | |
|
160 | 162 | followrename: follow renames, like "hg annotate -f" |
|
161 | 163 | followmerge: follow p2 of a merge changeset, otherwise p2 is ignored |
|
162 | 164 | """ |
|
163 | 165 | |
|
164 | 166 | defaults = { |
|
165 | 167 | b'diffopts': None, |
|
166 | 168 | b'followrename': True, |
|
167 | 169 | b'followmerge': True, |
|
168 | 170 | } |
|
169 | 171 | |
|
170 | 172 | def __init__(self, **opts): |
|
171 | 173 | opts = pycompat.byteskwargs(opts) |
|
172 | 174 | for k, v in pycompat.iteritems(self.defaults): |
|
173 | 175 | setattr(self, k, opts.get(k, v)) |
|
174 | 176 | |
|
175 | 177 | @util.propertycache |
|
176 | 178 | def shortstr(self): |
|
177 | 179 | """represent opts in a short string, suitable for a directory name""" |
|
178 | 180 | result = b'' |
|
179 | 181 | if not self.followrename: |
|
180 | 182 | result += b'r0' |
|
181 | 183 | if not self.followmerge: |
|
182 | 184 | result += b'm0' |
|
183 | 185 | if self.diffopts is not None: |
|
184 | 186 | assert isinstance(self.diffopts, mdiff.diffopts) |
|
185 | 187 | diffopthash = hashdiffopts(self.diffopts) |
|
186 | 188 | if diffopthash != _defaultdiffopthash: |
|
187 | 189 | result += b'i' + diffopthash |
|
188 | 190 | return result or b'default' |
|
189 | 191 | |
|
190 | 192 | |
|
191 | 193 | defaultopts = annotateopts() |
|
192 | 194 | |
|
193 | 195 | |
|
194 | 196 | class _annotatecontext(object): |
|
195 | 197 | """do not use this class directly as it does not use lock to protect |
|
196 | 198 | writes. use "with annotatecontext(...)" instead. |
|
197 | 199 | """ |
|
198 | 200 | |
|
199 | 201 | def __init__(self, repo, path, linelogpath, revmappath, opts): |
|
200 | 202 | self.repo = repo |
|
201 | 203 | self.ui = repo.ui |
|
202 | 204 | self.path = path |
|
203 | 205 | self.opts = opts |
|
204 | 206 | self.linelogpath = linelogpath |
|
205 | 207 | self.revmappath = revmappath |
|
206 | 208 | self._linelog = None |
|
207 | 209 | self._revmap = None |
|
208 | 210 | self._node2path = {} # {str: str} |
|
209 | 211 | |
|
210 | 212 | @property |
|
211 | 213 | def linelog(self): |
|
212 | 214 | if self._linelog is None: |
|
213 | 215 | if os.path.exists(self.linelogpath): |
|
214 | 216 | with open(self.linelogpath, b'rb') as f: |
|
215 | 217 | try: |
|
216 | 218 | self._linelog = linelogmod.linelog.fromdata(f.read()) |
|
217 | 219 | except linelogmod.LineLogError: |
|
218 | 220 | self._linelog = linelogmod.linelog() |
|
219 | 221 | else: |
|
220 | 222 | self._linelog = linelogmod.linelog() |
|
221 | 223 | return self._linelog |
|
222 | 224 | |
|
223 | 225 | @property |
|
224 | 226 | def revmap(self): |
|
225 | 227 | if self._revmap is None: |
|
226 | 228 | self._revmap = revmapmod.revmap(self.revmappath) |
|
227 | 229 | return self._revmap |
|
228 | 230 | |
|
229 | 231 | def close(self): |
|
230 | 232 | if self._revmap is not None: |
|
231 | 233 | self._revmap.flush() |
|
232 | 234 | self._revmap = None |
|
233 | 235 | if self._linelog is not None: |
|
234 | 236 | with open(self.linelogpath, b'wb') as f: |
|
235 | 237 | f.write(self._linelog.encode()) |
|
236 | 238 | self._linelog = None |
|
237 | 239 | |
|
238 | 240 | __del__ = close |
|
239 | 241 | |
|
240 | 242 | def rebuild(self): |
|
241 | 243 | """delete linelog and revmap, useful for rebuilding""" |
|
242 | 244 | self.close() |
|
243 | 245 | self._node2path.clear() |
|
244 | 246 | _unlinkpaths([self.revmappath, self.linelogpath]) |
|
245 | 247 | |
|
246 | 248 | @property |
|
247 | 249 | def lastnode(self): |
|
248 | 250 | """return last node in revmap, or None if revmap is empty""" |
|
249 | 251 | if self._revmap is None: |
|
250 | 252 | # fast path, read revmap without loading its full content |
|
251 | 253 | return revmapmod.getlastnode(self.revmappath) |
|
252 | 254 | else: |
|
253 | 255 | return self._revmap.rev2hsh(self._revmap.maxrev) |
|
254 | 256 | |
|
255 | 257 | def isuptodate(self, master, strict=True): |
|
256 | 258 | """return True if the revmap / linelog is up-to-date, or the file |
|
257 | 259 | does not exist in the master revision. False otherwise. |
|
258 | 260 | |
|
259 | 261 | it tries to be fast and could return false negatives, because of the |
|
260 | 262 | use of linkrev instead of introrev. |
|
261 | 263 | |
|
262 | 264 | useful for both server and client to decide whether to update |
|
263 | 265 | fastannotate cache or not. |
|
264 | 266 | |
|
265 | 267 | if strict is True, even if fctx exists in the revmap, but is not the |
|
266 | 268 | last node, isuptodate will return False. it's good for performance - no |
|
267 | 269 | expensive check was done. |
|
268 | 270 | |
|
269 | 271 | if strict is False, if fctx exists in the revmap, this function may |
|
270 | 272 | return True. this is useful for the client to skip downloading the |
|
271 | 273 | cache if the client's master is behind the server's. |
|
272 | 274 | """ |
|
273 | 275 | lastnode = self.lastnode |
|
274 | 276 | try: |
|
275 | 277 | f = self._resolvefctx(master, resolverev=True) |
|
276 | 278 | # choose linkrev instead of introrev as the check is meant to be |
|
277 | 279 | # *fast*. |
|
278 | 280 | linknode = self.repo.changelog.node(f.linkrev()) |
|
279 | 281 | if not strict and lastnode and linknode != lastnode: |
|
280 | 282 | # check if f.node() is in the revmap. note: this loads the |
|
281 | 283 | # revmap and can be slow. |
|
282 | 284 | return self.revmap.hsh2rev(linknode) is not None |
|
283 | 285 | # avoid resolving old manifest, or slow adjustlinkrev to be fast, |
|
284 | 286 | # false negatives are acceptable in this case. |
|
285 | 287 | return linknode == lastnode |
|
286 | 288 | except LookupError: |
|
287 | 289 | # master does not have the file, or the revmap is ahead |
|
288 | 290 | return True |
|
289 | 291 | |
|
290 | 292 | def annotate(self, rev, master=None, showpath=False, showlines=False): |
|
291 | 293 | """incrementally update the cache so it includes revisions in the main |
|
292 | 294 | branch till 'master'. and run annotate on 'rev', which may or may not be |
|
293 | 295 | included in the main branch. |
|
294 | 296 | |
|
295 | 297 | if master is None, do not update linelog. |
|
296 | 298 | |
|
297 | 299 | the first value returned is the annotate result, it is [(node, linenum)] |
|
298 | 300 | by default. [(node, linenum, path)] if showpath is True. |
|
299 | 301 | |
|
300 | 302 | if showlines is True, a second value will be returned, it is a list of |
|
301 | 303 | corresponding line contents. |
|
302 | 304 | """ |
|
303 | 305 | |
|
304 | 306 | # the fast path test requires commit hash, convert rev number to hash, |
|
305 | 307 | # so it may hit the fast path. note: in the "fctx" mode, the "annotate" |
|
306 | 308 | # command could give us a revision number even if the user passes a |
|
307 | 309 | # commit hash. |
|
308 | 310 | if isinstance(rev, int): |
|
309 | 311 | rev = node.hex(self.repo.changelog.node(rev)) |
|
310 | 312 | |
|
311 | 313 | # fast path: if rev is in the main branch already |
|
312 | 314 | directly, revfctx = self.canannotatedirectly(rev) |
|
313 | 315 | if directly: |
|
314 | 316 | if self.ui.debugflag: |
|
315 | 317 | self.ui.debug( |
|
316 | 318 | b'fastannotate: %s: using fast path ' |
|
317 | 319 | b'(resolved fctx: %s)\n' |
|
318 | 320 | % ( |
|
319 | 321 | self.path, |
|
320 | 322 | stringutil.pprint(util.safehasattr(revfctx, b'node')), |
|
321 | 323 | ) |
|
322 | 324 | ) |
|
323 | 325 | return self.annotatedirectly(revfctx, showpath, showlines) |
|
324 | 326 | |
|
325 | 327 | # resolve master |
|
326 | 328 | masterfctx = None |
|
327 | 329 | if master: |
|
328 | 330 | try: |
|
329 | 331 | masterfctx = self._resolvefctx( |
|
330 | 332 | master, resolverev=True, adjustctx=True |
|
331 | 333 | ) |
|
332 | 334 | except LookupError: # master does not have the file |
|
333 | 335 | pass |
|
334 | 336 | else: |
|
335 | 337 | if masterfctx in self.revmap: # no need to update linelog |
|
336 | 338 | masterfctx = None |
|
337 | 339 | |
|
338 | 340 | # ... - @ <- rev (can be an arbitrary changeset, |
|
339 | 341 | # / not necessarily a descendant |
|
340 | 342 | # master -> o of master) |
|
341 | 343 | # | |
|
342 | 344 | # a merge -> o 'o': new changesets in the main branch |
|
343 | 345 | # |\ '#': revisions in the main branch that |
|
344 | 346 | # o * exist in linelog / revmap |
|
345 | 347 | # | . '*': changesets in side branches, or |
|
346 | 348 | # last master -> # . descendants of master |
|
347 | 349 | # | . |
|
348 | 350 | # # * joint: '#', and is a parent of a '*' |
|
349 | 351 | # |/ |
|
350 | 352 | # a joint -> # ^^^^ --- side branches |
|
351 | 353 | # | |
|
352 | 354 | # ^ --- main branch (in linelog) |
|
353 | 355 | |
|
354 | 356 | # these DFSes are similar to the traditional annotate algorithm. |
|
355 | 357 | # we cannot really reuse the code for perf reason. |
|
356 | 358 | |
|
357 | 359 | # 1st DFS calculates merges, joint points, and needed. |
|
358 | 360 | # "needed" is a simple reference counting dict to free items in |
|
359 | 361 | # "hist", reducing its memory usage otherwise could be huge. |
|
360 | 362 | initvisit = [revfctx] |
|
361 | 363 | if masterfctx: |
|
362 | 364 | if masterfctx.rev() is None: |
|
363 | 365 | raise error.Abort( |
|
364 | 366 | _(b'cannot update linelog to wdir()'), |
|
365 | 367 | hint=_(b'set fastannotate.mainbranch'), |
|
366 | 368 | ) |
|
367 | 369 | initvisit.append(masterfctx) |
|
368 | 370 | visit = initvisit[:] |
|
369 | 371 | pcache = {} |
|
370 | 372 | needed = {revfctx: 1} |
|
371 | 373 | hist = {} # {fctx: ([(llrev or fctx, linenum)], text)} |
|
372 | 374 | while visit: |
|
373 | 375 | f = visit.pop() |
|
374 | 376 | if f in pcache or f in hist: |
|
375 | 377 | continue |
|
376 | 378 | if f in self.revmap: # in the old main branch, it's a joint |
|
377 | 379 | llrev = self.revmap.hsh2rev(f.node()) |
|
378 | 380 | self.linelog.annotate(llrev) |
|
379 | 381 | result = self.linelog.annotateresult |
|
380 | 382 | hist[f] = (result, f.data()) |
|
381 | 383 | continue |
|
382 | 384 | pl = self._parentfunc(f) |
|
383 | 385 | pcache[f] = pl |
|
384 | 386 | for p in pl: |
|
385 | 387 | needed[p] = needed.get(p, 0) + 1 |
|
386 | 388 | if p not in pcache: |
|
387 | 389 | visit.append(p) |
|
388 | 390 | |
|
389 | 391 | # 2nd (simple) DFS calculates new changesets in the main branch |
|
390 | 392 | # ('o' nodes in # the above graph), so we know when to update linelog. |
|
391 | 393 | newmainbranch = set() |
|
392 | 394 | f = masterfctx |
|
393 | 395 | while f and f not in self.revmap: |
|
394 | 396 | newmainbranch.add(f) |
|
395 | 397 | pl = pcache[f] |
|
396 | 398 | if pl: |
|
397 | 399 | f = pl[0] |
|
398 | 400 | else: |
|
399 | 401 | f = None |
|
400 | 402 | break |
|
401 | 403 | |
|
402 | 404 | # f, if present, is the position where the last build stopped at, and |
|
403 | 405 | # should be the "master" last time. check to see if we can continue |
|
404 | 406 | # building the linelog incrementally. (we cannot if diverged) |
|
405 | 407 | if masterfctx is not None: |
|
406 | 408 | self._checklastmasterhead(f) |
|
407 | 409 | |
|
408 | 410 | if self.ui.debugflag: |
|
409 | 411 | if newmainbranch: |
|
410 | 412 | self.ui.debug( |
|
411 | 413 | b'fastannotate: %s: %d new changesets in the main' |
|
412 | 414 | b' branch\n' % (self.path, len(newmainbranch)) |
|
413 | 415 | ) |
|
414 | 416 | elif not hist: # no joints, no updates |
|
415 | 417 | self.ui.debug( |
|
416 | 418 | b'fastannotate: %s: linelog cannot help in ' |
|
417 | 419 | b'annotating this revision\n' % self.path |
|
418 | 420 | ) |
|
419 | 421 | |
|
420 | 422 | # prepare annotateresult so we can update linelog incrementally |
|
421 | 423 | self.linelog.annotate(self.linelog.maxrev) |
|
422 | 424 | |
|
423 | 425 | # 3rd DFS does the actual annotate |
|
424 | 426 | visit = initvisit[:] |
|
425 | 427 | progress = self.ui.makeprogress( |
|
426 | 428 | b'building cache', total=len(newmainbranch) |
|
427 | 429 | ) |
|
428 | 430 | while visit: |
|
429 | 431 | f = visit[-1] |
|
430 | 432 | if f in hist: |
|
431 | 433 | visit.pop() |
|
432 | 434 | continue |
|
433 | 435 | |
|
434 | 436 | ready = True |
|
435 | 437 | pl = pcache[f] |
|
436 | 438 | for p in pl: |
|
437 | 439 | if p not in hist: |
|
438 | 440 | ready = False |
|
439 | 441 | visit.append(p) |
|
440 | 442 | if not ready: |
|
441 | 443 | continue |
|
442 | 444 | |
|
443 | 445 | visit.pop() |
|
444 | 446 | blocks = None # mdiff blocks, used for appending linelog |
|
445 | 447 | ismainbranch = f in newmainbranch |
|
446 | 448 | # curr is the same as the traditional annotate algorithm, |
|
447 | 449 | # if we only care about linear history (do not follow merge), |
|
448 | 450 | # then curr is not actually used. |
|
449 | 451 | assert f not in hist |
|
450 | 452 | curr = _decorate(f) |
|
451 | 453 | for i, p in enumerate(pl): |
|
452 | 454 | bs = list(self._diffblocks(hist[p][1], curr[1])) |
|
453 | 455 | if i == 0 and ismainbranch: |
|
454 | 456 | blocks = bs |
|
455 | 457 | curr = _pair(hist[p], curr, bs) |
|
456 | 458 | if needed[p] == 1: |
|
457 | 459 | del hist[p] |
|
458 | 460 | del needed[p] |
|
459 | 461 | else: |
|
460 | 462 | needed[p] -= 1 |
|
461 | 463 | |
|
462 | 464 | hist[f] = curr |
|
463 | 465 | del pcache[f] |
|
464 | 466 | |
|
465 | 467 | if ismainbranch: # need to write to linelog |
|
466 | 468 | progress.increment() |
|
467 | 469 | bannotated = None |
|
468 | 470 | if len(pl) == 2 and self.opts.followmerge: # merge |
|
469 | 471 | bannotated = curr[0] |
|
470 | 472 | if blocks is None: # no parents, add an empty one |
|
471 | 473 | blocks = list(self._diffblocks(b'', curr[1])) |
|
472 | 474 | self._appendrev(f, blocks, bannotated) |
|
473 | 475 | elif showpath: # not append linelog, but we need to record path |
|
474 | 476 | self._node2path[f.node()] = f.path() |
|
475 | 477 | |
|
476 | 478 | progress.complete() |
|
477 | 479 | |
|
478 | 480 | result = [ |
|
479 | 481 | ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l) |
|
480 | 482 | for fr, l in hist[revfctx][0] |
|
481 | 483 | ] # [(node, linenumber)] |
|
482 | 484 | return self._refineannotateresult(result, revfctx, showpath, showlines) |
|
483 | 485 | |
|
484 | 486 | def canannotatedirectly(self, rev): |
|
485 | 487 | """(str) -> bool, fctx or node. |
|
486 | 488 | return (True, f) if we can annotate without updating the linelog, pass |
|
487 | 489 | f to annotatedirectly. |
|
488 | 490 | return (False, f) if we need extra calculation. f is the fctx resolved |
|
489 | 491 | from rev. |
|
490 | 492 | """ |
|
491 | 493 | result = True |
|
492 | 494 | f = None |
|
493 | 495 | if not isinstance(rev, int) and rev is not None: |
|
494 | 496 | hsh = {20: bytes, 40: node.bin}.get(len(rev), lambda x: None)(rev) |
|
495 | 497 | if hsh is not None and (hsh, self.path) in self.revmap: |
|
496 | 498 | f = hsh |
|
497 | 499 | if f is None: |
|
498 | 500 | adjustctx = b'linkrev' if self._perfhack else True |
|
499 | 501 | f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True) |
|
500 | 502 | result = f in self.revmap |
|
501 | 503 | if not result and self._perfhack: |
|
502 | 504 | # redo the resolution without perfhack - as we are going to |
|
503 | 505 | # do write operations, we need a correct fctx. |
|
504 | 506 | f = self._resolvefctx(rev, adjustctx=True, resolverev=True) |
|
505 | 507 | return result, f |
|
506 | 508 | |
|
507 | 509 | def annotatealllines(self, rev, showpath=False, showlines=False): |
|
508 | 510 | """(rev : str) -> [(node : str, linenum : int, path : str)] |
|
509 | 511 | |
|
510 | 512 | the result has the same format with annotate, but include all (including |
|
511 | 513 | deleted) lines up to rev. call this after calling annotate(rev, ...) for |
|
512 | 514 | better performance and accuracy. |
|
513 | 515 | """ |
|
514 | 516 | revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True) |
|
515 | 517 | |
|
516 | 518 | # find a chain from rev to anything in the mainbranch |
|
517 | 519 | if revfctx not in self.revmap: |
|
518 | 520 | chain = [revfctx] |
|
519 | 521 | a = b'' |
|
520 | 522 | while True: |
|
521 | 523 | f = chain[-1] |
|
522 | 524 | pl = self._parentfunc(f) |
|
523 | 525 | if not pl: |
|
524 | 526 | break |
|
525 | 527 | if pl[0] in self.revmap: |
|
526 | 528 | a = pl[0].data() |
|
527 | 529 | break |
|
528 | 530 | chain.append(pl[0]) |
|
529 | 531 | |
|
530 | 532 | # both self.linelog and self.revmap is backed by filesystem. now |
|
531 | 533 | # we want to modify them but do not want to write changes back to |
|
532 | 534 | # files. so we create in-memory objects and copy them. it's like |
|
533 | 535 | # a "fork". |
|
534 | 536 | linelog = linelogmod.linelog() |
|
535 | 537 | linelog.copyfrom(self.linelog) |
|
536 | 538 | linelog.annotate(linelog.maxrev) |
|
537 | 539 | revmap = revmapmod.revmap() |
|
538 | 540 | revmap.copyfrom(self.revmap) |
|
539 | 541 | |
|
540 | 542 | for f in reversed(chain): |
|
541 | 543 | b = f.data() |
|
542 | 544 | blocks = list(self._diffblocks(a, b)) |
|
543 | 545 | self._doappendrev(linelog, revmap, f, blocks) |
|
544 | 546 | a = b |
|
545 | 547 | else: |
|
546 | 548 | # fastpath: use existing linelog, revmap as we don't write to them |
|
547 | 549 | linelog = self.linelog |
|
548 | 550 | revmap = self.revmap |
|
549 | 551 | |
|
550 | 552 | lines = linelog.getalllines() |
|
551 | 553 | hsh = revfctx.node() |
|
552 | 554 | llrev = revmap.hsh2rev(hsh) |
|
553 | 555 | result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev] |
|
554 | 556 | # cannot use _refineannotateresult since we need custom logic for |
|
555 | 557 | # resolving line contents |
|
556 | 558 | if showpath: |
|
557 | 559 | result = self._addpathtoresult(result, revmap) |
|
558 | 560 | if showlines: |
|
559 | 561 | linecontents = self._resolvelines(result, revmap, linelog) |
|
560 | 562 | result = (result, linecontents) |
|
561 | 563 | return result |
|
562 | 564 | |
|
563 | 565 | def _resolvelines(self, annotateresult, revmap, linelog): |
|
564 | 566 | """(annotateresult) -> [line]. designed for annotatealllines. |
|
565 | 567 | this is probably the most inefficient code in the whole fastannotate |
|
566 | 568 | directory. but we have made a decision that the linelog does not |
|
567 | 569 | store line contents. so getting them requires random accesses to |
|
568 | 570 | the revlog data, since they can be many, it can be very slow. |
|
569 | 571 | """ |
|
570 | 572 | # [llrev] |
|
571 | 573 | revs = [revmap.hsh2rev(l[0]) for l in annotateresult] |
|
572 | 574 | result = [None] * len(annotateresult) |
|
573 | 575 | # {(rev, linenum): [lineindex]} |
|
574 | 576 | key2idxs = collections.defaultdict(list) |
|
575 | 577 | for i in pycompat.xrange(len(result)): |
|
576 | 578 | key2idxs[(revs[i], annotateresult[i][1])].append(i) |
|
577 | 579 | while key2idxs: |
|
578 | 580 | # find an unresolved line and its linelog rev to annotate |
|
579 | 581 | hsh = None |
|
580 | 582 | try: |
|
581 | 583 | for (rev, _linenum), idxs in pycompat.iteritems(key2idxs): |
|
582 | 584 | if revmap.rev2flag(rev) & revmapmod.sidebranchflag: |
|
583 | 585 | continue |
|
584 | 586 | hsh = annotateresult[idxs[0]][0] |
|
585 | 587 | break |
|
586 | 588 | except StopIteration: # no more unresolved lines |
|
587 | 589 | return result |
|
588 | 590 | if hsh is None: |
|
589 | 591 | # the remaining key2idxs are not in main branch, resolving them |
|
590 | 592 | # using the hard way... |
|
591 | 593 | revlines = {} |
|
592 | 594 | for (rev, linenum), idxs in pycompat.iteritems(key2idxs): |
|
593 | 595 | if rev not in revlines: |
|
594 | 596 | hsh = annotateresult[idxs[0]][0] |
|
595 | 597 | if self.ui.debugflag: |
|
596 | 598 | self.ui.debug( |
|
597 | 599 | b'fastannotate: reading %s line #%d ' |
|
598 | 600 | b'to resolve lines %r\n' |
|
599 | 601 | % (node.short(hsh), linenum, idxs) |
|
600 | 602 | ) |
|
601 | 603 | fctx = self._resolvefctx(hsh, revmap.rev2path(rev)) |
|
602 | 604 | lines = mdiff.splitnewlines(fctx.data()) |
|
603 | 605 | revlines[rev] = lines |
|
604 | 606 | for idx in idxs: |
|
605 | 607 | result[idx] = revlines[rev][linenum] |
|
606 | 608 | assert all(x is not None for x in result) |
|
607 | 609 | return result |
|
608 | 610 | |
|
609 | 611 | # run the annotate and the lines should match to the file content |
|
610 | 612 | self.ui.debug( |
|
611 | 613 | b'fastannotate: annotate %s to resolve lines\n' |
|
612 | 614 | % node.short(hsh) |
|
613 | 615 | ) |
|
614 | 616 | linelog.annotate(rev) |
|
615 | 617 | fctx = self._resolvefctx(hsh, revmap.rev2path(rev)) |
|
616 | 618 | annotated = linelog.annotateresult |
|
617 | 619 | lines = mdiff.splitnewlines(fctx.data()) |
|
618 | 620 | if len(lines) != len(annotated): |
|
619 | 621 | raise faerror.CorruptedFileError(b'unexpected annotated lines') |
|
620 | 622 | # resolve lines from the annotate result |
|
621 | 623 | for i, line in enumerate(lines): |
|
622 | 624 | k = annotated[i] |
|
623 | 625 | if k in key2idxs: |
|
624 | 626 | for idx in key2idxs[k]: |
|
625 | 627 | result[idx] = line |
|
626 | 628 | del key2idxs[k] |
|
627 | 629 | return result |
|
628 | 630 | |
|
629 | 631 | def annotatedirectly(self, f, showpath, showlines): |
|
630 | 632 | """like annotate, but when we know that f is in linelog. |
|
631 | 633 | f can be either a 20-char str (node) or a fctx. this is for perf - in |
|
632 | 634 | the best case, the user provides a node and we don't need to read the |
|
633 | 635 | filelog or construct any filecontext. |
|
634 | 636 | """ |
|
635 | 637 | if isinstance(f, bytes): |
|
636 | 638 | hsh = f |
|
637 | 639 | else: |
|
638 | 640 | hsh = f.node() |
|
639 | 641 | llrev = self.revmap.hsh2rev(hsh) |
|
640 | 642 | if not llrev: |
|
641 | 643 | raise faerror.CorruptedFileError( |
|
642 | 644 | b'%s is not in revmap' % node.hex(hsh) |
|
643 | 645 | ) |
|
644 | 646 | if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0: |
|
645 | 647 | raise faerror.CorruptedFileError( |
|
646 | 648 | b'%s is not in revmap mainbranch' % node.hex(hsh) |
|
647 | 649 | ) |
|
648 | 650 | self.linelog.annotate(llrev) |
|
649 | 651 | result = [ |
|
650 | 652 | (self.revmap.rev2hsh(r), l) for r, l in self.linelog.annotateresult |
|
651 | 653 | ] |
|
652 | 654 | return self._refineannotateresult(result, f, showpath, showlines) |
|
653 | 655 | |
|
654 | 656 | def _refineannotateresult(self, result, f, showpath, showlines): |
|
655 | 657 | """add the missing path or line contents, they can be expensive. |
|
656 | 658 | f could be either node or fctx. |
|
657 | 659 | """ |
|
658 | 660 | if showpath: |
|
659 | 661 | result = self._addpathtoresult(result) |
|
660 | 662 | if showlines: |
|
661 | 663 | if isinstance(f, bytes): # f: node or fctx |
|
662 | 664 | llrev = self.revmap.hsh2rev(f) |
|
663 | 665 | fctx = self._resolvefctx(f, self.revmap.rev2path(llrev)) |
|
664 | 666 | else: |
|
665 | 667 | fctx = f |
|
666 | 668 | lines = mdiff.splitnewlines(fctx.data()) |
|
667 | 669 | if len(lines) != len(result): # linelog is probably corrupted |
|
668 | 670 | raise faerror.CorruptedFileError() |
|
669 | 671 | result = (result, lines) |
|
670 | 672 | return result |
|
671 | 673 | |
|
672 | 674 | def _appendrev(self, fctx, blocks, bannotated=None): |
|
673 | 675 | self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated) |
|
674 | 676 | |
|
675 | 677 | def _diffblocks(self, a, b): |
|
676 | 678 | return mdiff.allblocks(a, b, self.opts.diffopts) |
|
677 | 679 | |
|
678 | 680 | @staticmethod |
|
679 | 681 | def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None): |
|
680 | 682 | """append a revision to linelog and revmap""" |
|
681 | 683 | |
|
682 | 684 | def getllrev(f): |
|
683 | 685 | """(fctx) -> int""" |
|
684 | 686 | # f should not be a linelog revision |
|
685 | 687 | if isinstance(f, int): |
|
686 | 688 | raise error.ProgrammingError(b'f should not be an int') |
|
687 | 689 | # f is a fctx, allocate linelog rev on demand |
|
688 | 690 | hsh = f.node() |
|
689 | 691 | rev = revmap.hsh2rev(hsh) |
|
690 | 692 | if rev is None: |
|
691 | 693 | rev = revmap.append(hsh, sidebranch=True, path=f.path()) |
|
692 | 694 | return rev |
|
693 | 695 | |
|
694 | 696 | # append sidebranch revisions to revmap |
|
695 | 697 | siderevs = [] |
|
696 | 698 | siderevmap = {} # node: int |
|
697 | 699 | if bannotated is not None: |
|
698 | 700 | for (a1, a2, b1, b2), op in blocks: |
|
699 | 701 | if op != b'=': |
|
700 | 702 | # f could be either linelong rev, or fctx. |
|
701 | 703 | siderevs += [ |
|
702 | 704 | f |
|
703 | 705 | for f, l in bannotated[b1:b2] |
|
704 | 706 | if not isinstance(f, int) |
|
705 | 707 | ] |
|
706 | 708 | siderevs = set(siderevs) |
|
707 | 709 | if fctx in siderevs: # mainnode must be appended seperately |
|
708 | 710 | siderevs.remove(fctx) |
|
709 | 711 | for f in siderevs: |
|
710 | 712 | siderevmap[f] = getllrev(f) |
|
711 | 713 | |
|
712 | 714 | # the changeset in the main branch, could be a merge |
|
713 | 715 | llrev = revmap.append(fctx.node(), path=fctx.path()) |
|
714 | 716 | siderevmap[fctx] = llrev |
|
715 | 717 | |
|
716 | 718 | for (a1, a2, b1, b2), op in reversed(blocks): |
|
717 | 719 | if op == b'=': |
|
718 | 720 | continue |
|
719 | 721 | if bannotated is None: |
|
720 | 722 | linelog.replacelines(llrev, a1, a2, b1, b2) |
|
721 | 723 | else: |
|
722 | 724 | blines = [ |
|
723 | 725 | ((r if isinstance(r, int) else siderevmap[r]), l) |
|
724 | 726 | for r, l in bannotated[b1:b2] |
|
725 | 727 | ] |
|
726 | 728 | linelog.replacelines_vec(llrev, a1, a2, blines) |
|
727 | 729 | |
|
728 | 730 | def _addpathtoresult(self, annotateresult, revmap=None): |
|
729 | 731 | """(revmap, [(node, linenum)]) -> [(node, linenum, path)]""" |
|
730 | 732 | if revmap is None: |
|
731 | 733 | revmap = self.revmap |
|
732 | 734 | |
|
733 | 735 | def _getpath(nodeid): |
|
734 | 736 | path = self._node2path.get(nodeid) |
|
735 | 737 | if path is None: |
|
736 | 738 | path = revmap.rev2path(revmap.hsh2rev(nodeid)) |
|
737 | 739 | self._node2path[nodeid] = path |
|
738 | 740 | return path |
|
739 | 741 | |
|
740 | 742 | return [(n, l, _getpath(n)) for n, l in annotateresult] |
|
741 | 743 | |
|
742 | 744 | def _checklastmasterhead(self, fctx): |
|
743 | 745 | """check if fctx is the master's head last time, raise if not""" |
|
744 | 746 | if fctx is None: |
|
745 | 747 | llrev = 0 |
|
746 | 748 | else: |
|
747 | 749 | llrev = self.revmap.hsh2rev(fctx.node()) |
|
748 | 750 | if not llrev: |
|
749 | 751 | raise faerror.CannotReuseError() |
|
750 | 752 | if self.linelog.maxrev != llrev: |
|
751 | 753 | raise faerror.CannotReuseError() |
|
752 | 754 | |
|
753 | 755 | @util.propertycache |
|
754 | 756 | def _parentfunc(self): |
|
755 | 757 | """-> (fctx) -> [fctx]""" |
|
756 | 758 | followrename = self.opts.followrename |
|
757 | 759 | followmerge = self.opts.followmerge |
|
758 | 760 | |
|
759 | 761 | def parents(f): |
|
760 | 762 | pl = _parents(f, follow=followrename) |
|
761 | 763 | if not followmerge: |
|
762 | 764 | pl = pl[:1] |
|
763 | 765 | return pl |
|
764 | 766 | |
|
765 | 767 | return parents |
|
766 | 768 | |
|
767 | 769 | @util.propertycache |
|
768 | 770 | def _perfhack(self): |
|
769 | 771 | return self.ui.configbool(b'fastannotate', b'perfhack') |
|
770 | 772 | |
|
771 | 773 | def _resolvefctx(self, rev, path=None, **kwds): |
|
772 | 774 | return resolvefctx(self.repo, rev, (path or self.path), **kwds) |
|
773 | 775 | |
|
774 | 776 | |
|
775 | 777 | def _unlinkpaths(paths): |
|
776 | 778 | """silent, best-effort unlink""" |
|
777 | 779 | for path in paths: |
|
778 | 780 | try: |
|
779 | 781 | util.unlink(path) |
|
780 | 782 | except OSError: |
|
781 | 783 | pass |
|
782 | 784 | |
|
783 | 785 | |
|
784 | 786 | class pathhelper(object): |
|
785 | 787 | """helper for getting paths for lockfile, linelog and revmap""" |
|
786 | 788 | |
|
787 | 789 | def __init__(self, repo, path, opts=defaultopts): |
|
788 | 790 | # different options use different directories |
|
789 | 791 | self._vfspath = os.path.join( |
|
790 | 792 | b'fastannotate', opts.shortstr, encodedir(path) |
|
791 | 793 | ) |
|
792 | 794 | self._repo = repo |
|
793 | 795 | |
|
794 | 796 | @property |
|
795 | 797 | def dirname(self): |
|
796 | 798 | return os.path.dirname(self._repo.vfs.join(self._vfspath)) |
|
797 | 799 | |
|
798 | 800 | @property |
|
799 | 801 | def linelogpath(self): |
|
800 | 802 | return self._repo.vfs.join(self._vfspath + b'.l') |
|
801 | 803 | |
|
802 | 804 | def lock(self): |
|
803 | 805 | return lockmod.lock(self._repo.vfs, self._vfspath + b'.lock') |
|
804 | 806 | |
|
805 | 807 | @property |
|
806 | 808 | def revmappath(self): |
|
807 | 809 | return self._repo.vfs.join(self._vfspath + b'.m') |
|
808 | 810 | |
|
809 | 811 | |
|
810 | 812 | @contextlib.contextmanager |
|
811 | 813 | def annotatecontext(repo, path, opts=defaultopts, rebuild=False): |
|
812 | 814 | """context needed to perform (fast) annotate on a file |
|
813 | 815 | |
|
814 | 816 | an annotatecontext of a single file consists of two structures: the |
|
815 | 817 | linelog and the revmap. this function takes care of locking. only 1 |
|
816 | 818 | process is allowed to write that file's linelog and revmap at a time. |
|
817 | 819 | |
|
818 | 820 | when something goes wrong, this function will assume the linelog and the |
|
819 | 821 | revmap are in a bad state, and remove them from disk. |
|
820 | 822 | |
|
821 | 823 | use this function in the following way: |
|
822 | 824 | |
|
823 | 825 | with annotatecontext(...) as actx: |
|
824 | 826 | actx. .... |
|
825 | 827 | """ |
|
826 | 828 | helper = pathhelper(repo, path, opts) |
|
827 | 829 | util.makedirs(helper.dirname) |
|
828 | 830 | revmappath = helper.revmappath |
|
829 | 831 | linelogpath = helper.linelogpath |
|
830 | 832 | actx = None |
|
831 | 833 | try: |
|
832 | 834 | with helper.lock(): |
|
833 | 835 | actx = _annotatecontext(repo, path, linelogpath, revmappath, opts) |
|
834 | 836 | if rebuild: |
|
835 | 837 | actx.rebuild() |
|
836 | 838 | yield actx |
|
837 | 839 | except Exception: |
|
838 | 840 | if actx is not None: |
|
839 | 841 | actx.rebuild() |
|
840 | 842 | repo.ui.debug(b'fastannotate: %s: cache broken and deleted\n' % path) |
|
841 | 843 | raise |
|
842 | 844 | finally: |
|
843 | 845 | if actx is not None: |
|
844 | 846 | actx.close() |
|
845 | 847 | |
|
846 | 848 | |
|
847 | 849 | def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False): |
|
848 | 850 | """like annotatecontext but get the context from a fctx. convenient when |
|
849 | 851 | used in fctx.annotate |
|
850 | 852 | """ |
|
851 | 853 | repo = fctx._repo |
|
852 | 854 | path = fctx._path |
|
853 | 855 | if repo.ui.configbool(b'fastannotate', b'forcefollow', True): |
|
854 | 856 | follow = True |
|
855 | 857 | aopts = annotateopts(diffopts=diffopts, followrename=follow) |
|
856 | 858 | return annotatecontext(repo, path, aopts, rebuild) |
@@ -1,988 +1,990 b'' | |||
|
1 | 1 | # __init__.py - fsmonitor initialization and overrides |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2013-2016 Facebook, Inc. |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | '''Faster status operations with the Watchman file monitor (EXPERIMENTAL) |
|
9 | 9 | |
|
10 | 10 | Integrates the file-watching program Watchman with Mercurial to produce faster |
|
11 | 11 | status results. |
|
12 | 12 | |
|
13 | 13 | On a particular Linux system, for a real-world repository with over 400,000 |
|
14 | 14 | files hosted on ext4, vanilla `hg status` takes 1.3 seconds. On the same |
|
15 | 15 | system, with fsmonitor it takes about 0.3 seconds. |
|
16 | 16 | |
|
17 | 17 | fsmonitor requires no configuration -- it will tell Watchman about your |
|
18 | 18 | repository as necessary. You'll need to install Watchman from |
|
19 | 19 | https://facebook.github.io/watchman/ and make sure it is in your PATH. |
|
20 | 20 | |
|
21 | 21 | fsmonitor is incompatible with the largefiles and eol extensions, and |
|
22 | 22 | will disable itself if any of those are active. |
|
23 | 23 | |
|
24 | 24 | The following configuration options exist: |
|
25 | 25 | |
|
26 | 26 | :: |
|
27 | 27 | |
|
28 | 28 | [fsmonitor] |
|
29 | 29 | mode = {off, on, paranoid} |
|
30 | 30 | |
|
31 | 31 | When `mode = off`, fsmonitor will disable itself (similar to not loading the |
|
32 | 32 | extension at all). When `mode = on`, fsmonitor will be enabled (the default). |
|
33 | 33 | When `mode = paranoid`, fsmonitor will query both Watchman and the filesystem, |
|
34 | 34 | and ensure that the results are consistent. |
|
35 | 35 | |
|
36 | 36 | :: |
|
37 | 37 | |
|
38 | 38 | [fsmonitor] |
|
39 | 39 | timeout = (float) |
|
40 | 40 | |
|
41 | 41 | A value, in seconds, that determines how long fsmonitor will wait for Watchman |
|
42 | 42 | to return results. Defaults to `2.0`. |
|
43 | 43 | |
|
44 | 44 | :: |
|
45 | 45 | |
|
46 | 46 | [fsmonitor] |
|
47 | 47 | blacklistusers = (list of userids) |
|
48 | 48 | |
|
49 | 49 | A list of usernames for which fsmonitor will disable itself altogether. |
|
50 | 50 | |
|
51 | 51 | :: |
|
52 | 52 | |
|
53 | 53 | [fsmonitor] |
|
54 | 54 | walk_on_invalidate = (boolean) |
|
55 | 55 | |
|
56 | 56 | Whether or not to walk the whole repo ourselves when our cached state has been |
|
57 | 57 | invalidated, for example when Watchman has been restarted or .hgignore rules |
|
58 | 58 | have been changed. Walking the repo in that case can result in competing for |
|
59 | 59 | I/O with Watchman. For large repos it is recommended to set this value to |
|
60 | 60 | false. You may wish to set this to true if you have a very fast filesystem |
|
61 | 61 | that can outpace the IPC overhead of getting the result data for the full repo |
|
62 | 62 | from Watchman. Defaults to false. |
|
63 | 63 | |
|
64 | 64 | :: |
|
65 | 65 | |
|
66 | 66 | [fsmonitor] |
|
67 | 67 | warn_when_unused = (boolean) |
|
68 | 68 | |
|
69 | 69 | Whether to print a warning during certain operations when fsmonitor would be |
|
70 | 70 | beneficial to performance but isn't enabled. |
|
71 | 71 | |
|
72 | 72 | :: |
|
73 | 73 | |
|
74 | 74 | [fsmonitor] |
|
75 | 75 | warn_update_file_count = (integer) |
|
76 | 76 | |
|
77 | 77 | If ``warn_when_unused`` is set and fsmonitor isn't enabled, a warning will |
|
78 | 78 | be printed during working directory updates if this many files will be |
|
79 | 79 | created. |
|
80 | 80 | ''' |
|
81 | 81 | |
|
82 | 82 | # Platforms Supported |
|
83 | 83 | # =================== |
|
84 | 84 | # |
|
85 | 85 | # **Linux:** *Stable*. Watchman and fsmonitor are both known to work reliably, |
|
86 | 86 | # even under severe loads. |
|
87 | 87 | # |
|
88 | 88 | # **Mac OS X:** *Stable*. The Mercurial test suite passes with fsmonitor |
|
89 | 89 | # turned on, on case-insensitive HFS+. There has been a reasonable amount of |
|
90 | 90 | # user testing under normal loads. |
|
91 | 91 | # |
|
92 | 92 | # **Solaris, BSD:** *Alpha*. watchman and fsmonitor are believed to work, but |
|
93 | 93 | # very little testing has been done. |
|
94 | 94 | # |
|
95 | 95 | # **Windows:** *Alpha*. Not in a release version of watchman or fsmonitor yet. |
|
96 | 96 | # |
|
97 | 97 | # Known Issues |
|
98 | 98 | # ============ |
|
99 | 99 | # |
|
100 | 100 | # * fsmonitor will disable itself if any of the following extensions are |
|
101 | 101 | # enabled: largefiles, inotify, eol; or if the repository has subrepos. |
|
102 | 102 | # * fsmonitor will produce incorrect results if nested repos that are not |
|
103 | 103 | # subrepos exist. *Workaround*: add nested repo paths to your `.hgignore`. |
|
104 | 104 | # |
|
105 | 105 | # The issues related to nested repos and subrepos are probably not fundamental |
|
106 | 106 | # ones. Patches to fix them are welcome. |
|
107 | 107 | |
|
108 | 108 | from __future__ import absolute_import |
|
109 | 109 | |
|
110 | 110 | import codecs |
|
111 | import hashlib | |
|
112 | 111 | import os |
|
113 | 112 | import stat |
|
114 | 113 | import sys |
|
115 | 114 | import tempfile |
|
116 | 115 | import weakref |
|
117 | 116 | |
|
118 | 117 | from mercurial.i18n import _ |
|
119 | 118 | from mercurial.node import hex |
|
120 | 119 | from mercurial.pycompat import open |
|
121 | 120 | from mercurial import ( |
|
122 | 121 | context, |
|
123 | 122 | encoding, |
|
124 | 123 | error, |
|
125 | 124 | extensions, |
|
126 | 125 | localrepo, |
|
127 | 126 | merge, |
|
128 | 127 | pathutil, |
|
129 | 128 | pycompat, |
|
130 | 129 | registrar, |
|
131 | 130 | scmutil, |
|
132 | 131 | util, |
|
133 | 132 | ) |
|
134 | 133 | from mercurial import match as matchmod |
|
135 |
from mercurial.utils import |
|
|
134 | from mercurial.utils import ( | |
|
135 | hashutil, | |
|
136 | stringutil, | |
|
137 | ) | |
|
136 | 138 | |
|
137 | 139 | from . import ( |
|
138 | 140 | pywatchman, |
|
139 | 141 | state, |
|
140 | 142 | watchmanclient, |
|
141 | 143 | ) |
|
142 | 144 | |
|
143 | 145 | # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for |
|
144 | 146 | # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should |
|
145 | 147 | # be specifying the version(s) of Mercurial they are tested with, or |
|
146 | 148 | # leave the attribute unspecified. |
|
147 | 149 | testedwith = b'ships-with-hg-core' |
|
148 | 150 | |
|
149 | 151 | configtable = {} |
|
150 | 152 | configitem = registrar.configitem(configtable) |
|
151 | 153 | |
|
152 | 154 | configitem( |
|
153 | 155 | b'fsmonitor', b'mode', default=b'on', |
|
154 | 156 | ) |
|
155 | 157 | configitem( |
|
156 | 158 | b'fsmonitor', b'walk_on_invalidate', default=False, |
|
157 | 159 | ) |
|
158 | 160 | configitem( |
|
159 | 161 | b'fsmonitor', b'timeout', default=b'2', |
|
160 | 162 | ) |
|
161 | 163 | configitem( |
|
162 | 164 | b'fsmonitor', b'blacklistusers', default=list, |
|
163 | 165 | ) |
|
164 | 166 | configitem( |
|
165 | 167 | b'fsmonitor', b'watchman_exe', default=b'watchman', |
|
166 | 168 | ) |
|
167 | 169 | configitem( |
|
168 | 170 | b'fsmonitor', b'verbose', default=True, experimental=True, |
|
169 | 171 | ) |
|
170 | 172 | configitem( |
|
171 | 173 | b'experimental', b'fsmonitor.transaction_notify', default=False, |
|
172 | 174 | ) |
|
173 | 175 | |
|
174 | 176 | # This extension is incompatible with the following blacklisted extensions |
|
175 | 177 | # and will disable itself when encountering one of these: |
|
176 | 178 | _blacklist = [b'largefiles', b'eol'] |
|
177 | 179 | |
|
178 | 180 | |
|
179 | 181 | def debuginstall(ui, fm): |
|
180 | 182 | fm.write( |
|
181 | 183 | b"fsmonitor-watchman", |
|
182 | 184 | _(b"fsmonitor checking for watchman binary... (%s)\n"), |
|
183 | 185 | ui.configpath(b"fsmonitor", b"watchman_exe"), |
|
184 | 186 | ) |
|
185 | 187 | root = tempfile.mkdtemp() |
|
186 | 188 | c = watchmanclient.client(ui, root) |
|
187 | 189 | err = None |
|
188 | 190 | try: |
|
189 | 191 | v = c.command(b"version") |
|
190 | 192 | fm.write( |
|
191 | 193 | b"fsmonitor-watchman-version", |
|
192 | 194 | _(b" watchman binary version %s\n"), |
|
193 | 195 | pycompat.bytestr(v["version"]), |
|
194 | 196 | ) |
|
195 | 197 | except watchmanclient.Unavailable as e: |
|
196 | 198 | err = stringutil.forcebytestr(e) |
|
197 | 199 | fm.condwrite( |
|
198 | 200 | err, |
|
199 | 201 | b"fsmonitor-watchman-error", |
|
200 | 202 | _(b" watchman binary missing or broken: %s\n"), |
|
201 | 203 | err, |
|
202 | 204 | ) |
|
203 | 205 | return 1 if err else 0 |
|
204 | 206 | |
|
205 | 207 | |
|
206 | 208 | def _handleunavailable(ui, state, ex): |
|
207 | 209 | """Exception handler for Watchman interaction exceptions""" |
|
208 | 210 | if isinstance(ex, watchmanclient.Unavailable): |
|
209 | 211 | # experimental config: fsmonitor.verbose |
|
210 | 212 | if ex.warn and ui.configbool(b'fsmonitor', b'verbose'): |
|
211 | 213 | if b'illegal_fstypes' not in stringutil.forcebytestr(ex): |
|
212 | 214 | ui.warn(stringutil.forcebytestr(ex) + b'\n') |
|
213 | 215 | if ex.invalidate: |
|
214 | 216 | state.invalidate() |
|
215 | 217 | # experimental config: fsmonitor.verbose |
|
216 | 218 | if ui.configbool(b'fsmonitor', b'verbose'): |
|
217 | 219 | ui.log( |
|
218 | 220 | b'fsmonitor', |
|
219 | 221 | b'Watchman unavailable: %s\n', |
|
220 | 222 | stringutil.forcebytestr(ex.msg), |
|
221 | 223 | ) |
|
222 | 224 | else: |
|
223 | 225 | ui.log( |
|
224 | 226 | b'fsmonitor', |
|
225 | 227 | b'Watchman exception: %s\n', |
|
226 | 228 | stringutil.forcebytestr(ex), |
|
227 | 229 | ) |
|
228 | 230 | |
|
229 | 231 | |
|
230 | 232 | def _hashignore(ignore): |
|
231 | 233 | """Calculate hash for ignore patterns and filenames |
|
232 | 234 | |
|
233 | 235 | If this information changes between Mercurial invocations, we can't |
|
234 | 236 | rely on Watchman information anymore and have to re-scan the working |
|
235 | 237 | copy. |
|
236 | 238 | |
|
237 | 239 | """ |
|
238 |
sha1 = hashl |
|
|
240 | sha1 = hashutil.sha1() | |
|
239 | 241 | sha1.update(pycompat.byterepr(ignore)) |
|
240 | 242 | return pycompat.sysbytes(sha1.hexdigest()) |
|
241 | 243 | |
|
242 | 244 | |
|
243 | 245 | _watchmanencoding = pywatchman.encoding.get_local_encoding() |
|
244 | 246 | _fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding() |
|
245 | 247 | _fixencoding = codecs.lookup(_watchmanencoding) != codecs.lookup(_fsencoding) |
|
246 | 248 | |
|
247 | 249 | |
|
248 | 250 | def _watchmantofsencoding(path): |
|
249 | 251 | """Fix path to match watchman and local filesystem encoding |
|
250 | 252 | |
|
251 | 253 | watchman's paths encoding can differ from filesystem encoding. For example, |
|
252 | 254 | on Windows, it's always utf-8. |
|
253 | 255 | """ |
|
254 | 256 | try: |
|
255 | 257 | decoded = path.decode(_watchmanencoding) |
|
256 | 258 | except UnicodeDecodeError as e: |
|
257 | 259 | raise error.Abort( |
|
258 | 260 | stringutil.forcebytestr(e), hint=b'watchman encoding error' |
|
259 | 261 | ) |
|
260 | 262 | |
|
261 | 263 | try: |
|
262 | 264 | encoded = decoded.encode(_fsencoding, 'strict') |
|
263 | 265 | except UnicodeEncodeError as e: |
|
264 | 266 | raise error.Abort(stringutil.forcebytestr(e)) |
|
265 | 267 | |
|
266 | 268 | return encoded |
|
267 | 269 | |
|
268 | 270 | |
|
269 | 271 | def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True): |
|
270 | 272 | '''Replacement for dirstate.walk, hooking into Watchman. |
|
271 | 273 | |
|
272 | 274 | Whenever full is False, ignored is False, and the Watchman client is |
|
273 | 275 | available, use Watchman combined with saved state to possibly return only a |
|
274 | 276 | subset of files.''' |
|
275 | 277 | |
|
276 | 278 | def bail(reason): |
|
277 | 279 | self._ui.debug(b'fsmonitor: fallback to core status, %s\n' % reason) |
|
278 | 280 | return orig(match, subrepos, unknown, ignored, full=True) |
|
279 | 281 | |
|
280 | 282 | if full: |
|
281 | 283 | return bail(b'full rewalk requested') |
|
282 | 284 | if ignored: |
|
283 | 285 | return bail(b'listing ignored files') |
|
284 | 286 | if not self._watchmanclient.available(): |
|
285 | 287 | return bail(b'client unavailable') |
|
286 | 288 | state = self._fsmonitorstate |
|
287 | 289 | clock, ignorehash, notefiles = state.get() |
|
288 | 290 | if not clock: |
|
289 | 291 | if state.walk_on_invalidate: |
|
290 | 292 | return bail(b'no clock') |
|
291 | 293 | # Initial NULL clock value, see |
|
292 | 294 | # https://facebook.github.io/watchman/docs/clockspec.html |
|
293 | 295 | clock = b'c:0:0' |
|
294 | 296 | notefiles = [] |
|
295 | 297 | |
|
296 | 298 | ignore = self._ignore |
|
297 | 299 | dirignore = self._dirignore |
|
298 | 300 | if unknown: |
|
299 | 301 | if _hashignore(ignore) != ignorehash and clock != b'c:0:0': |
|
300 | 302 | # ignore list changed -- can't rely on Watchman state any more |
|
301 | 303 | if state.walk_on_invalidate: |
|
302 | 304 | return bail(b'ignore rules changed') |
|
303 | 305 | notefiles = [] |
|
304 | 306 | clock = b'c:0:0' |
|
305 | 307 | else: |
|
306 | 308 | # always ignore |
|
307 | 309 | ignore = util.always |
|
308 | 310 | dirignore = util.always |
|
309 | 311 | |
|
310 | 312 | matchfn = match.matchfn |
|
311 | 313 | matchalways = match.always() |
|
312 | 314 | dmap = self._map |
|
313 | 315 | if util.safehasattr(dmap, b'_map'): |
|
314 | 316 | # for better performance, directly access the inner dirstate map if the |
|
315 | 317 | # standard dirstate implementation is in use. |
|
316 | 318 | dmap = dmap._map |
|
317 | 319 | nonnormalset = self._map.nonnormalset |
|
318 | 320 | |
|
319 | 321 | copymap = self._map.copymap |
|
320 | 322 | getkind = stat.S_IFMT |
|
321 | 323 | dirkind = stat.S_IFDIR |
|
322 | 324 | regkind = stat.S_IFREG |
|
323 | 325 | lnkkind = stat.S_IFLNK |
|
324 | 326 | join = self._join |
|
325 | 327 | normcase = util.normcase |
|
326 | 328 | fresh_instance = False |
|
327 | 329 | |
|
328 | 330 | exact = skipstep3 = False |
|
329 | 331 | if match.isexact(): # match.exact |
|
330 | 332 | exact = True |
|
331 | 333 | dirignore = util.always # skip step 2 |
|
332 | 334 | elif match.prefix(): # match.match, no patterns |
|
333 | 335 | skipstep3 = True |
|
334 | 336 | |
|
335 | 337 | if not exact and self._checkcase: |
|
336 | 338 | # note that even though we could receive directory entries, we're only |
|
337 | 339 | # interested in checking if a file with the same name exists. So only |
|
338 | 340 | # normalize files if possible. |
|
339 | 341 | normalize = self._normalizefile |
|
340 | 342 | skipstep3 = False |
|
341 | 343 | else: |
|
342 | 344 | normalize = None |
|
343 | 345 | |
|
344 | 346 | # step 1: find all explicit files |
|
345 | 347 | results, work, dirsnotfound = self._walkexplicit(match, subrepos) |
|
346 | 348 | |
|
347 | 349 | skipstep3 = skipstep3 and not (work or dirsnotfound) |
|
348 | 350 | work = [d for d in work if not dirignore(d[0])] |
|
349 | 351 | |
|
350 | 352 | if not work and (exact or skipstep3): |
|
351 | 353 | for s in subrepos: |
|
352 | 354 | del results[s] |
|
353 | 355 | del results[b'.hg'] |
|
354 | 356 | return results |
|
355 | 357 | |
|
356 | 358 | # step 2: query Watchman |
|
357 | 359 | try: |
|
358 | 360 | # Use the user-configured timeout for the query. |
|
359 | 361 | # Add a little slack over the top of the user query to allow for |
|
360 | 362 | # overheads while transferring the data |
|
361 | 363 | self._watchmanclient.settimeout(state.timeout + 0.1) |
|
362 | 364 | result = self._watchmanclient.command( |
|
363 | 365 | b'query', |
|
364 | 366 | { |
|
365 | 367 | b'fields': [b'mode', b'mtime', b'size', b'exists', b'name'], |
|
366 | 368 | b'since': clock, |
|
367 | 369 | b'expression': [ |
|
368 | 370 | b'not', |
|
369 | 371 | [ |
|
370 | 372 | b'anyof', |
|
371 | 373 | [b'dirname', b'.hg'], |
|
372 | 374 | [b'name', b'.hg', b'wholename'], |
|
373 | 375 | ], |
|
374 | 376 | ], |
|
375 | 377 | b'sync_timeout': int(state.timeout * 1000), |
|
376 | 378 | b'empty_on_fresh_instance': state.walk_on_invalidate, |
|
377 | 379 | }, |
|
378 | 380 | ) |
|
379 | 381 | except Exception as ex: |
|
380 | 382 | _handleunavailable(self._ui, state, ex) |
|
381 | 383 | self._watchmanclient.clearconnection() |
|
382 | 384 | return bail(b'exception during run') |
|
383 | 385 | else: |
|
384 | 386 | # We need to propagate the last observed clock up so that we |
|
385 | 387 | # can use it for our next query |
|
386 | 388 | state.setlastclock(pycompat.sysbytes(result[b'clock'])) |
|
387 | 389 | if result[b'is_fresh_instance']: |
|
388 | 390 | if state.walk_on_invalidate: |
|
389 | 391 | state.invalidate() |
|
390 | 392 | return bail(b'fresh instance') |
|
391 | 393 | fresh_instance = True |
|
392 | 394 | # Ignore any prior noteable files from the state info |
|
393 | 395 | notefiles = [] |
|
394 | 396 | |
|
395 | 397 | # for file paths which require normalization and we encounter a case |
|
396 | 398 | # collision, we store our own foldmap |
|
397 | 399 | if normalize: |
|
398 | 400 | foldmap = dict((normcase(k), k) for k in results) |
|
399 | 401 | |
|
400 | 402 | switch_slashes = pycompat.ossep == b'\\' |
|
401 | 403 | # The order of the results is, strictly speaking, undefined. |
|
402 | 404 | # For case changes on a case insensitive filesystem we may receive |
|
403 | 405 | # two entries, one with exists=True and another with exists=False. |
|
404 | 406 | # The exists=True entries in the same response should be interpreted |
|
405 | 407 | # as being happens-after the exists=False entries due to the way that |
|
406 | 408 | # Watchman tracks files. We use this property to reconcile deletes |
|
407 | 409 | # for name case changes. |
|
408 | 410 | for entry in result[b'files']: |
|
409 | 411 | fname = entry[b'name'] |
|
410 | 412 | |
|
411 | 413 | # Watchman always give us a str. Normalize to bytes on Python 3 |
|
412 | 414 | # using Watchman's encoding, if needed. |
|
413 | 415 | if not isinstance(fname, bytes): |
|
414 | 416 | fname = fname.encode(_watchmanencoding) |
|
415 | 417 | |
|
416 | 418 | if _fixencoding: |
|
417 | 419 | fname = _watchmantofsencoding(fname) |
|
418 | 420 | |
|
419 | 421 | if switch_slashes: |
|
420 | 422 | fname = fname.replace(b'\\', b'/') |
|
421 | 423 | if normalize: |
|
422 | 424 | normed = normcase(fname) |
|
423 | 425 | fname = normalize(fname, True, True) |
|
424 | 426 | foldmap[normed] = fname |
|
425 | 427 | fmode = entry[b'mode'] |
|
426 | 428 | fexists = entry[b'exists'] |
|
427 | 429 | kind = getkind(fmode) |
|
428 | 430 | |
|
429 | 431 | if b'/.hg/' in fname or fname.endswith(b'/.hg'): |
|
430 | 432 | return bail(b'nested-repo-detected') |
|
431 | 433 | |
|
432 | 434 | if not fexists: |
|
433 | 435 | # if marked as deleted and we don't already have a change |
|
434 | 436 | # record, mark it as deleted. If we already have an entry |
|
435 | 437 | # for fname then it was either part of walkexplicit or was |
|
436 | 438 | # an earlier result that was a case change |
|
437 | 439 | if ( |
|
438 | 440 | fname not in results |
|
439 | 441 | and fname in dmap |
|
440 | 442 | and (matchalways or matchfn(fname)) |
|
441 | 443 | ): |
|
442 | 444 | results[fname] = None |
|
443 | 445 | elif kind == dirkind: |
|
444 | 446 | if fname in dmap and (matchalways or matchfn(fname)): |
|
445 | 447 | results[fname] = None |
|
446 | 448 | elif kind == regkind or kind == lnkkind: |
|
447 | 449 | if fname in dmap: |
|
448 | 450 | if matchalways or matchfn(fname): |
|
449 | 451 | results[fname] = entry |
|
450 | 452 | elif (matchalways or matchfn(fname)) and not ignore(fname): |
|
451 | 453 | results[fname] = entry |
|
452 | 454 | elif fname in dmap and (matchalways or matchfn(fname)): |
|
453 | 455 | results[fname] = None |
|
454 | 456 | |
|
455 | 457 | # step 3: query notable files we don't already know about |
|
456 | 458 | # XXX try not to iterate over the entire dmap |
|
457 | 459 | if normalize: |
|
458 | 460 | # any notable files that have changed case will already be handled |
|
459 | 461 | # above, so just check membership in the foldmap |
|
460 | 462 | notefiles = set( |
|
461 | 463 | ( |
|
462 | 464 | normalize(f, True, True) |
|
463 | 465 | for f in notefiles |
|
464 | 466 | if normcase(f) not in foldmap |
|
465 | 467 | ) |
|
466 | 468 | ) |
|
467 | 469 | visit = set( |
|
468 | 470 | ( |
|
469 | 471 | f |
|
470 | 472 | for f in notefiles |
|
471 | 473 | if ( |
|
472 | 474 | f not in results and matchfn(f) and (f in dmap or not ignore(f)) |
|
473 | 475 | ) |
|
474 | 476 | ) |
|
475 | 477 | ) |
|
476 | 478 | |
|
477 | 479 | if not fresh_instance: |
|
478 | 480 | if matchalways: |
|
479 | 481 | visit.update(f for f in nonnormalset if f not in results) |
|
480 | 482 | visit.update(f for f in copymap if f not in results) |
|
481 | 483 | else: |
|
482 | 484 | visit.update( |
|
483 | 485 | f for f in nonnormalset if f not in results and matchfn(f) |
|
484 | 486 | ) |
|
485 | 487 | visit.update(f for f in copymap if f not in results and matchfn(f)) |
|
486 | 488 | else: |
|
487 | 489 | if matchalways: |
|
488 | 490 | visit.update( |
|
489 | 491 | f for f, st in pycompat.iteritems(dmap) if f not in results |
|
490 | 492 | ) |
|
491 | 493 | visit.update(f for f in copymap if f not in results) |
|
492 | 494 | else: |
|
493 | 495 | visit.update( |
|
494 | 496 | f |
|
495 | 497 | for f, st in pycompat.iteritems(dmap) |
|
496 | 498 | if f not in results and matchfn(f) |
|
497 | 499 | ) |
|
498 | 500 | visit.update(f for f in copymap if f not in results and matchfn(f)) |
|
499 | 501 | |
|
500 | 502 | audit = pathutil.pathauditor(self._root, cached=True).check |
|
501 | 503 | auditpass = [f for f in visit if audit(f)] |
|
502 | 504 | auditpass.sort() |
|
503 | 505 | auditfail = visit.difference(auditpass) |
|
504 | 506 | for f in auditfail: |
|
505 | 507 | results[f] = None |
|
506 | 508 | |
|
507 | 509 | nf = iter(auditpass) |
|
508 | 510 | for st in util.statfiles([join(f) for f in auditpass]): |
|
509 | 511 | f = next(nf) |
|
510 | 512 | if st or f in dmap: |
|
511 | 513 | results[f] = st |
|
512 | 514 | |
|
513 | 515 | for s in subrepos: |
|
514 | 516 | del results[s] |
|
515 | 517 | del results[b'.hg'] |
|
516 | 518 | return results |
|
517 | 519 | |
|
518 | 520 | |
|
519 | 521 | def overridestatus( |
|
520 | 522 | orig, |
|
521 | 523 | self, |
|
522 | 524 | node1=b'.', |
|
523 | 525 | node2=None, |
|
524 | 526 | match=None, |
|
525 | 527 | ignored=False, |
|
526 | 528 | clean=False, |
|
527 | 529 | unknown=False, |
|
528 | 530 | listsubrepos=False, |
|
529 | 531 | ): |
|
530 | 532 | listignored = ignored |
|
531 | 533 | listclean = clean |
|
532 | 534 | listunknown = unknown |
|
533 | 535 | |
|
534 | 536 | def _cmpsets(l1, l2): |
|
535 | 537 | try: |
|
536 | 538 | if b'FSMONITOR_LOG_FILE' in encoding.environ: |
|
537 | 539 | fn = encoding.environ[b'FSMONITOR_LOG_FILE'] |
|
538 | 540 | f = open(fn, b'wb') |
|
539 | 541 | else: |
|
540 | 542 | fn = b'fsmonitorfail.log' |
|
541 | 543 | f = self.vfs.open(fn, b'wb') |
|
542 | 544 | except (IOError, OSError): |
|
543 | 545 | self.ui.warn(_(b'warning: unable to write to %s\n') % fn) |
|
544 | 546 | return |
|
545 | 547 | |
|
546 | 548 | try: |
|
547 | 549 | for i, (s1, s2) in enumerate(zip(l1, l2)): |
|
548 | 550 | if set(s1) != set(s2): |
|
549 | 551 | f.write(b'sets at position %d are unequal\n' % i) |
|
550 | 552 | f.write(b'watchman returned: %s\n' % s1) |
|
551 | 553 | f.write(b'stat returned: %s\n' % s2) |
|
552 | 554 | finally: |
|
553 | 555 | f.close() |
|
554 | 556 | |
|
555 | 557 | if isinstance(node1, context.changectx): |
|
556 | 558 | ctx1 = node1 |
|
557 | 559 | else: |
|
558 | 560 | ctx1 = self[node1] |
|
559 | 561 | if isinstance(node2, context.changectx): |
|
560 | 562 | ctx2 = node2 |
|
561 | 563 | else: |
|
562 | 564 | ctx2 = self[node2] |
|
563 | 565 | |
|
564 | 566 | working = ctx2.rev() is None |
|
565 | 567 | parentworking = working and ctx1 == self[b'.'] |
|
566 | 568 | match = match or matchmod.always() |
|
567 | 569 | |
|
568 | 570 | # Maybe we can use this opportunity to update Watchman's state. |
|
569 | 571 | # Mercurial uses workingcommitctx and/or memctx to represent the part of |
|
570 | 572 | # the workingctx that is to be committed. So don't update the state in |
|
571 | 573 | # that case. |
|
572 | 574 | # HG_PENDING is set in the environment when the dirstate is being updated |
|
573 | 575 | # in the middle of a transaction; we must not update our state in that |
|
574 | 576 | # case, or we risk forgetting about changes in the working copy. |
|
575 | 577 | updatestate = ( |
|
576 | 578 | parentworking |
|
577 | 579 | and match.always() |
|
578 | 580 | and not isinstance(ctx2, (context.workingcommitctx, context.memctx)) |
|
579 | 581 | and b'HG_PENDING' not in encoding.environ |
|
580 | 582 | ) |
|
581 | 583 | |
|
582 | 584 | try: |
|
583 | 585 | if self._fsmonitorstate.walk_on_invalidate: |
|
584 | 586 | # Use a short timeout to query the current clock. If that |
|
585 | 587 | # takes too long then we assume that the service will be slow |
|
586 | 588 | # to answer our query. |
|
587 | 589 | # walk_on_invalidate indicates that we prefer to walk the |
|
588 | 590 | # tree ourselves because we can ignore portions that Watchman |
|
589 | 591 | # cannot and we tend to be faster in the warmer buffer cache |
|
590 | 592 | # cases. |
|
591 | 593 | self._watchmanclient.settimeout(0.1) |
|
592 | 594 | else: |
|
593 | 595 | # Give Watchman more time to potentially complete its walk |
|
594 | 596 | # and return the initial clock. In this mode we assume that |
|
595 | 597 | # the filesystem will be slower than parsing a potentially |
|
596 | 598 | # very large Watchman result set. |
|
597 | 599 | self._watchmanclient.settimeout(self._fsmonitorstate.timeout + 0.1) |
|
598 | 600 | startclock = self._watchmanclient.getcurrentclock() |
|
599 | 601 | except Exception as ex: |
|
600 | 602 | self._watchmanclient.clearconnection() |
|
601 | 603 | _handleunavailable(self.ui, self._fsmonitorstate, ex) |
|
602 | 604 | # boo, Watchman failed. bail |
|
603 | 605 | return orig( |
|
604 | 606 | node1, |
|
605 | 607 | node2, |
|
606 | 608 | match, |
|
607 | 609 | listignored, |
|
608 | 610 | listclean, |
|
609 | 611 | listunknown, |
|
610 | 612 | listsubrepos, |
|
611 | 613 | ) |
|
612 | 614 | |
|
613 | 615 | if updatestate: |
|
614 | 616 | # We need info about unknown files. This may make things slower the |
|
615 | 617 | # first time, but whatever. |
|
616 | 618 | stateunknown = True |
|
617 | 619 | else: |
|
618 | 620 | stateunknown = listunknown |
|
619 | 621 | |
|
620 | 622 | if updatestate: |
|
621 | 623 | ps = poststatus(startclock) |
|
622 | 624 | self.addpostdsstatus(ps) |
|
623 | 625 | |
|
624 | 626 | r = orig( |
|
625 | 627 | node1, node2, match, listignored, listclean, stateunknown, listsubrepos |
|
626 | 628 | ) |
|
627 | 629 | modified, added, removed, deleted, unknown, ignored, clean = r |
|
628 | 630 | |
|
629 | 631 | if not listunknown: |
|
630 | 632 | unknown = [] |
|
631 | 633 | |
|
632 | 634 | # don't do paranoid checks if we're not going to query Watchman anyway |
|
633 | 635 | full = listclean or match.traversedir is not None |
|
634 | 636 | if self._fsmonitorstate.mode == b'paranoid' and not full: |
|
635 | 637 | # run status again and fall back to the old walk this time |
|
636 | 638 | self.dirstate._fsmonitordisable = True |
|
637 | 639 | |
|
638 | 640 | # shut the UI up |
|
639 | 641 | quiet = self.ui.quiet |
|
640 | 642 | self.ui.quiet = True |
|
641 | 643 | fout, ferr = self.ui.fout, self.ui.ferr |
|
642 | 644 | self.ui.fout = self.ui.ferr = open(os.devnull, b'wb') |
|
643 | 645 | |
|
644 | 646 | try: |
|
645 | 647 | rv2 = orig( |
|
646 | 648 | node1, |
|
647 | 649 | node2, |
|
648 | 650 | match, |
|
649 | 651 | listignored, |
|
650 | 652 | listclean, |
|
651 | 653 | listunknown, |
|
652 | 654 | listsubrepos, |
|
653 | 655 | ) |
|
654 | 656 | finally: |
|
655 | 657 | self.dirstate._fsmonitordisable = False |
|
656 | 658 | self.ui.quiet = quiet |
|
657 | 659 | self.ui.fout, self.ui.ferr = fout, ferr |
|
658 | 660 | |
|
659 | 661 | # clean isn't tested since it's set to True above |
|
660 | 662 | with self.wlock(): |
|
661 | 663 | _cmpsets( |
|
662 | 664 | [modified, added, removed, deleted, unknown, ignored, clean], |
|
663 | 665 | rv2, |
|
664 | 666 | ) |
|
665 | 667 | modified, added, removed, deleted, unknown, ignored, clean = rv2 |
|
666 | 668 | |
|
667 | 669 | return scmutil.status( |
|
668 | 670 | modified, added, removed, deleted, unknown, ignored, clean |
|
669 | 671 | ) |
|
670 | 672 | |
|
671 | 673 | |
|
672 | 674 | class poststatus(object): |
|
673 | 675 | def __init__(self, startclock): |
|
674 | 676 | self._startclock = startclock |
|
675 | 677 | |
|
676 | 678 | def __call__(self, wctx, status): |
|
677 | 679 | clock = wctx.repo()._fsmonitorstate.getlastclock() or self._startclock |
|
678 | 680 | hashignore = _hashignore(wctx.repo().dirstate._ignore) |
|
679 | 681 | notefiles = ( |
|
680 | 682 | status.modified |
|
681 | 683 | + status.added |
|
682 | 684 | + status.removed |
|
683 | 685 | + status.deleted |
|
684 | 686 | + status.unknown |
|
685 | 687 | ) |
|
686 | 688 | wctx.repo()._fsmonitorstate.set(clock, hashignore, notefiles) |
|
687 | 689 | |
|
688 | 690 | |
|
689 | 691 | def makedirstate(repo, dirstate): |
|
690 | 692 | class fsmonitordirstate(dirstate.__class__): |
|
691 | 693 | def _fsmonitorinit(self, repo): |
|
692 | 694 | # _fsmonitordisable is used in paranoid mode |
|
693 | 695 | self._fsmonitordisable = False |
|
694 | 696 | self._fsmonitorstate = repo._fsmonitorstate |
|
695 | 697 | self._watchmanclient = repo._watchmanclient |
|
696 | 698 | self._repo = weakref.proxy(repo) |
|
697 | 699 | |
|
698 | 700 | def walk(self, *args, **kwargs): |
|
699 | 701 | orig = super(fsmonitordirstate, self).walk |
|
700 | 702 | if self._fsmonitordisable: |
|
701 | 703 | return orig(*args, **kwargs) |
|
702 | 704 | return overridewalk(orig, self, *args, **kwargs) |
|
703 | 705 | |
|
704 | 706 | def rebuild(self, *args, **kwargs): |
|
705 | 707 | self._fsmonitorstate.invalidate() |
|
706 | 708 | return super(fsmonitordirstate, self).rebuild(*args, **kwargs) |
|
707 | 709 | |
|
708 | 710 | def invalidate(self, *args, **kwargs): |
|
709 | 711 | self._fsmonitorstate.invalidate() |
|
710 | 712 | return super(fsmonitordirstate, self).invalidate(*args, **kwargs) |
|
711 | 713 | |
|
712 | 714 | dirstate.__class__ = fsmonitordirstate |
|
713 | 715 | dirstate._fsmonitorinit(repo) |
|
714 | 716 | |
|
715 | 717 | |
|
716 | 718 | def wrapdirstate(orig, self): |
|
717 | 719 | ds = orig(self) |
|
718 | 720 | # only override the dirstate when Watchman is available for the repo |
|
719 | 721 | if util.safehasattr(self, b'_fsmonitorstate'): |
|
720 | 722 | makedirstate(self, ds) |
|
721 | 723 | return ds |
|
722 | 724 | |
|
723 | 725 | |
|
724 | 726 | def extsetup(ui): |
|
725 | 727 | extensions.wrapfilecache( |
|
726 | 728 | localrepo.localrepository, b'dirstate', wrapdirstate |
|
727 | 729 | ) |
|
728 | 730 | if pycompat.isdarwin: |
|
729 | 731 | # An assist for avoiding the dangling-symlink fsevents bug |
|
730 | 732 | extensions.wrapfunction(os, b'symlink', wrapsymlink) |
|
731 | 733 | |
|
732 | 734 | extensions.wrapfunction(merge, b'update', wrapupdate) |
|
733 | 735 | |
|
734 | 736 | |
|
735 | 737 | def wrapsymlink(orig, source, link_name): |
|
736 | 738 | ''' if we create a dangling symlink, also touch the parent dir |
|
737 | 739 | to encourage fsevents notifications to work more correctly ''' |
|
738 | 740 | try: |
|
739 | 741 | return orig(source, link_name) |
|
740 | 742 | finally: |
|
741 | 743 | try: |
|
742 | 744 | os.utime(os.path.dirname(link_name), None) |
|
743 | 745 | except OSError: |
|
744 | 746 | pass |
|
745 | 747 | |
|
746 | 748 | |
|
747 | 749 | class state_update(object): |
|
748 | 750 | ''' This context manager is responsible for dispatching the state-enter |
|
749 | 751 | and state-leave signals to the watchman service. The enter and leave |
|
750 | 752 | methods can be invoked manually (for scenarios where context manager |
|
751 | 753 | semantics are not possible). If parameters oldnode and newnode are None, |
|
752 | 754 | they will be populated based on current working copy in enter and |
|
753 | 755 | leave, respectively. Similarly, if the distance is none, it will be |
|
754 | 756 | calculated based on the oldnode and newnode in the leave method.''' |
|
755 | 757 | |
|
756 | 758 | def __init__( |
|
757 | 759 | self, |
|
758 | 760 | repo, |
|
759 | 761 | name, |
|
760 | 762 | oldnode=None, |
|
761 | 763 | newnode=None, |
|
762 | 764 | distance=None, |
|
763 | 765 | partial=False, |
|
764 | 766 | ): |
|
765 | 767 | self.repo = repo.unfiltered() |
|
766 | 768 | self.name = name |
|
767 | 769 | self.oldnode = oldnode |
|
768 | 770 | self.newnode = newnode |
|
769 | 771 | self.distance = distance |
|
770 | 772 | self.partial = partial |
|
771 | 773 | self._lock = None |
|
772 | 774 | self.need_leave = False |
|
773 | 775 | |
|
774 | 776 | def __enter__(self): |
|
775 | 777 | self.enter() |
|
776 | 778 | |
|
777 | 779 | def enter(self): |
|
778 | 780 | # Make sure we have a wlock prior to sending notifications to watchman. |
|
779 | 781 | # We don't want to race with other actors. In the update case, |
|
780 | 782 | # merge.update is going to take the wlock almost immediately. We are |
|
781 | 783 | # effectively extending the lock around several short sanity checks. |
|
782 | 784 | if self.oldnode is None: |
|
783 | 785 | self.oldnode = self.repo[b'.'].node() |
|
784 | 786 | |
|
785 | 787 | if self.repo.currentwlock() is None: |
|
786 | 788 | if util.safehasattr(self.repo, b'wlocknostateupdate'): |
|
787 | 789 | self._lock = self.repo.wlocknostateupdate() |
|
788 | 790 | else: |
|
789 | 791 | self._lock = self.repo.wlock() |
|
790 | 792 | self.need_leave = self._state(b'state-enter', hex(self.oldnode)) |
|
791 | 793 | return self |
|
792 | 794 | |
|
793 | 795 | def __exit__(self, type_, value, tb): |
|
794 | 796 | abort = True if type_ else False |
|
795 | 797 | self.exit(abort=abort) |
|
796 | 798 | |
|
797 | 799 | def exit(self, abort=False): |
|
798 | 800 | try: |
|
799 | 801 | if self.need_leave: |
|
800 | 802 | status = b'failed' if abort else b'ok' |
|
801 | 803 | if self.newnode is None: |
|
802 | 804 | self.newnode = self.repo[b'.'].node() |
|
803 | 805 | if self.distance is None: |
|
804 | 806 | self.distance = calcdistance( |
|
805 | 807 | self.repo, self.oldnode, self.newnode |
|
806 | 808 | ) |
|
807 | 809 | self._state(b'state-leave', hex(self.newnode), status=status) |
|
808 | 810 | finally: |
|
809 | 811 | self.need_leave = False |
|
810 | 812 | if self._lock: |
|
811 | 813 | self._lock.release() |
|
812 | 814 | |
|
813 | 815 | def _state(self, cmd, commithash, status=b'ok'): |
|
814 | 816 | if not util.safehasattr(self.repo, b'_watchmanclient'): |
|
815 | 817 | return False |
|
816 | 818 | try: |
|
817 | 819 | self.repo._watchmanclient.command( |
|
818 | 820 | cmd, |
|
819 | 821 | { |
|
820 | 822 | b'name': self.name, |
|
821 | 823 | b'metadata': { |
|
822 | 824 | # the target revision |
|
823 | 825 | b'rev': commithash, |
|
824 | 826 | # approximate number of commits between current and target |
|
825 | 827 | b'distance': self.distance if self.distance else 0, |
|
826 | 828 | # success/failure (only really meaningful for state-leave) |
|
827 | 829 | b'status': status, |
|
828 | 830 | # whether the working copy parent is changing |
|
829 | 831 | b'partial': self.partial, |
|
830 | 832 | }, |
|
831 | 833 | }, |
|
832 | 834 | ) |
|
833 | 835 | return True |
|
834 | 836 | except Exception as e: |
|
835 | 837 | # Swallow any errors; fire and forget |
|
836 | 838 | self.repo.ui.log( |
|
837 | 839 | b'watchman', b'Exception %s while running %s\n', e, cmd |
|
838 | 840 | ) |
|
839 | 841 | return False |
|
840 | 842 | |
|
841 | 843 | |
|
842 | 844 | # Estimate the distance between two nodes |
|
843 | 845 | def calcdistance(repo, oldnode, newnode): |
|
844 | 846 | anc = repo.changelog.ancestor(oldnode, newnode) |
|
845 | 847 | ancrev = repo[anc].rev() |
|
846 | 848 | distance = abs(repo[oldnode].rev() - ancrev) + abs( |
|
847 | 849 | repo[newnode].rev() - ancrev |
|
848 | 850 | ) |
|
849 | 851 | return distance |
|
850 | 852 | |
|
851 | 853 | |
|
852 | 854 | # Bracket working copy updates with calls to the watchman state-enter |
|
853 | 855 | # and state-leave commands. This allows clients to perform more intelligent |
|
854 | 856 | # settling during bulk file change scenarios |
|
855 | 857 | # https://facebook.github.io/watchman/docs/cmd/subscribe.html#advanced-settling |
|
856 | 858 | def wrapupdate( |
|
857 | 859 | orig, |
|
858 | 860 | repo, |
|
859 | 861 | node, |
|
860 | 862 | branchmerge, |
|
861 | 863 | force, |
|
862 | 864 | ancestor=None, |
|
863 | 865 | mergeancestor=False, |
|
864 | 866 | labels=None, |
|
865 | 867 | matcher=None, |
|
866 | 868 | **kwargs |
|
867 | 869 | ): |
|
868 | 870 | |
|
869 | 871 | distance = 0 |
|
870 | 872 | partial = True |
|
871 | 873 | oldnode = repo[b'.'].node() |
|
872 | 874 | newnode = repo[node].node() |
|
873 | 875 | if matcher is None or matcher.always(): |
|
874 | 876 | partial = False |
|
875 | 877 | distance = calcdistance(repo.unfiltered(), oldnode, newnode) |
|
876 | 878 | |
|
877 | 879 | with state_update( |
|
878 | 880 | repo, |
|
879 | 881 | name=b"hg.update", |
|
880 | 882 | oldnode=oldnode, |
|
881 | 883 | newnode=newnode, |
|
882 | 884 | distance=distance, |
|
883 | 885 | partial=partial, |
|
884 | 886 | ): |
|
885 | 887 | return orig( |
|
886 | 888 | repo, |
|
887 | 889 | node, |
|
888 | 890 | branchmerge, |
|
889 | 891 | force, |
|
890 | 892 | ancestor, |
|
891 | 893 | mergeancestor, |
|
892 | 894 | labels, |
|
893 | 895 | matcher, |
|
894 | 896 | **kwargs |
|
895 | 897 | ) |
|
896 | 898 | |
|
897 | 899 | |
|
898 | 900 | def repo_has_depth_one_nested_repo(repo): |
|
899 | 901 | for f in repo.wvfs.listdir(): |
|
900 | 902 | if os.path.isdir(os.path.join(repo.root, f, b'.hg')): |
|
901 | 903 | msg = b'fsmonitor: sub-repository %r detected, fsmonitor disabled\n' |
|
902 | 904 | repo.ui.debug(msg % f) |
|
903 | 905 | return True |
|
904 | 906 | return False |
|
905 | 907 | |
|
906 | 908 | |
|
907 | 909 | def reposetup(ui, repo): |
|
908 | 910 | # We don't work with largefiles or inotify |
|
909 | 911 | exts = extensions.enabled() |
|
910 | 912 | for ext in _blacklist: |
|
911 | 913 | if ext in exts: |
|
912 | 914 | ui.warn( |
|
913 | 915 | _( |
|
914 | 916 | b'The fsmonitor extension is incompatible with the %s ' |
|
915 | 917 | b'extension and has been disabled.\n' |
|
916 | 918 | ) |
|
917 | 919 | % ext |
|
918 | 920 | ) |
|
919 | 921 | return |
|
920 | 922 | |
|
921 | 923 | if repo.local(): |
|
922 | 924 | # We don't work with subrepos either. |
|
923 | 925 | # |
|
924 | 926 | # if repo[None].substate can cause a dirstate parse, which is too |
|
925 | 927 | # slow. Instead, look for a file called hgsubstate, |
|
926 | 928 | if repo.wvfs.exists(b'.hgsubstate') or repo.wvfs.exists(b'.hgsub'): |
|
927 | 929 | return |
|
928 | 930 | |
|
929 | 931 | if repo_has_depth_one_nested_repo(repo): |
|
930 | 932 | return |
|
931 | 933 | |
|
932 | 934 | fsmonitorstate = state.state(repo) |
|
933 | 935 | if fsmonitorstate.mode == b'off': |
|
934 | 936 | return |
|
935 | 937 | |
|
936 | 938 | try: |
|
937 | 939 | client = watchmanclient.client(repo.ui, repo.root) |
|
938 | 940 | except Exception as ex: |
|
939 | 941 | _handleunavailable(ui, fsmonitorstate, ex) |
|
940 | 942 | return |
|
941 | 943 | |
|
942 | 944 | repo._fsmonitorstate = fsmonitorstate |
|
943 | 945 | repo._watchmanclient = client |
|
944 | 946 | |
|
945 | 947 | dirstate, cached = localrepo.isfilecached(repo, b'dirstate') |
|
946 | 948 | if cached: |
|
947 | 949 | # at this point since fsmonitorstate wasn't present, |
|
948 | 950 | # repo.dirstate is not a fsmonitordirstate |
|
949 | 951 | makedirstate(repo, dirstate) |
|
950 | 952 | |
|
951 | 953 | class fsmonitorrepo(repo.__class__): |
|
952 | 954 | def status(self, *args, **kwargs): |
|
953 | 955 | orig = super(fsmonitorrepo, self).status |
|
954 | 956 | return overridestatus(orig, self, *args, **kwargs) |
|
955 | 957 | |
|
956 | 958 | def wlocknostateupdate(self, *args, **kwargs): |
|
957 | 959 | return super(fsmonitorrepo, self).wlock(*args, **kwargs) |
|
958 | 960 | |
|
959 | 961 | def wlock(self, *args, **kwargs): |
|
960 | 962 | l = super(fsmonitorrepo, self).wlock(*args, **kwargs) |
|
961 | 963 | if not ui.configbool( |
|
962 | 964 | b"experimental", b"fsmonitor.transaction_notify" |
|
963 | 965 | ): |
|
964 | 966 | return l |
|
965 | 967 | if l.held != 1: |
|
966 | 968 | return l |
|
967 | 969 | origrelease = l.releasefn |
|
968 | 970 | |
|
969 | 971 | def staterelease(): |
|
970 | 972 | if origrelease: |
|
971 | 973 | origrelease() |
|
972 | 974 | if l.stateupdate: |
|
973 | 975 | l.stateupdate.exit() |
|
974 | 976 | l.stateupdate = None |
|
975 | 977 | |
|
976 | 978 | try: |
|
977 | 979 | l.stateupdate = None |
|
978 | 980 | l.stateupdate = state_update(self, name=b"hg.transaction") |
|
979 | 981 | l.stateupdate.enter() |
|
980 | 982 | l.releasefn = staterelease |
|
981 | 983 | except Exception as e: |
|
982 | 984 | # Swallow any errors; fire and forget |
|
983 | 985 | self.ui.log( |
|
984 | 986 | b'watchman', b'Exception in state update %s\n', e |
|
985 | 987 | ) |
|
986 | 988 | return l |
|
987 | 989 | |
|
988 | 990 | repo.__class__ = fsmonitorrepo |
@@ -1,184 +1,186 b'' | |||
|
1 | 1 | # This software may be used and distributed according to the terms of the |
|
2 | 2 | # GNU General Public License version 2 or any later version. |
|
3 | 3 | |
|
4 | 4 | # based on bundleheads extension by Gregory Szorc <gps@mozilla.com> |
|
5 | 5 | |
|
6 | 6 | from __future__ import absolute_import |
|
7 | 7 | |
|
8 | 8 | import abc |
|
9 | import hashlib | |
|
10 | 9 | import os |
|
11 | 10 | import subprocess |
|
12 | 11 | import tempfile |
|
13 | 12 | |
|
14 | 13 | from mercurial.pycompat import open |
|
15 | 14 | from mercurial import ( |
|
16 | 15 | node, |
|
17 | 16 | pycompat, |
|
18 | 17 | ) |
|
19 |
from mercurial.utils import |
|
|
18 | from mercurial.utils import ( | |
|
19 | hashutil, | |
|
20 | procutil, | |
|
21 | ) | |
|
20 | 22 | |
|
21 | 23 | NamedTemporaryFile = tempfile.NamedTemporaryFile |
|
22 | 24 | |
|
23 | 25 | |
|
24 | 26 | class BundleWriteException(Exception): |
|
25 | 27 | pass |
|
26 | 28 | |
|
27 | 29 | |
|
28 | 30 | class BundleReadException(Exception): |
|
29 | 31 | pass |
|
30 | 32 | |
|
31 | 33 | |
|
32 | 34 | class abstractbundlestore(object): # pytype: disable=ignored-metaclass |
|
33 | 35 | """Defines the interface for bundle stores. |
|
34 | 36 | |
|
35 | 37 | A bundle store is an entity that stores raw bundle data. It is a simple |
|
36 | 38 | key-value store. However, the keys are chosen by the store. The keys can |
|
37 | 39 | be any Python object understood by the corresponding bundle index (see |
|
38 | 40 | ``abstractbundleindex`` below). |
|
39 | 41 | """ |
|
40 | 42 | |
|
41 | 43 | __metaclass__ = abc.ABCMeta |
|
42 | 44 | |
|
43 | 45 | @abc.abstractmethod |
|
44 | 46 | def write(self, data): |
|
45 | 47 | """Write bundle data to the store. |
|
46 | 48 | |
|
47 | 49 | This function receives the raw data to be written as a str. |
|
48 | 50 | Throws BundleWriteException |
|
49 | 51 | The key of the written data MUST be returned. |
|
50 | 52 | """ |
|
51 | 53 | |
|
52 | 54 | @abc.abstractmethod |
|
53 | 55 | def read(self, key): |
|
54 | 56 | """Obtain bundle data for a key. |
|
55 | 57 | |
|
56 | 58 | Returns None if the bundle isn't known. |
|
57 | 59 | Throws BundleReadException |
|
58 | 60 | The returned object should be a file object supporting read() |
|
59 | 61 | and close(). |
|
60 | 62 | """ |
|
61 | 63 | |
|
62 | 64 | |
|
63 | 65 | class filebundlestore(object): |
|
64 | 66 | """bundle store in filesystem |
|
65 | 67 | |
|
66 | 68 | meant for storing bundles somewhere on disk and on network filesystems |
|
67 | 69 | """ |
|
68 | 70 | |
|
69 | 71 | def __init__(self, ui, repo): |
|
70 | 72 | self.ui = ui |
|
71 | 73 | self.repo = repo |
|
72 | 74 | self.storepath = ui.configpath(b'scratchbranch', b'storepath') |
|
73 | 75 | if not self.storepath: |
|
74 | 76 | self.storepath = self.repo.vfs.join( |
|
75 | 77 | b"scratchbranches", b"filebundlestore" |
|
76 | 78 | ) |
|
77 | 79 | if not os.path.exists(self.storepath): |
|
78 | 80 | os.makedirs(self.storepath) |
|
79 | 81 | |
|
80 | 82 | def _dirpath(self, hashvalue): |
|
81 | 83 | """First two bytes of the hash are the name of the upper |
|
82 | 84 | level directory, next two bytes are the name of the |
|
83 | 85 | next level directory""" |
|
84 | 86 | return os.path.join(self.storepath, hashvalue[0:2], hashvalue[2:4]) |
|
85 | 87 | |
|
86 | 88 | def _filepath(self, filename): |
|
87 | 89 | return os.path.join(self._dirpath(filename), filename) |
|
88 | 90 | |
|
89 | 91 | def write(self, data): |
|
90 |
filename = node.hex(hashl |
|
|
92 | filename = node.hex(hashutil.sha1(data).digest()) | |
|
91 | 93 | dirpath = self._dirpath(filename) |
|
92 | 94 | |
|
93 | 95 | if not os.path.exists(dirpath): |
|
94 | 96 | os.makedirs(dirpath) |
|
95 | 97 | |
|
96 | 98 | with open(self._filepath(filename), b'wb') as f: |
|
97 | 99 | f.write(data) |
|
98 | 100 | |
|
99 | 101 | return filename |
|
100 | 102 | |
|
101 | 103 | def read(self, key): |
|
102 | 104 | try: |
|
103 | 105 | with open(self._filepath(key), b'rb') as f: |
|
104 | 106 | return f.read() |
|
105 | 107 | except IOError: |
|
106 | 108 | return None |
|
107 | 109 | |
|
108 | 110 | |
|
109 | 111 | class externalbundlestore(abstractbundlestore): |
|
110 | 112 | def __init__(self, put_binary, put_args, get_binary, get_args): |
|
111 | 113 | """ |
|
112 | 114 | `put_binary` - path to binary file which uploads bundle to external |
|
113 | 115 | storage and prints key to stdout |
|
114 | 116 | `put_args` - format string with additional args to `put_binary` |
|
115 | 117 | {filename} replacement field can be used. |
|
116 | 118 | `get_binary` - path to binary file which accepts filename and key |
|
117 | 119 | (in that order), downloads bundle from store and saves it to file |
|
118 | 120 | `get_args` - format string with additional args to `get_binary`. |
|
119 | 121 | {filename} and {handle} replacement field can be used. |
|
120 | 122 | """ |
|
121 | 123 | |
|
122 | 124 | self.put_args = put_args |
|
123 | 125 | self.get_args = get_args |
|
124 | 126 | self.put_binary = put_binary |
|
125 | 127 | self.get_binary = get_binary |
|
126 | 128 | |
|
127 | 129 | def _call_binary(self, args): |
|
128 | 130 | p = subprocess.Popen( |
|
129 | 131 | pycompat.rapply(procutil.tonativestr, args), |
|
130 | 132 | stdout=subprocess.PIPE, |
|
131 | 133 | stderr=subprocess.PIPE, |
|
132 | 134 | close_fds=True, |
|
133 | 135 | ) |
|
134 | 136 | stdout, stderr = p.communicate() |
|
135 | 137 | returncode = p.returncode |
|
136 | 138 | return returncode, stdout, stderr |
|
137 | 139 | |
|
138 | 140 | def write(self, data): |
|
139 | 141 | # Won't work on windows because you can't open file second time without |
|
140 | 142 | # closing it |
|
141 | 143 | # TODO: rewrite without str.format() and replace NamedTemporaryFile() |
|
142 | 144 | # with pycompat.namedtempfile() |
|
143 | 145 | with NamedTemporaryFile() as temp: |
|
144 | 146 | temp.write(data) |
|
145 | 147 | temp.flush() |
|
146 | 148 | temp.seek(0) |
|
147 | 149 | formatted_args = [ |
|
148 | 150 | arg.format(filename=temp.name) for arg in self.put_args |
|
149 | 151 | ] |
|
150 | 152 | returncode, stdout, stderr = self._call_binary( |
|
151 | 153 | [self.put_binary] + formatted_args |
|
152 | 154 | ) |
|
153 | 155 | |
|
154 | 156 | if returncode != 0: |
|
155 | 157 | raise BundleWriteException( |
|
156 | 158 | b'Failed to upload to external store: %s' % stderr |
|
157 | 159 | ) |
|
158 | 160 | stdout_lines = stdout.splitlines() |
|
159 | 161 | if len(stdout_lines) == 1: |
|
160 | 162 | return stdout_lines[0] |
|
161 | 163 | else: |
|
162 | 164 | raise BundleWriteException( |
|
163 | 165 | b'Bad output from %s: %s' % (self.put_binary, stdout) |
|
164 | 166 | ) |
|
165 | 167 | |
|
166 | 168 | def read(self, handle): |
|
167 | 169 | # Won't work on windows because you can't open file second time without |
|
168 | 170 | # closing it |
|
169 | 171 | # TODO: rewrite without str.format() and replace NamedTemporaryFile() |
|
170 | 172 | # with pycompat.namedtempfile() |
|
171 | 173 | with NamedTemporaryFile() as temp: |
|
172 | 174 | formatted_args = [ |
|
173 | 175 | arg.format(filename=temp.name, handle=handle) |
|
174 | 176 | for arg in self.get_args |
|
175 | 177 | ] |
|
176 | 178 | returncode, stdout, stderr = self._call_binary( |
|
177 | 179 | [self.get_binary] + formatted_args |
|
178 | 180 | ) |
|
179 | 181 | |
|
180 | 182 | if returncode != 0: |
|
181 | 183 | raise BundleReadException( |
|
182 | 184 | b'Failed to download from external store: %s' % stderr |
|
183 | 185 | ) |
|
184 | 186 | return temp.read() |
@@ -1,669 +1,669 b'' | |||
|
1 | 1 | # Copyright 2009-2010 Gregory P. Ward |
|
2 | 2 | # Copyright 2009-2010 Intelerad Medical Systems Incorporated |
|
3 | 3 | # Copyright 2010-2011 Fog Creek Software |
|
4 | 4 | # Copyright 2010-2011 Unity Technologies |
|
5 | 5 | # |
|
6 | 6 | # This software may be used and distributed according to the terms of the |
|
7 | 7 | # GNU General Public License version 2 or any later version. |
|
8 | 8 | |
|
9 | 9 | '''High-level command function for lfconvert, plus the cmdtable.''' |
|
10 | 10 | from __future__ import absolute_import |
|
11 | 11 | |
|
12 | 12 | import errno |
|
13 | import hashlib | |
|
14 | 13 | import os |
|
15 | 14 | import shutil |
|
16 | 15 | |
|
17 | 16 | from mercurial.i18n import _ |
|
18 | 17 | |
|
19 | 18 | from mercurial import ( |
|
20 | 19 | cmdutil, |
|
21 | 20 | context, |
|
22 | 21 | error, |
|
23 | 22 | exthelper, |
|
24 | 23 | hg, |
|
25 | 24 | lock, |
|
26 | 25 | match as matchmod, |
|
27 | 26 | node, |
|
28 | 27 | pycompat, |
|
29 | 28 | scmutil, |
|
30 | 29 | util, |
|
31 | 30 | ) |
|
31 | from mercurial.utils import hashutil | |
|
32 | 32 | |
|
33 | 33 | from ..convert import ( |
|
34 | 34 | convcmd, |
|
35 | 35 | filemap, |
|
36 | 36 | ) |
|
37 | 37 | |
|
38 | 38 | from . import lfutil, storefactory |
|
39 | 39 | |
|
40 | 40 | release = lock.release |
|
41 | 41 | |
|
42 | 42 | # -- Commands ---------------------------------------------------------- |
|
43 | 43 | |
|
44 | 44 | eh = exthelper.exthelper() |
|
45 | 45 | |
|
46 | 46 | |
|
47 | 47 | @eh.command( |
|
48 | 48 | b'lfconvert', |
|
49 | 49 | [ |
|
50 | 50 | ( |
|
51 | 51 | b's', |
|
52 | 52 | b'size', |
|
53 | 53 | b'', |
|
54 | 54 | _(b'minimum size (MB) for files to be converted as largefiles'), |
|
55 | 55 | b'SIZE', |
|
56 | 56 | ), |
|
57 | 57 | ( |
|
58 | 58 | b'', |
|
59 | 59 | b'to-normal', |
|
60 | 60 | False, |
|
61 | 61 | _(b'convert from a largefiles repo to a normal repo'), |
|
62 | 62 | ), |
|
63 | 63 | ], |
|
64 | 64 | _(b'hg lfconvert SOURCE DEST [FILE ...]'), |
|
65 | 65 | norepo=True, |
|
66 | 66 | inferrepo=True, |
|
67 | 67 | ) |
|
68 | 68 | def lfconvert(ui, src, dest, *pats, **opts): |
|
69 | 69 | '''convert a normal repository to a largefiles repository |
|
70 | 70 | |
|
71 | 71 | Convert repository SOURCE to a new repository DEST, identical to |
|
72 | 72 | SOURCE except that certain files will be converted as largefiles: |
|
73 | 73 | specifically, any file that matches any PATTERN *or* whose size is |
|
74 | 74 | above the minimum size threshold is converted as a largefile. The |
|
75 | 75 | size used to determine whether or not to track a file as a |
|
76 | 76 | largefile is the size of the first version of the file. The |
|
77 | 77 | minimum size can be specified either with --size or in |
|
78 | 78 | configuration as ``largefiles.size``. |
|
79 | 79 | |
|
80 | 80 | After running this command you will need to make sure that |
|
81 | 81 | largefiles is enabled anywhere you intend to push the new |
|
82 | 82 | repository. |
|
83 | 83 | |
|
84 | 84 | Use --to-normal to convert largefiles back to normal files; after |
|
85 | 85 | this, the DEST repository can be used without largefiles at all.''' |
|
86 | 86 | |
|
87 | 87 | opts = pycompat.byteskwargs(opts) |
|
88 | 88 | if opts[b'to_normal']: |
|
89 | 89 | tolfile = False |
|
90 | 90 | else: |
|
91 | 91 | tolfile = True |
|
92 | 92 | size = lfutil.getminsize(ui, True, opts.get(b'size'), default=None) |
|
93 | 93 | |
|
94 | 94 | if not hg.islocal(src): |
|
95 | 95 | raise error.Abort(_(b'%s is not a local Mercurial repo') % src) |
|
96 | 96 | if not hg.islocal(dest): |
|
97 | 97 | raise error.Abort(_(b'%s is not a local Mercurial repo') % dest) |
|
98 | 98 | |
|
99 | 99 | rsrc = hg.repository(ui, src) |
|
100 | 100 | ui.status(_(b'initializing destination %s\n') % dest) |
|
101 | 101 | rdst = hg.repository(ui, dest, create=True) |
|
102 | 102 | |
|
103 | 103 | success = False |
|
104 | 104 | dstwlock = dstlock = None |
|
105 | 105 | try: |
|
106 | 106 | # Get a list of all changesets in the source. The easy way to do this |
|
107 | 107 | # is to simply walk the changelog, using changelog.nodesbetween(). |
|
108 | 108 | # Take a look at mercurial/revlog.py:639 for more details. |
|
109 | 109 | # Use a generator instead of a list to decrease memory usage |
|
110 | 110 | ctxs = ( |
|
111 | 111 | rsrc[ctx] |
|
112 | 112 | for ctx in rsrc.changelog.nodesbetween(None, rsrc.heads())[0] |
|
113 | 113 | ) |
|
114 | 114 | revmap = {node.nullid: node.nullid} |
|
115 | 115 | if tolfile: |
|
116 | 116 | # Lock destination to prevent modification while it is converted to. |
|
117 | 117 | # Don't need to lock src because we are just reading from its |
|
118 | 118 | # history which can't change. |
|
119 | 119 | dstwlock = rdst.wlock() |
|
120 | 120 | dstlock = rdst.lock() |
|
121 | 121 | |
|
122 | 122 | lfiles = set() |
|
123 | 123 | normalfiles = set() |
|
124 | 124 | if not pats: |
|
125 | 125 | pats = ui.configlist(lfutil.longname, b'patterns') |
|
126 | 126 | if pats: |
|
127 | 127 | matcher = matchmod.match(rsrc.root, b'', list(pats)) |
|
128 | 128 | else: |
|
129 | 129 | matcher = None |
|
130 | 130 | |
|
131 | 131 | lfiletohash = {} |
|
132 | 132 | with ui.makeprogress( |
|
133 | 133 | _(b'converting revisions'), |
|
134 | 134 | unit=_(b'revisions'), |
|
135 | 135 | total=rsrc[b'tip'].rev(), |
|
136 | 136 | ) as progress: |
|
137 | 137 | for ctx in ctxs: |
|
138 | 138 | progress.update(ctx.rev()) |
|
139 | 139 | _lfconvert_addchangeset( |
|
140 | 140 | rsrc, |
|
141 | 141 | rdst, |
|
142 | 142 | ctx, |
|
143 | 143 | revmap, |
|
144 | 144 | lfiles, |
|
145 | 145 | normalfiles, |
|
146 | 146 | matcher, |
|
147 | 147 | size, |
|
148 | 148 | lfiletohash, |
|
149 | 149 | ) |
|
150 | 150 | |
|
151 | 151 | if rdst.wvfs.exists(lfutil.shortname): |
|
152 | 152 | rdst.wvfs.rmtree(lfutil.shortname) |
|
153 | 153 | |
|
154 | 154 | for f in lfiletohash.keys(): |
|
155 | 155 | if rdst.wvfs.isfile(f): |
|
156 | 156 | rdst.wvfs.unlink(f) |
|
157 | 157 | try: |
|
158 | 158 | rdst.wvfs.removedirs(rdst.wvfs.dirname(f)) |
|
159 | 159 | except OSError: |
|
160 | 160 | pass |
|
161 | 161 | |
|
162 | 162 | # If there were any files converted to largefiles, add largefiles |
|
163 | 163 | # to the destination repository's requirements. |
|
164 | 164 | if lfiles: |
|
165 | 165 | rdst.requirements.add(b'largefiles') |
|
166 | 166 | rdst._writerequirements() |
|
167 | 167 | else: |
|
168 | 168 | |
|
169 | 169 | class lfsource(filemap.filemap_source): |
|
170 | 170 | def __init__(self, ui, source): |
|
171 | 171 | super(lfsource, self).__init__(ui, source, None) |
|
172 | 172 | self.filemapper.rename[lfutil.shortname] = b'.' |
|
173 | 173 | |
|
174 | 174 | def getfile(self, name, rev): |
|
175 | 175 | realname, realrev = rev |
|
176 | 176 | f = super(lfsource, self).getfile(name, rev) |
|
177 | 177 | |
|
178 | 178 | if ( |
|
179 | 179 | not realname.startswith(lfutil.shortnameslash) |
|
180 | 180 | or f[0] is None |
|
181 | 181 | ): |
|
182 | 182 | return f |
|
183 | 183 | |
|
184 | 184 | # Substitute in the largefile data for the hash |
|
185 | 185 | hash = f[0].strip() |
|
186 | 186 | path = lfutil.findfile(rsrc, hash) |
|
187 | 187 | |
|
188 | 188 | if path is None: |
|
189 | 189 | raise error.Abort( |
|
190 | 190 | _(b"missing largefile for '%s' in %s") |
|
191 | 191 | % (realname, realrev) |
|
192 | 192 | ) |
|
193 | 193 | return util.readfile(path), f[1] |
|
194 | 194 | |
|
195 | 195 | class converter(convcmd.converter): |
|
196 | 196 | def __init__(self, ui, source, dest, revmapfile, opts): |
|
197 | 197 | src = lfsource(ui, source) |
|
198 | 198 | |
|
199 | 199 | super(converter, self).__init__( |
|
200 | 200 | ui, src, dest, revmapfile, opts |
|
201 | 201 | ) |
|
202 | 202 | |
|
203 | 203 | found, missing = downloadlfiles(ui, rsrc) |
|
204 | 204 | if missing != 0: |
|
205 | 205 | raise error.Abort(_(b"all largefiles must be present locally")) |
|
206 | 206 | |
|
207 | 207 | orig = convcmd.converter |
|
208 | 208 | convcmd.converter = converter |
|
209 | 209 | |
|
210 | 210 | try: |
|
211 | 211 | convcmd.convert( |
|
212 | 212 | ui, src, dest, source_type=b'hg', dest_type=b'hg' |
|
213 | 213 | ) |
|
214 | 214 | finally: |
|
215 | 215 | convcmd.converter = orig |
|
216 | 216 | success = True |
|
217 | 217 | finally: |
|
218 | 218 | if tolfile: |
|
219 | 219 | rdst.dirstate.clear() |
|
220 | 220 | release(dstlock, dstwlock) |
|
221 | 221 | if not success: |
|
222 | 222 | # we failed, remove the new directory |
|
223 | 223 | shutil.rmtree(rdst.root) |
|
224 | 224 | |
|
225 | 225 | |
|
226 | 226 | def _lfconvert_addchangeset( |
|
227 | 227 | rsrc, rdst, ctx, revmap, lfiles, normalfiles, matcher, size, lfiletohash |
|
228 | 228 | ): |
|
229 | 229 | # Convert src parents to dst parents |
|
230 | 230 | parents = _convertparents(ctx, revmap) |
|
231 | 231 | |
|
232 | 232 | # Generate list of changed files |
|
233 | 233 | files = _getchangedfiles(ctx, parents) |
|
234 | 234 | |
|
235 | 235 | dstfiles = [] |
|
236 | 236 | for f in files: |
|
237 | 237 | if f not in lfiles and f not in normalfiles: |
|
238 | 238 | islfile = _islfile(f, ctx, matcher, size) |
|
239 | 239 | # If this file was renamed or copied then copy |
|
240 | 240 | # the largefile-ness of its predecessor |
|
241 | 241 | if f in ctx.manifest(): |
|
242 | 242 | fctx = ctx.filectx(f) |
|
243 | 243 | renamed = fctx.copysource() |
|
244 | 244 | if renamed is None: |
|
245 | 245 | # the code below assumes renamed to be a boolean or a list |
|
246 | 246 | # and won't quite work with the value None |
|
247 | 247 | renamed = False |
|
248 | 248 | renamedlfile = renamed and renamed in lfiles |
|
249 | 249 | islfile |= renamedlfile |
|
250 | 250 | if b'l' in fctx.flags(): |
|
251 | 251 | if renamedlfile: |
|
252 | 252 | raise error.Abort( |
|
253 | 253 | _(b'renamed/copied largefile %s becomes symlink') |
|
254 | 254 | % f |
|
255 | 255 | ) |
|
256 | 256 | islfile = False |
|
257 | 257 | if islfile: |
|
258 | 258 | lfiles.add(f) |
|
259 | 259 | else: |
|
260 | 260 | normalfiles.add(f) |
|
261 | 261 | |
|
262 | 262 | if f in lfiles: |
|
263 | 263 | fstandin = lfutil.standin(f) |
|
264 | 264 | dstfiles.append(fstandin) |
|
265 | 265 | # largefile in manifest if it has not been removed/renamed |
|
266 | 266 | if f in ctx.manifest(): |
|
267 | 267 | fctx = ctx.filectx(f) |
|
268 | 268 | if b'l' in fctx.flags(): |
|
269 | 269 | renamed = fctx.copysource() |
|
270 | 270 | if renamed and renamed in lfiles: |
|
271 | 271 | raise error.Abort( |
|
272 | 272 | _(b'largefile %s becomes symlink') % f |
|
273 | 273 | ) |
|
274 | 274 | |
|
275 | 275 | # largefile was modified, update standins |
|
276 |
m = hashl |
|
|
276 | m = hashutil.sha1(b'') | |
|
277 | 277 | m.update(ctx[f].data()) |
|
278 | 278 | hash = node.hex(m.digest()) |
|
279 | 279 | if f not in lfiletohash or lfiletohash[f] != hash: |
|
280 | 280 | rdst.wwrite(f, ctx[f].data(), ctx[f].flags()) |
|
281 | 281 | executable = b'x' in ctx[f].flags() |
|
282 | 282 | lfutil.writestandin(rdst, fstandin, hash, executable) |
|
283 | 283 | lfiletohash[f] = hash |
|
284 | 284 | else: |
|
285 | 285 | # normal file |
|
286 | 286 | dstfiles.append(f) |
|
287 | 287 | |
|
288 | 288 | def getfilectx(repo, memctx, f): |
|
289 | 289 | srcfname = lfutil.splitstandin(f) |
|
290 | 290 | if srcfname is not None: |
|
291 | 291 | # if the file isn't in the manifest then it was removed |
|
292 | 292 | # or renamed, return None to indicate this |
|
293 | 293 | try: |
|
294 | 294 | fctx = ctx.filectx(srcfname) |
|
295 | 295 | except error.LookupError: |
|
296 | 296 | return None |
|
297 | 297 | renamed = fctx.copysource() |
|
298 | 298 | if renamed: |
|
299 | 299 | # standin is always a largefile because largefile-ness |
|
300 | 300 | # doesn't change after rename or copy |
|
301 | 301 | renamed = lfutil.standin(renamed) |
|
302 | 302 | |
|
303 | 303 | return context.memfilectx( |
|
304 | 304 | repo, |
|
305 | 305 | memctx, |
|
306 | 306 | f, |
|
307 | 307 | lfiletohash[srcfname] + b'\n', |
|
308 | 308 | b'l' in fctx.flags(), |
|
309 | 309 | b'x' in fctx.flags(), |
|
310 | 310 | renamed, |
|
311 | 311 | ) |
|
312 | 312 | else: |
|
313 | 313 | return _getnormalcontext(repo, ctx, f, revmap) |
|
314 | 314 | |
|
315 | 315 | # Commit |
|
316 | 316 | _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap) |
|
317 | 317 | |
|
318 | 318 | |
|
319 | 319 | def _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap): |
|
320 | 320 | mctx = context.memctx( |
|
321 | 321 | rdst, |
|
322 | 322 | parents, |
|
323 | 323 | ctx.description(), |
|
324 | 324 | dstfiles, |
|
325 | 325 | getfilectx, |
|
326 | 326 | ctx.user(), |
|
327 | 327 | ctx.date(), |
|
328 | 328 | ctx.extra(), |
|
329 | 329 | ) |
|
330 | 330 | ret = rdst.commitctx(mctx) |
|
331 | 331 | lfutil.copyalltostore(rdst, ret) |
|
332 | 332 | rdst.setparents(ret) |
|
333 | 333 | revmap[ctx.node()] = rdst.changelog.tip() |
|
334 | 334 | |
|
335 | 335 | |
|
336 | 336 | # Generate list of changed files |
|
337 | 337 | def _getchangedfiles(ctx, parents): |
|
338 | 338 | files = set(ctx.files()) |
|
339 | 339 | if node.nullid not in parents: |
|
340 | 340 | mc = ctx.manifest() |
|
341 | 341 | for pctx in ctx.parents(): |
|
342 | 342 | for fn in pctx.manifest().diff(mc): |
|
343 | 343 | files.add(fn) |
|
344 | 344 | return files |
|
345 | 345 | |
|
346 | 346 | |
|
347 | 347 | # Convert src parents to dst parents |
|
348 | 348 | def _convertparents(ctx, revmap): |
|
349 | 349 | parents = [] |
|
350 | 350 | for p in ctx.parents(): |
|
351 | 351 | parents.append(revmap[p.node()]) |
|
352 | 352 | while len(parents) < 2: |
|
353 | 353 | parents.append(node.nullid) |
|
354 | 354 | return parents |
|
355 | 355 | |
|
356 | 356 | |
|
357 | 357 | # Get memfilectx for a normal file |
|
358 | 358 | def _getnormalcontext(repo, ctx, f, revmap): |
|
359 | 359 | try: |
|
360 | 360 | fctx = ctx.filectx(f) |
|
361 | 361 | except error.LookupError: |
|
362 | 362 | return None |
|
363 | 363 | renamed = fctx.copysource() |
|
364 | 364 | |
|
365 | 365 | data = fctx.data() |
|
366 | 366 | if f == b'.hgtags': |
|
367 | 367 | data = _converttags(repo.ui, revmap, data) |
|
368 | 368 | return context.memfilectx( |
|
369 | 369 | repo, ctx, f, data, b'l' in fctx.flags(), b'x' in fctx.flags(), renamed |
|
370 | 370 | ) |
|
371 | 371 | |
|
372 | 372 | |
|
373 | 373 | # Remap tag data using a revision map |
|
374 | 374 | def _converttags(ui, revmap, data): |
|
375 | 375 | newdata = [] |
|
376 | 376 | for line in data.splitlines(): |
|
377 | 377 | try: |
|
378 | 378 | id, name = line.split(b' ', 1) |
|
379 | 379 | except ValueError: |
|
380 | 380 | ui.warn(_(b'skipping incorrectly formatted tag %s\n') % line) |
|
381 | 381 | continue |
|
382 | 382 | try: |
|
383 | 383 | newid = node.bin(id) |
|
384 | 384 | except TypeError: |
|
385 | 385 | ui.warn(_(b'skipping incorrectly formatted id %s\n') % id) |
|
386 | 386 | continue |
|
387 | 387 | try: |
|
388 | 388 | newdata.append(b'%s %s\n' % (node.hex(revmap[newid]), name)) |
|
389 | 389 | except KeyError: |
|
390 | 390 | ui.warn(_(b'no mapping for id %s\n') % id) |
|
391 | 391 | continue |
|
392 | 392 | return b''.join(newdata) |
|
393 | 393 | |
|
394 | 394 | |
|
395 | 395 | def _islfile(file, ctx, matcher, size): |
|
396 | 396 | '''Return true if file should be considered a largefile, i.e. |
|
397 | 397 | matcher matches it or it is larger than size.''' |
|
398 | 398 | # never store special .hg* files as largefiles |
|
399 | 399 | if file == b'.hgtags' or file == b'.hgignore' or file == b'.hgsigs': |
|
400 | 400 | return False |
|
401 | 401 | if matcher and matcher(file): |
|
402 | 402 | return True |
|
403 | 403 | try: |
|
404 | 404 | return ctx.filectx(file).size() >= size * 1024 * 1024 |
|
405 | 405 | except error.LookupError: |
|
406 | 406 | return False |
|
407 | 407 | |
|
408 | 408 | |
|
409 | 409 | def uploadlfiles(ui, rsrc, rdst, files): |
|
410 | 410 | '''upload largefiles to the central store''' |
|
411 | 411 | |
|
412 | 412 | if not files: |
|
413 | 413 | return |
|
414 | 414 | |
|
415 | 415 | store = storefactory.openstore(rsrc, rdst, put=True) |
|
416 | 416 | |
|
417 | 417 | at = 0 |
|
418 | 418 | ui.debug(b"sending statlfile command for %d largefiles\n" % len(files)) |
|
419 | 419 | retval = store.exists(files) |
|
420 | 420 | files = [h for h in files if not retval[h]] |
|
421 | 421 | ui.debug(b"%d largefiles need to be uploaded\n" % len(files)) |
|
422 | 422 | |
|
423 | 423 | with ui.makeprogress( |
|
424 | 424 | _(b'uploading largefiles'), unit=_(b'files'), total=len(files) |
|
425 | 425 | ) as progress: |
|
426 | 426 | for hash in files: |
|
427 | 427 | progress.update(at) |
|
428 | 428 | source = lfutil.findfile(rsrc, hash) |
|
429 | 429 | if not source: |
|
430 | 430 | raise error.Abort( |
|
431 | 431 | _( |
|
432 | 432 | b'largefile %s missing from store' |
|
433 | 433 | b' (needs to be uploaded)' |
|
434 | 434 | ) |
|
435 | 435 | % hash |
|
436 | 436 | ) |
|
437 | 437 | # XXX check for errors here |
|
438 | 438 | store.put(source, hash) |
|
439 | 439 | at += 1 |
|
440 | 440 | |
|
441 | 441 | |
|
442 | 442 | def verifylfiles(ui, repo, all=False, contents=False): |
|
443 | 443 | '''Verify that every largefile revision in the current changeset |
|
444 | 444 | exists in the central store. With --contents, also verify that |
|
445 | 445 | the contents of each local largefile file revision are correct (SHA-1 hash |
|
446 | 446 | matches the revision ID). With --all, check every changeset in |
|
447 | 447 | this repository.''' |
|
448 | 448 | if all: |
|
449 | 449 | revs = repo.revs(b'all()') |
|
450 | 450 | else: |
|
451 | 451 | revs = [b'.'] |
|
452 | 452 | |
|
453 | 453 | store = storefactory.openstore(repo) |
|
454 | 454 | return store.verify(revs, contents=contents) |
|
455 | 455 | |
|
456 | 456 | |
|
457 | 457 | def cachelfiles(ui, repo, node, filelist=None): |
|
458 | 458 | '''cachelfiles ensures that all largefiles needed by the specified revision |
|
459 | 459 | are present in the repository's largefile cache. |
|
460 | 460 | |
|
461 | 461 | returns a tuple (cached, missing). cached is the list of files downloaded |
|
462 | 462 | by this operation; missing is the list of files that were needed but could |
|
463 | 463 | not be found.''' |
|
464 | 464 | lfiles = lfutil.listlfiles(repo, node) |
|
465 | 465 | if filelist: |
|
466 | 466 | lfiles = set(lfiles) & set(filelist) |
|
467 | 467 | toget = [] |
|
468 | 468 | |
|
469 | 469 | ctx = repo[node] |
|
470 | 470 | for lfile in lfiles: |
|
471 | 471 | try: |
|
472 | 472 | expectedhash = lfutil.readasstandin(ctx[lfutil.standin(lfile)]) |
|
473 | 473 | except IOError as err: |
|
474 | 474 | if err.errno == errno.ENOENT: |
|
475 | 475 | continue # node must be None and standin wasn't found in wctx |
|
476 | 476 | raise |
|
477 | 477 | if not lfutil.findfile(repo, expectedhash): |
|
478 | 478 | toget.append((lfile, expectedhash)) |
|
479 | 479 | |
|
480 | 480 | if toget: |
|
481 | 481 | store = storefactory.openstore(repo) |
|
482 | 482 | ret = store.get(toget) |
|
483 | 483 | return ret |
|
484 | 484 | |
|
485 | 485 | return ([], []) |
|
486 | 486 | |
|
487 | 487 | |
|
488 | 488 | def downloadlfiles(ui, repo, rev=None): |
|
489 | 489 | match = scmutil.match(repo[None], [repo.wjoin(lfutil.shortname)], {}) |
|
490 | 490 | |
|
491 | 491 | def prepare(ctx, fns): |
|
492 | 492 | pass |
|
493 | 493 | |
|
494 | 494 | totalsuccess = 0 |
|
495 | 495 | totalmissing = 0 |
|
496 | 496 | if rev != []: # walkchangerevs on empty list would return all revs |
|
497 | 497 | for ctx in cmdutil.walkchangerevs(repo, match, {b'rev': rev}, prepare): |
|
498 | 498 | success, missing = cachelfiles(ui, repo, ctx.node()) |
|
499 | 499 | totalsuccess += len(success) |
|
500 | 500 | totalmissing += len(missing) |
|
501 | 501 | ui.status(_(b"%d additional largefiles cached\n") % totalsuccess) |
|
502 | 502 | if totalmissing > 0: |
|
503 | 503 | ui.status(_(b"%d largefiles failed to download\n") % totalmissing) |
|
504 | 504 | return totalsuccess, totalmissing |
|
505 | 505 | |
|
506 | 506 | |
|
507 | 507 | def updatelfiles( |
|
508 | 508 | ui, repo, filelist=None, printmessage=None, normallookup=False |
|
509 | 509 | ): |
|
510 | 510 | '''Update largefiles according to standins in the working directory |
|
511 | 511 | |
|
512 | 512 | If ``printmessage`` is other than ``None``, it means "print (or |
|
513 | 513 | ignore, for false) message forcibly". |
|
514 | 514 | ''' |
|
515 | 515 | statuswriter = lfutil.getstatuswriter(ui, repo, printmessage) |
|
516 | 516 | with repo.wlock(): |
|
517 | 517 | lfdirstate = lfutil.openlfdirstate(ui, repo) |
|
518 | 518 | lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate) |
|
519 | 519 | |
|
520 | 520 | if filelist is not None: |
|
521 | 521 | filelist = set(filelist) |
|
522 | 522 | lfiles = [f for f in lfiles if f in filelist] |
|
523 | 523 | |
|
524 | 524 | update = {} |
|
525 | 525 | dropped = set() |
|
526 | 526 | updated, removed = 0, 0 |
|
527 | 527 | wvfs = repo.wvfs |
|
528 | 528 | wctx = repo[None] |
|
529 | 529 | for lfile in lfiles: |
|
530 | 530 | lfileorig = os.path.relpath( |
|
531 | 531 | scmutil.backuppath(ui, repo, lfile), start=repo.root |
|
532 | 532 | ) |
|
533 | 533 | standin = lfutil.standin(lfile) |
|
534 | 534 | standinorig = os.path.relpath( |
|
535 | 535 | scmutil.backuppath(ui, repo, standin), start=repo.root |
|
536 | 536 | ) |
|
537 | 537 | if wvfs.exists(standin): |
|
538 | 538 | if wvfs.exists(standinorig) and wvfs.exists(lfile): |
|
539 | 539 | shutil.copyfile(wvfs.join(lfile), wvfs.join(lfileorig)) |
|
540 | 540 | wvfs.unlinkpath(standinorig) |
|
541 | 541 | expecthash = lfutil.readasstandin(wctx[standin]) |
|
542 | 542 | if expecthash != b'': |
|
543 | 543 | if lfile not in wctx: # not switched to normal file |
|
544 | 544 | if repo.dirstate[standin] != b'?': |
|
545 | 545 | wvfs.unlinkpath(lfile, ignoremissing=True) |
|
546 | 546 | else: |
|
547 | 547 | dropped.add(lfile) |
|
548 | 548 | |
|
549 | 549 | # use normallookup() to allocate an entry in largefiles |
|
550 | 550 | # dirstate to prevent lfilesrepo.status() from reporting |
|
551 | 551 | # missing files as removed. |
|
552 | 552 | lfdirstate.normallookup(lfile) |
|
553 | 553 | update[lfile] = expecthash |
|
554 | 554 | else: |
|
555 | 555 | # Remove lfiles for which the standin is deleted, unless the |
|
556 | 556 | # lfile is added to the repository again. This happens when a |
|
557 | 557 | # largefile is converted back to a normal file: the standin |
|
558 | 558 | # disappears, but a new (normal) file appears as the lfile. |
|
559 | 559 | if ( |
|
560 | 560 | wvfs.exists(lfile) |
|
561 | 561 | and repo.dirstate.normalize(lfile) not in wctx |
|
562 | 562 | ): |
|
563 | 563 | wvfs.unlinkpath(lfile) |
|
564 | 564 | removed += 1 |
|
565 | 565 | |
|
566 | 566 | # largefile processing might be slow and be interrupted - be prepared |
|
567 | 567 | lfdirstate.write() |
|
568 | 568 | |
|
569 | 569 | if lfiles: |
|
570 | 570 | lfiles = [f for f in lfiles if f not in dropped] |
|
571 | 571 | |
|
572 | 572 | for f in dropped: |
|
573 | 573 | repo.wvfs.unlinkpath(lfutil.standin(f)) |
|
574 | 574 | |
|
575 | 575 | # This needs to happen for dropped files, otherwise they stay in |
|
576 | 576 | # the M state. |
|
577 | 577 | lfutil.synclfdirstate(repo, lfdirstate, f, normallookup) |
|
578 | 578 | |
|
579 | 579 | statuswriter(_(b'getting changed largefiles\n')) |
|
580 | 580 | cachelfiles(ui, repo, None, lfiles) |
|
581 | 581 | |
|
582 | 582 | for lfile in lfiles: |
|
583 | 583 | update1 = 0 |
|
584 | 584 | |
|
585 | 585 | expecthash = update.get(lfile) |
|
586 | 586 | if expecthash: |
|
587 | 587 | if not lfutil.copyfromcache(repo, expecthash, lfile): |
|
588 | 588 | # failed ... but already removed and set to normallookup |
|
589 | 589 | continue |
|
590 | 590 | # Synchronize largefile dirstate to the last modified |
|
591 | 591 | # time of the file |
|
592 | 592 | lfdirstate.normal(lfile) |
|
593 | 593 | update1 = 1 |
|
594 | 594 | |
|
595 | 595 | # copy the exec mode of largefile standin from the repository's |
|
596 | 596 | # dirstate to its state in the lfdirstate. |
|
597 | 597 | standin = lfutil.standin(lfile) |
|
598 | 598 | if wvfs.exists(standin): |
|
599 | 599 | # exec is decided by the users permissions using mask 0o100 |
|
600 | 600 | standinexec = wvfs.stat(standin).st_mode & 0o100 |
|
601 | 601 | st = wvfs.stat(lfile) |
|
602 | 602 | mode = st.st_mode |
|
603 | 603 | if standinexec != mode & 0o100: |
|
604 | 604 | # first remove all X bits, then shift all R bits to X |
|
605 | 605 | mode &= ~0o111 |
|
606 | 606 | if standinexec: |
|
607 | 607 | mode |= (mode >> 2) & 0o111 & ~util.umask |
|
608 | 608 | wvfs.chmod(lfile, mode) |
|
609 | 609 | update1 = 1 |
|
610 | 610 | |
|
611 | 611 | updated += update1 |
|
612 | 612 | |
|
613 | 613 | lfutil.synclfdirstate(repo, lfdirstate, lfile, normallookup) |
|
614 | 614 | |
|
615 | 615 | lfdirstate.write() |
|
616 | 616 | if lfiles: |
|
617 | 617 | statuswriter( |
|
618 | 618 | _(b'%d largefiles updated, %d removed\n') % (updated, removed) |
|
619 | 619 | ) |
|
620 | 620 | |
|
621 | 621 | |
|
622 | 622 | @eh.command( |
|
623 | 623 | b'lfpull', |
|
624 | 624 | [(b'r', b'rev', [], _(b'pull largefiles for these revisions'))] |
|
625 | 625 | + cmdutil.remoteopts, |
|
626 | 626 | _(b'-r REV... [-e CMD] [--remotecmd CMD] [SOURCE]'), |
|
627 | 627 | ) |
|
628 | 628 | def lfpull(ui, repo, source=b"default", **opts): |
|
629 | 629 | """pull largefiles for the specified revisions from the specified source |
|
630 | 630 | |
|
631 | 631 | Pull largefiles that are referenced from local changesets but missing |
|
632 | 632 | locally, pulling from a remote repository to the local cache. |
|
633 | 633 | |
|
634 | 634 | If SOURCE is omitted, the 'default' path will be used. |
|
635 | 635 | See :hg:`help urls` for more information. |
|
636 | 636 | |
|
637 | 637 | .. container:: verbose |
|
638 | 638 | |
|
639 | 639 | Some examples: |
|
640 | 640 | |
|
641 | 641 | - pull largefiles for all branch heads:: |
|
642 | 642 | |
|
643 | 643 | hg lfpull -r "head() and not closed()" |
|
644 | 644 | |
|
645 | 645 | - pull largefiles on the default branch:: |
|
646 | 646 | |
|
647 | 647 | hg lfpull -r "branch(default)" |
|
648 | 648 | """ |
|
649 | 649 | repo.lfpullsource = source |
|
650 | 650 | |
|
651 | 651 | revs = opts.get('rev', []) |
|
652 | 652 | if not revs: |
|
653 | 653 | raise error.Abort(_(b'no revisions specified')) |
|
654 | 654 | revs = scmutil.revrange(repo, revs) |
|
655 | 655 | |
|
656 | 656 | numcached = 0 |
|
657 | 657 | for rev in revs: |
|
658 | 658 | ui.note(_(b'pulling largefiles for revision %d\n') % rev) |
|
659 | 659 | (cached, missing) = cachelfiles(ui, repo, rev) |
|
660 | 660 | numcached += len(cached) |
|
661 | 661 | ui.status(_(b"%d largefiles cached\n") % numcached) |
|
662 | 662 | |
|
663 | 663 | |
|
664 | 664 | @eh.command(b'debuglfput', [] + cmdutil.remoteopts, _(b'FILE')) |
|
665 | 665 | def debuglfput(ui, repo, filepath, **kwargs): |
|
666 | 666 | hash = lfutil.hashfile(filepath) |
|
667 | 667 | storefactory.openstore(repo).put(filepath, hash) |
|
668 | 668 | ui.write(b'%s\n' % hash) |
|
669 | 669 | return 0 |
@@ -1,760 +1,760 b'' | |||
|
1 | 1 | # Copyright 2009-2010 Gregory P. Ward |
|
2 | 2 | # Copyright 2009-2010 Intelerad Medical Systems Incorporated |
|
3 | 3 | # Copyright 2010-2011 Fog Creek Software |
|
4 | 4 | # Copyright 2010-2011 Unity Technologies |
|
5 | 5 | # |
|
6 | 6 | # This software may be used and distributed according to the terms of the |
|
7 | 7 | # GNU General Public License version 2 or any later version. |
|
8 | 8 | |
|
9 | 9 | '''largefiles utility code: must not import other modules in this package.''' |
|
10 | 10 | from __future__ import absolute_import |
|
11 | 11 | |
|
12 | 12 | import contextlib |
|
13 | 13 | import copy |
|
14 | import hashlib | |
|
15 | 14 | import os |
|
16 | 15 | import stat |
|
17 | 16 | |
|
18 | 17 | from mercurial.i18n import _ |
|
19 | 18 | from mercurial.node import hex |
|
20 | 19 | from mercurial.pycompat import open |
|
21 | 20 | |
|
22 | 21 | from mercurial import ( |
|
23 | 22 | dirstate, |
|
24 | 23 | encoding, |
|
25 | 24 | error, |
|
26 | 25 | httpconnection, |
|
27 | 26 | match as matchmod, |
|
28 | 27 | node, |
|
29 | 28 | pycompat, |
|
30 | 29 | scmutil, |
|
31 | 30 | sparse, |
|
32 | 31 | util, |
|
33 | 32 | vfs as vfsmod, |
|
34 | 33 | ) |
|
34 | from mercurial.utils import hashutil | |
|
35 | 35 | |
|
36 | 36 | shortname = b'.hglf' |
|
37 | 37 | shortnameslash = shortname + b'/' |
|
38 | 38 | longname = b'largefiles' |
|
39 | 39 | |
|
40 | 40 | # -- Private worker functions ------------------------------------------ |
|
41 | 41 | |
|
42 | 42 | |
|
43 | 43 | @contextlib.contextmanager |
|
44 | 44 | def lfstatus(repo, value=True): |
|
45 | 45 | oldvalue = getattr(repo, 'lfstatus', False) |
|
46 | 46 | repo.lfstatus = value |
|
47 | 47 | try: |
|
48 | 48 | yield |
|
49 | 49 | finally: |
|
50 | 50 | repo.lfstatus = oldvalue |
|
51 | 51 | |
|
52 | 52 | |
|
53 | 53 | def getminsize(ui, assumelfiles, opt, default=10): |
|
54 | 54 | lfsize = opt |
|
55 | 55 | if not lfsize and assumelfiles: |
|
56 | 56 | lfsize = ui.config(longname, b'minsize', default=default) |
|
57 | 57 | if lfsize: |
|
58 | 58 | try: |
|
59 | 59 | lfsize = float(lfsize) |
|
60 | 60 | except ValueError: |
|
61 | 61 | raise error.Abort( |
|
62 | 62 | _(b'largefiles: size must be number (not %s)\n') % lfsize |
|
63 | 63 | ) |
|
64 | 64 | if lfsize is None: |
|
65 | 65 | raise error.Abort(_(b'minimum size for largefiles must be specified')) |
|
66 | 66 | return lfsize |
|
67 | 67 | |
|
68 | 68 | |
|
69 | 69 | def link(src, dest): |
|
70 | 70 | """Try to create hardlink - if that fails, efficiently make a copy.""" |
|
71 | 71 | util.makedirs(os.path.dirname(dest)) |
|
72 | 72 | try: |
|
73 | 73 | util.oslink(src, dest) |
|
74 | 74 | except OSError: |
|
75 | 75 | # if hardlinks fail, fallback on atomic copy |
|
76 | 76 | with open(src, b'rb') as srcf, util.atomictempfile(dest) as dstf: |
|
77 | 77 | for chunk in util.filechunkiter(srcf): |
|
78 | 78 | dstf.write(chunk) |
|
79 | 79 | os.chmod(dest, os.stat(src).st_mode) |
|
80 | 80 | |
|
81 | 81 | |
|
82 | 82 | def usercachepath(ui, hash): |
|
83 | 83 | '''Return the correct location in the "global" largefiles cache for a file |
|
84 | 84 | with the given hash. |
|
85 | 85 | This cache is used for sharing of largefiles across repositories - both |
|
86 | 86 | to preserve download bandwidth and storage space.''' |
|
87 | 87 | return os.path.join(_usercachedir(ui), hash) |
|
88 | 88 | |
|
89 | 89 | |
|
90 | 90 | def _usercachedir(ui, name=longname): |
|
91 | 91 | '''Return the location of the "global" largefiles cache.''' |
|
92 | 92 | path = ui.configpath(name, b'usercache') |
|
93 | 93 | if path: |
|
94 | 94 | return path |
|
95 | 95 | if pycompat.iswindows: |
|
96 | 96 | appdata = encoding.environ.get( |
|
97 | 97 | b'LOCALAPPDATA', encoding.environ.get(b'APPDATA') |
|
98 | 98 | ) |
|
99 | 99 | if appdata: |
|
100 | 100 | return os.path.join(appdata, name) |
|
101 | 101 | elif pycompat.isdarwin: |
|
102 | 102 | home = encoding.environ.get(b'HOME') |
|
103 | 103 | if home: |
|
104 | 104 | return os.path.join(home, b'Library', b'Caches', name) |
|
105 | 105 | elif pycompat.isposix: |
|
106 | 106 | path = encoding.environ.get(b'XDG_CACHE_HOME') |
|
107 | 107 | if path: |
|
108 | 108 | return os.path.join(path, name) |
|
109 | 109 | home = encoding.environ.get(b'HOME') |
|
110 | 110 | if home: |
|
111 | 111 | return os.path.join(home, b'.cache', name) |
|
112 | 112 | else: |
|
113 | 113 | raise error.Abort( |
|
114 | 114 | _(b'unknown operating system: %s\n') % pycompat.osname |
|
115 | 115 | ) |
|
116 | 116 | raise error.Abort(_(b'unknown %s usercache location') % name) |
|
117 | 117 | |
|
118 | 118 | |
|
119 | 119 | def inusercache(ui, hash): |
|
120 | 120 | path = usercachepath(ui, hash) |
|
121 | 121 | return os.path.exists(path) |
|
122 | 122 | |
|
123 | 123 | |
|
124 | 124 | def findfile(repo, hash): |
|
125 | 125 | '''Return store path of the largefile with the specified hash. |
|
126 | 126 | As a side effect, the file might be linked from user cache. |
|
127 | 127 | Return None if the file can't be found locally.''' |
|
128 | 128 | path, exists = findstorepath(repo, hash) |
|
129 | 129 | if exists: |
|
130 | 130 | repo.ui.note(_(b'found %s in store\n') % hash) |
|
131 | 131 | return path |
|
132 | 132 | elif inusercache(repo.ui, hash): |
|
133 | 133 | repo.ui.note(_(b'found %s in system cache\n') % hash) |
|
134 | 134 | path = storepath(repo, hash) |
|
135 | 135 | link(usercachepath(repo.ui, hash), path) |
|
136 | 136 | return path |
|
137 | 137 | return None |
|
138 | 138 | |
|
139 | 139 | |
|
140 | 140 | class largefilesdirstate(dirstate.dirstate): |
|
141 | 141 | def __getitem__(self, key): |
|
142 | 142 | return super(largefilesdirstate, self).__getitem__(unixpath(key)) |
|
143 | 143 | |
|
144 | 144 | def normal(self, f): |
|
145 | 145 | return super(largefilesdirstate, self).normal(unixpath(f)) |
|
146 | 146 | |
|
147 | 147 | def remove(self, f): |
|
148 | 148 | return super(largefilesdirstate, self).remove(unixpath(f)) |
|
149 | 149 | |
|
150 | 150 | def add(self, f): |
|
151 | 151 | return super(largefilesdirstate, self).add(unixpath(f)) |
|
152 | 152 | |
|
153 | 153 | def drop(self, f): |
|
154 | 154 | return super(largefilesdirstate, self).drop(unixpath(f)) |
|
155 | 155 | |
|
156 | 156 | def forget(self, f): |
|
157 | 157 | return super(largefilesdirstate, self).forget(unixpath(f)) |
|
158 | 158 | |
|
159 | 159 | def normallookup(self, f): |
|
160 | 160 | return super(largefilesdirstate, self).normallookup(unixpath(f)) |
|
161 | 161 | |
|
162 | 162 | def _ignore(self, f): |
|
163 | 163 | return False |
|
164 | 164 | |
|
165 | 165 | def write(self, tr=False): |
|
166 | 166 | # (1) disable PENDING mode always |
|
167 | 167 | # (lfdirstate isn't yet managed as a part of the transaction) |
|
168 | 168 | # (2) avoid develwarn 'use dirstate.write with ....' |
|
169 | 169 | super(largefilesdirstate, self).write(None) |
|
170 | 170 | |
|
171 | 171 | |
|
172 | 172 | def openlfdirstate(ui, repo, create=True): |
|
173 | 173 | ''' |
|
174 | 174 | Return a dirstate object that tracks largefiles: i.e. its root is |
|
175 | 175 | the repo root, but it is saved in .hg/largefiles/dirstate. |
|
176 | 176 | ''' |
|
177 | 177 | vfs = repo.vfs |
|
178 | 178 | lfstoredir = longname |
|
179 | 179 | opener = vfsmod.vfs(vfs.join(lfstoredir)) |
|
180 | 180 | lfdirstate = largefilesdirstate( |
|
181 | 181 | opener, |
|
182 | 182 | ui, |
|
183 | 183 | repo.root, |
|
184 | 184 | repo.dirstate._validate, |
|
185 | 185 | lambda: sparse.matcher(repo), |
|
186 | 186 | ) |
|
187 | 187 | |
|
188 | 188 | # If the largefiles dirstate does not exist, populate and create |
|
189 | 189 | # it. This ensures that we create it on the first meaningful |
|
190 | 190 | # largefiles operation in a new clone. |
|
191 | 191 | if create and not vfs.exists(vfs.join(lfstoredir, b'dirstate')): |
|
192 | 192 | matcher = getstandinmatcher(repo) |
|
193 | 193 | standins = repo.dirstate.walk( |
|
194 | 194 | matcher, subrepos=[], unknown=False, ignored=False |
|
195 | 195 | ) |
|
196 | 196 | |
|
197 | 197 | if len(standins) > 0: |
|
198 | 198 | vfs.makedirs(lfstoredir) |
|
199 | 199 | |
|
200 | 200 | for standin in standins: |
|
201 | 201 | lfile = splitstandin(standin) |
|
202 | 202 | lfdirstate.normallookup(lfile) |
|
203 | 203 | return lfdirstate |
|
204 | 204 | |
|
205 | 205 | |
|
206 | 206 | def lfdirstatestatus(lfdirstate, repo): |
|
207 | 207 | pctx = repo[b'.'] |
|
208 | 208 | match = matchmod.always() |
|
209 | 209 | unsure, s = lfdirstate.status( |
|
210 | 210 | match, subrepos=[], ignored=False, clean=False, unknown=False |
|
211 | 211 | ) |
|
212 | 212 | modified, clean = s.modified, s.clean |
|
213 | 213 | for lfile in unsure: |
|
214 | 214 | try: |
|
215 | 215 | fctx = pctx[standin(lfile)] |
|
216 | 216 | except LookupError: |
|
217 | 217 | fctx = None |
|
218 | 218 | if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)): |
|
219 | 219 | modified.append(lfile) |
|
220 | 220 | else: |
|
221 | 221 | clean.append(lfile) |
|
222 | 222 | lfdirstate.normal(lfile) |
|
223 | 223 | return s |
|
224 | 224 | |
|
225 | 225 | |
|
226 | 226 | def listlfiles(repo, rev=None, matcher=None): |
|
227 | 227 | '''return a list of largefiles in the working copy or the |
|
228 | 228 | specified changeset''' |
|
229 | 229 | |
|
230 | 230 | if matcher is None: |
|
231 | 231 | matcher = getstandinmatcher(repo) |
|
232 | 232 | |
|
233 | 233 | # ignore unknown files in working directory |
|
234 | 234 | return [ |
|
235 | 235 | splitstandin(f) |
|
236 | 236 | for f in repo[rev].walk(matcher) |
|
237 | 237 | if rev is not None or repo.dirstate[f] != b'?' |
|
238 | 238 | ] |
|
239 | 239 | |
|
240 | 240 | |
|
241 | 241 | def instore(repo, hash, forcelocal=False): |
|
242 | 242 | '''Return true if a largefile with the given hash exists in the store''' |
|
243 | 243 | return os.path.exists(storepath(repo, hash, forcelocal)) |
|
244 | 244 | |
|
245 | 245 | |
|
246 | 246 | def storepath(repo, hash, forcelocal=False): |
|
247 | 247 | '''Return the correct location in the repository largefiles store for a |
|
248 | 248 | file with the given hash.''' |
|
249 | 249 | if not forcelocal and repo.shared(): |
|
250 | 250 | return repo.vfs.reljoin(repo.sharedpath, longname, hash) |
|
251 | 251 | return repo.vfs.join(longname, hash) |
|
252 | 252 | |
|
253 | 253 | |
|
254 | 254 | def findstorepath(repo, hash): |
|
255 | 255 | '''Search through the local store path(s) to find the file for the given |
|
256 | 256 | hash. If the file is not found, its path in the primary store is returned. |
|
257 | 257 | The return value is a tuple of (path, exists(path)). |
|
258 | 258 | ''' |
|
259 | 259 | # For shared repos, the primary store is in the share source. But for |
|
260 | 260 | # backward compatibility, force a lookup in the local store if it wasn't |
|
261 | 261 | # found in the share source. |
|
262 | 262 | path = storepath(repo, hash, False) |
|
263 | 263 | |
|
264 | 264 | if instore(repo, hash): |
|
265 | 265 | return (path, True) |
|
266 | 266 | elif repo.shared() and instore(repo, hash, True): |
|
267 | 267 | return storepath(repo, hash, True), True |
|
268 | 268 | |
|
269 | 269 | return (path, False) |
|
270 | 270 | |
|
271 | 271 | |
|
272 | 272 | def copyfromcache(repo, hash, filename): |
|
273 | 273 | '''Copy the specified largefile from the repo or system cache to |
|
274 | 274 | filename in the repository. Return true on success or false if the |
|
275 | 275 | file was not found in either cache (which should not happened: |
|
276 | 276 | this is meant to be called only after ensuring that the needed |
|
277 | 277 | largefile exists in the cache).''' |
|
278 | 278 | wvfs = repo.wvfs |
|
279 | 279 | path = findfile(repo, hash) |
|
280 | 280 | if path is None: |
|
281 | 281 | return False |
|
282 | 282 | wvfs.makedirs(wvfs.dirname(wvfs.join(filename))) |
|
283 | 283 | # The write may fail before the file is fully written, but we |
|
284 | 284 | # don't use atomic writes in the working copy. |
|
285 | 285 | with open(path, b'rb') as srcfd, wvfs(filename, b'wb') as destfd: |
|
286 | 286 | gothash = copyandhash(util.filechunkiter(srcfd), destfd) |
|
287 | 287 | if gothash != hash: |
|
288 | 288 | repo.ui.warn( |
|
289 | 289 | _(b'%s: data corruption in %s with hash %s\n') |
|
290 | 290 | % (filename, path, gothash) |
|
291 | 291 | ) |
|
292 | 292 | wvfs.unlink(filename) |
|
293 | 293 | return False |
|
294 | 294 | return True |
|
295 | 295 | |
|
296 | 296 | |
|
297 | 297 | def copytostore(repo, ctx, file, fstandin): |
|
298 | 298 | wvfs = repo.wvfs |
|
299 | 299 | hash = readasstandin(ctx[fstandin]) |
|
300 | 300 | if instore(repo, hash): |
|
301 | 301 | return |
|
302 | 302 | if wvfs.exists(file): |
|
303 | 303 | copytostoreabsolute(repo, wvfs.join(file), hash) |
|
304 | 304 | else: |
|
305 | 305 | repo.ui.warn( |
|
306 | 306 | _(b"%s: largefile %s not available from local store\n") |
|
307 | 307 | % (file, hash) |
|
308 | 308 | ) |
|
309 | 309 | |
|
310 | 310 | |
|
311 | 311 | def copyalltostore(repo, node): |
|
312 | 312 | '''Copy all largefiles in a given revision to the store''' |
|
313 | 313 | |
|
314 | 314 | ctx = repo[node] |
|
315 | 315 | for filename in ctx.files(): |
|
316 | 316 | realfile = splitstandin(filename) |
|
317 | 317 | if realfile is not None and filename in ctx.manifest(): |
|
318 | 318 | copytostore(repo, ctx, realfile, filename) |
|
319 | 319 | |
|
320 | 320 | |
|
321 | 321 | def copytostoreabsolute(repo, file, hash): |
|
322 | 322 | if inusercache(repo.ui, hash): |
|
323 | 323 | link(usercachepath(repo.ui, hash), storepath(repo, hash)) |
|
324 | 324 | else: |
|
325 | 325 | util.makedirs(os.path.dirname(storepath(repo, hash))) |
|
326 | 326 | with open(file, b'rb') as srcf: |
|
327 | 327 | with util.atomictempfile( |
|
328 | 328 | storepath(repo, hash), createmode=repo.store.createmode |
|
329 | 329 | ) as dstf: |
|
330 | 330 | for chunk in util.filechunkiter(srcf): |
|
331 | 331 | dstf.write(chunk) |
|
332 | 332 | linktousercache(repo, hash) |
|
333 | 333 | |
|
334 | 334 | |
|
335 | 335 | def linktousercache(repo, hash): |
|
336 | 336 | '''Link / copy the largefile with the specified hash from the store |
|
337 | 337 | to the cache.''' |
|
338 | 338 | path = usercachepath(repo.ui, hash) |
|
339 | 339 | link(storepath(repo, hash), path) |
|
340 | 340 | |
|
341 | 341 | |
|
342 | 342 | def getstandinmatcher(repo, rmatcher=None): |
|
343 | 343 | '''Return a match object that applies rmatcher to the standin directory''' |
|
344 | 344 | wvfs = repo.wvfs |
|
345 | 345 | standindir = shortname |
|
346 | 346 | |
|
347 | 347 | # no warnings about missing files or directories |
|
348 | 348 | badfn = lambda f, msg: None |
|
349 | 349 | |
|
350 | 350 | if rmatcher and not rmatcher.always(): |
|
351 | 351 | pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()] |
|
352 | 352 | if not pats: |
|
353 | 353 | pats = [wvfs.join(standindir)] |
|
354 | 354 | match = scmutil.match(repo[None], pats, badfn=badfn) |
|
355 | 355 | else: |
|
356 | 356 | # no patterns: relative to repo root |
|
357 | 357 | match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn) |
|
358 | 358 | return match |
|
359 | 359 | |
|
360 | 360 | |
|
361 | 361 | def composestandinmatcher(repo, rmatcher): |
|
362 | 362 | '''Return a matcher that accepts standins corresponding to the |
|
363 | 363 | files accepted by rmatcher. Pass the list of files in the matcher |
|
364 | 364 | as the paths specified by the user.''' |
|
365 | 365 | smatcher = getstandinmatcher(repo, rmatcher) |
|
366 | 366 | isstandin = smatcher.matchfn |
|
367 | 367 | |
|
368 | 368 | def composedmatchfn(f): |
|
369 | 369 | return isstandin(f) and rmatcher.matchfn(splitstandin(f)) |
|
370 | 370 | |
|
371 | 371 | smatcher.matchfn = composedmatchfn |
|
372 | 372 | |
|
373 | 373 | return smatcher |
|
374 | 374 | |
|
375 | 375 | |
|
376 | 376 | def standin(filename): |
|
377 | 377 | '''Return the repo-relative path to the standin for the specified big |
|
378 | 378 | file.''' |
|
379 | 379 | # Notes: |
|
380 | 380 | # 1) Some callers want an absolute path, but for instance addlargefiles |
|
381 | 381 | # needs it repo-relative so it can be passed to repo[None].add(). So |
|
382 | 382 | # leave it up to the caller to use repo.wjoin() to get an absolute path. |
|
383 | 383 | # 2) Join with '/' because that's what dirstate always uses, even on |
|
384 | 384 | # Windows. Change existing separator to '/' first in case we are |
|
385 | 385 | # passed filenames from an external source (like the command line). |
|
386 | 386 | return shortnameslash + util.pconvert(filename) |
|
387 | 387 | |
|
388 | 388 | |
|
389 | 389 | def isstandin(filename): |
|
390 | 390 | '''Return true if filename is a big file standin. filename must be |
|
391 | 391 | in Mercurial's internal form (slash-separated).''' |
|
392 | 392 | return filename.startswith(shortnameslash) |
|
393 | 393 | |
|
394 | 394 | |
|
395 | 395 | def splitstandin(filename): |
|
396 | 396 | # Split on / because that's what dirstate always uses, even on Windows. |
|
397 | 397 | # Change local separator to / first just in case we are passed filenames |
|
398 | 398 | # from an external source (like the command line). |
|
399 | 399 | bits = util.pconvert(filename).split(b'/', 1) |
|
400 | 400 | if len(bits) == 2 and bits[0] == shortname: |
|
401 | 401 | return bits[1] |
|
402 | 402 | else: |
|
403 | 403 | return None |
|
404 | 404 | |
|
405 | 405 | |
|
406 | 406 | def updatestandin(repo, lfile, standin): |
|
407 | 407 | """Re-calculate hash value of lfile and write it into standin |
|
408 | 408 | |
|
409 | 409 | This assumes that "lfutil.standin(lfile) == standin", for efficiency. |
|
410 | 410 | """ |
|
411 | 411 | file = repo.wjoin(lfile) |
|
412 | 412 | if repo.wvfs.exists(lfile): |
|
413 | 413 | hash = hashfile(file) |
|
414 | 414 | executable = getexecutable(file) |
|
415 | 415 | writestandin(repo, standin, hash, executable) |
|
416 | 416 | else: |
|
417 | 417 | raise error.Abort(_(b'%s: file not found!') % lfile) |
|
418 | 418 | |
|
419 | 419 | |
|
420 | 420 | def readasstandin(fctx): |
|
421 | 421 | '''read hex hash from given filectx of standin file |
|
422 | 422 | |
|
423 | 423 | This encapsulates how "standin" data is stored into storage layer.''' |
|
424 | 424 | return fctx.data().strip() |
|
425 | 425 | |
|
426 | 426 | |
|
427 | 427 | def writestandin(repo, standin, hash, executable): |
|
428 | 428 | '''write hash to <repo.root>/<standin>''' |
|
429 | 429 | repo.wwrite(standin, hash + b'\n', executable and b'x' or b'') |
|
430 | 430 | |
|
431 | 431 | |
|
432 | 432 | def copyandhash(instream, outfile): |
|
433 | 433 | '''Read bytes from instream (iterable) and write them to outfile, |
|
434 | 434 | computing the SHA-1 hash of the data along the way. Return the hash.''' |
|
435 |
hasher = hashl |
|
|
435 | hasher = hashutil.sha1(b'') | |
|
436 | 436 | for data in instream: |
|
437 | 437 | hasher.update(data) |
|
438 | 438 | outfile.write(data) |
|
439 | 439 | return hex(hasher.digest()) |
|
440 | 440 | |
|
441 | 441 | |
|
442 | 442 | def hashfile(file): |
|
443 | 443 | if not os.path.exists(file): |
|
444 | 444 | return b'' |
|
445 | 445 | with open(file, b'rb') as fd: |
|
446 | 446 | return hexsha1(fd) |
|
447 | 447 | |
|
448 | 448 | |
|
449 | 449 | def getexecutable(filename): |
|
450 | 450 | mode = os.stat(filename).st_mode |
|
451 | 451 | return ( |
|
452 | 452 | (mode & stat.S_IXUSR) |
|
453 | 453 | and (mode & stat.S_IXGRP) |
|
454 | 454 | and (mode & stat.S_IXOTH) |
|
455 | 455 | ) |
|
456 | 456 | |
|
457 | 457 | |
|
458 | 458 | def urljoin(first, second, *arg): |
|
459 | 459 | def join(left, right): |
|
460 | 460 | if not left.endswith(b'/'): |
|
461 | 461 | left += b'/' |
|
462 | 462 | if right.startswith(b'/'): |
|
463 | 463 | right = right[1:] |
|
464 | 464 | return left + right |
|
465 | 465 | |
|
466 | 466 | url = join(first, second) |
|
467 | 467 | for a in arg: |
|
468 | 468 | url = join(url, a) |
|
469 | 469 | return url |
|
470 | 470 | |
|
471 | 471 | |
|
472 | 472 | def hexsha1(fileobj): |
|
473 | 473 | """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like |
|
474 | 474 | object data""" |
|
475 |
h = hashl |
|
|
475 | h = hashutil.sha1() | |
|
476 | 476 | for chunk in util.filechunkiter(fileobj): |
|
477 | 477 | h.update(chunk) |
|
478 | 478 | return hex(h.digest()) |
|
479 | 479 | |
|
480 | 480 | |
|
481 | 481 | def httpsendfile(ui, filename): |
|
482 | 482 | return httpconnection.httpsendfile(ui, filename, b'rb') |
|
483 | 483 | |
|
484 | 484 | |
|
485 | 485 | def unixpath(path): |
|
486 | 486 | '''Return a version of path normalized for use with the lfdirstate.''' |
|
487 | 487 | return util.pconvert(os.path.normpath(path)) |
|
488 | 488 | |
|
489 | 489 | |
|
490 | 490 | def islfilesrepo(repo): |
|
491 | 491 | '''Return true if the repo is a largefile repo.''' |
|
492 | 492 | if b'largefiles' in repo.requirements and any( |
|
493 | 493 | shortnameslash in f[0] for f in repo.store.datafiles() |
|
494 | 494 | ): |
|
495 | 495 | return True |
|
496 | 496 | |
|
497 | 497 | return any(openlfdirstate(repo.ui, repo, False)) |
|
498 | 498 | |
|
499 | 499 | |
|
500 | 500 | class storeprotonotcapable(Exception): |
|
501 | 501 | def __init__(self, storetypes): |
|
502 | 502 | self.storetypes = storetypes |
|
503 | 503 | |
|
504 | 504 | |
|
505 | 505 | def getstandinsstate(repo): |
|
506 | 506 | standins = [] |
|
507 | 507 | matcher = getstandinmatcher(repo) |
|
508 | 508 | wctx = repo[None] |
|
509 | 509 | for standin in repo.dirstate.walk( |
|
510 | 510 | matcher, subrepos=[], unknown=False, ignored=False |
|
511 | 511 | ): |
|
512 | 512 | lfile = splitstandin(standin) |
|
513 | 513 | try: |
|
514 | 514 | hash = readasstandin(wctx[standin]) |
|
515 | 515 | except IOError: |
|
516 | 516 | hash = None |
|
517 | 517 | standins.append((lfile, hash)) |
|
518 | 518 | return standins |
|
519 | 519 | |
|
520 | 520 | |
|
521 | 521 | def synclfdirstate(repo, lfdirstate, lfile, normallookup): |
|
522 | 522 | lfstandin = standin(lfile) |
|
523 | 523 | if lfstandin in repo.dirstate: |
|
524 | 524 | stat = repo.dirstate._map[lfstandin] |
|
525 | 525 | state, mtime = stat[0], stat[3] |
|
526 | 526 | else: |
|
527 | 527 | state, mtime = b'?', -1 |
|
528 | 528 | if state == b'n': |
|
529 | 529 | if normallookup or mtime < 0 or not repo.wvfs.exists(lfile): |
|
530 | 530 | # state 'n' doesn't ensure 'clean' in this case |
|
531 | 531 | lfdirstate.normallookup(lfile) |
|
532 | 532 | else: |
|
533 | 533 | lfdirstate.normal(lfile) |
|
534 | 534 | elif state == b'm': |
|
535 | 535 | lfdirstate.normallookup(lfile) |
|
536 | 536 | elif state == b'r': |
|
537 | 537 | lfdirstate.remove(lfile) |
|
538 | 538 | elif state == b'a': |
|
539 | 539 | lfdirstate.add(lfile) |
|
540 | 540 | elif state == b'?': |
|
541 | 541 | lfdirstate.drop(lfile) |
|
542 | 542 | |
|
543 | 543 | |
|
544 | 544 | def markcommitted(orig, ctx, node): |
|
545 | 545 | repo = ctx.repo() |
|
546 | 546 | |
|
547 | 547 | orig(node) |
|
548 | 548 | |
|
549 | 549 | # ATTENTION: "ctx.files()" may differ from "repo[node].files()" |
|
550 | 550 | # because files coming from the 2nd parent are omitted in the latter. |
|
551 | 551 | # |
|
552 | 552 | # The former should be used to get targets of "synclfdirstate", |
|
553 | 553 | # because such files: |
|
554 | 554 | # - are marked as "a" by "patch.patch()" (e.g. via transplant), and |
|
555 | 555 | # - have to be marked as "n" after commit, but |
|
556 | 556 | # - aren't listed in "repo[node].files()" |
|
557 | 557 | |
|
558 | 558 | lfdirstate = openlfdirstate(repo.ui, repo) |
|
559 | 559 | for f in ctx.files(): |
|
560 | 560 | lfile = splitstandin(f) |
|
561 | 561 | if lfile is not None: |
|
562 | 562 | synclfdirstate(repo, lfdirstate, lfile, False) |
|
563 | 563 | lfdirstate.write() |
|
564 | 564 | |
|
565 | 565 | # As part of committing, copy all of the largefiles into the cache. |
|
566 | 566 | # |
|
567 | 567 | # Using "node" instead of "ctx" implies additional "repo[node]" |
|
568 | 568 | # lookup while copyalltostore(), but can omit redundant check for |
|
569 | 569 | # files comming from the 2nd parent, which should exist in store |
|
570 | 570 | # at merging. |
|
571 | 571 | copyalltostore(repo, node) |
|
572 | 572 | |
|
573 | 573 | |
|
574 | 574 | def getlfilestoupdate(oldstandins, newstandins): |
|
575 | 575 | changedstandins = set(oldstandins).symmetric_difference(set(newstandins)) |
|
576 | 576 | filelist = [] |
|
577 | 577 | for f in changedstandins: |
|
578 | 578 | if f[0] not in filelist: |
|
579 | 579 | filelist.append(f[0]) |
|
580 | 580 | return filelist |
|
581 | 581 | |
|
582 | 582 | |
|
583 | 583 | def getlfilestoupload(repo, missing, addfunc): |
|
584 | 584 | makeprogress = repo.ui.makeprogress |
|
585 | 585 | with makeprogress( |
|
586 | 586 | _(b'finding outgoing largefiles'), |
|
587 | 587 | unit=_(b'revisions'), |
|
588 | 588 | total=len(missing), |
|
589 | 589 | ) as progress: |
|
590 | 590 | for i, n in enumerate(missing): |
|
591 | 591 | progress.update(i) |
|
592 | 592 | parents = [p for p in repo[n].parents() if p != node.nullid] |
|
593 | 593 | |
|
594 | 594 | with lfstatus(repo, value=False): |
|
595 | 595 | ctx = repo[n] |
|
596 | 596 | |
|
597 | 597 | files = set(ctx.files()) |
|
598 | 598 | if len(parents) == 2: |
|
599 | 599 | mc = ctx.manifest() |
|
600 | 600 | mp1 = ctx.p1().manifest() |
|
601 | 601 | mp2 = ctx.p2().manifest() |
|
602 | 602 | for f in mp1: |
|
603 | 603 | if f not in mc: |
|
604 | 604 | files.add(f) |
|
605 | 605 | for f in mp2: |
|
606 | 606 | if f not in mc: |
|
607 | 607 | files.add(f) |
|
608 | 608 | for f in mc: |
|
609 | 609 | if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None): |
|
610 | 610 | files.add(f) |
|
611 | 611 | for fn in files: |
|
612 | 612 | if isstandin(fn) and fn in ctx: |
|
613 | 613 | addfunc(fn, readasstandin(ctx[fn])) |
|
614 | 614 | |
|
615 | 615 | |
|
616 | 616 | def updatestandinsbymatch(repo, match): |
|
617 | 617 | '''Update standins in the working directory according to specified match |
|
618 | 618 | |
|
619 | 619 | This returns (possibly modified) ``match`` object to be used for |
|
620 | 620 | subsequent commit process. |
|
621 | 621 | ''' |
|
622 | 622 | |
|
623 | 623 | ui = repo.ui |
|
624 | 624 | |
|
625 | 625 | # Case 1: user calls commit with no specific files or |
|
626 | 626 | # include/exclude patterns: refresh and commit all files that |
|
627 | 627 | # are "dirty". |
|
628 | 628 | if match is None or match.always(): |
|
629 | 629 | # Spend a bit of time here to get a list of files we know |
|
630 | 630 | # are modified so we can compare only against those. |
|
631 | 631 | # It can cost a lot of time (several seconds) |
|
632 | 632 | # otherwise to update all standins if the largefiles are |
|
633 | 633 | # large. |
|
634 | 634 | lfdirstate = openlfdirstate(ui, repo) |
|
635 | 635 | dirtymatch = matchmod.always() |
|
636 | 636 | unsure, s = lfdirstate.status( |
|
637 | 637 | dirtymatch, subrepos=[], ignored=False, clean=False, unknown=False |
|
638 | 638 | ) |
|
639 | 639 | modifiedfiles = unsure + s.modified + s.added + s.removed |
|
640 | 640 | lfiles = listlfiles(repo) |
|
641 | 641 | # this only loops through largefiles that exist (not |
|
642 | 642 | # removed/renamed) |
|
643 | 643 | for lfile in lfiles: |
|
644 | 644 | if lfile in modifiedfiles: |
|
645 | 645 | fstandin = standin(lfile) |
|
646 | 646 | if repo.wvfs.exists(fstandin): |
|
647 | 647 | # this handles the case where a rebase is being |
|
648 | 648 | # performed and the working copy is not updated |
|
649 | 649 | # yet. |
|
650 | 650 | if repo.wvfs.exists(lfile): |
|
651 | 651 | updatestandin(repo, lfile, fstandin) |
|
652 | 652 | |
|
653 | 653 | return match |
|
654 | 654 | |
|
655 | 655 | lfiles = listlfiles(repo) |
|
656 | 656 | match._files = repo._subdirlfs(match.files(), lfiles) |
|
657 | 657 | |
|
658 | 658 | # Case 2: user calls commit with specified patterns: refresh |
|
659 | 659 | # any matching big files. |
|
660 | 660 | smatcher = composestandinmatcher(repo, match) |
|
661 | 661 | standins = repo.dirstate.walk( |
|
662 | 662 | smatcher, subrepos=[], unknown=False, ignored=False |
|
663 | 663 | ) |
|
664 | 664 | |
|
665 | 665 | # No matching big files: get out of the way and pass control to |
|
666 | 666 | # the usual commit() method. |
|
667 | 667 | if not standins: |
|
668 | 668 | return match |
|
669 | 669 | |
|
670 | 670 | # Refresh all matching big files. It's possible that the |
|
671 | 671 | # commit will end up failing, in which case the big files will |
|
672 | 672 | # stay refreshed. No harm done: the user modified them and |
|
673 | 673 | # asked to commit them, so sooner or later we're going to |
|
674 | 674 | # refresh the standins. Might as well leave them refreshed. |
|
675 | 675 | lfdirstate = openlfdirstate(ui, repo) |
|
676 | 676 | for fstandin in standins: |
|
677 | 677 | lfile = splitstandin(fstandin) |
|
678 | 678 | if lfdirstate[lfile] != b'r': |
|
679 | 679 | updatestandin(repo, lfile, fstandin) |
|
680 | 680 | |
|
681 | 681 | # Cook up a new matcher that only matches regular files or |
|
682 | 682 | # standins corresponding to the big files requested by the |
|
683 | 683 | # user. Have to modify _files to prevent commit() from |
|
684 | 684 | # complaining "not tracked" for big files. |
|
685 | 685 | match = copy.copy(match) |
|
686 | 686 | origmatchfn = match.matchfn |
|
687 | 687 | |
|
688 | 688 | # Check both the list of largefiles and the list of |
|
689 | 689 | # standins because if a largefile was removed, it |
|
690 | 690 | # won't be in the list of largefiles at this point |
|
691 | 691 | match._files += sorted(standins) |
|
692 | 692 | |
|
693 | 693 | actualfiles = [] |
|
694 | 694 | for f in match._files: |
|
695 | 695 | fstandin = standin(f) |
|
696 | 696 | |
|
697 | 697 | # For largefiles, only one of the normal and standin should be |
|
698 | 698 | # committed (except if one of them is a remove). In the case of a |
|
699 | 699 | # standin removal, drop the normal file if it is unknown to dirstate. |
|
700 | 700 | # Thus, skip plain largefile names but keep the standin. |
|
701 | 701 | if f in lfiles or fstandin in standins: |
|
702 | 702 | if repo.dirstate[fstandin] != b'r': |
|
703 | 703 | if repo.dirstate[f] != b'r': |
|
704 | 704 | continue |
|
705 | 705 | elif repo.dirstate[f] == b'?': |
|
706 | 706 | continue |
|
707 | 707 | |
|
708 | 708 | actualfiles.append(f) |
|
709 | 709 | match._files = actualfiles |
|
710 | 710 | |
|
711 | 711 | def matchfn(f): |
|
712 | 712 | if origmatchfn(f): |
|
713 | 713 | return f not in lfiles |
|
714 | 714 | else: |
|
715 | 715 | return f in standins |
|
716 | 716 | |
|
717 | 717 | match.matchfn = matchfn |
|
718 | 718 | |
|
719 | 719 | return match |
|
720 | 720 | |
|
721 | 721 | |
|
722 | 722 | class automatedcommithook(object): |
|
723 | 723 | '''Stateful hook to update standins at the 1st commit of resuming |
|
724 | 724 | |
|
725 | 725 | For efficiency, updating standins in the working directory should |
|
726 | 726 | be avoided while automated committing (like rebase, transplant and |
|
727 | 727 | so on), because they should be updated before committing. |
|
728 | 728 | |
|
729 | 729 | But the 1st commit of resuming automated committing (e.g. ``rebase |
|
730 | 730 | --continue``) should update them, because largefiles may be |
|
731 | 731 | modified manually. |
|
732 | 732 | ''' |
|
733 | 733 | |
|
734 | 734 | def __init__(self, resuming): |
|
735 | 735 | self.resuming = resuming |
|
736 | 736 | |
|
737 | 737 | def __call__(self, repo, match): |
|
738 | 738 | if self.resuming: |
|
739 | 739 | self.resuming = False # avoids updating at subsequent commits |
|
740 | 740 | return updatestandinsbymatch(repo, match) |
|
741 | 741 | else: |
|
742 | 742 | return match |
|
743 | 743 | |
|
744 | 744 | |
|
745 | 745 | def getstatuswriter(ui, repo, forcibly=None): |
|
746 | 746 | '''Return the function to write largefiles specific status out |
|
747 | 747 | |
|
748 | 748 | If ``forcibly`` is ``None``, this returns the last element of |
|
749 | 749 | ``repo._lfstatuswriters`` as "default" writer function. |
|
750 | 750 | |
|
751 | 751 | Otherwise, this returns the function to always write out (or |
|
752 | 752 | ignore if ``not forcibly``) status. |
|
753 | 753 | ''' |
|
754 | 754 | if forcibly is None and util.safehasattr(repo, b'_largefilesenabled'): |
|
755 | 755 | return repo._lfstatuswriters[-1] |
|
756 | 756 | else: |
|
757 | 757 | if forcibly: |
|
758 | 758 | return ui.status # forcibly WRITE OUT |
|
759 | 759 | else: |
|
760 | 760 | return lambda *msg, **opts: None # forcibly IGNORE |
@@ -1,561 +1,561 b'' | |||
|
1 | 1 | from __future__ import absolute_import |
|
2 | 2 | |
|
3 | 3 | import collections |
|
4 | 4 | import errno |
|
5 | import hashlib | |
|
6 | 5 | import mmap |
|
7 | 6 | import os |
|
8 | 7 | import struct |
|
9 | 8 | import time |
|
10 | 9 | |
|
11 | 10 | from mercurial.i18n import _ |
|
12 | 11 | from mercurial.pycompat import ( |
|
13 | 12 | getattr, |
|
14 | 13 | open, |
|
15 | 14 | ) |
|
16 | 15 | from mercurial import ( |
|
17 | 16 | node as nodemod, |
|
18 | 17 | policy, |
|
19 | 18 | pycompat, |
|
20 | 19 | util, |
|
21 | 20 | vfs as vfsmod, |
|
22 | 21 | ) |
|
22 | from mercurial.utils import hashutil | |
|
23 | 23 | from . import shallowutil |
|
24 | 24 | |
|
25 | 25 | osutil = policy.importmod('osutil') |
|
26 | 26 | |
|
27 | 27 | # The pack version supported by this implementation. This will need to be |
|
28 | 28 | # rev'd whenever the byte format changes. Ex: changing the fanout prefix, |
|
29 | 29 | # changing any of the int sizes, changing the delta algorithm, etc. |
|
30 | 30 | PACKVERSIONSIZE = 1 |
|
31 | 31 | INDEXVERSIONSIZE = 2 |
|
32 | 32 | |
|
33 | 33 | FANOUTSTART = INDEXVERSIONSIZE |
|
34 | 34 | |
|
35 | 35 | # Constant that indicates a fanout table entry hasn't been filled in. (This does |
|
36 | 36 | # not get serialized) |
|
37 | 37 | EMPTYFANOUT = -1 |
|
38 | 38 | |
|
39 | 39 | # The fanout prefix is the number of bytes that can be addressed by the fanout |
|
40 | 40 | # table. Example: a fanout prefix of 1 means we use the first byte of a hash to |
|
41 | 41 | # look in the fanout table (which will be 2^8 entries long). |
|
42 | 42 | SMALLFANOUTPREFIX = 1 |
|
43 | 43 | LARGEFANOUTPREFIX = 2 |
|
44 | 44 | |
|
45 | 45 | # The number of entries in the index at which point we switch to a large fanout. |
|
46 | 46 | # It is chosen to balance the linear scan through a sparse fanout, with the |
|
47 | 47 | # size of the bisect in actual index. |
|
48 | 48 | # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step |
|
49 | 49 | # bisect) with (8 step fanout scan + 1 step bisect) |
|
50 | 50 | # 5 step bisect = log(2^16 / 8 / 255) # fanout |
|
51 | 51 | # 10 step fanout scan = 2^16 / (2^16 / 8) # fanout space divided by entries |
|
52 | 52 | SMALLFANOUTCUTOFF = 2 ** 16 // 8 |
|
53 | 53 | |
|
54 | 54 | # The amount of time to wait between checking for new packs. This prevents an |
|
55 | 55 | # exception when data is moved to a new pack after the process has already |
|
56 | 56 | # loaded the pack list. |
|
57 | 57 | REFRESHRATE = 0.1 |
|
58 | 58 | |
|
59 | 59 | if pycompat.isposix and not pycompat.ispy3: |
|
60 | 60 | # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening. |
|
61 | 61 | # The 'e' flag will be ignored on older versions of glibc. |
|
62 | 62 | # Python 3 can't handle the 'e' flag. |
|
63 | 63 | PACKOPENMODE = b'rbe' |
|
64 | 64 | else: |
|
65 | 65 | PACKOPENMODE = b'rb' |
|
66 | 66 | |
|
67 | 67 | |
|
68 | 68 | class _cachebackedpacks(object): |
|
69 | 69 | def __init__(self, packs, cachesize): |
|
70 | 70 | self._packs = set(packs) |
|
71 | 71 | self._lrucache = util.lrucachedict(cachesize) |
|
72 | 72 | self._lastpack = None |
|
73 | 73 | |
|
74 | 74 | # Avoid cold start of the cache by populating the most recent packs |
|
75 | 75 | # in the cache. |
|
76 | 76 | for i in reversed(range(min(cachesize, len(packs)))): |
|
77 | 77 | self._movetofront(packs[i]) |
|
78 | 78 | |
|
79 | 79 | def _movetofront(self, pack): |
|
80 | 80 | # This effectively makes pack the first entry in the cache. |
|
81 | 81 | self._lrucache[pack] = True |
|
82 | 82 | |
|
83 | 83 | def _registerlastpackusage(self): |
|
84 | 84 | if self._lastpack is not None: |
|
85 | 85 | self._movetofront(self._lastpack) |
|
86 | 86 | self._lastpack = None |
|
87 | 87 | |
|
88 | 88 | def add(self, pack): |
|
89 | 89 | self._registerlastpackusage() |
|
90 | 90 | |
|
91 | 91 | # This method will mostly be called when packs are not in cache. |
|
92 | 92 | # Therefore, adding pack to the cache. |
|
93 | 93 | self._movetofront(pack) |
|
94 | 94 | self._packs.add(pack) |
|
95 | 95 | |
|
96 | 96 | def __iter__(self): |
|
97 | 97 | self._registerlastpackusage() |
|
98 | 98 | |
|
99 | 99 | # Cache iteration is based on LRU. |
|
100 | 100 | for pack in self._lrucache: |
|
101 | 101 | self._lastpack = pack |
|
102 | 102 | yield pack |
|
103 | 103 | |
|
104 | 104 | cachedpacks = set(pack for pack in self._lrucache) |
|
105 | 105 | # Yield for paths not in the cache. |
|
106 | 106 | for pack in self._packs - cachedpacks: |
|
107 | 107 | self._lastpack = pack |
|
108 | 108 | yield pack |
|
109 | 109 | |
|
110 | 110 | # Data not found in any pack. |
|
111 | 111 | self._lastpack = None |
|
112 | 112 | |
|
113 | 113 | |
|
114 | 114 | class basepackstore(object): |
|
115 | 115 | # Default cache size limit for the pack files. |
|
116 | 116 | DEFAULTCACHESIZE = 100 |
|
117 | 117 | |
|
118 | 118 | def __init__(self, ui, path): |
|
119 | 119 | self.ui = ui |
|
120 | 120 | self.path = path |
|
121 | 121 | |
|
122 | 122 | # lastrefesh is 0 so we'll immediately check for new packs on the first |
|
123 | 123 | # failure. |
|
124 | 124 | self.lastrefresh = 0 |
|
125 | 125 | |
|
126 | 126 | packs = [] |
|
127 | 127 | for filepath, __, __ in self._getavailablepackfilessorted(): |
|
128 | 128 | try: |
|
129 | 129 | pack = self.getpack(filepath) |
|
130 | 130 | except Exception as ex: |
|
131 | 131 | # An exception may be thrown if the pack file is corrupted |
|
132 | 132 | # somehow. Log a warning but keep going in this case, just |
|
133 | 133 | # skipping this pack file. |
|
134 | 134 | # |
|
135 | 135 | # If this is an ENOENT error then don't even bother logging. |
|
136 | 136 | # Someone could have removed the file since we retrieved the |
|
137 | 137 | # list of paths. |
|
138 | 138 | if getattr(ex, 'errno', None) != errno.ENOENT: |
|
139 | 139 | ui.warn(_(b'unable to load pack %s: %s\n') % (filepath, ex)) |
|
140 | 140 | continue |
|
141 | 141 | packs.append(pack) |
|
142 | 142 | |
|
143 | 143 | self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE) |
|
144 | 144 | |
|
145 | 145 | def _getavailablepackfiles(self): |
|
146 | 146 | """For each pack file (a index/data file combo), yields: |
|
147 | 147 | (full path without extension, mtime, size) |
|
148 | 148 | |
|
149 | 149 | mtime will be the mtime of the index/data file (whichever is newer) |
|
150 | 150 | size is the combined size of index/data file |
|
151 | 151 | """ |
|
152 | 152 | indexsuffixlen = len(self.INDEXSUFFIX) |
|
153 | 153 | packsuffixlen = len(self.PACKSUFFIX) |
|
154 | 154 | |
|
155 | 155 | ids = set() |
|
156 | 156 | sizes = collections.defaultdict(lambda: 0) |
|
157 | 157 | mtimes = collections.defaultdict(lambda: []) |
|
158 | 158 | try: |
|
159 | 159 | for filename, type, stat in osutil.listdir(self.path, stat=True): |
|
160 | 160 | id = None |
|
161 | 161 | if filename[-indexsuffixlen:] == self.INDEXSUFFIX: |
|
162 | 162 | id = filename[:-indexsuffixlen] |
|
163 | 163 | elif filename[-packsuffixlen:] == self.PACKSUFFIX: |
|
164 | 164 | id = filename[:-packsuffixlen] |
|
165 | 165 | |
|
166 | 166 | # Since we expect to have two files corresponding to each ID |
|
167 | 167 | # (the index file and the pack file), we can yield once we see |
|
168 | 168 | # it twice. |
|
169 | 169 | if id: |
|
170 | 170 | sizes[id] += stat.st_size # Sum both files' sizes together |
|
171 | 171 | mtimes[id].append(stat.st_mtime) |
|
172 | 172 | if id in ids: |
|
173 | 173 | yield ( |
|
174 | 174 | os.path.join(self.path, id), |
|
175 | 175 | max(mtimes[id]), |
|
176 | 176 | sizes[id], |
|
177 | 177 | ) |
|
178 | 178 | else: |
|
179 | 179 | ids.add(id) |
|
180 | 180 | except OSError as ex: |
|
181 | 181 | if ex.errno != errno.ENOENT: |
|
182 | 182 | raise |
|
183 | 183 | |
|
184 | 184 | def _getavailablepackfilessorted(self): |
|
185 | 185 | """Like `_getavailablepackfiles`, but also sorts the files by mtime, |
|
186 | 186 | yielding newest files first. |
|
187 | 187 | |
|
188 | 188 | This is desirable, since it is more likely newer packfiles have more |
|
189 | 189 | desirable data. |
|
190 | 190 | """ |
|
191 | 191 | files = [] |
|
192 | 192 | for path, mtime, size in self._getavailablepackfiles(): |
|
193 | 193 | files.append((mtime, size, path)) |
|
194 | 194 | files = sorted(files, reverse=True) |
|
195 | 195 | for mtime, size, path in files: |
|
196 | 196 | yield path, mtime, size |
|
197 | 197 | |
|
198 | 198 | def gettotalsizeandcount(self): |
|
199 | 199 | """Returns the total disk size (in bytes) of all the pack files in |
|
200 | 200 | this store, and the count of pack files. |
|
201 | 201 | |
|
202 | 202 | (This might be smaller than the total size of the ``self.path`` |
|
203 | 203 | directory, since this only considers fuly-writen pack files, and not |
|
204 | 204 | temporary files or other detritus on the directory.) |
|
205 | 205 | """ |
|
206 | 206 | totalsize = 0 |
|
207 | 207 | count = 0 |
|
208 | 208 | for __, __, size in self._getavailablepackfiles(): |
|
209 | 209 | totalsize += size |
|
210 | 210 | count += 1 |
|
211 | 211 | return totalsize, count |
|
212 | 212 | |
|
213 | 213 | def getmetrics(self): |
|
214 | 214 | """Returns metrics on the state of this store.""" |
|
215 | 215 | size, count = self.gettotalsizeandcount() |
|
216 | 216 | return { |
|
217 | 217 | b'numpacks': count, |
|
218 | 218 | b'totalpacksize': size, |
|
219 | 219 | } |
|
220 | 220 | |
|
221 | 221 | def getpack(self, path): |
|
222 | 222 | raise NotImplementedError() |
|
223 | 223 | |
|
224 | 224 | def getmissing(self, keys): |
|
225 | 225 | missing = keys |
|
226 | 226 | for pack in self.packs: |
|
227 | 227 | missing = pack.getmissing(missing) |
|
228 | 228 | |
|
229 | 229 | # Ensures better performance of the cache by keeping the most |
|
230 | 230 | # recently accessed pack at the beginning in subsequent iterations. |
|
231 | 231 | if not missing: |
|
232 | 232 | return missing |
|
233 | 233 | |
|
234 | 234 | if missing: |
|
235 | 235 | for pack in self.refresh(): |
|
236 | 236 | missing = pack.getmissing(missing) |
|
237 | 237 | |
|
238 | 238 | return missing |
|
239 | 239 | |
|
240 | 240 | def markledger(self, ledger, options=None): |
|
241 | 241 | for pack in self.packs: |
|
242 | 242 | pack.markledger(ledger) |
|
243 | 243 | |
|
244 | 244 | def markforrefresh(self): |
|
245 | 245 | """Tells the store that there may be new pack files, so the next time it |
|
246 | 246 | has a lookup miss it should check for new files.""" |
|
247 | 247 | self.lastrefresh = 0 |
|
248 | 248 | |
|
249 | 249 | def refresh(self): |
|
250 | 250 | """Checks for any new packs on disk, adds them to the main pack list, |
|
251 | 251 | and returns a list of just the new packs.""" |
|
252 | 252 | now = time.time() |
|
253 | 253 | |
|
254 | 254 | # If we experience a lot of misses (like in the case of getmissing() on |
|
255 | 255 | # new objects), let's only actually check disk for new stuff every once |
|
256 | 256 | # in a while. Generally this code path should only ever matter when a |
|
257 | 257 | # repack is going on in the background, and that should be pretty rare |
|
258 | 258 | # to have that happen twice in quick succession. |
|
259 | 259 | newpacks = [] |
|
260 | 260 | if now > self.lastrefresh + REFRESHRATE: |
|
261 | 261 | self.lastrefresh = now |
|
262 | 262 | previous = set(p.path for p in self.packs) |
|
263 | 263 | for filepath, __, __ in self._getavailablepackfilessorted(): |
|
264 | 264 | if filepath not in previous: |
|
265 | 265 | newpack = self.getpack(filepath) |
|
266 | 266 | newpacks.append(newpack) |
|
267 | 267 | self.packs.add(newpack) |
|
268 | 268 | |
|
269 | 269 | return newpacks |
|
270 | 270 | |
|
271 | 271 | |
|
272 | 272 | class versionmixin(object): |
|
273 | 273 | # Mix-in for classes with multiple supported versions |
|
274 | 274 | VERSION = None |
|
275 | 275 | SUPPORTED_VERSIONS = [2] |
|
276 | 276 | |
|
277 | 277 | def _checkversion(self, version): |
|
278 | 278 | if version in self.SUPPORTED_VERSIONS: |
|
279 | 279 | if self.VERSION is None: |
|
280 | 280 | # only affect this instance |
|
281 | 281 | self.VERSION = version |
|
282 | 282 | elif self.VERSION != version: |
|
283 | 283 | raise RuntimeError(b'inconsistent version: %d' % version) |
|
284 | 284 | else: |
|
285 | 285 | raise RuntimeError(b'unsupported version: %d' % version) |
|
286 | 286 | |
|
287 | 287 | |
|
288 | 288 | class basepack(versionmixin): |
|
289 | 289 | # The maximum amount we should read via mmap before remmaping so the old |
|
290 | 290 | # pages can be released (100MB) |
|
291 | 291 | MAXPAGEDIN = 100 * 1024 ** 2 |
|
292 | 292 | |
|
293 | 293 | SUPPORTED_VERSIONS = [2] |
|
294 | 294 | |
|
295 | 295 | def __init__(self, path): |
|
296 | 296 | self.path = path |
|
297 | 297 | self.packpath = path + self.PACKSUFFIX |
|
298 | 298 | self.indexpath = path + self.INDEXSUFFIX |
|
299 | 299 | |
|
300 | 300 | self.indexsize = os.stat(self.indexpath).st_size |
|
301 | 301 | self.datasize = os.stat(self.packpath).st_size |
|
302 | 302 | |
|
303 | 303 | self._index = None |
|
304 | 304 | self._data = None |
|
305 | 305 | self.freememory() # initialize the mmap |
|
306 | 306 | |
|
307 | 307 | version = struct.unpack(b'!B', self._data[:PACKVERSIONSIZE])[0] |
|
308 | 308 | self._checkversion(version) |
|
309 | 309 | |
|
310 | 310 | version, config = struct.unpack(b'!BB', self._index[:INDEXVERSIONSIZE]) |
|
311 | 311 | self._checkversion(version) |
|
312 | 312 | |
|
313 | 313 | if 0b10000000 & config: |
|
314 | 314 | self.params = indexparams(LARGEFANOUTPREFIX, version) |
|
315 | 315 | else: |
|
316 | 316 | self.params = indexparams(SMALLFANOUTPREFIX, version) |
|
317 | 317 | |
|
318 | 318 | @util.propertycache |
|
319 | 319 | def _fanouttable(self): |
|
320 | 320 | params = self.params |
|
321 | 321 | rawfanout = self._index[FANOUTSTART : FANOUTSTART + params.fanoutsize] |
|
322 | 322 | fanouttable = [] |
|
323 | 323 | for i in pycompat.xrange(0, params.fanoutcount): |
|
324 | 324 | loc = i * 4 |
|
325 | 325 | fanoutentry = struct.unpack(b'!I', rawfanout[loc : loc + 4])[0] |
|
326 | 326 | fanouttable.append(fanoutentry) |
|
327 | 327 | return fanouttable |
|
328 | 328 | |
|
329 | 329 | @util.propertycache |
|
330 | 330 | def _indexend(self): |
|
331 | 331 | nodecount = struct.unpack_from( |
|
332 | 332 | b'!Q', self._index, self.params.indexstart - 8 |
|
333 | 333 | )[0] |
|
334 | 334 | return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH |
|
335 | 335 | |
|
336 | 336 | def freememory(self): |
|
337 | 337 | """Unmap and remap the memory to free it up after known expensive |
|
338 | 338 | operations. Return True if self._data and self._index were reloaded. |
|
339 | 339 | """ |
|
340 | 340 | if self._index: |
|
341 | 341 | if self._pagedin < self.MAXPAGEDIN: |
|
342 | 342 | return False |
|
343 | 343 | |
|
344 | 344 | self._index.close() |
|
345 | 345 | self._data.close() |
|
346 | 346 | |
|
347 | 347 | # TODO: use an opener/vfs to access these paths |
|
348 | 348 | with open(self.indexpath, PACKOPENMODE) as indexfp: |
|
349 | 349 | # memory-map the file, size 0 means whole file |
|
350 | 350 | self._index = mmap.mmap( |
|
351 | 351 | indexfp.fileno(), 0, access=mmap.ACCESS_READ |
|
352 | 352 | ) |
|
353 | 353 | with open(self.packpath, PACKOPENMODE) as datafp: |
|
354 | 354 | self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ) |
|
355 | 355 | |
|
356 | 356 | self._pagedin = 0 |
|
357 | 357 | return True |
|
358 | 358 | |
|
359 | 359 | def getmissing(self, keys): |
|
360 | 360 | raise NotImplementedError() |
|
361 | 361 | |
|
362 | 362 | def markledger(self, ledger, options=None): |
|
363 | 363 | raise NotImplementedError() |
|
364 | 364 | |
|
365 | 365 | def cleanup(self, ledger): |
|
366 | 366 | raise NotImplementedError() |
|
367 | 367 | |
|
368 | 368 | def __iter__(self): |
|
369 | 369 | raise NotImplementedError() |
|
370 | 370 | |
|
371 | 371 | def iterentries(self): |
|
372 | 372 | raise NotImplementedError() |
|
373 | 373 | |
|
374 | 374 | |
|
375 | 375 | class mutablebasepack(versionmixin): |
|
376 | 376 | def __init__(self, ui, packdir, version=2): |
|
377 | 377 | self._checkversion(version) |
|
378 | 378 | # TODO(augie): make this configurable |
|
379 | 379 | self._compressor = b'GZ' |
|
380 | 380 | opener = vfsmod.vfs(packdir) |
|
381 | 381 | opener.createmode = 0o444 |
|
382 | 382 | self.opener = opener |
|
383 | 383 | |
|
384 | 384 | self.entries = {} |
|
385 | 385 | |
|
386 | 386 | shallowutil.mkstickygroupdir(ui, packdir) |
|
387 | 387 | self.packfp, self.packpath = opener.mkstemp( |
|
388 | 388 | suffix=self.PACKSUFFIX + b'-tmp' |
|
389 | 389 | ) |
|
390 | 390 | self.idxfp, self.idxpath = opener.mkstemp( |
|
391 | 391 | suffix=self.INDEXSUFFIX + b'-tmp' |
|
392 | 392 | ) |
|
393 | 393 | self.packfp = os.fdopen(self.packfp, 'wb+') |
|
394 | 394 | self.idxfp = os.fdopen(self.idxfp, 'wb+') |
|
395 |
self.sha = hashl |
|
|
395 | self.sha = hashutil.sha1() | |
|
396 | 396 | self._closed = False |
|
397 | 397 | |
|
398 | 398 | # The opener provides no way of doing permission fixup on files created |
|
399 | 399 | # via mkstemp, so we must fix it ourselves. We can probably fix this |
|
400 | 400 | # upstream in vfs.mkstemp so we don't need to use the private method. |
|
401 | 401 | opener._fixfilemode(opener.join(self.packpath)) |
|
402 | 402 | opener._fixfilemode(opener.join(self.idxpath)) |
|
403 | 403 | |
|
404 | 404 | # Write header |
|
405 | 405 | # TODO: make it extensible (ex: allow specifying compression algorithm, |
|
406 | 406 | # a flexible key/value header, delta algorithm, fanout size, etc) |
|
407 | 407 | versionbuf = struct.pack(b'!B', self.VERSION) # unsigned 1 byte int |
|
408 | 408 | self.writeraw(versionbuf) |
|
409 | 409 | |
|
410 | 410 | def __enter__(self): |
|
411 | 411 | return self |
|
412 | 412 | |
|
413 | 413 | def __exit__(self, exc_type, exc_value, traceback): |
|
414 | 414 | if exc_type is None: |
|
415 | 415 | self.close() |
|
416 | 416 | else: |
|
417 | 417 | self.abort() |
|
418 | 418 | |
|
419 | 419 | def abort(self): |
|
420 | 420 | # Unclean exit |
|
421 | 421 | self._cleantemppacks() |
|
422 | 422 | |
|
423 | 423 | def writeraw(self, data): |
|
424 | 424 | self.packfp.write(data) |
|
425 | 425 | self.sha.update(data) |
|
426 | 426 | |
|
427 | 427 | def close(self, ledger=None): |
|
428 | 428 | if self._closed: |
|
429 | 429 | return |
|
430 | 430 | |
|
431 | 431 | try: |
|
432 | 432 | sha = nodemod.hex(self.sha.digest()) |
|
433 | 433 | self.packfp.close() |
|
434 | 434 | self.writeindex() |
|
435 | 435 | |
|
436 | 436 | if len(self.entries) == 0: |
|
437 | 437 | # Empty pack |
|
438 | 438 | self._cleantemppacks() |
|
439 | 439 | self._closed = True |
|
440 | 440 | return None |
|
441 | 441 | |
|
442 | 442 | self.opener.rename(self.packpath, sha + self.PACKSUFFIX) |
|
443 | 443 | try: |
|
444 | 444 | self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX) |
|
445 | 445 | except Exception as ex: |
|
446 | 446 | try: |
|
447 | 447 | self.opener.unlink(sha + self.PACKSUFFIX) |
|
448 | 448 | except Exception: |
|
449 | 449 | pass |
|
450 | 450 | # Throw exception 'ex' explicitly since a normal 'raise' would |
|
451 | 451 | # potentially throw an exception from the unlink cleanup. |
|
452 | 452 | raise ex |
|
453 | 453 | except Exception: |
|
454 | 454 | # Clean up temp packs in all exception cases |
|
455 | 455 | self._cleantemppacks() |
|
456 | 456 | raise |
|
457 | 457 | |
|
458 | 458 | self._closed = True |
|
459 | 459 | result = self.opener.join(sha) |
|
460 | 460 | if ledger: |
|
461 | 461 | ledger.addcreated(result) |
|
462 | 462 | return result |
|
463 | 463 | |
|
464 | 464 | def _cleantemppacks(self): |
|
465 | 465 | try: |
|
466 | 466 | self.opener.unlink(self.packpath) |
|
467 | 467 | except Exception: |
|
468 | 468 | pass |
|
469 | 469 | try: |
|
470 | 470 | self.opener.unlink(self.idxpath) |
|
471 | 471 | except Exception: |
|
472 | 472 | pass |
|
473 | 473 | |
|
474 | 474 | def writeindex(self): |
|
475 | 475 | largefanout = len(self.entries) > SMALLFANOUTCUTOFF |
|
476 | 476 | if largefanout: |
|
477 | 477 | params = indexparams(LARGEFANOUTPREFIX, self.VERSION) |
|
478 | 478 | else: |
|
479 | 479 | params = indexparams(SMALLFANOUTPREFIX, self.VERSION) |
|
480 | 480 | |
|
481 | 481 | fanouttable = [EMPTYFANOUT] * params.fanoutcount |
|
482 | 482 | |
|
483 | 483 | # Precompute the location of each entry |
|
484 | 484 | locations = {} |
|
485 | 485 | count = 0 |
|
486 | 486 | for node in sorted(self.entries): |
|
487 | 487 | location = count * self.INDEXENTRYLENGTH |
|
488 | 488 | locations[node] = location |
|
489 | 489 | count += 1 |
|
490 | 490 | |
|
491 | 491 | # Must use [0] on the unpack result since it's always a tuple. |
|
492 | 492 | fanoutkey = struct.unpack( |
|
493 | 493 | params.fanoutstruct, node[: params.fanoutprefix] |
|
494 | 494 | )[0] |
|
495 | 495 | if fanouttable[fanoutkey] == EMPTYFANOUT: |
|
496 | 496 | fanouttable[fanoutkey] = location |
|
497 | 497 | |
|
498 | 498 | rawfanouttable = b'' |
|
499 | 499 | last = 0 |
|
500 | 500 | for offset in fanouttable: |
|
501 | 501 | offset = offset if offset != EMPTYFANOUT else last |
|
502 | 502 | last = offset |
|
503 | 503 | rawfanouttable += struct.pack(b'!I', offset) |
|
504 | 504 | |
|
505 | 505 | rawentrieslength = struct.pack(b'!Q', len(self.entries)) |
|
506 | 506 | |
|
507 | 507 | # The index offset is the it's location in the file. So after the 2 byte |
|
508 | 508 | # header and the fanouttable. |
|
509 | 509 | rawindex = self.createindex(locations, 2 + len(rawfanouttable)) |
|
510 | 510 | |
|
511 | 511 | self._writeheader(params) |
|
512 | 512 | self.idxfp.write(rawfanouttable) |
|
513 | 513 | self.idxfp.write(rawentrieslength) |
|
514 | 514 | self.idxfp.write(rawindex) |
|
515 | 515 | self.idxfp.close() |
|
516 | 516 | |
|
517 | 517 | def createindex(self, nodelocations): |
|
518 | 518 | raise NotImplementedError() |
|
519 | 519 | |
|
520 | 520 | def _writeheader(self, indexparams): |
|
521 | 521 | # Index header |
|
522 | 522 | # <version: 1 byte> |
|
523 | 523 | # <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8 |
|
524 | 524 | # <unused: 7 bit> # future use (compression, delta format, etc) |
|
525 | 525 | config = 0 |
|
526 | 526 | if indexparams.fanoutprefix == LARGEFANOUTPREFIX: |
|
527 | 527 | config = 0b10000000 |
|
528 | 528 | self.idxfp.write(struct.pack(b'!BB', self.VERSION, config)) |
|
529 | 529 | |
|
530 | 530 | |
|
531 | 531 | class indexparams(object): |
|
532 | 532 | __slots__ = ( |
|
533 | 533 | 'fanoutprefix', |
|
534 | 534 | 'fanoutstruct', |
|
535 | 535 | 'fanoutcount', |
|
536 | 536 | 'fanoutsize', |
|
537 | 537 | 'indexstart', |
|
538 | 538 | ) |
|
539 | 539 | |
|
540 | 540 | def __init__(self, prefixsize, version): |
|
541 | 541 | self.fanoutprefix = prefixsize |
|
542 | 542 | |
|
543 | 543 | # The struct pack format for fanout table location (i.e. the format that |
|
544 | 544 | # converts the node prefix into an integer location in the fanout |
|
545 | 545 | # table). |
|
546 | 546 | if prefixsize == SMALLFANOUTPREFIX: |
|
547 | 547 | self.fanoutstruct = b'!B' |
|
548 | 548 | elif prefixsize == LARGEFANOUTPREFIX: |
|
549 | 549 | self.fanoutstruct = b'!H' |
|
550 | 550 | else: |
|
551 | 551 | raise ValueError(b"invalid fanout prefix size: %s" % prefixsize) |
|
552 | 552 | |
|
553 | 553 | # The number of fanout table entries |
|
554 | 554 | self.fanoutcount = 2 ** (prefixsize * 8) |
|
555 | 555 | |
|
556 | 556 | # The total bytes used by the fanout table |
|
557 | 557 | self.fanoutsize = self.fanoutcount * 4 |
|
558 | 558 | |
|
559 | 559 | self.indexstart = FANOUTSTART + self.fanoutsize |
|
560 | 560 | # Skip the index length |
|
561 | 561 | self.indexstart += 8 |
@@ -1,461 +1,461 b'' | |||
|
1 | 1 | from __future__ import absolute_import |
|
2 | 2 | |
|
3 | 3 | import errno |
|
4 | import hashlib | |
|
5 | 4 | import os |
|
6 | 5 | import shutil |
|
7 | 6 | import stat |
|
8 | 7 | import time |
|
9 | 8 | |
|
10 | 9 | from mercurial.i18n import _ |
|
11 | 10 | from mercurial.node import bin, hex |
|
12 | 11 | from mercurial.pycompat import open |
|
13 | 12 | from mercurial import ( |
|
14 | 13 | error, |
|
15 | 14 | pycompat, |
|
16 | 15 | util, |
|
17 | 16 | ) |
|
17 | from mercurial.utils import hashutil | |
|
18 | 18 | from . import ( |
|
19 | 19 | constants, |
|
20 | 20 | shallowutil, |
|
21 | 21 | ) |
|
22 | 22 | |
|
23 | 23 | |
|
24 | 24 | class basestore(object): |
|
25 | 25 | def __init__(self, repo, path, reponame, shared=False): |
|
26 | 26 | """Creates a remotefilelog store object for the given repo name. |
|
27 | 27 | |
|
28 | 28 | `path` - The file path where this store keeps its data |
|
29 | 29 | `reponame` - The name of the repo. This is used to partition data from |
|
30 | 30 | many repos. |
|
31 | 31 | `shared` - True if this store is a shared cache of data from the central |
|
32 | 32 | server, for many repos on this machine. False means this store is for |
|
33 | 33 | the local data for one repo. |
|
34 | 34 | """ |
|
35 | 35 | self.repo = repo |
|
36 | 36 | self.ui = repo.ui |
|
37 | 37 | self._path = path |
|
38 | 38 | self._reponame = reponame |
|
39 | 39 | self._shared = shared |
|
40 | 40 | self._uid = os.getuid() if not pycompat.iswindows else None |
|
41 | 41 | |
|
42 | 42 | self._validatecachelog = self.ui.config( |
|
43 | 43 | b"remotefilelog", b"validatecachelog" |
|
44 | 44 | ) |
|
45 | 45 | self._validatecache = self.ui.config( |
|
46 | 46 | b"remotefilelog", b"validatecache", b'on' |
|
47 | 47 | ) |
|
48 | 48 | if self._validatecache not in (b'on', b'strict', b'off'): |
|
49 | 49 | self._validatecache = b'on' |
|
50 | 50 | if self._validatecache == b'off': |
|
51 | 51 | self._validatecache = False |
|
52 | 52 | |
|
53 | 53 | if shared: |
|
54 | 54 | shallowutil.mkstickygroupdir(self.ui, path) |
|
55 | 55 | |
|
56 | 56 | def getmissing(self, keys): |
|
57 | 57 | missing = [] |
|
58 | 58 | for name, node in keys: |
|
59 | 59 | filepath = self._getfilepath(name, node) |
|
60 | 60 | exists = os.path.exists(filepath) |
|
61 | 61 | if ( |
|
62 | 62 | exists |
|
63 | 63 | and self._validatecache == b'strict' |
|
64 | 64 | and not self._validatekey(filepath, b'contains') |
|
65 | 65 | ): |
|
66 | 66 | exists = False |
|
67 | 67 | if not exists: |
|
68 | 68 | missing.append((name, node)) |
|
69 | 69 | |
|
70 | 70 | return missing |
|
71 | 71 | |
|
72 | 72 | # BELOW THIS ARE IMPLEMENTATIONS OF REPACK SOURCE |
|
73 | 73 | |
|
74 | 74 | def markledger(self, ledger, options=None): |
|
75 | 75 | if options and options.get(constants.OPTION_PACKSONLY): |
|
76 | 76 | return |
|
77 | 77 | if self._shared: |
|
78 | 78 | for filename, nodes in self._getfiles(): |
|
79 | 79 | for node in nodes: |
|
80 | 80 | ledger.markdataentry(self, filename, node) |
|
81 | 81 | ledger.markhistoryentry(self, filename, node) |
|
82 | 82 | |
|
83 | 83 | def cleanup(self, ledger): |
|
84 | 84 | ui = self.ui |
|
85 | 85 | entries = ledger.sources.get(self, []) |
|
86 | 86 | count = 0 |
|
87 | 87 | progress = ui.makeprogress( |
|
88 | 88 | _(b"cleaning up"), unit=b"files", total=len(entries) |
|
89 | 89 | ) |
|
90 | 90 | for entry in entries: |
|
91 | 91 | if entry.gced or (entry.datarepacked and entry.historyrepacked): |
|
92 | 92 | progress.update(count) |
|
93 | 93 | path = self._getfilepath(entry.filename, entry.node) |
|
94 | 94 | util.tryunlink(path) |
|
95 | 95 | count += 1 |
|
96 | 96 | progress.complete() |
|
97 | 97 | |
|
98 | 98 | # Clean up the repo cache directory. |
|
99 | 99 | self._cleanupdirectory(self._getrepocachepath()) |
|
100 | 100 | |
|
101 | 101 | # BELOW THIS ARE NON-STANDARD APIS |
|
102 | 102 | |
|
103 | 103 | def _cleanupdirectory(self, rootdir): |
|
104 | 104 | """Removes the empty directories and unnecessary files within the root |
|
105 | 105 | directory recursively. Note that this method does not remove the root |
|
106 | 106 | directory itself. """ |
|
107 | 107 | |
|
108 | 108 | oldfiles = set() |
|
109 | 109 | otherfiles = set() |
|
110 | 110 | # osutil.listdir returns stat information which saves some rmdir/listdir |
|
111 | 111 | # syscalls. |
|
112 | 112 | for name, mode in util.osutil.listdir(rootdir): |
|
113 | 113 | if stat.S_ISDIR(mode): |
|
114 | 114 | dirpath = os.path.join(rootdir, name) |
|
115 | 115 | self._cleanupdirectory(dirpath) |
|
116 | 116 | |
|
117 | 117 | # Now that the directory specified by dirpath is potentially |
|
118 | 118 | # empty, try and remove it. |
|
119 | 119 | try: |
|
120 | 120 | os.rmdir(dirpath) |
|
121 | 121 | except OSError: |
|
122 | 122 | pass |
|
123 | 123 | |
|
124 | 124 | elif stat.S_ISREG(mode): |
|
125 | 125 | if name.endswith(b'_old'): |
|
126 | 126 | oldfiles.add(name[:-4]) |
|
127 | 127 | else: |
|
128 | 128 | otherfiles.add(name) |
|
129 | 129 | |
|
130 | 130 | # Remove the files which end with suffix '_old' and have no |
|
131 | 131 | # corresponding file without the suffix '_old'. See addremotefilelognode |
|
132 | 132 | # method for the generation/purpose of files with '_old' suffix. |
|
133 | 133 | for filename in oldfiles - otherfiles: |
|
134 | 134 | filepath = os.path.join(rootdir, filename + b'_old') |
|
135 | 135 | util.tryunlink(filepath) |
|
136 | 136 | |
|
137 | 137 | def _getfiles(self): |
|
138 | 138 | """Return a list of (filename, [node,...]) for all the revisions that |
|
139 | 139 | exist in the store. |
|
140 | 140 | |
|
141 | 141 | This is useful for obtaining a list of all the contents of the store |
|
142 | 142 | when performing a repack to another store, since the store API requires |
|
143 | 143 | name+node keys and not namehash+node keys. |
|
144 | 144 | """ |
|
145 | 145 | existing = {} |
|
146 | 146 | for filenamehash, node in self._listkeys(): |
|
147 | 147 | existing.setdefault(filenamehash, []).append(node) |
|
148 | 148 | |
|
149 | 149 | filenamemap = self._resolvefilenames(existing.keys()) |
|
150 | 150 | |
|
151 | 151 | for filename, sha in pycompat.iteritems(filenamemap): |
|
152 | 152 | yield (filename, existing[sha]) |
|
153 | 153 | |
|
154 | 154 | def _resolvefilenames(self, hashes): |
|
155 | 155 | """Given a list of filename hashes that are present in the |
|
156 | 156 | remotefilelog store, return a mapping from filename->hash. |
|
157 | 157 | |
|
158 | 158 | This is useful when converting remotefilelog blobs into other storage |
|
159 | 159 | formats. |
|
160 | 160 | """ |
|
161 | 161 | if not hashes: |
|
162 | 162 | return {} |
|
163 | 163 | |
|
164 | 164 | filenames = {} |
|
165 | 165 | missingfilename = set(hashes) |
|
166 | 166 | |
|
167 | 167 | # Start with a full manifest, since it'll cover the majority of files |
|
168 | 168 | for filename in self.repo[b'tip'].manifest(): |
|
169 |
sha = hashl |
|
|
169 | sha = hashutil.sha1(filename).digest() | |
|
170 | 170 | if sha in missingfilename: |
|
171 | 171 | filenames[filename] = sha |
|
172 | 172 | missingfilename.discard(sha) |
|
173 | 173 | |
|
174 | 174 | # Scan the changelog until we've found every file name |
|
175 | 175 | cl = self.repo.unfiltered().changelog |
|
176 | 176 | for rev in pycompat.xrange(len(cl) - 1, -1, -1): |
|
177 | 177 | if not missingfilename: |
|
178 | 178 | break |
|
179 | 179 | files = cl.readfiles(cl.node(rev)) |
|
180 | 180 | for filename in files: |
|
181 |
sha = hashl |
|
|
181 | sha = hashutil.sha1(filename).digest() | |
|
182 | 182 | if sha in missingfilename: |
|
183 | 183 | filenames[filename] = sha |
|
184 | 184 | missingfilename.discard(sha) |
|
185 | 185 | |
|
186 | 186 | return filenames |
|
187 | 187 | |
|
188 | 188 | def _getrepocachepath(self): |
|
189 | 189 | return ( |
|
190 | 190 | os.path.join(self._path, self._reponame) |
|
191 | 191 | if self._shared |
|
192 | 192 | else self._path |
|
193 | 193 | ) |
|
194 | 194 | |
|
195 | 195 | def _listkeys(self): |
|
196 | 196 | """List all the remotefilelog keys that exist in the store. |
|
197 | 197 | |
|
198 | 198 | Returns a iterator of (filename hash, filecontent hash) tuples. |
|
199 | 199 | """ |
|
200 | 200 | |
|
201 | 201 | for root, dirs, files in os.walk(self._getrepocachepath()): |
|
202 | 202 | for filename in files: |
|
203 | 203 | if len(filename) != 40: |
|
204 | 204 | continue |
|
205 | 205 | node = filename |
|
206 | 206 | if self._shared: |
|
207 | 207 | # .../1a/85ffda..be21 |
|
208 | 208 | filenamehash = root[-41:-39] + root[-38:] |
|
209 | 209 | else: |
|
210 | 210 | filenamehash = root[-40:] |
|
211 | 211 | yield (bin(filenamehash), bin(node)) |
|
212 | 212 | |
|
213 | 213 | def _getfilepath(self, name, node): |
|
214 | 214 | node = hex(node) |
|
215 | 215 | if self._shared: |
|
216 | 216 | key = shallowutil.getcachekey(self._reponame, name, node) |
|
217 | 217 | else: |
|
218 | 218 | key = shallowutil.getlocalkey(name, node) |
|
219 | 219 | |
|
220 | 220 | return os.path.join(self._path, key) |
|
221 | 221 | |
|
222 | 222 | def _getdata(self, name, node): |
|
223 | 223 | filepath = self._getfilepath(name, node) |
|
224 | 224 | try: |
|
225 | 225 | data = shallowutil.readfile(filepath) |
|
226 | 226 | if self._validatecache and not self._validatedata(data, filepath): |
|
227 | 227 | if self._validatecachelog: |
|
228 | 228 | with open(self._validatecachelog, b'a+') as f: |
|
229 | 229 | f.write(b"corrupt %s during read\n" % filepath) |
|
230 | 230 | os.rename(filepath, filepath + b".corrupt") |
|
231 | 231 | raise KeyError(b"corrupt local cache file %s" % filepath) |
|
232 | 232 | except IOError: |
|
233 | 233 | raise KeyError( |
|
234 | 234 | b"no file found at %s for %s:%s" % (filepath, name, hex(node)) |
|
235 | 235 | ) |
|
236 | 236 | |
|
237 | 237 | return data |
|
238 | 238 | |
|
239 | 239 | def addremotefilelognode(self, name, node, data): |
|
240 | 240 | filepath = self._getfilepath(name, node) |
|
241 | 241 | |
|
242 | 242 | oldumask = os.umask(0o002) |
|
243 | 243 | try: |
|
244 | 244 | # if this node already exists, save the old version for |
|
245 | 245 | # recovery/debugging purposes. |
|
246 | 246 | if os.path.exists(filepath): |
|
247 | 247 | newfilename = filepath + b'_old' |
|
248 | 248 | # newfilename can be read-only and shutil.copy will fail. |
|
249 | 249 | # Delete newfilename to avoid it |
|
250 | 250 | if os.path.exists(newfilename): |
|
251 | 251 | shallowutil.unlinkfile(newfilename) |
|
252 | 252 | shutil.copy(filepath, newfilename) |
|
253 | 253 | |
|
254 | 254 | shallowutil.mkstickygroupdir(self.ui, os.path.dirname(filepath)) |
|
255 | 255 | shallowutil.writefile(filepath, data, readonly=True) |
|
256 | 256 | |
|
257 | 257 | if self._validatecache: |
|
258 | 258 | if not self._validatekey(filepath, b'write'): |
|
259 | 259 | raise error.Abort( |
|
260 | 260 | _(b"local cache write was corrupted %s") % filepath |
|
261 | 261 | ) |
|
262 | 262 | finally: |
|
263 | 263 | os.umask(oldumask) |
|
264 | 264 | |
|
265 | 265 | def markrepo(self, path): |
|
266 | 266 | """Call this to add the given repo path to the store's list of |
|
267 | 267 | repositories that are using it. This is useful later when doing garbage |
|
268 | 268 | collection, since it allows us to insecpt the repos to see what nodes |
|
269 | 269 | they want to be kept alive in the store. |
|
270 | 270 | """ |
|
271 | 271 | repospath = os.path.join(self._path, b"repos") |
|
272 | 272 | with open(repospath, b'ab') as reposfile: |
|
273 | 273 | reposfile.write(os.path.dirname(path) + b"\n") |
|
274 | 274 | |
|
275 | 275 | repospathstat = os.stat(repospath) |
|
276 | 276 | if repospathstat.st_uid == self._uid: |
|
277 | 277 | os.chmod(repospath, 0o0664) |
|
278 | 278 | |
|
279 | 279 | def _validatekey(self, path, action): |
|
280 | 280 | with open(path, b'rb') as f: |
|
281 | 281 | data = f.read() |
|
282 | 282 | |
|
283 | 283 | if self._validatedata(data, path): |
|
284 | 284 | return True |
|
285 | 285 | |
|
286 | 286 | if self._validatecachelog: |
|
287 | 287 | with open(self._validatecachelog, b'ab+') as f: |
|
288 | 288 | f.write(b"corrupt %s during %s\n" % (path, action)) |
|
289 | 289 | |
|
290 | 290 | os.rename(path, path + b".corrupt") |
|
291 | 291 | return False |
|
292 | 292 | |
|
293 | 293 | def _validatedata(self, data, path): |
|
294 | 294 | try: |
|
295 | 295 | if len(data) > 0: |
|
296 | 296 | # see remotefilelogserver.createfileblob for the format |
|
297 | 297 | offset, size, flags = shallowutil.parsesizeflags(data) |
|
298 | 298 | if len(data) <= size: |
|
299 | 299 | # it is truncated |
|
300 | 300 | return False |
|
301 | 301 | |
|
302 | 302 | # extract the node from the metadata |
|
303 | 303 | offset += size |
|
304 | 304 | datanode = data[offset : offset + 20] |
|
305 | 305 | |
|
306 | 306 | # and compare against the path |
|
307 | 307 | if os.path.basename(path) == hex(datanode): |
|
308 | 308 | # Content matches the intended path |
|
309 | 309 | return True |
|
310 | 310 | return False |
|
311 | 311 | except (ValueError, RuntimeError): |
|
312 | 312 | pass |
|
313 | 313 | |
|
314 | 314 | return False |
|
315 | 315 | |
|
316 | 316 | def gc(self, keepkeys): |
|
317 | 317 | ui = self.ui |
|
318 | 318 | cachepath = self._path |
|
319 | 319 | |
|
320 | 320 | # prune cache |
|
321 | 321 | queue = pycompat.queue.PriorityQueue() |
|
322 | 322 | originalsize = 0 |
|
323 | 323 | size = 0 |
|
324 | 324 | count = 0 |
|
325 | 325 | removed = 0 |
|
326 | 326 | |
|
327 | 327 | # keep files newer than a day even if they aren't needed |
|
328 | 328 | limit = time.time() - (60 * 60 * 24) |
|
329 | 329 | |
|
330 | 330 | progress = ui.makeprogress( |
|
331 | 331 | _(b"removing unnecessary files"), unit=b"files" |
|
332 | 332 | ) |
|
333 | 333 | progress.update(0) |
|
334 | 334 | for root, dirs, files in os.walk(cachepath): |
|
335 | 335 | for file in files: |
|
336 | 336 | if file == b'repos': |
|
337 | 337 | continue |
|
338 | 338 | |
|
339 | 339 | # Don't delete pack files |
|
340 | 340 | if b'/packs/' in root: |
|
341 | 341 | continue |
|
342 | 342 | |
|
343 | 343 | progress.update(count) |
|
344 | 344 | path = os.path.join(root, file) |
|
345 | 345 | key = os.path.relpath(path, cachepath) |
|
346 | 346 | count += 1 |
|
347 | 347 | try: |
|
348 | 348 | pathstat = os.stat(path) |
|
349 | 349 | except OSError as e: |
|
350 | 350 | # errno.ENOENT = no such file or directory |
|
351 | 351 | if e.errno != errno.ENOENT: |
|
352 | 352 | raise |
|
353 | 353 | msg = _( |
|
354 | 354 | b"warning: file %s was removed by another process\n" |
|
355 | 355 | ) |
|
356 | 356 | ui.warn(msg % path) |
|
357 | 357 | continue |
|
358 | 358 | |
|
359 | 359 | originalsize += pathstat.st_size |
|
360 | 360 | |
|
361 | 361 | if key in keepkeys or pathstat.st_atime > limit: |
|
362 | 362 | queue.put((pathstat.st_atime, path, pathstat)) |
|
363 | 363 | size += pathstat.st_size |
|
364 | 364 | else: |
|
365 | 365 | try: |
|
366 | 366 | shallowutil.unlinkfile(path) |
|
367 | 367 | except OSError as e: |
|
368 | 368 | # errno.ENOENT = no such file or directory |
|
369 | 369 | if e.errno != errno.ENOENT: |
|
370 | 370 | raise |
|
371 | 371 | msg = _( |
|
372 | 372 | b"warning: file %s was removed by another " |
|
373 | 373 | b"process\n" |
|
374 | 374 | ) |
|
375 | 375 | ui.warn(msg % path) |
|
376 | 376 | continue |
|
377 | 377 | removed += 1 |
|
378 | 378 | progress.complete() |
|
379 | 379 | |
|
380 | 380 | # remove oldest files until under limit |
|
381 | 381 | limit = ui.configbytes(b"remotefilelog", b"cachelimit") |
|
382 | 382 | if size > limit: |
|
383 | 383 | excess = size - limit |
|
384 | 384 | progress = ui.makeprogress( |
|
385 | 385 | _(b"enforcing cache limit"), unit=b"bytes", total=excess |
|
386 | 386 | ) |
|
387 | 387 | removedexcess = 0 |
|
388 | 388 | while queue and size > limit and size > 0: |
|
389 | 389 | progress.update(removedexcess) |
|
390 | 390 | atime, oldpath, oldpathstat = queue.get() |
|
391 | 391 | try: |
|
392 | 392 | shallowutil.unlinkfile(oldpath) |
|
393 | 393 | except OSError as e: |
|
394 | 394 | # errno.ENOENT = no such file or directory |
|
395 | 395 | if e.errno != errno.ENOENT: |
|
396 | 396 | raise |
|
397 | 397 | msg = _( |
|
398 | 398 | b"warning: file %s was removed by another process\n" |
|
399 | 399 | ) |
|
400 | 400 | ui.warn(msg % oldpath) |
|
401 | 401 | size -= oldpathstat.st_size |
|
402 | 402 | removed += 1 |
|
403 | 403 | removedexcess += oldpathstat.st_size |
|
404 | 404 | progress.complete() |
|
405 | 405 | |
|
406 | 406 | ui.status( |
|
407 | 407 | _(b"finished: removed %d of %d files (%0.2f GB to %0.2f GB)\n") |
|
408 | 408 | % ( |
|
409 | 409 | removed, |
|
410 | 410 | count, |
|
411 | 411 | float(originalsize) / 1024.0 / 1024.0 / 1024.0, |
|
412 | 412 | float(size) / 1024.0 / 1024.0 / 1024.0, |
|
413 | 413 | ) |
|
414 | 414 | ) |
|
415 | 415 | |
|
416 | 416 | |
|
417 | 417 | class baseunionstore(object): |
|
418 | 418 | def __init__(self, *args, **kwargs): |
|
419 | 419 | # If one of the functions that iterates all of the stores is about to |
|
420 | 420 | # throw a KeyError, try this many times with a full refresh between |
|
421 | 421 | # attempts. A repack operation may have moved data from one store to |
|
422 | 422 | # another while we were running. |
|
423 | 423 | self.numattempts = kwargs.get('numretries', 0) + 1 |
|
424 | 424 | # If not-None, call this function on every retry and if the attempts are |
|
425 | 425 | # exhausted. |
|
426 | 426 | self.retrylog = kwargs.get('retrylog', None) |
|
427 | 427 | |
|
428 | 428 | def markforrefresh(self): |
|
429 | 429 | for store in self.stores: |
|
430 | 430 | if util.safehasattr(store, b'markforrefresh'): |
|
431 | 431 | store.markforrefresh() |
|
432 | 432 | |
|
433 | 433 | @staticmethod |
|
434 | 434 | def retriable(fn): |
|
435 | 435 | def noop(*args): |
|
436 | 436 | pass |
|
437 | 437 | |
|
438 | 438 | def wrapped(self, *args, **kwargs): |
|
439 | 439 | retrylog = self.retrylog or noop |
|
440 | 440 | funcname = fn.__name__ |
|
441 | 441 | i = 0 |
|
442 | 442 | while i < self.numattempts: |
|
443 | 443 | if i > 0: |
|
444 | 444 | retrylog( |
|
445 | 445 | b're-attempting (n=%d) %s\n' |
|
446 | 446 | % (i, pycompat.sysbytes(funcname)) |
|
447 | 447 | ) |
|
448 | 448 | self.markforrefresh() |
|
449 | 449 | i += 1 |
|
450 | 450 | try: |
|
451 | 451 | return fn(self, *args, **kwargs) |
|
452 | 452 | except KeyError: |
|
453 | 453 | if i == self.numattempts: |
|
454 | 454 | # retries exhausted |
|
455 | 455 | retrylog( |
|
456 | 456 | b'retries exhausted in %s, raising KeyError\n' |
|
457 | 457 | % pycompat.sysbytes(funcname) |
|
458 | 458 | ) |
|
459 | 459 | raise |
|
460 | 460 | |
|
461 | 461 | return wrapped |
@@ -1,477 +1,477 b'' | |||
|
1 | 1 | # debugcommands.py - debug logic for remotefilelog |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2013 Facebook, Inc. |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | from __future__ import absolute_import |
|
8 | 8 | |
|
9 | import hashlib | |
|
10 | 9 | import os |
|
11 | 10 | import zlib |
|
12 | 11 | |
|
13 | 12 | from mercurial.node import bin, hex, nullid, short |
|
14 | 13 | from mercurial.i18n import _ |
|
15 | 14 | from mercurial.pycompat import open |
|
16 | 15 | from mercurial import ( |
|
17 | 16 | error, |
|
18 | 17 | filelog, |
|
19 | 18 | lock as lockmod, |
|
20 | 19 | node as nodemod, |
|
21 | 20 | pycompat, |
|
22 | 21 | revlog, |
|
23 | 22 | ) |
|
23 | from mercurial.utils import hashutil | |
|
24 | 24 | from . import ( |
|
25 | 25 | constants, |
|
26 | 26 | datapack, |
|
27 | 27 | fileserverclient, |
|
28 | 28 | historypack, |
|
29 | 29 | repack, |
|
30 | 30 | shallowutil, |
|
31 | 31 | ) |
|
32 | 32 | |
|
33 | 33 | |
|
34 | 34 | def debugremotefilelog(ui, path, **opts): |
|
35 | 35 | decompress = opts.get('decompress') |
|
36 | 36 | |
|
37 | 37 | size, firstnode, mapping = parsefileblob(path, decompress) |
|
38 | 38 | |
|
39 | 39 | ui.status(_(b"size: %d bytes\n") % size) |
|
40 | 40 | ui.status(_(b"path: %s \n") % path) |
|
41 | 41 | ui.status(_(b"key: %s \n") % (short(firstnode))) |
|
42 | 42 | ui.status(_(b"\n")) |
|
43 | 43 | ui.status( |
|
44 | 44 | _(b"%12s => %12s %13s %13s %12s\n") |
|
45 | 45 | % (b"node", b"p1", b"p2", b"linknode", b"copyfrom") |
|
46 | 46 | ) |
|
47 | 47 | |
|
48 | 48 | queue = [firstnode] |
|
49 | 49 | while queue: |
|
50 | 50 | node = queue.pop(0) |
|
51 | 51 | p1, p2, linknode, copyfrom = mapping[node] |
|
52 | 52 | ui.status( |
|
53 | 53 | _(b"%s => %s %s %s %s\n") |
|
54 | 54 | % (short(node), short(p1), short(p2), short(linknode), copyfrom) |
|
55 | 55 | ) |
|
56 | 56 | if p1 != nullid: |
|
57 | 57 | queue.append(p1) |
|
58 | 58 | if p2 != nullid: |
|
59 | 59 | queue.append(p2) |
|
60 | 60 | |
|
61 | 61 | |
|
62 | 62 | def buildtemprevlog(repo, file): |
|
63 | 63 | # get filename key |
|
64 |
filekey = nodemod.hex(hashl |
|
|
64 | filekey = nodemod.hex(hashutil.sha1(file).digest()) | |
|
65 | 65 | filedir = os.path.join(repo.path, b'store/data', filekey) |
|
66 | 66 | |
|
67 | 67 | # sort all entries based on linkrev |
|
68 | 68 | fctxs = [] |
|
69 | 69 | for filenode in os.listdir(filedir): |
|
70 | 70 | if b'_old' not in filenode: |
|
71 | 71 | fctxs.append(repo.filectx(file, fileid=bin(filenode))) |
|
72 | 72 | |
|
73 | 73 | fctxs = sorted(fctxs, key=lambda x: x.linkrev()) |
|
74 | 74 | |
|
75 | 75 | # add to revlog |
|
76 | 76 | temppath = repo.sjoin(b'data/temprevlog.i') |
|
77 | 77 | if os.path.exists(temppath): |
|
78 | 78 | os.remove(temppath) |
|
79 | 79 | r = filelog.filelog(repo.svfs, b'temprevlog') |
|
80 | 80 | |
|
81 | 81 | class faket(object): |
|
82 | 82 | def add(self, a, b, c): |
|
83 | 83 | pass |
|
84 | 84 | |
|
85 | 85 | t = faket() |
|
86 | 86 | for fctx in fctxs: |
|
87 | 87 | if fctx.node() not in repo: |
|
88 | 88 | continue |
|
89 | 89 | |
|
90 | 90 | p = fctx.filelog().parents(fctx.filenode()) |
|
91 | 91 | meta = {} |
|
92 | 92 | if fctx.renamed(): |
|
93 | 93 | meta[b'copy'] = fctx.renamed()[0] |
|
94 | 94 | meta[b'copyrev'] = hex(fctx.renamed()[1]) |
|
95 | 95 | |
|
96 | 96 | r.add(fctx.data(), meta, t, fctx.linkrev(), p[0], p[1]) |
|
97 | 97 | |
|
98 | 98 | return r |
|
99 | 99 | |
|
100 | 100 | |
|
101 | 101 | def debugindex(orig, ui, repo, file_=None, **opts): |
|
102 | 102 | """dump the contents of an index file""" |
|
103 | 103 | if ( |
|
104 | 104 | opts.get('changelog') |
|
105 | 105 | or opts.get('manifest') |
|
106 | 106 | or opts.get('dir') |
|
107 | 107 | or not shallowutil.isenabled(repo) |
|
108 | 108 | or not repo.shallowmatch(file_) |
|
109 | 109 | ): |
|
110 | 110 | return orig(ui, repo, file_, **opts) |
|
111 | 111 | |
|
112 | 112 | r = buildtemprevlog(repo, file_) |
|
113 | 113 | |
|
114 | 114 | # debugindex like normal |
|
115 | 115 | format = opts.get(b'format', 0) |
|
116 | 116 | if format not in (0, 1): |
|
117 | 117 | raise error.Abort(_(b"unknown format %d") % format) |
|
118 | 118 | |
|
119 | 119 | generaldelta = r.version & revlog.FLAG_GENERALDELTA |
|
120 | 120 | if generaldelta: |
|
121 | 121 | basehdr = b' delta' |
|
122 | 122 | else: |
|
123 | 123 | basehdr = b' base' |
|
124 | 124 | |
|
125 | 125 | if format == 0: |
|
126 | 126 | ui.write( |
|
127 | 127 | ( |
|
128 | 128 | b" rev offset length " + basehdr + b" linkrev" |
|
129 | 129 | b" nodeid p1 p2\n" |
|
130 | 130 | ) |
|
131 | 131 | ) |
|
132 | 132 | elif format == 1: |
|
133 | 133 | ui.write( |
|
134 | 134 | ( |
|
135 | 135 | b" rev flag offset length" |
|
136 | 136 | b" size " + basehdr + b" link p1 p2" |
|
137 | 137 | b" nodeid\n" |
|
138 | 138 | ) |
|
139 | 139 | ) |
|
140 | 140 | |
|
141 | 141 | for i in r: |
|
142 | 142 | node = r.node(i) |
|
143 | 143 | if generaldelta: |
|
144 | 144 | base = r.deltaparent(i) |
|
145 | 145 | else: |
|
146 | 146 | base = r.chainbase(i) |
|
147 | 147 | if format == 0: |
|
148 | 148 | try: |
|
149 | 149 | pp = r.parents(node) |
|
150 | 150 | except Exception: |
|
151 | 151 | pp = [nullid, nullid] |
|
152 | 152 | ui.write( |
|
153 | 153 | b"% 6d % 9d % 7d % 6d % 7d %s %s %s\n" |
|
154 | 154 | % ( |
|
155 | 155 | i, |
|
156 | 156 | r.start(i), |
|
157 | 157 | r.length(i), |
|
158 | 158 | base, |
|
159 | 159 | r.linkrev(i), |
|
160 | 160 | short(node), |
|
161 | 161 | short(pp[0]), |
|
162 | 162 | short(pp[1]), |
|
163 | 163 | ) |
|
164 | 164 | ) |
|
165 | 165 | elif format == 1: |
|
166 | 166 | pr = r.parentrevs(i) |
|
167 | 167 | ui.write( |
|
168 | 168 | b"% 6d %04x % 8d % 8d % 8d % 6d % 6d % 6d % 6d %s\n" |
|
169 | 169 | % ( |
|
170 | 170 | i, |
|
171 | 171 | r.flags(i), |
|
172 | 172 | r.start(i), |
|
173 | 173 | r.length(i), |
|
174 | 174 | r.rawsize(i), |
|
175 | 175 | base, |
|
176 | 176 | r.linkrev(i), |
|
177 | 177 | pr[0], |
|
178 | 178 | pr[1], |
|
179 | 179 | short(node), |
|
180 | 180 | ) |
|
181 | 181 | ) |
|
182 | 182 | |
|
183 | 183 | |
|
184 | 184 | def debugindexdot(orig, ui, repo, file_): |
|
185 | 185 | """dump an index DAG as a graphviz dot file""" |
|
186 | 186 | if not shallowutil.isenabled(repo): |
|
187 | 187 | return orig(ui, repo, file_) |
|
188 | 188 | |
|
189 | 189 | r = buildtemprevlog(repo, os.path.basename(file_)[:-2]) |
|
190 | 190 | |
|
191 | 191 | ui.writenoi18n(b"digraph G {\n") |
|
192 | 192 | for i in r: |
|
193 | 193 | node = r.node(i) |
|
194 | 194 | pp = r.parents(node) |
|
195 | 195 | ui.write(b"\t%d -> %d\n" % (r.rev(pp[0]), i)) |
|
196 | 196 | if pp[1] != nullid: |
|
197 | 197 | ui.write(b"\t%d -> %d\n" % (r.rev(pp[1]), i)) |
|
198 | 198 | ui.write(b"}\n") |
|
199 | 199 | |
|
200 | 200 | |
|
201 | 201 | def verifyremotefilelog(ui, path, **opts): |
|
202 | 202 | decompress = opts.get('decompress') |
|
203 | 203 | |
|
204 | 204 | for root, dirs, files in os.walk(path): |
|
205 | 205 | for file in files: |
|
206 | 206 | if file == b"repos": |
|
207 | 207 | continue |
|
208 | 208 | filepath = os.path.join(root, file) |
|
209 | 209 | size, firstnode, mapping = parsefileblob(filepath, decompress) |
|
210 | 210 | for p1, p2, linknode, copyfrom in pycompat.itervalues(mapping): |
|
211 | 211 | if linknode == nullid: |
|
212 | 212 | actualpath = os.path.relpath(root, path) |
|
213 | 213 | key = fileserverclient.getcachekey( |
|
214 | 214 | b"reponame", actualpath, file |
|
215 | 215 | ) |
|
216 | 216 | ui.status( |
|
217 | 217 | b"%s %s\n" % (key, os.path.relpath(filepath, path)) |
|
218 | 218 | ) |
|
219 | 219 | |
|
220 | 220 | |
|
221 | 221 | def _decompressblob(raw): |
|
222 | 222 | return zlib.decompress(raw) |
|
223 | 223 | |
|
224 | 224 | |
|
225 | 225 | def parsefileblob(path, decompress): |
|
226 | 226 | f = open(path, b"rb") |
|
227 | 227 | try: |
|
228 | 228 | raw = f.read() |
|
229 | 229 | finally: |
|
230 | 230 | f.close() |
|
231 | 231 | |
|
232 | 232 | if decompress: |
|
233 | 233 | raw = _decompressblob(raw) |
|
234 | 234 | |
|
235 | 235 | offset, size, flags = shallowutil.parsesizeflags(raw) |
|
236 | 236 | start = offset + size |
|
237 | 237 | |
|
238 | 238 | firstnode = None |
|
239 | 239 | |
|
240 | 240 | mapping = {} |
|
241 | 241 | while start < len(raw): |
|
242 | 242 | divider = raw.index(b'\0', start + 80) |
|
243 | 243 | |
|
244 | 244 | currentnode = raw[start : (start + 20)] |
|
245 | 245 | if not firstnode: |
|
246 | 246 | firstnode = currentnode |
|
247 | 247 | |
|
248 | 248 | p1 = raw[(start + 20) : (start + 40)] |
|
249 | 249 | p2 = raw[(start + 40) : (start + 60)] |
|
250 | 250 | linknode = raw[(start + 60) : (start + 80)] |
|
251 | 251 | copyfrom = raw[(start + 80) : divider] |
|
252 | 252 | |
|
253 | 253 | mapping[currentnode] = (p1, p2, linknode, copyfrom) |
|
254 | 254 | start = divider + 1 |
|
255 | 255 | |
|
256 | 256 | return size, firstnode, mapping |
|
257 | 257 | |
|
258 | 258 | |
|
259 | 259 | def debugdatapack(ui, *paths, **opts): |
|
260 | 260 | for path in paths: |
|
261 | 261 | if b'.data' in path: |
|
262 | 262 | path = path[: path.index(b'.data')] |
|
263 | 263 | ui.write(b"%s:\n" % path) |
|
264 | 264 | dpack = datapack.datapack(path) |
|
265 | 265 | node = opts.get('node') |
|
266 | 266 | if node: |
|
267 | 267 | deltachain = dpack.getdeltachain(b'', bin(node)) |
|
268 | 268 | dumpdeltachain(ui, deltachain, **opts) |
|
269 | 269 | return |
|
270 | 270 | |
|
271 | 271 | if opts.get('long'): |
|
272 | 272 | hashformatter = hex |
|
273 | 273 | hashlen = 42 |
|
274 | 274 | else: |
|
275 | 275 | hashformatter = short |
|
276 | 276 | hashlen = 14 |
|
277 | 277 | |
|
278 | 278 | lastfilename = None |
|
279 | 279 | totaldeltasize = 0 |
|
280 | 280 | totalblobsize = 0 |
|
281 | 281 | |
|
282 | 282 | def printtotals(): |
|
283 | 283 | if lastfilename is not None: |
|
284 | 284 | ui.write(b"\n") |
|
285 | 285 | if not totaldeltasize or not totalblobsize: |
|
286 | 286 | return |
|
287 | 287 | difference = totalblobsize - totaldeltasize |
|
288 | 288 | deltastr = b"%0.1f%% %s" % ( |
|
289 | 289 | (100.0 * abs(difference) / totalblobsize), |
|
290 | 290 | (b"smaller" if difference > 0 else b"bigger"), |
|
291 | 291 | ) |
|
292 | 292 | |
|
293 | 293 | ui.writenoi18n( |
|
294 | 294 | b"Total:%s%s %s (%s)\n" |
|
295 | 295 | % ( |
|
296 | 296 | b"".ljust(2 * hashlen - len(b"Total:")), |
|
297 | 297 | (b'%d' % totaldeltasize).ljust(12), |
|
298 | 298 | (b'%d' % totalblobsize).ljust(9), |
|
299 | 299 | deltastr, |
|
300 | 300 | ) |
|
301 | 301 | ) |
|
302 | 302 | |
|
303 | 303 | bases = {} |
|
304 | 304 | nodes = set() |
|
305 | 305 | failures = 0 |
|
306 | 306 | for filename, node, deltabase, deltalen in dpack.iterentries(): |
|
307 | 307 | bases[node] = deltabase |
|
308 | 308 | if node in nodes: |
|
309 | 309 | ui.write((b"Bad entry: %s appears twice\n" % short(node))) |
|
310 | 310 | failures += 1 |
|
311 | 311 | nodes.add(node) |
|
312 | 312 | if filename != lastfilename: |
|
313 | 313 | printtotals() |
|
314 | 314 | name = b'(empty name)' if filename == b'' else filename |
|
315 | 315 | ui.write(b"%s:\n" % name) |
|
316 | 316 | ui.write( |
|
317 | 317 | b"%s%s%s%s\n" |
|
318 | 318 | % ( |
|
319 | 319 | b"Node".ljust(hashlen), |
|
320 | 320 | b"Delta Base".ljust(hashlen), |
|
321 | 321 | b"Delta Length".ljust(14), |
|
322 | 322 | b"Blob Size".ljust(9), |
|
323 | 323 | ) |
|
324 | 324 | ) |
|
325 | 325 | lastfilename = filename |
|
326 | 326 | totalblobsize = 0 |
|
327 | 327 | totaldeltasize = 0 |
|
328 | 328 | |
|
329 | 329 | # Metadata could be missing, in which case it will be an empty dict. |
|
330 | 330 | meta = dpack.getmeta(filename, node) |
|
331 | 331 | if constants.METAKEYSIZE in meta: |
|
332 | 332 | blobsize = meta[constants.METAKEYSIZE] |
|
333 | 333 | totaldeltasize += deltalen |
|
334 | 334 | totalblobsize += blobsize |
|
335 | 335 | else: |
|
336 | 336 | blobsize = b"(missing)" |
|
337 | 337 | ui.write( |
|
338 | 338 | b"%s %s %s%s\n" |
|
339 | 339 | % ( |
|
340 | 340 | hashformatter(node), |
|
341 | 341 | hashformatter(deltabase), |
|
342 | 342 | (b'%d' % deltalen).ljust(14), |
|
343 | 343 | pycompat.bytestr(blobsize), |
|
344 | 344 | ) |
|
345 | 345 | ) |
|
346 | 346 | |
|
347 | 347 | if filename is not None: |
|
348 | 348 | printtotals() |
|
349 | 349 | |
|
350 | 350 | failures += _sanitycheck(ui, set(nodes), bases) |
|
351 | 351 | if failures > 1: |
|
352 | 352 | ui.warn((b"%d failures\n" % failures)) |
|
353 | 353 | return 1 |
|
354 | 354 | |
|
355 | 355 | |
|
356 | 356 | def _sanitycheck(ui, nodes, bases): |
|
357 | 357 | """ |
|
358 | 358 | Does some basic sanity checking on a packfiles with ``nodes`` ``bases`` (a |
|
359 | 359 | mapping of node->base): |
|
360 | 360 | |
|
361 | 361 | - Each deltabase must itself be a node elsewhere in the pack |
|
362 | 362 | - There must be no cycles |
|
363 | 363 | """ |
|
364 | 364 | failures = 0 |
|
365 | 365 | for node in nodes: |
|
366 | 366 | seen = set() |
|
367 | 367 | current = node |
|
368 | 368 | deltabase = bases[current] |
|
369 | 369 | |
|
370 | 370 | while deltabase != nullid: |
|
371 | 371 | if deltabase not in nodes: |
|
372 | 372 | ui.warn( |
|
373 | 373 | ( |
|
374 | 374 | b"Bad entry: %s has an unknown deltabase (%s)\n" |
|
375 | 375 | % (short(node), short(deltabase)) |
|
376 | 376 | ) |
|
377 | 377 | ) |
|
378 | 378 | failures += 1 |
|
379 | 379 | break |
|
380 | 380 | |
|
381 | 381 | if deltabase in seen: |
|
382 | 382 | ui.warn( |
|
383 | 383 | ( |
|
384 | 384 | b"Bad entry: %s has a cycle (at %s)\n" |
|
385 | 385 | % (short(node), short(deltabase)) |
|
386 | 386 | ) |
|
387 | 387 | ) |
|
388 | 388 | failures += 1 |
|
389 | 389 | break |
|
390 | 390 | |
|
391 | 391 | current = deltabase |
|
392 | 392 | seen.add(current) |
|
393 | 393 | deltabase = bases[current] |
|
394 | 394 | # Since ``node`` begins a valid chain, reset/memoize its base to nullid |
|
395 | 395 | # so we don't traverse it again. |
|
396 | 396 | bases[node] = nullid |
|
397 | 397 | return failures |
|
398 | 398 | |
|
399 | 399 | |
|
400 | 400 | def dumpdeltachain(ui, deltachain, **opts): |
|
401 | 401 | hashformatter = hex |
|
402 | 402 | hashlen = 40 |
|
403 | 403 | |
|
404 | 404 | lastfilename = None |
|
405 | 405 | for filename, node, filename, deltabasenode, delta in deltachain: |
|
406 | 406 | if filename != lastfilename: |
|
407 | 407 | ui.write(b"\n%s\n" % filename) |
|
408 | 408 | lastfilename = filename |
|
409 | 409 | ui.write( |
|
410 | 410 | b"%s %s %s %s\n" |
|
411 | 411 | % ( |
|
412 | 412 | b"Node".ljust(hashlen), |
|
413 | 413 | b"Delta Base".ljust(hashlen), |
|
414 | 414 | b"Delta SHA1".ljust(hashlen), |
|
415 | 415 | b"Delta Length".ljust(6), |
|
416 | 416 | ) |
|
417 | 417 | ) |
|
418 | 418 | |
|
419 | 419 | ui.write( |
|
420 | 420 | b"%s %s %s %d\n" |
|
421 | 421 | % ( |
|
422 | 422 | hashformatter(node), |
|
423 | 423 | hashformatter(deltabasenode), |
|
424 |
nodemod.hex(hashl |
|
|
424 | nodemod.hex(hashutil.sha1(delta).digest()), | |
|
425 | 425 | len(delta), |
|
426 | 426 | ) |
|
427 | 427 | ) |
|
428 | 428 | |
|
429 | 429 | |
|
430 | 430 | def debughistorypack(ui, path): |
|
431 | 431 | if b'.hist' in path: |
|
432 | 432 | path = path[: path.index(b'.hist')] |
|
433 | 433 | hpack = historypack.historypack(path) |
|
434 | 434 | |
|
435 | 435 | lastfilename = None |
|
436 | 436 | for entry in hpack.iterentries(): |
|
437 | 437 | filename, node, p1node, p2node, linknode, copyfrom = entry |
|
438 | 438 | if filename != lastfilename: |
|
439 | 439 | ui.write(b"\n%s\n" % filename) |
|
440 | 440 | ui.write( |
|
441 | 441 | b"%s%s%s%s%s\n" |
|
442 | 442 | % ( |
|
443 | 443 | b"Node".ljust(14), |
|
444 | 444 | b"P1 Node".ljust(14), |
|
445 | 445 | b"P2 Node".ljust(14), |
|
446 | 446 | b"Link Node".ljust(14), |
|
447 | 447 | b"Copy From", |
|
448 | 448 | ) |
|
449 | 449 | ) |
|
450 | 450 | lastfilename = filename |
|
451 | 451 | ui.write( |
|
452 | 452 | b"%s %s %s %s %s\n" |
|
453 | 453 | % ( |
|
454 | 454 | short(node), |
|
455 | 455 | short(p1node), |
|
456 | 456 | short(p2node), |
|
457 | 457 | short(linknode), |
|
458 | 458 | copyfrom, |
|
459 | 459 | ) |
|
460 | 460 | ) |
|
461 | 461 | |
|
462 | 462 | |
|
463 | 463 | def debugwaitonrepack(repo): |
|
464 | 464 | with lockmod.lock(repack.repacklockvfs(repo), b"repacklock", timeout=-1): |
|
465 | 465 | return |
|
466 | 466 | |
|
467 | 467 | |
|
468 | 468 | def debugwaitonprefetch(repo): |
|
469 | 469 | with repo._lock( |
|
470 | 470 | repo.svfs, |
|
471 | 471 | b"prefetchlock", |
|
472 | 472 | True, |
|
473 | 473 | None, |
|
474 | 474 | None, |
|
475 | 475 | _(b'prefetching in %s') % repo.origroot, |
|
476 | 476 | ): |
|
477 | 477 | pass |
@@ -1,667 +1,669 b'' | |||
|
1 | 1 | # fileserverclient.py - client for communicating with the cache process |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2013 Facebook, Inc. |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | from __future__ import absolute_import |
|
9 | 9 | |
|
10 | import hashlib | |
|
11 | 10 | import io |
|
12 | 11 | import os |
|
13 | 12 | import threading |
|
14 | 13 | import time |
|
15 | 14 | import zlib |
|
16 | 15 | |
|
17 | 16 | from mercurial.i18n import _ |
|
18 | 17 | from mercurial.node import bin, hex, nullid |
|
19 | 18 | from mercurial import ( |
|
20 | 19 | error, |
|
21 | 20 | node, |
|
22 | 21 | pycompat, |
|
23 | 22 | revlog, |
|
24 | 23 | sshpeer, |
|
25 | 24 | util, |
|
26 | 25 | wireprotov1peer, |
|
27 | 26 | ) |
|
28 |
from mercurial.utils import |
|
|
27 | from mercurial.utils import ( | |
|
28 | hashutil, | |
|
29 | procutil, | |
|
30 | ) | |
|
29 | 31 | |
|
30 | 32 | from . import ( |
|
31 | 33 | constants, |
|
32 | 34 | contentstore, |
|
33 | 35 | metadatastore, |
|
34 | 36 | ) |
|
35 | 37 | |
|
36 | 38 | _sshv1peer = sshpeer.sshv1peer |
|
37 | 39 | |
|
38 | 40 | # Statistics for debugging |
|
39 | 41 | fetchcost = 0 |
|
40 | 42 | fetches = 0 |
|
41 | 43 | fetched = 0 |
|
42 | 44 | fetchmisses = 0 |
|
43 | 45 | |
|
44 | 46 | _lfsmod = None |
|
45 | 47 | |
|
46 | 48 | |
|
47 | 49 | def getcachekey(reponame, file, id): |
|
48 |
pathhash = node.hex(hashl |
|
|
50 | pathhash = node.hex(hashutil.sha1(file).digest()) | |
|
49 | 51 | return os.path.join(reponame, pathhash[:2], pathhash[2:], id) |
|
50 | 52 | |
|
51 | 53 | |
|
52 | 54 | def getlocalkey(file, id): |
|
53 |
pathhash = node.hex(hashl |
|
|
55 | pathhash = node.hex(hashutil.sha1(file).digest()) | |
|
54 | 56 | return os.path.join(pathhash, id) |
|
55 | 57 | |
|
56 | 58 | |
|
57 | 59 | def peersetup(ui, peer): |
|
58 | 60 | class remotefilepeer(peer.__class__): |
|
59 | 61 | @wireprotov1peer.batchable |
|
60 | 62 | def x_rfl_getfile(self, file, node): |
|
61 | 63 | if not self.capable(b'x_rfl_getfile'): |
|
62 | 64 | raise error.Abort( |
|
63 | 65 | b'configured remotefile server does not support getfile' |
|
64 | 66 | ) |
|
65 | 67 | f = wireprotov1peer.future() |
|
66 | 68 | yield {b'file': file, b'node': node}, f |
|
67 | 69 | code, data = f.value.split(b'\0', 1) |
|
68 | 70 | if int(code): |
|
69 | 71 | raise error.LookupError(file, node, data) |
|
70 | 72 | yield data |
|
71 | 73 | |
|
72 | 74 | @wireprotov1peer.batchable |
|
73 | 75 | def x_rfl_getflogheads(self, path): |
|
74 | 76 | if not self.capable(b'x_rfl_getflogheads'): |
|
75 | 77 | raise error.Abort( |
|
76 | 78 | b'configured remotefile server does not ' |
|
77 | 79 | b'support getflogheads' |
|
78 | 80 | ) |
|
79 | 81 | f = wireprotov1peer.future() |
|
80 | 82 | yield {b'path': path}, f |
|
81 | 83 | heads = f.value.split(b'\n') if f.value else [] |
|
82 | 84 | yield heads |
|
83 | 85 | |
|
84 | 86 | def _updatecallstreamopts(self, command, opts): |
|
85 | 87 | if command != b'getbundle': |
|
86 | 88 | return |
|
87 | 89 | if ( |
|
88 | 90 | constants.NETWORK_CAP_LEGACY_SSH_GETFILES |
|
89 | 91 | not in self.capabilities() |
|
90 | 92 | ): |
|
91 | 93 | return |
|
92 | 94 | if not util.safehasattr(self, '_localrepo'): |
|
93 | 95 | return |
|
94 | 96 | if ( |
|
95 | 97 | constants.SHALLOWREPO_REQUIREMENT |
|
96 | 98 | not in self._localrepo.requirements |
|
97 | 99 | ): |
|
98 | 100 | return |
|
99 | 101 | |
|
100 | 102 | bundlecaps = opts.get(b'bundlecaps') |
|
101 | 103 | if bundlecaps: |
|
102 | 104 | bundlecaps = [bundlecaps] |
|
103 | 105 | else: |
|
104 | 106 | bundlecaps = [] |
|
105 | 107 | |
|
106 | 108 | # shallow, includepattern, and excludepattern are a hacky way of |
|
107 | 109 | # carrying over data from the local repo to this getbundle |
|
108 | 110 | # command. We need to do it this way because bundle1 getbundle |
|
109 | 111 | # doesn't provide any other place we can hook in to manipulate |
|
110 | 112 | # getbundle args before it goes across the wire. Once we get rid |
|
111 | 113 | # of bundle1, we can use bundle2's _pullbundle2extraprepare to |
|
112 | 114 | # do this more cleanly. |
|
113 | 115 | bundlecaps.append(constants.BUNDLE2_CAPABLITY) |
|
114 | 116 | if self._localrepo.includepattern: |
|
115 | 117 | patterns = b'\0'.join(self._localrepo.includepattern) |
|
116 | 118 | includecap = b"includepattern=" + patterns |
|
117 | 119 | bundlecaps.append(includecap) |
|
118 | 120 | if self._localrepo.excludepattern: |
|
119 | 121 | patterns = b'\0'.join(self._localrepo.excludepattern) |
|
120 | 122 | excludecap = b"excludepattern=" + patterns |
|
121 | 123 | bundlecaps.append(excludecap) |
|
122 | 124 | opts[b'bundlecaps'] = b','.join(bundlecaps) |
|
123 | 125 | |
|
124 | 126 | def _sendrequest(self, command, args, **opts): |
|
125 | 127 | self._updatecallstreamopts(command, args) |
|
126 | 128 | return super(remotefilepeer, self)._sendrequest( |
|
127 | 129 | command, args, **opts |
|
128 | 130 | ) |
|
129 | 131 | |
|
130 | 132 | def _callstream(self, command, **opts): |
|
131 | 133 | supertype = super(remotefilepeer, self) |
|
132 | 134 | if not util.safehasattr(supertype, '_sendrequest'): |
|
133 | 135 | self._updatecallstreamopts(command, pycompat.byteskwargs(opts)) |
|
134 | 136 | return super(remotefilepeer, self)._callstream(command, **opts) |
|
135 | 137 | |
|
136 | 138 | peer.__class__ = remotefilepeer |
|
137 | 139 | |
|
138 | 140 | |
|
139 | 141 | class cacheconnection(object): |
|
140 | 142 | """The connection for communicating with the remote cache. Performs |
|
141 | 143 | gets and sets by communicating with an external process that has the |
|
142 | 144 | cache-specific implementation. |
|
143 | 145 | """ |
|
144 | 146 | |
|
145 | 147 | def __init__(self): |
|
146 | 148 | self.pipeo = self.pipei = self.pipee = None |
|
147 | 149 | self.subprocess = None |
|
148 | 150 | self.connected = False |
|
149 | 151 | |
|
150 | 152 | def connect(self, cachecommand): |
|
151 | 153 | if self.pipeo: |
|
152 | 154 | raise error.Abort(_(b"cache connection already open")) |
|
153 | 155 | self.pipei, self.pipeo, self.pipee, self.subprocess = procutil.popen4( |
|
154 | 156 | cachecommand |
|
155 | 157 | ) |
|
156 | 158 | self.connected = True |
|
157 | 159 | |
|
158 | 160 | def close(self): |
|
159 | 161 | def tryclose(pipe): |
|
160 | 162 | try: |
|
161 | 163 | pipe.close() |
|
162 | 164 | except Exception: |
|
163 | 165 | pass |
|
164 | 166 | |
|
165 | 167 | if self.connected: |
|
166 | 168 | try: |
|
167 | 169 | self.pipei.write(b"exit\n") |
|
168 | 170 | except Exception: |
|
169 | 171 | pass |
|
170 | 172 | tryclose(self.pipei) |
|
171 | 173 | self.pipei = None |
|
172 | 174 | tryclose(self.pipeo) |
|
173 | 175 | self.pipeo = None |
|
174 | 176 | tryclose(self.pipee) |
|
175 | 177 | self.pipee = None |
|
176 | 178 | try: |
|
177 | 179 | # Wait for process to terminate, making sure to avoid deadlock. |
|
178 | 180 | # See https://docs.python.org/2/library/subprocess.html for |
|
179 | 181 | # warnings about wait() and deadlocking. |
|
180 | 182 | self.subprocess.communicate() |
|
181 | 183 | except Exception: |
|
182 | 184 | pass |
|
183 | 185 | self.subprocess = None |
|
184 | 186 | self.connected = False |
|
185 | 187 | |
|
186 | 188 | def request(self, request, flush=True): |
|
187 | 189 | if self.connected: |
|
188 | 190 | try: |
|
189 | 191 | self.pipei.write(request) |
|
190 | 192 | if flush: |
|
191 | 193 | self.pipei.flush() |
|
192 | 194 | except IOError: |
|
193 | 195 | self.close() |
|
194 | 196 | |
|
195 | 197 | def receiveline(self): |
|
196 | 198 | if not self.connected: |
|
197 | 199 | return None |
|
198 | 200 | try: |
|
199 | 201 | result = self.pipeo.readline()[:-1] |
|
200 | 202 | if not result: |
|
201 | 203 | self.close() |
|
202 | 204 | except IOError: |
|
203 | 205 | self.close() |
|
204 | 206 | |
|
205 | 207 | return result |
|
206 | 208 | |
|
207 | 209 | |
|
208 | 210 | def _getfilesbatch( |
|
209 | 211 | remote, receivemissing, progresstick, missed, idmap, batchsize |
|
210 | 212 | ): |
|
211 | 213 | # Over http(s), iterbatch is a streamy method and we can start |
|
212 | 214 | # looking at results early. This means we send one (potentially |
|
213 | 215 | # large) request, but then we show nice progress as we process |
|
214 | 216 | # file results, rather than showing chunks of $batchsize in |
|
215 | 217 | # progress. |
|
216 | 218 | # |
|
217 | 219 | # Over ssh, iterbatch isn't streamy because batch() wasn't |
|
218 | 220 | # explicitly designed as a streaming method. In the future we |
|
219 | 221 | # should probably introduce a streambatch() method upstream and |
|
220 | 222 | # use that for this. |
|
221 | 223 | with remote.commandexecutor() as e: |
|
222 | 224 | futures = [] |
|
223 | 225 | for m in missed: |
|
224 | 226 | futures.append( |
|
225 | 227 | e.callcommand( |
|
226 | 228 | b'x_rfl_getfile', {b'file': idmap[m], b'node': m[-40:]} |
|
227 | 229 | ) |
|
228 | 230 | ) |
|
229 | 231 | |
|
230 | 232 | for i, m in enumerate(missed): |
|
231 | 233 | r = futures[i].result() |
|
232 | 234 | futures[i] = None # release memory |
|
233 | 235 | file_ = idmap[m] |
|
234 | 236 | node = m[-40:] |
|
235 | 237 | receivemissing(io.BytesIO(b'%d\n%s' % (len(r), r)), file_, node) |
|
236 | 238 | progresstick() |
|
237 | 239 | |
|
238 | 240 | |
|
239 | 241 | def _getfiles_optimistic( |
|
240 | 242 | remote, receivemissing, progresstick, missed, idmap, step |
|
241 | 243 | ): |
|
242 | 244 | remote._callstream(b"x_rfl_getfiles") |
|
243 | 245 | i = 0 |
|
244 | 246 | pipeo = remote._pipeo |
|
245 | 247 | pipei = remote._pipei |
|
246 | 248 | while i < len(missed): |
|
247 | 249 | # issue a batch of requests |
|
248 | 250 | start = i |
|
249 | 251 | end = min(len(missed), start + step) |
|
250 | 252 | i = end |
|
251 | 253 | for missingid in missed[start:end]: |
|
252 | 254 | # issue new request |
|
253 | 255 | versionid = missingid[-40:] |
|
254 | 256 | file = idmap[missingid] |
|
255 | 257 | sshrequest = b"%s%s\n" % (versionid, file) |
|
256 | 258 | pipeo.write(sshrequest) |
|
257 | 259 | pipeo.flush() |
|
258 | 260 | |
|
259 | 261 | # receive batch results |
|
260 | 262 | for missingid in missed[start:end]: |
|
261 | 263 | versionid = missingid[-40:] |
|
262 | 264 | file = idmap[missingid] |
|
263 | 265 | receivemissing(pipei, file, versionid) |
|
264 | 266 | progresstick() |
|
265 | 267 | |
|
266 | 268 | # End the command |
|
267 | 269 | pipeo.write(b'\n') |
|
268 | 270 | pipeo.flush() |
|
269 | 271 | |
|
270 | 272 | |
|
271 | 273 | def _getfiles_threaded( |
|
272 | 274 | remote, receivemissing, progresstick, missed, idmap, step |
|
273 | 275 | ): |
|
274 | 276 | remote._callstream(b"getfiles") |
|
275 | 277 | pipeo = remote._pipeo |
|
276 | 278 | pipei = remote._pipei |
|
277 | 279 | |
|
278 | 280 | def writer(): |
|
279 | 281 | for missingid in missed: |
|
280 | 282 | versionid = missingid[-40:] |
|
281 | 283 | file = idmap[missingid] |
|
282 | 284 | sshrequest = b"%s%s\n" % (versionid, file) |
|
283 | 285 | pipeo.write(sshrequest) |
|
284 | 286 | pipeo.flush() |
|
285 | 287 | |
|
286 | 288 | writerthread = threading.Thread(target=writer) |
|
287 | 289 | writerthread.daemon = True |
|
288 | 290 | writerthread.start() |
|
289 | 291 | |
|
290 | 292 | for missingid in missed: |
|
291 | 293 | versionid = missingid[-40:] |
|
292 | 294 | file = idmap[missingid] |
|
293 | 295 | receivemissing(pipei, file, versionid) |
|
294 | 296 | progresstick() |
|
295 | 297 | |
|
296 | 298 | writerthread.join() |
|
297 | 299 | # End the command |
|
298 | 300 | pipeo.write(b'\n') |
|
299 | 301 | pipeo.flush() |
|
300 | 302 | |
|
301 | 303 | |
|
302 | 304 | class fileserverclient(object): |
|
303 | 305 | """A client for requesting files from the remote file server. |
|
304 | 306 | """ |
|
305 | 307 | |
|
306 | 308 | def __init__(self, repo): |
|
307 | 309 | ui = repo.ui |
|
308 | 310 | self.repo = repo |
|
309 | 311 | self.ui = ui |
|
310 | 312 | self.cacheprocess = ui.config(b"remotefilelog", b"cacheprocess") |
|
311 | 313 | if self.cacheprocess: |
|
312 | 314 | self.cacheprocess = util.expandpath(self.cacheprocess) |
|
313 | 315 | |
|
314 | 316 | # This option causes remotefilelog to pass the full file path to the |
|
315 | 317 | # cacheprocess instead of a hashed key. |
|
316 | 318 | self.cacheprocesspasspath = ui.configbool( |
|
317 | 319 | b"remotefilelog", b"cacheprocess.includepath" |
|
318 | 320 | ) |
|
319 | 321 | |
|
320 | 322 | self.debugoutput = ui.configbool(b"remotefilelog", b"debug") |
|
321 | 323 | |
|
322 | 324 | self.remotecache = cacheconnection() |
|
323 | 325 | |
|
324 | 326 | def setstore(self, datastore, historystore, writedata, writehistory): |
|
325 | 327 | self.datastore = datastore |
|
326 | 328 | self.historystore = historystore |
|
327 | 329 | self.writedata = writedata |
|
328 | 330 | self.writehistory = writehistory |
|
329 | 331 | |
|
330 | 332 | def _connect(self): |
|
331 | 333 | return self.repo.connectionpool.get(self.repo.fallbackpath) |
|
332 | 334 | |
|
333 | 335 | def request(self, fileids): |
|
334 | 336 | """Takes a list of filename/node pairs and fetches them from the |
|
335 | 337 | server. Files are stored in the local cache. |
|
336 | 338 | A list of nodes that the server couldn't find is returned. |
|
337 | 339 | If the connection fails, an exception is raised. |
|
338 | 340 | """ |
|
339 | 341 | if not self.remotecache.connected: |
|
340 | 342 | self.connect() |
|
341 | 343 | cache = self.remotecache |
|
342 | 344 | writedata = self.writedata |
|
343 | 345 | |
|
344 | 346 | repo = self.repo |
|
345 | 347 | total = len(fileids) |
|
346 | 348 | request = b"get\n%d\n" % total |
|
347 | 349 | idmap = {} |
|
348 | 350 | reponame = repo.name |
|
349 | 351 | for file, id in fileids: |
|
350 | 352 | fullid = getcachekey(reponame, file, id) |
|
351 | 353 | if self.cacheprocesspasspath: |
|
352 | 354 | request += file + b'\0' |
|
353 | 355 | request += fullid + b"\n" |
|
354 | 356 | idmap[fullid] = file |
|
355 | 357 | |
|
356 | 358 | cache.request(request) |
|
357 | 359 | |
|
358 | 360 | progress = self.ui.makeprogress(_(b'downloading'), total=total) |
|
359 | 361 | progress.update(0) |
|
360 | 362 | |
|
361 | 363 | missed = [] |
|
362 | 364 | while True: |
|
363 | 365 | missingid = cache.receiveline() |
|
364 | 366 | if not missingid: |
|
365 | 367 | missedset = set(missed) |
|
366 | 368 | for missingid in idmap: |
|
367 | 369 | if not missingid in missedset: |
|
368 | 370 | missed.append(missingid) |
|
369 | 371 | self.ui.warn( |
|
370 | 372 | _( |
|
371 | 373 | b"warning: cache connection closed early - " |
|
372 | 374 | + b"falling back to server\n" |
|
373 | 375 | ) |
|
374 | 376 | ) |
|
375 | 377 | break |
|
376 | 378 | if missingid == b"0": |
|
377 | 379 | break |
|
378 | 380 | if missingid.startswith(b"_hits_"): |
|
379 | 381 | # receive progress reports |
|
380 | 382 | parts = missingid.split(b"_") |
|
381 | 383 | progress.increment(int(parts[2])) |
|
382 | 384 | continue |
|
383 | 385 | |
|
384 | 386 | missed.append(missingid) |
|
385 | 387 | |
|
386 | 388 | global fetchmisses |
|
387 | 389 | fetchmisses += len(missed) |
|
388 | 390 | |
|
389 | 391 | fromcache = total - len(missed) |
|
390 | 392 | progress.update(fromcache, total=total) |
|
391 | 393 | self.ui.log( |
|
392 | 394 | b"remotefilelog", |
|
393 | 395 | b"remote cache hit rate is %r of %r\n", |
|
394 | 396 | fromcache, |
|
395 | 397 | total, |
|
396 | 398 | hit=fromcache, |
|
397 | 399 | total=total, |
|
398 | 400 | ) |
|
399 | 401 | |
|
400 | 402 | oldumask = os.umask(0o002) |
|
401 | 403 | try: |
|
402 | 404 | # receive cache misses from master |
|
403 | 405 | if missed: |
|
404 | 406 | # When verbose is true, sshpeer prints 'running ssh...' |
|
405 | 407 | # to stdout, which can interfere with some command |
|
406 | 408 | # outputs |
|
407 | 409 | verbose = self.ui.verbose |
|
408 | 410 | self.ui.verbose = False |
|
409 | 411 | try: |
|
410 | 412 | with self._connect() as conn: |
|
411 | 413 | remote = conn.peer |
|
412 | 414 | if remote.capable( |
|
413 | 415 | constants.NETWORK_CAP_LEGACY_SSH_GETFILES |
|
414 | 416 | ): |
|
415 | 417 | if not isinstance(remote, _sshv1peer): |
|
416 | 418 | raise error.Abort( |
|
417 | 419 | b'remotefilelog requires ssh servers' |
|
418 | 420 | ) |
|
419 | 421 | step = self.ui.configint( |
|
420 | 422 | b'remotefilelog', b'getfilesstep' |
|
421 | 423 | ) |
|
422 | 424 | getfilestype = self.ui.config( |
|
423 | 425 | b'remotefilelog', b'getfilestype' |
|
424 | 426 | ) |
|
425 | 427 | if getfilestype == b'threaded': |
|
426 | 428 | _getfiles = _getfiles_threaded |
|
427 | 429 | else: |
|
428 | 430 | _getfiles = _getfiles_optimistic |
|
429 | 431 | _getfiles( |
|
430 | 432 | remote, |
|
431 | 433 | self.receivemissing, |
|
432 | 434 | progress.increment, |
|
433 | 435 | missed, |
|
434 | 436 | idmap, |
|
435 | 437 | step, |
|
436 | 438 | ) |
|
437 | 439 | elif remote.capable(b"x_rfl_getfile"): |
|
438 | 440 | if remote.capable(b'batch'): |
|
439 | 441 | batchdefault = 100 |
|
440 | 442 | else: |
|
441 | 443 | batchdefault = 10 |
|
442 | 444 | batchsize = self.ui.configint( |
|
443 | 445 | b'remotefilelog', b'batchsize', batchdefault |
|
444 | 446 | ) |
|
445 | 447 | self.ui.debug( |
|
446 | 448 | b'requesting %d files from ' |
|
447 | 449 | b'remotefilelog server...\n' % len(missed) |
|
448 | 450 | ) |
|
449 | 451 | _getfilesbatch( |
|
450 | 452 | remote, |
|
451 | 453 | self.receivemissing, |
|
452 | 454 | progress.increment, |
|
453 | 455 | missed, |
|
454 | 456 | idmap, |
|
455 | 457 | batchsize, |
|
456 | 458 | ) |
|
457 | 459 | else: |
|
458 | 460 | raise error.Abort( |
|
459 | 461 | b"configured remotefilelog server" |
|
460 | 462 | b" does not support remotefilelog" |
|
461 | 463 | ) |
|
462 | 464 | |
|
463 | 465 | self.ui.log( |
|
464 | 466 | b"remotefilefetchlog", |
|
465 | 467 | b"Success\n", |
|
466 | 468 | fetched_files=progress.pos - fromcache, |
|
467 | 469 | total_to_fetch=total - fromcache, |
|
468 | 470 | ) |
|
469 | 471 | except Exception: |
|
470 | 472 | self.ui.log( |
|
471 | 473 | b"remotefilefetchlog", |
|
472 | 474 | b"Fail\n", |
|
473 | 475 | fetched_files=progress.pos - fromcache, |
|
474 | 476 | total_to_fetch=total - fromcache, |
|
475 | 477 | ) |
|
476 | 478 | raise |
|
477 | 479 | finally: |
|
478 | 480 | self.ui.verbose = verbose |
|
479 | 481 | # send to memcache |
|
480 | 482 | request = b"set\n%d\n%s\n" % (len(missed), b"\n".join(missed)) |
|
481 | 483 | cache.request(request) |
|
482 | 484 | |
|
483 | 485 | progress.complete() |
|
484 | 486 | |
|
485 | 487 | # mark ourselves as a user of this cache |
|
486 | 488 | writedata.markrepo(self.repo.path) |
|
487 | 489 | finally: |
|
488 | 490 | os.umask(oldumask) |
|
489 | 491 | |
|
490 | 492 | def receivemissing(self, pipe, filename, node): |
|
491 | 493 | line = pipe.readline()[:-1] |
|
492 | 494 | if not line: |
|
493 | 495 | raise error.ResponseError( |
|
494 | 496 | _(b"error downloading file contents:"), |
|
495 | 497 | _(b"connection closed early"), |
|
496 | 498 | ) |
|
497 | 499 | size = int(line) |
|
498 | 500 | data = pipe.read(size) |
|
499 | 501 | if len(data) != size: |
|
500 | 502 | raise error.ResponseError( |
|
501 | 503 | _(b"error downloading file contents:"), |
|
502 | 504 | _(b"only received %s of %s bytes") % (len(data), size), |
|
503 | 505 | ) |
|
504 | 506 | |
|
505 | 507 | self.writedata.addremotefilelognode( |
|
506 | 508 | filename, bin(node), zlib.decompress(data) |
|
507 | 509 | ) |
|
508 | 510 | |
|
509 | 511 | def connect(self): |
|
510 | 512 | if self.cacheprocess: |
|
511 | 513 | cmd = b"%s %s" % (self.cacheprocess, self.writedata._path) |
|
512 | 514 | self.remotecache.connect(cmd) |
|
513 | 515 | else: |
|
514 | 516 | # If no cache process is specified, we fake one that always |
|
515 | 517 | # returns cache misses. This enables tests to run easily |
|
516 | 518 | # and may eventually allow us to be a drop in replacement |
|
517 | 519 | # for the largefiles extension. |
|
518 | 520 | class simplecache(object): |
|
519 | 521 | def __init__(self): |
|
520 | 522 | self.missingids = [] |
|
521 | 523 | self.connected = True |
|
522 | 524 | |
|
523 | 525 | def close(self): |
|
524 | 526 | pass |
|
525 | 527 | |
|
526 | 528 | def request(self, value, flush=True): |
|
527 | 529 | lines = value.split(b"\n") |
|
528 | 530 | if lines[0] != b"get": |
|
529 | 531 | return |
|
530 | 532 | self.missingids = lines[2:-1] |
|
531 | 533 | self.missingids.append(b'0') |
|
532 | 534 | |
|
533 | 535 | def receiveline(self): |
|
534 | 536 | if len(self.missingids) > 0: |
|
535 | 537 | return self.missingids.pop(0) |
|
536 | 538 | return None |
|
537 | 539 | |
|
538 | 540 | self.remotecache = simplecache() |
|
539 | 541 | |
|
540 | 542 | def close(self): |
|
541 | 543 | if fetches: |
|
542 | 544 | msg = ( |
|
543 | 545 | b"%d files fetched over %d fetches - " |
|
544 | 546 | + b"(%d misses, %0.2f%% hit ratio) over %0.2fs\n" |
|
545 | 547 | ) % ( |
|
546 | 548 | fetched, |
|
547 | 549 | fetches, |
|
548 | 550 | fetchmisses, |
|
549 | 551 | float(fetched - fetchmisses) / float(fetched) * 100.0, |
|
550 | 552 | fetchcost, |
|
551 | 553 | ) |
|
552 | 554 | if self.debugoutput: |
|
553 | 555 | self.ui.warn(msg) |
|
554 | 556 | self.ui.log( |
|
555 | 557 | b"remotefilelog.prefetch", |
|
556 | 558 | msg.replace(b"%", b"%%"), |
|
557 | 559 | remotefilelogfetched=fetched, |
|
558 | 560 | remotefilelogfetches=fetches, |
|
559 | 561 | remotefilelogfetchmisses=fetchmisses, |
|
560 | 562 | remotefilelogfetchtime=fetchcost * 1000, |
|
561 | 563 | ) |
|
562 | 564 | |
|
563 | 565 | if self.remotecache.connected: |
|
564 | 566 | self.remotecache.close() |
|
565 | 567 | |
|
566 | 568 | def prefetch( |
|
567 | 569 | self, fileids, force=False, fetchdata=True, fetchhistory=False |
|
568 | 570 | ): |
|
569 | 571 | """downloads the given file versions to the cache |
|
570 | 572 | """ |
|
571 | 573 | repo = self.repo |
|
572 | 574 | idstocheck = [] |
|
573 | 575 | for file, id in fileids: |
|
574 | 576 | # hack |
|
575 | 577 | # - we don't use .hgtags |
|
576 | 578 | # - workingctx produces ids with length 42, |
|
577 | 579 | # which we skip since they aren't in any cache |
|
578 | 580 | if ( |
|
579 | 581 | file == b'.hgtags' |
|
580 | 582 | or len(id) == 42 |
|
581 | 583 | or not repo.shallowmatch(file) |
|
582 | 584 | ): |
|
583 | 585 | continue |
|
584 | 586 | |
|
585 | 587 | idstocheck.append((file, bin(id))) |
|
586 | 588 | |
|
587 | 589 | datastore = self.datastore |
|
588 | 590 | historystore = self.historystore |
|
589 | 591 | if force: |
|
590 | 592 | datastore = contentstore.unioncontentstore(*repo.shareddatastores) |
|
591 | 593 | historystore = metadatastore.unionmetadatastore( |
|
592 | 594 | *repo.sharedhistorystores |
|
593 | 595 | ) |
|
594 | 596 | |
|
595 | 597 | missingids = set() |
|
596 | 598 | if fetchdata: |
|
597 | 599 | missingids.update(datastore.getmissing(idstocheck)) |
|
598 | 600 | if fetchhistory: |
|
599 | 601 | missingids.update(historystore.getmissing(idstocheck)) |
|
600 | 602 | |
|
601 | 603 | # partition missing nodes into nullid and not-nullid so we can |
|
602 | 604 | # warn about this filtering potentially shadowing bugs. |
|
603 | 605 | nullids = len([None for unused, id in missingids if id == nullid]) |
|
604 | 606 | if nullids: |
|
605 | 607 | missingids = [(f, id) for f, id in missingids if id != nullid] |
|
606 | 608 | repo.ui.develwarn( |
|
607 | 609 | ( |
|
608 | 610 | b'remotefilelog not fetching %d null revs' |
|
609 | 611 | b' - this is likely hiding bugs' % nullids |
|
610 | 612 | ), |
|
611 | 613 | config=b'remotefilelog-ext', |
|
612 | 614 | ) |
|
613 | 615 | if missingids: |
|
614 | 616 | global fetches, fetched, fetchcost |
|
615 | 617 | fetches += 1 |
|
616 | 618 | |
|
617 | 619 | # We want to be able to detect excess individual file downloads, so |
|
618 | 620 | # let's log that information for debugging. |
|
619 | 621 | if fetches >= 15 and fetches < 18: |
|
620 | 622 | if fetches == 15: |
|
621 | 623 | fetchwarning = self.ui.config( |
|
622 | 624 | b'remotefilelog', b'fetchwarning' |
|
623 | 625 | ) |
|
624 | 626 | if fetchwarning: |
|
625 | 627 | self.ui.warn(fetchwarning + b'\n') |
|
626 | 628 | self.logstacktrace() |
|
627 | 629 | missingids = [(file, hex(id)) for file, id in sorted(missingids)] |
|
628 | 630 | fetched += len(missingids) |
|
629 | 631 | start = time.time() |
|
630 | 632 | missingids = self.request(missingids) |
|
631 | 633 | if missingids: |
|
632 | 634 | raise error.Abort( |
|
633 | 635 | _(b"unable to download %d files") % len(missingids) |
|
634 | 636 | ) |
|
635 | 637 | fetchcost += time.time() - start |
|
636 | 638 | self._lfsprefetch(fileids) |
|
637 | 639 | |
|
638 | 640 | def _lfsprefetch(self, fileids): |
|
639 | 641 | if not _lfsmod or not util.safehasattr( |
|
640 | 642 | self.repo.svfs, b'lfslocalblobstore' |
|
641 | 643 | ): |
|
642 | 644 | return |
|
643 | 645 | if not _lfsmod.wrapper.candownload(self.repo): |
|
644 | 646 | return |
|
645 | 647 | pointers = [] |
|
646 | 648 | store = self.repo.svfs.lfslocalblobstore |
|
647 | 649 | for file, id in fileids: |
|
648 | 650 | node = bin(id) |
|
649 | 651 | rlog = self.repo.file(file) |
|
650 | 652 | if rlog.flags(node) & revlog.REVIDX_EXTSTORED: |
|
651 | 653 | text = rlog.rawdata(node) |
|
652 | 654 | p = _lfsmod.pointer.deserialize(text) |
|
653 | 655 | oid = p.oid() |
|
654 | 656 | if not store.has(oid): |
|
655 | 657 | pointers.append(p) |
|
656 | 658 | if len(pointers) > 0: |
|
657 | 659 | self.repo.svfs.lfsremoteblobstore.readbatch(pointers, store) |
|
658 | 660 | assert all(store.has(p.oid()) for p in pointers) |
|
659 | 661 | |
|
660 | 662 | def logstacktrace(self): |
|
661 | 663 | import traceback |
|
662 | 664 | |
|
663 | 665 | self.ui.log( |
|
664 | 666 | b'remotefilelog', |
|
665 | 667 | b'excess remotefilelog fetching:\n%s\n', |
|
666 | 668 | b''.join(pycompat.sysbytes(s) for s in traceback.format_stack()), |
|
667 | 669 | ) |
@@ -1,572 +1,572 b'' | |||
|
1 | 1 | from __future__ import absolute_import |
|
2 | 2 | |
|
3 | import hashlib | |
|
4 | 3 | import struct |
|
5 | 4 | |
|
6 | 5 | from mercurial.node import hex, nullid |
|
7 | 6 | from mercurial import ( |
|
8 | 7 | pycompat, |
|
9 | 8 | util, |
|
10 | 9 | ) |
|
10 | from mercurial.utils import hashutil | |
|
11 | 11 | from . import ( |
|
12 | 12 | basepack, |
|
13 | 13 | constants, |
|
14 | 14 | shallowutil, |
|
15 | 15 | ) |
|
16 | 16 | |
|
17 | 17 | # (filename hash, offset, size) |
|
18 | 18 | INDEXFORMAT2 = b'!20sQQII' |
|
19 | 19 | INDEXENTRYLENGTH2 = struct.calcsize(INDEXFORMAT2) |
|
20 | 20 | NODELENGTH = 20 |
|
21 | 21 | |
|
22 | 22 | NODEINDEXFORMAT = b'!20sQ' |
|
23 | 23 | NODEINDEXENTRYLENGTH = struct.calcsize(NODEINDEXFORMAT) |
|
24 | 24 | |
|
25 | 25 | # (node, p1, p2, linknode) |
|
26 | 26 | PACKFORMAT = b"!20s20s20s20sH" |
|
27 | 27 | PACKENTRYLENGTH = 82 |
|
28 | 28 | |
|
29 | 29 | ENTRYCOUNTSIZE = 4 |
|
30 | 30 | |
|
31 | 31 | INDEXSUFFIX = b'.histidx' |
|
32 | 32 | PACKSUFFIX = b'.histpack' |
|
33 | 33 | |
|
34 | 34 | ANC_NODE = 0 |
|
35 | 35 | ANC_P1NODE = 1 |
|
36 | 36 | ANC_P2NODE = 2 |
|
37 | 37 | ANC_LINKNODE = 3 |
|
38 | 38 | ANC_COPYFROM = 4 |
|
39 | 39 | |
|
40 | 40 | |
|
41 | 41 | class historypackstore(basepack.basepackstore): |
|
42 | 42 | INDEXSUFFIX = INDEXSUFFIX |
|
43 | 43 | PACKSUFFIX = PACKSUFFIX |
|
44 | 44 | |
|
45 | 45 | def getpack(self, path): |
|
46 | 46 | return historypack(path) |
|
47 | 47 | |
|
48 | 48 | def getancestors(self, name, node, known=None): |
|
49 | 49 | for pack in self.packs: |
|
50 | 50 | try: |
|
51 | 51 | return pack.getancestors(name, node, known=known) |
|
52 | 52 | except KeyError: |
|
53 | 53 | pass |
|
54 | 54 | |
|
55 | 55 | for pack in self.refresh(): |
|
56 | 56 | try: |
|
57 | 57 | return pack.getancestors(name, node, known=known) |
|
58 | 58 | except KeyError: |
|
59 | 59 | pass |
|
60 | 60 | |
|
61 | 61 | raise KeyError((name, node)) |
|
62 | 62 | |
|
63 | 63 | def getnodeinfo(self, name, node): |
|
64 | 64 | for pack in self.packs: |
|
65 | 65 | try: |
|
66 | 66 | return pack.getnodeinfo(name, node) |
|
67 | 67 | except KeyError: |
|
68 | 68 | pass |
|
69 | 69 | |
|
70 | 70 | for pack in self.refresh(): |
|
71 | 71 | try: |
|
72 | 72 | return pack.getnodeinfo(name, node) |
|
73 | 73 | except KeyError: |
|
74 | 74 | pass |
|
75 | 75 | |
|
76 | 76 | raise KeyError((name, node)) |
|
77 | 77 | |
|
78 | 78 | def add(self, filename, node, p1, p2, linknode, copyfrom): |
|
79 | 79 | raise RuntimeError( |
|
80 | 80 | b"cannot add to historypackstore (%s:%s)" % (filename, hex(node)) |
|
81 | 81 | ) |
|
82 | 82 | |
|
83 | 83 | |
|
84 | 84 | class historypack(basepack.basepack): |
|
85 | 85 | INDEXSUFFIX = INDEXSUFFIX |
|
86 | 86 | PACKSUFFIX = PACKSUFFIX |
|
87 | 87 | |
|
88 | 88 | SUPPORTED_VERSIONS = [2] |
|
89 | 89 | |
|
90 | 90 | def __init__(self, path): |
|
91 | 91 | super(historypack, self).__init__(path) |
|
92 | 92 | self.INDEXFORMAT = INDEXFORMAT2 |
|
93 | 93 | self.INDEXENTRYLENGTH = INDEXENTRYLENGTH2 |
|
94 | 94 | |
|
95 | 95 | def getmissing(self, keys): |
|
96 | 96 | missing = [] |
|
97 | 97 | for name, node in keys: |
|
98 | 98 | try: |
|
99 | 99 | self._findnode(name, node) |
|
100 | 100 | except KeyError: |
|
101 | 101 | missing.append((name, node)) |
|
102 | 102 | |
|
103 | 103 | return missing |
|
104 | 104 | |
|
105 | 105 | def getancestors(self, name, node, known=None): |
|
106 | 106 | """Returns as many ancestors as we're aware of. |
|
107 | 107 | |
|
108 | 108 | return value: { |
|
109 | 109 | node: (p1, p2, linknode, copyfrom), |
|
110 | 110 | ... |
|
111 | 111 | } |
|
112 | 112 | """ |
|
113 | 113 | if known and node in known: |
|
114 | 114 | return [] |
|
115 | 115 | |
|
116 | 116 | ancestors = self._getancestors(name, node, known=known) |
|
117 | 117 | results = {} |
|
118 | 118 | for ancnode, p1, p2, linknode, copyfrom in ancestors: |
|
119 | 119 | results[ancnode] = (p1, p2, linknode, copyfrom) |
|
120 | 120 | |
|
121 | 121 | if not results: |
|
122 | 122 | raise KeyError((name, node)) |
|
123 | 123 | return results |
|
124 | 124 | |
|
125 | 125 | def getnodeinfo(self, name, node): |
|
126 | 126 | # Drop the node from the tuple before returning, since the result should |
|
127 | 127 | # just be (p1, p2, linknode, copyfrom) |
|
128 | 128 | return self._findnode(name, node)[1:] |
|
129 | 129 | |
|
130 | 130 | def _getancestors(self, name, node, known=None): |
|
131 | 131 | if known is None: |
|
132 | 132 | known = set() |
|
133 | 133 | section = self._findsection(name) |
|
134 | 134 | filename, offset, size, nodeindexoffset, nodeindexsize = section |
|
135 | 135 | pending = set((node,)) |
|
136 | 136 | o = 0 |
|
137 | 137 | while o < size: |
|
138 | 138 | if not pending: |
|
139 | 139 | break |
|
140 | 140 | entry, copyfrom = self._readentry(offset + o) |
|
141 | 141 | o += PACKENTRYLENGTH |
|
142 | 142 | if copyfrom: |
|
143 | 143 | o += len(copyfrom) |
|
144 | 144 | |
|
145 | 145 | ancnode = entry[ANC_NODE] |
|
146 | 146 | if ancnode in pending: |
|
147 | 147 | pending.remove(ancnode) |
|
148 | 148 | p1node = entry[ANC_P1NODE] |
|
149 | 149 | p2node = entry[ANC_P2NODE] |
|
150 | 150 | if p1node != nullid and p1node not in known: |
|
151 | 151 | pending.add(p1node) |
|
152 | 152 | if p2node != nullid and p2node not in known: |
|
153 | 153 | pending.add(p2node) |
|
154 | 154 | |
|
155 | 155 | yield (ancnode, p1node, p2node, entry[ANC_LINKNODE], copyfrom) |
|
156 | 156 | |
|
157 | 157 | def _readentry(self, offset): |
|
158 | 158 | data = self._data |
|
159 | 159 | entry = struct.unpack( |
|
160 | 160 | PACKFORMAT, data[offset : offset + PACKENTRYLENGTH] |
|
161 | 161 | ) |
|
162 | 162 | copyfrom = None |
|
163 | 163 | copyfromlen = entry[ANC_COPYFROM] |
|
164 | 164 | if copyfromlen != 0: |
|
165 | 165 | offset += PACKENTRYLENGTH |
|
166 | 166 | copyfrom = data[offset : offset + copyfromlen] |
|
167 | 167 | return entry, copyfrom |
|
168 | 168 | |
|
169 | 169 | def add(self, filename, node, p1, p2, linknode, copyfrom): |
|
170 | 170 | raise RuntimeError( |
|
171 | 171 | b"cannot add to historypack (%s:%s)" % (filename, hex(node)) |
|
172 | 172 | ) |
|
173 | 173 | |
|
174 | 174 | def _findnode(self, name, node): |
|
175 | 175 | if self.VERSION == 0: |
|
176 | 176 | ancestors = self._getancestors(name, node) |
|
177 | 177 | for ancnode, p1node, p2node, linknode, copyfrom in ancestors: |
|
178 | 178 | if ancnode == node: |
|
179 | 179 | return (ancnode, p1node, p2node, linknode, copyfrom) |
|
180 | 180 | else: |
|
181 | 181 | section = self._findsection(name) |
|
182 | 182 | nodeindexoffset, nodeindexsize = section[3:] |
|
183 | 183 | entry = self._bisect( |
|
184 | 184 | node, |
|
185 | 185 | nodeindexoffset, |
|
186 | 186 | nodeindexoffset + nodeindexsize, |
|
187 | 187 | NODEINDEXENTRYLENGTH, |
|
188 | 188 | ) |
|
189 | 189 | if entry is not None: |
|
190 | 190 | node, offset = struct.unpack(NODEINDEXFORMAT, entry) |
|
191 | 191 | entry, copyfrom = self._readentry(offset) |
|
192 | 192 | # Drop the copyfromlen from the end of entry, and replace it |
|
193 | 193 | # with the copyfrom string. |
|
194 | 194 | return entry[:4] + (copyfrom,) |
|
195 | 195 | |
|
196 | 196 | raise KeyError(b"unable to find history for %s:%s" % (name, hex(node))) |
|
197 | 197 | |
|
198 | 198 | def _findsection(self, name): |
|
199 | 199 | params = self.params |
|
200 |
namehash = hashl |
|
|
200 | namehash = hashutil.sha1(name).digest() | |
|
201 | 201 | fanoutkey = struct.unpack( |
|
202 | 202 | params.fanoutstruct, namehash[: params.fanoutprefix] |
|
203 | 203 | )[0] |
|
204 | 204 | fanout = self._fanouttable |
|
205 | 205 | |
|
206 | 206 | start = fanout[fanoutkey] + params.indexstart |
|
207 | 207 | indexend = self._indexend |
|
208 | 208 | |
|
209 | 209 | for i in pycompat.xrange(fanoutkey + 1, params.fanoutcount): |
|
210 | 210 | end = fanout[i] + params.indexstart |
|
211 | 211 | if end != start: |
|
212 | 212 | break |
|
213 | 213 | else: |
|
214 | 214 | end = indexend |
|
215 | 215 | |
|
216 | 216 | entry = self._bisect(namehash, start, end, self.INDEXENTRYLENGTH) |
|
217 | 217 | if not entry: |
|
218 | 218 | raise KeyError(name) |
|
219 | 219 | |
|
220 | 220 | rawentry = struct.unpack(self.INDEXFORMAT, entry) |
|
221 | 221 | x, offset, size, nodeindexoffset, nodeindexsize = rawentry |
|
222 | 222 | rawnamelen = self._index[ |
|
223 | 223 | nodeindexoffset : nodeindexoffset + constants.FILENAMESIZE |
|
224 | 224 | ] |
|
225 | 225 | actualnamelen = struct.unpack(b'!H', rawnamelen)[0] |
|
226 | 226 | nodeindexoffset += constants.FILENAMESIZE |
|
227 | 227 | actualname = self._index[ |
|
228 | 228 | nodeindexoffset : nodeindexoffset + actualnamelen |
|
229 | 229 | ] |
|
230 | 230 | if actualname != name: |
|
231 | 231 | raise KeyError( |
|
232 | 232 | b"found file name %s when looking for %s" % (actualname, name) |
|
233 | 233 | ) |
|
234 | 234 | nodeindexoffset += actualnamelen |
|
235 | 235 | |
|
236 | 236 | filenamelength = struct.unpack( |
|
237 | 237 | b'!H', self._data[offset : offset + constants.FILENAMESIZE] |
|
238 | 238 | )[0] |
|
239 | 239 | offset += constants.FILENAMESIZE |
|
240 | 240 | |
|
241 | 241 | actualname = self._data[offset : offset + filenamelength] |
|
242 | 242 | offset += filenamelength |
|
243 | 243 | |
|
244 | 244 | if name != actualname: |
|
245 | 245 | raise KeyError( |
|
246 | 246 | b"found file name %s when looking for %s" % (actualname, name) |
|
247 | 247 | ) |
|
248 | 248 | |
|
249 | 249 | # Skip entry list size |
|
250 | 250 | offset += ENTRYCOUNTSIZE |
|
251 | 251 | |
|
252 | 252 | nodelistoffset = offset |
|
253 | 253 | nodelistsize = ( |
|
254 | 254 | size - constants.FILENAMESIZE - filenamelength - ENTRYCOUNTSIZE |
|
255 | 255 | ) |
|
256 | 256 | return ( |
|
257 | 257 | name, |
|
258 | 258 | nodelistoffset, |
|
259 | 259 | nodelistsize, |
|
260 | 260 | nodeindexoffset, |
|
261 | 261 | nodeindexsize, |
|
262 | 262 | ) |
|
263 | 263 | |
|
264 | 264 | def _bisect(self, node, start, end, entrylen): |
|
265 | 265 | # Bisect between start and end to find node |
|
266 | 266 | origstart = start |
|
267 | 267 | startnode = self._index[start : start + NODELENGTH] |
|
268 | 268 | endnode = self._index[end : end + NODELENGTH] |
|
269 | 269 | |
|
270 | 270 | if startnode == node: |
|
271 | 271 | return self._index[start : start + entrylen] |
|
272 | 272 | elif endnode == node: |
|
273 | 273 | return self._index[end : end + entrylen] |
|
274 | 274 | else: |
|
275 | 275 | while start < end - entrylen: |
|
276 | 276 | mid = start + (end - start) // 2 |
|
277 | 277 | mid = mid - ((mid - origstart) % entrylen) |
|
278 | 278 | midnode = self._index[mid : mid + NODELENGTH] |
|
279 | 279 | if midnode == node: |
|
280 | 280 | return self._index[mid : mid + entrylen] |
|
281 | 281 | if node > midnode: |
|
282 | 282 | start = mid |
|
283 | 283 | elif node < midnode: |
|
284 | 284 | end = mid |
|
285 | 285 | return None |
|
286 | 286 | |
|
287 | 287 | def markledger(self, ledger, options=None): |
|
288 | 288 | for filename, node in self: |
|
289 | 289 | ledger.markhistoryentry(self, filename, node) |
|
290 | 290 | |
|
291 | 291 | def cleanup(self, ledger): |
|
292 | 292 | entries = ledger.sources.get(self, []) |
|
293 | 293 | allkeys = set(self) |
|
294 | 294 | repackedkeys = set( |
|
295 | 295 | (e.filename, e.node) for e in entries if e.historyrepacked |
|
296 | 296 | ) |
|
297 | 297 | |
|
298 | 298 | if len(allkeys - repackedkeys) == 0: |
|
299 | 299 | if self.path not in ledger.created: |
|
300 | 300 | util.unlinkpath(self.indexpath, ignoremissing=True) |
|
301 | 301 | util.unlinkpath(self.packpath, ignoremissing=True) |
|
302 | 302 | |
|
303 | 303 | def __iter__(self): |
|
304 | 304 | for f, n, x, x, x, x in self.iterentries(): |
|
305 | 305 | yield f, n |
|
306 | 306 | |
|
307 | 307 | def iterentries(self): |
|
308 | 308 | # Start at 1 to skip the header |
|
309 | 309 | offset = 1 |
|
310 | 310 | while offset < self.datasize: |
|
311 | 311 | data = self._data |
|
312 | 312 | # <2 byte len> + <filename> |
|
313 | 313 | filenamelen = struct.unpack( |
|
314 | 314 | b'!H', data[offset : offset + constants.FILENAMESIZE] |
|
315 | 315 | )[0] |
|
316 | 316 | offset += constants.FILENAMESIZE |
|
317 | 317 | filename = data[offset : offset + filenamelen] |
|
318 | 318 | offset += filenamelen |
|
319 | 319 | |
|
320 | 320 | revcount = struct.unpack( |
|
321 | 321 | b'!I', data[offset : offset + ENTRYCOUNTSIZE] |
|
322 | 322 | )[0] |
|
323 | 323 | offset += ENTRYCOUNTSIZE |
|
324 | 324 | |
|
325 | 325 | for i in pycompat.xrange(revcount): |
|
326 | 326 | entry = struct.unpack( |
|
327 | 327 | PACKFORMAT, data[offset : offset + PACKENTRYLENGTH] |
|
328 | 328 | ) |
|
329 | 329 | offset += PACKENTRYLENGTH |
|
330 | 330 | |
|
331 | 331 | copyfrom = data[offset : offset + entry[ANC_COPYFROM]] |
|
332 | 332 | offset += entry[ANC_COPYFROM] |
|
333 | 333 | |
|
334 | 334 | yield ( |
|
335 | 335 | filename, |
|
336 | 336 | entry[ANC_NODE], |
|
337 | 337 | entry[ANC_P1NODE], |
|
338 | 338 | entry[ANC_P2NODE], |
|
339 | 339 | entry[ANC_LINKNODE], |
|
340 | 340 | copyfrom, |
|
341 | 341 | ) |
|
342 | 342 | |
|
343 | 343 | self._pagedin += PACKENTRYLENGTH |
|
344 | 344 | |
|
345 | 345 | # If we've read a lot of data from the mmap, free some memory. |
|
346 | 346 | self.freememory() |
|
347 | 347 | |
|
348 | 348 | |
|
349 | 349 | class mutablehistorypack(basepack.mutablebasepack): |
|
350 | 350 | """A class for constructing and serializing a histpack file and index. |
|
351 | 351 | |
|
352 | 352 | A history pack is a pair of files that contain the revision history for |
|
353 | 353 | various file revisions in Mercurial. It contains only revision history (like |
|
354 | 354 | parent pointers and linknodes), not any revision content information. |
|
355 | 355 | |
|
356 | 356 | It consists of two files, with the following format: |
|
357 | 357 | |
|
358 | 358 | .histpack |
|
359 | 359 | The pack itself is a series of file revisions with some basic header |
|
360 | 360 | information on each. |
|
361 | 361 | |
|
362 | 362 | datapack = <version: 1 byte> |
|
363 | 363 | [<filesection>,...] |
|
364 | 364 | filesection = <filename len: 2 byte unsigned int> |
|
365 | 365 | <filename> |
|
366 | 366 | <revision count: 4 byte unsigned int> |
|
367 | 367 | [<revision>,...] |
|
368 | 368 | revision = <node: 20 byte> |
|
369 | 369 | <p1node: 20 byte> |
|
370 | 370 | <p2node: 20 byte> |
|
371 | 371 | <linknode: 20 byte> |
|
372 | 372 | <copyfromlen: 2 byte> |
|
373 | 373 | <copyfrom> |
|
374 | 374 | |
|
375 | 375 | The revisions within each filesection are stored in topological order |
|
376 | 376 | (newest first). If a given entry has a parent from another file (a copy) |
|
377 | 377 | then p1node is the node from the other file, and copyfrom is the |
|
378 | 378 | filepath of the other file. |
|
379 | 379 | |
|
380 | 380 | .histidx |
|
381 | 381 | The index file provides a mapping from filename to the file section in |
|
382 | 382 | the histpack. In V1 it also contains sub-indexes for specific nodes |
|
383 | 383 | within each file. It consists of three parts, the fanout, the file index |
|
384 | 384 | and the node indexes. |
|
385 | 385 | |
|
386 | 386 | The file index is a list of index entries, sorted by filename hash (one |
|
387 | 387 | per file section in the pack). Each entry has: |
|
388 | 388 | |
|
389 | 389 | - node (The 20 byte hash of the filename) |
|
390 | 390 | - pack entry offset (The location of this file section in the histpack) |
|
391 | 391 | - pack content size (The on-disk length of this file section's pack |
|
392 | 392 | data) |
|
393 | 393 | - node index offset (The location of the file's node index in the index |
|
394 | 394 | file) [1] |
|
395 | 395 | - node index size (the on-disk length of this file's node index) [1] |
|
396 | 396 | |
|
397 | 397 | The fanout is a quick lookup table to reduce the number of steps for |
|
398 | 398 | bisecting the index. It is a series of 4 byte pointers to positions |
|
399 | 399 | within the index. It has 2^16 entries, which corresponds to hash |
|
400 | 400 | prefixes [00, 01, 02,..., FD, FE, FF]. Example: the pointer in slot 4F |
|
401 | 401 | points to the index position of the first revision whose node starts |
|
402 | 402 | with 4F. This saves log(2^16) bisect steps. |
|
403 | 403 | |
|
404 | 404 | dataidx = <fanouttable> |
|
405 | 405 | <file count: 8 byte unsigned> [1] |
|
406 | 406 | <fileindex> |
|
407 | 407 | <node count: 8 byte unsigned> [1] |
|
408 | 408 | [<nodeindex>,...] [1] |
|
409 | 409 | fanouttable = [<index offset: 4 byte unsigned int>,...] (2^16 entries) |
|
410 | 410 | |
|
411 | 411 | fileindex = [<file index entry>,...] |
|
412 | 412 | fileindexentry = <node: 20 byte> |
|
413 | 413 | <pack file section offset: 8 byte unsigned int> |
|
414 | 414 | <pack file section size: 8 byte unsigned int> |
|
415 | 415 | <node index offset: 4 byte unsigned int> [1] |
|
416 | 416 | <node index size: 4 byte unsigned int> [1] |
|
417 | 417 | nodeindex = <filename>[<node index entry>,...] [1] |
|
418 | 418 | filename = <filename len : 2 byte unsigned int><filename value> [1] |
|
419 | 419 | nodeindexentry = <node: 20 byte> [1] |
|
420 | 420 | <pack file node offset: 8 byte unsigned int> [1] |
|
421 | 421 | |
|
422 | 422 | [1]: new in version 1. |
|
423 | 423 | """ |
|
424 | 424 | |
|
425 | 425 | INDEXSUFFIX = INDEXSUFFIX |
|
426 | 426 | PACKSUFFIX = PACKSUFFIX |
|
427 | 427 | |
|
428 | 428 | SUPPORTED_VERSIONS = [2] |
|
429 | 429 | |
|
430 | 430 | def __init__(self, ui, packpath, version=2): |
|
431 | 431 | super(mutablehistorypack, self).__init__(ui, packpath, version=version) |
|
432 | 432 | self.files = {} |
|
433 | 433 | self.entrylocations = {} |
|
434 | 434 | self.fileentries = {} |
|
435 | 435 | |
|
436 | 436 | self.INDEXFORMAT = INDEXFORMAT2 |
|
437 | 437 | self.INDEXENTRYLENGTH = INDEXENTRYLENGTH2 |
|
438 | 438 | |
|
439 | 439 | self.NODEINDEXFORMAT = NODEINDEXFORMAT |
|
440 | 440 | self.NODEINDEXENTRYLENGTH = NODEINDEXENTRYLENGTH |
|
441 | 441 | |
|
442 | 442 | def add(self, filename, node, p1, p2, linknode, copyfrom): |
|
443 | 443 | copyfrom = copyfrom or b'' |
|
444 | 444 | copyfromlen = struct.pack(b'!H', len(copyfrom)) |
|
445 | 445 | self.fileentries.setdefault(filename, []).append( |
|
446 | 446 | (node, p1, p2, linknode, copyfromlen, copyfrom) |
|
447 | 447 | ) |
|
448 | 448 | |
|
449 | 449 | def _write(self): |
|
450 | 450 | for filename in sorted(self.fileentries): |
|
451 | 451 | entries = self.fileentries[filename] |
|
452 | 452 | sectionstart = self.packfp.tell() |
|
453 | 453 | |
|
454 | 454 | # Write the file section content |
|
455 | 455 | entrymap = dict((e[0], e) for e in entries) |
|
456 | 456 | |
|
457 | 457 | def parentfunc(node): |
|
458 | 458 | x, p1, p2, x, x, x = entrymap[node] |
|
459 | 459 | parents = [] |
|
460 | 460 | if p1 != nullid: |
|
461 | 461 | parents.append(p1) |
|
462 | 462 | if p2 != nullid: |
|
463 | 463 | parents.append(p2) |
|
464 | 464 | return parents |
|
465 | 465 | |
|
466 | 466 | sortednodes = list( |
|
467 | 467 | reversed( |
|
468 | 468 | shallowutil.sortnodes((e[0] for e in entries), parentfunc) |
|
469 | 469 | ) |
|
470 | 470 | ) |
|
471 | 471 | |
|
472 | 472 | # Write the file section header |
|
473 | 473 | self.writeraw( |
|
474 | 474 | b"%s%s%s" |
|
475 | 475 | % ( |
|
476 | 476 | struct.pack(b'!H', len(filename)), |
|
477 | 477 | filename, |
|
478 | 478 | struct.pack(b'!I', len(sortednodes)), |
|
479 | 479 | ) |
|
480 | 480 | ) |
|
481 | 481 | |
|
482 | 482 | sectionlen = constants.FILENAMESIZE + len(filename) + 4 |
|
483 | 483 | |
|
484 | 484 | rawstrings = [] |
|
485 | 485 | |
|
486 | 486 | # Record the node locations for the index |
|
487 | 487 | locations = self.entrylocations.setdefault(filename, {}) |
|
488 | 488 | offset = sectionstart + sectionlen |
|
489 | 489 | for node in sortednodes: |
|
490 | 490 | locations[node] = offset |
|
491 | 491 | raw = b'%s%s%s%s%s%s' % entrymap[node] |
|
492 | 492 | rawstrings.append(raw) |
|
493 | 493 | offset += len(raw) |
|
494 | 494 | |
|
495 | 495 | rawdata = b''.join(rawstrings) |
|
496 | 496 | sectionlen += len(rawdata) |
|
497 | 497 | |
|
498 | 498 | self.writeraw(rawdata) |
|
499 | 499 | |
|
500 | 500 | # Record metadata for the index |
|
501 | 501 | self.files[filename] = (sectionstart, sectionlen) |
|
502 |
node = hashl |
|
|
502 | node = hashutil.sha1(filename).digest() | |
|
503 | 503 | self.entries[node] = node |
|
504 | 504 | |
|
505 | 505 | def close(self, ledger=None): |
|
506 | 506 | if self._closed: |
|
507 | 507 | return |
|
508 | 508 | |
|
509 | 509 | self._write() |
|
510 | 510 | |
|
511 | 511 | return super(mutablehistorypack, self).close(ledger=ledger) |
|
512 | 512 | |
|
513 | 513 | def createindex(self, nodelocations, indexoffset): |
|
514 | 514 | fileindexformat = self.INDEXFORMAT |
|
515 | 515 | fileindexlength = self.INDEXENTRYLENGTH |
|
516 | 516 | nodeindexformat = self.NODEINDEXFORMAT |
|
517 | 517 | nodeindexlength = self.NODEINDEXENTRYLENGTH |
|
518 | 518 | |
|
519 | 519 | files = ( |
|
520 |
(hashl |
|
|
520 | (hashutil.sha1(filename).digest(), filename, offset, size) | |
|
521 | 521 | for filename, (offset, size) in pycompat.iteritems(self.files) |
|
522 | 522 | ) |
|
523 | 523 | files = sorted(files) |
|
524 | 524 | |
|
525 | 525 | # node index is after file index size, file index, and node index size |
|
526 | 526 | indexlensize = struct.calcsize(b'!Q') |
|
527 | 527 | nodeindexoffset = ( |
|
528 | 528 | indexoffset |
|
529 | 529 | + indexlensize |
|
530 | 530 | + (len(files) * fileindexlength) |
|
531 | 531 | + indexlensize |
|
532 | 532 | ) |
|
533 | 533 | |
|
534 | 534 | fileindexentries = [] |
|
535 | 535 | nodeindexentries = [] |
|
536 | 536 | nodecount = 0 |
|
537 | 537 | for namehash, filename, offset, size in files: |
|
538 | 538 | # File section index |
|
539 | 539 | nodelocations = self.entrylocations[filename] |
|
540 | 540 | |
|
541 | 541 | nodeindexsize = len(nodelocations) * nodeindexlength |
|
542 | 542 | |
|
543 | 543 | rawentry = struct.pack( |
|
544 | 544 | fileindexformat, |
|
545 | 545 | namehash, |
|
546 | 546 | offset, |
|
547 | 547 | size, |
|
548 | 548 | nodeindexoffset, |
|
549 | 549 | nodeindexsize, |
|
550 | 550 | ) |
|
551 | 551 | # Node index |
|
552 | 552 | nodeindexentries.append( |
|
553 | 553 | struct.pack(constants.FILENAMESTRUCT, len(filename)) + filename |
|
554 | 554 | ) |
|
555 | 555 | nodeindexoffset += constants.FILENAMESIZE + len(filename) |
|
556 | 556 | |
|
557 | 557 | for node, location in sorted(pycompat.iteritems(nodelocations)): |
|
558 | 558 | nodeindexentries.append( |
|
559 | 559 | struct.pack(nodeindexformat, node, location) |
|
560 | 560 | ) |
|
561 | 561 | nodecount += 1 |
|
562 | 562 | |
|
563 | 563 | nodeindexoffset += len(nodelocations) * nodeindexlength |
|
564 | 564 | |
|
565 | 565 | fileindexentries.append(rawentry) |
|
566 | 566 | |
|
567 | 567 | nodecountraw = struct.pack(b'!Q', nodecount) |
|
568 | 568 | return ( |
|
569 | 569 | b''.join(fileindexentries) |
|
570 | 570 | + nodecountraw |
|
571 | 571 | + b''.join(nodeindexentries) |
|
572 | 572 | ) |
@@ -1,536 +1,536 b'' | |||
|
1 | 1 | # shallowutil.py -- remotefilelog utilities |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2014 Facebook, Inc. |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | from __future__ import absolute_import |
|
8 | 8 | |
|
9 | 9 | import collections |
|
10 | 10 | import errno |
|
11 | import hashlib | |
|
12 | 11 | import os |
|
13 | 12 | import stat |
|
14 | 13 | import struct |
|
15 | 14 | import tempfile |
|
16 | 15 | |
|
17 | 16 | from mercurial.i18n import _ |
|
18 | 17 | from mercurial.pycompat import open |
|
19 | 18 | from mercurial import ( |
|
20 | 19 | error, |
|
21 | 20 | node, |
|
22 | 21 | pycompat, |
|
23 | 22 | revlog, |
|
24 | 23 | util, |
|
25 | 24 | ) |
|
26 | 25 | from mercurial.utils import ( |
|
26 | hashutil, | |
|
27 | 27 | storageutil, |
|
28 | 28 | stringutil, |
|
29 | 29 | ) |
|
30 | 30 | from . import constants |
|
31 | 31 | |
|
32 | 32 | if not pycompat.iswindows: |
|
33 | 33 | import grp |
|
34 | 34 | |
|
35 | 35 | |
|
36 | 36 | def isenabled(repo): |
|
37 | 37 | """returns whether the repository is remotefilelog enabled or not""" |
|
38 | 38 | return constants.SHALLOWREPO_REQUIREMENT in repo.requirements |
|
39 | 39 | |
|
40 | 40 | |
|
41 | 41 | def getcachekey(reponame, file, id): |
|
42 |
pathhash = node.hex(hashl |
|
|
42 | pathhash = node.hex(hashutil.sha1(file).digest()) | |
|
43 | 43 | return os.path.join(reponame, pathhash[:2], pathhash[2:], id) |
|
44 | 44 | |
|
45 | 45 | |
|
46 | 46 | def getlocalkey(file, id): |
|
47 |
pathhash = node.hex(hashl |
|
|
47 | pathhash = node.hex(hashutil.sha1(file).digest()) | |
|
48 | 48 | return os.path.join(pathhash, id) |
|
49 | 49 | |
|
50 | 50 | |
|
51 | 51 | def getcachepath(ui, allowempty=False): |
|
52 | 52 | cachepath = ui.config(b"remotefilelog", b"cachepath") |
|
53 | 53 | if not cachepath: |
|
54 | 54 | if allowempty: |
|
55 | 55 | return None |
|
56 | 56 | else: |
|
57 | 57 | raise error.Abort( |
|
58 | 58 | _(b"could not find config option remotefilelog.cachepath") |
|
59 | 59 | ) |
|
60 | 60 | return util.expandpath(cachepath) |
|
61 | 61 | |
|
62 | 62 | |
|
63 | 63 | def getcachepackpath(repo, category): |
|
64 | 64 | cachepath = getcachepath(repo.ui) |
|
65 | 65 | if category != constants.FILEPACK_CATEGORY: |
|
66 | 66 | return os.path.join(cachepath, repo.name, b'packs', category) |
|
67 | 67 | else: |
|
68 | 68 | return os.path.join(cachepath, repo.name, b'packs') |
|
69 | 69 | |
|
70 | 70 | |
|
71 | 71 | def getlocalpackpath(base, category): |
|
72 | 72 | return os.path.join(base, b'packs', category) |
|
73 | 73 | |
|
74 | 74 | |
|
75 | 75 | def createrevlogtext(text, copyfrom=None, copyrev=None): |
|
76 | 76 | """returns a string that matches the revlog contents in a |
|
77 | 77 | traditional revlog |
|
78 | 78 | """ |
|
79 | 79 | meta = {} |
|
80 | 80 | if copyfrom or text.startswith(b'\1\n'): |
|
81 | 81 | if copyfrom: |
|
82 | 82 | meta[b'copy'] = copyfrom |
|
83 | 83 | meta[b'copyrev'] = copyrev |
|
84 | 84 | text = storageutil.packmeta(meta, text) |
|
85 | 85 | |
|
86 | 86 | return text |
|
87 | 87 | |
|
88 | 88 | |
|
89 | 89 | def parsemeta(text): |
|
90 | 90 | """parse mercurial filelog metadata""" |
|
91 | 91 | meta, size = storageutil.parsemeta(text) |
|
92 | 92 | if text.startswith(b'\1\n'): |
|
93 | 93 | s = text.index(b'\1\n', 2) |
|
94 | 94 | text = text[s + 2 :] |
|
95 | 95 | return meta or {}, text |
|
96 | 96 | |
|
97 | 97 | |
|
98 | 98 | def sumdicts(*dicts): |
|
99 | 99 | """Adds all the values of *dicts together into one dictionary. This assumes |
|
100 | 100 | the values in *dicts are all summable. |
|
101 | 101 | |
|
102 | 102 | e.g. [{'a': 4', 'b': 2}, {'b': 3, 'c': 1}] -> {'a': 4, 'b': 5, 'c': 1} |
|
103 | 103 | """ |
|
104 | 104 | result = collections.defaultdict(lambda: 0) |
|
105 | 105 | for dict in dicts: |
|
106 | 106 | for k, v in pycompat.iteritems(dict): |
|
107 | 107 | result[k] += v |
|
108 | 108 | return result |
|
109 | 109 | |
|
110 | 110 | |
|
111 | 111 | def prefixkeys(dict, prefix): |
|
112 | 112 | """Returns ``dict`` with ``prefix`` prepended to all its keys.""" |
|
113 | 113 | result = {} |
|
114 | 114 | for k, v in pycompat.iteritems(dict): |
|
115 | 115 | result[prefix + k] = v |
|
116 | 116 | return result |
|
117 | 117 | |
|
118 | 118 | |
|
119 | 119 | def reportpackmetrics(ui, prefix, *stores): |
|
120 | 120 | dicts = [s.getmetrics() for s in stores] |
|
121 | 121 | dict = prefixkeys(sumdicts(*dicts), prefix + b'_') |
|
122 | 122 | ui.log(prefix + b"_packsizes", b"\n", **pycompat.strkwargs(dict)) |
|
123 | 123 | |
|
124 | 124 | |
|
125 | 125 | def _parsepackmeta(metabuf): |
|
126 | 126 | """parse datapack meta, bytes (<metadata-list>) -> dict |
|
127 | 127 | |
|
128 | 128 | The dict contains raw content - both keys and values are strings. |
|
129 | 129 | Upper-level business may want to convert some of them to other types like |
|
130 | 130 | integers, on their own. |
|
131 | 131 | |
|
132 | 132 | raise ValueError if the data is corrupted |
|
133 | 133 | """ |
|
134 | 134 | metadict = {} |
|
135 | 135 | offset = 0 |
|
136 | 136 | buflen = len(metabuf) |
|
137 | 137 | while buflen - offset >= 3: |
|
138 | 138 | key = metabuf[offset : offset + 1] |
|
139 | 139 | offset += 1 |
|
140 | 140 | metalen = struct.unpack_from(b'!H', metabuf, offset)[0] |
|
141 | 141 | offset += 2 |
|
142 | 142 | if offset + metalen > buflen: |
|
143 | 143 | raise ValueError(b'corrupted metadata: incomplete buffer') |
|
144 | 144 | value = metabuf[offset : offset + metalen] |
|
145 | 145 | metadict[key] = value |
|
146 | 146 | offset += metalen |
|
147 | 147 | if offset != buflen: |
|
148 | 148 | raise ValueError(b'corrupted metadata: redundant data') |
|
149 | 149 | return metadict |
|
150 | 150 | |
|
151 | 151 | |
|
152 | 152 | def _buildpackmeta(metadict): |
|
153 | 153 | """reverse of _parsepackmeta, dict -> bytes (<metadata-list>) |
|
154 | 154 | |
|
155 | 155 | The dict contains raw content - both keys and values are strings. |
|
156 | 156 | Upper-level business may want to serialize some of other types (like |
|
157 | 157 | integers) to strings before calling this function. |
|
158 | 158 | |
|
159 | 159 | raise ProgrammingError when metadata key is illegal, or ValueError if |
|
160 | 160 | length limit is exceeded |
|
161 | 161 | """ |
|
162 | 162 | metabuf = b'' |
|
163 | 163 | for k, v in sorted(pycompat.iteritems((metadict or {}))): |
|
164 | 164 | if len(k) != 1: |
|
165 | 165 | raise error.ProgrammingError(b'packmeta: illegal key: %s' % k) |
|
166 | 166 | if len(v) > 0xFFFE: |
|
167 | 167 | raise ValueError( |
|
168 | 168 | b'metadata value is too long: 0x%x > 0xfffe' % len(v) |
|
169 | 169 | ) |
|
170 | 170 | metabuf += k |
|
171 | 171 | metabuf += struct.pack(b'!H', len(v)) |
|
172 | 172 | metabuf += v |
|
173 | 173 | # len(metabuf) is guaranteed representable in 4 bytes, because there are |
|
174 | 174 | # only 256 keys, and for each value, len(value) <= 0xfffe. |
|
175 | 175 | return metabuf |
|
176 | 176 | |
|
177 | 177 | |
|
178 | 178 | _metaitemtypes = { |
|
179 | 179 | constants.METAKEYFLAG: (int, pycompat.long), |
|
180 | 180 | constants.METAKEYSIZE: (int, pycompat.long), |
|
181 | 181 | } |
|
182 | 182 | |
|
183 | 183 | |
|
184 | 184 | def buildpackmeta(metadict): |
|
185 | 185 | """like _buildpackmeta, but typechecks metadict and normalize it. |
|
186 | 186 | |
|
187 | 187 | This means, METAKEYSIZE and METAKEYSIZE should have integers as values, |
|
188 | 188 | and METAKEYFLAG will be dropped if its value is 0. |
|
189 | 189 | """ |
|
190 | 190 | newmeta = {} |
|
191 | 191 | for k, v in pycompat.iteritems(metadict or {}): |
|
192 | 192 | expectedtype = _metaitemtypes.get(k, (bytes,)) |
|
193 | 193 | if not isinstance(v, expectedtype): |
|
194 | 194 | raise error.ProgrammingError(b'packmeta: wrong type of key %s' % k) |
|
195 | 195 | # normalize int to binary buffer |
|
196 | 196 | if int in expectedtype: |
|
197 | 197 | # optimization: remove flag if it's 0 to save space |
|
198 | 198 | if k == constants.METAKEYFLAG and v == 0: |
|
199 | 199 | continue |
|
200 | 200 | v = int2bin(v) |
|
201 | 201 | newmeta[k] = v |
|
202 | 202 | return _buildpackmeta(newmeta) |
|
203 | 203 | |
|
204 | 204 | |
|
205 | 205 | def parsepackmeta(metabuf): |
|
206 | 206 | """like _parsepackmeta, but convert fields to desired types automatically. |
|
207 | 207 | |
|
208 | 208 | This means, METAKEYFLAG and METAKEYSIZE fields will be converted to |
|
209 | 209 | integers. |
|
210 | 210 | """ |
|
211 | 211 | metadict = _parsepackmeta(metabuf) |
|
212 | 212 | for k, v in pycompat.iteritems(metadict): |
|
213 | 213 | if k in _metaitemtypes and int in _metaitemtypes[k]: |
|
214 | 214 | metadict[k] = bin2int(v) |
|
215 | 215 | return metadict |
|
216 | 216 | |
|
217 | 217 | |
|
218 | 218 | def int2bin(n): |
|
219 | 219 | """convert a non-negative integer to raw binary buffer""" |
|
220 | 220 | buf = bytearray() |
|
221 | 221 | while n > 0: |
|
222 | 222 | buf.insert(0, n & 0xFF) |
|
223 | 223 | n >>= 8 |
|
224 | 224 | return bytes(buf) |
|
225 | 225 | |
|
226 | 226 | |
|
227 | 227 | def bin2int(buf): |
|
228 | 228 | """the reverse of int2bin, convert a binary buffer to an integer""" |
|
229 | 229 | x = 0 |
|
230 | 230 | for b in bytearray(buf): |
|
231 | 231 | x <<= 8 |
|
232 | 232 | x |= b |
|
233 | 233 | return x |
|
234 | 234 | |
|
235 | 235 | |
|
236 | 236 | def parsesizeflags(raw): |
|
237 | 237 | """given a remotefilelog blob, return (headersize, rawtextsize, flags) |
|
238 | 238 | |
|
239 | 239 | see remotefilelogserver.createfileblob for the format. |
|
240 | 240 | raise RuntimeError if the content is illformed. |
|
241 | 241 | """ |
|
242 | 242 | flags = revlog.REVIDX_DEFAULT_FLAGS |
|
243 | 243 | size = None |
|
244 | 244 | try: |
|
245 | 245 | index = raw.index(b'\0') |
|
246 | 246 | header = raw[:index] |
|
247 | 247 | if header.startswith(b'v'): |
|
248 | 248 | # v1 and above, header starts with 'v' |
|
249 | 249 | if header.startswith(b'v1\n'): |
|
250 | 250 | for s in header.split(b'\n'): |
|
251 | 251 | if s.startswith(constants.METAKEYSIZE): |
|
252 | 252 | size = int(s[len(constants.METAKEYSIZE) :]) |
|
253 | 253 | elif s.startswith(constants.METAKEYFLAG): |
|
254 | 254 | flags = int(s[len(constants.METAKEYFLAG) :]) |
|
255 | 255 | else: |
|
256 | 256 | raise RuntimeError( |
|
257 | 257 | b'unsupported remotefilelog header: %s' % header |
|
258 | 258 | ) |
|
259 | 259 | else: |
|
260 | 260 | # v0, str(int(size)) is the header |
|
261 | 261 | size = int(header) |
|
262 | 262 | except ValueError: |
|
263 | 263 | raise RuntimeError("unexpected remotefilelog header: illegal format") |
|
264 | 264 | if size is None: |
|
265 | 265 | raise RuntimeError("unexpected remotefilelog header: no size found") |
|
266 | 266 | return index + 1, size, flags |
|
267 | 267 | |
|
268 | 268 | |
|
269 | 269 | def buildfileblobheader(size, flags, version=None): |
|
270 | 270 | """return the header of a remotefilelog blob. |
|
271 | 271 | |
|
272 | 272 | see remotefilelogserver.createfileblob for the format. |
|
273 | 273 | approximately the reverse of parsesizeflags. |
|
274 | 274 | |
|
275 | 275 | version could be 0 or 1, or None (auto decide). |
|
276 | 276 | """ |
|
277 | 277 | # choose v0 if flags is empty, otherwise v1 |
|
278 | 278 | if version is None: |
|
279 | 279 | version = int(bool(flags)) |
|
280 | 280 | if version == 1: |
|
281 | 281 | header = b'v1\n%s%d\n%s%d' % ( |
|
282 | 282 | constants.METAKEYSIZE, |
|
283 | 283 | size, |
|
284 | 284 | constants.METAKEYFLAG, |
|
285 | 285 | flags, |
|
286 | 286 | ) |
|
287 | 287 | elif version == 0: |
|
288 | 288 | if flags: |
|
289 | 289 | raise error.ProgrammingError(b'fileblob v0 does not support flag') |
|
290 | 290 | header = b'%d' % size |
|
291 | 291 | else: |
|
292 | 292 | raise error.ProgrammingError(b'unknown fileblob version %d' % version) |
|
293 | 293 | return header |
|
294 | 294 | |
|
295 | 295 | |
|
296 | 296 | def ancestormap(raw): |
|
297 | 297 | offset, size, flags = parsesizeflags(raw) |
|
298 | 298 | start = offset + size |
|
299 | 299 | |
|
300 | 300 | mapping = {} |
|
301 | 301 | while start < len(raw): |
|
302 | 302 | divider = raw.index(b'\0', start + 80) |
|
303 | 303 | |
|
304 | 304 | currentnode = raw[start : (start + 20)] |
|
305 | 305 | p1 = raw[(start + 20) : (start + 40)] |
|
306 | 306 | p2 = raw[(start + 40) : (start + 60)] |
|
307 | 307 | linknode = raw[(start + 60) : (start + 80)] |
|
308 | 308 | copyfrom = raw[(start + 80) : divider] |
|
309 | 309 | |
|
310 | 310 | mapping[currentnode] = (p1, p2, linknode, copyfrom) |
|
311 | 311 | start = divider + 1 |
|
312 | 312 | |
|
313 | 313 | return mapping |
|
314 | 314 | |
|
315 | 315 | |
|
316 | 316 | def readfile(path): |
|
317 | 317 | f = open(path, b'rb') |
|
318 | 318 | try: |
|
319 | 319 | result = f.read() |
|
320 | 320 | |
|
321 | 321 | # we should never have empty files |
|
322 | 322 | if not result: |
|
323 | 323 | os.remove(path) |
|
324 | 324 | raise IOError(b"empty file: %s" % path) |
|
325 | 325 | |
|
326 | 326 | return result |
|
327 | 327 | finally: |
|
328 | 328 | f.close() |
|
329 | 329 | |
|
330 | 330 | |
|
331 | 331 | def unlinkfile(filepath): |
|
332 | 332 | if pycompat.iswindows: |
|
333 | 333 | # On Windows, os.unlink cannnot delete readonly files |
|
334 | 334 | os.chmod(filepath, stat.S_IWUSR) |
|
335 | 335 | os.unlink(filepath) |
|
336 | 336 | |
|
337 | 337 | |
|
338 | 338 | def renamefile(source, destination): |
|
339 | 339 | if pycompat.iswindows: |
|
340 | 340 | # On Windows, os.rename cannot rename readonly files |
|
341 | 341 | # and cannot overwrite destination if it exists |
|
342 | 342 | os.chmod(source, stat.S_IWUSR) |
|
343 | 343 | if os.path.isfile(destination): |
|
344 | 344 | os.chmod(destination, stat.S_IWUSR) |
|
345 | 345 | os.unlink(destination) |
|
346 | 346 | |
|
347 | 347 | os.rename(source, destination) |
|
348 | 348 | |
|
349 | 349 | |
|
350 | 350 | def writefile(path, content, readonly=False): |
|
351 | 351 | dirname, filename = os.path.split(path) |
|
352 | 352 | if not os.path.exists(dirname): |
|
353 | 353 | try: |
|
354 | 354 | os.makedirs(dirname) |
|
355 | 355 | except OSError as ex: |
|
356 | 356 | if ex.errno != errno.EEXIST: |
|
357 | 357 | raise |
|
358 | 358 | |
|
359 | 359 | fd, temp = tempfile.mkstemp(prefix=b'.%s-' % filename, dir=dirname) |
|
360 | 360 | os.close(fd) |
|
361 | 361 | |
|
362 | 362 | try: |
|
363 | 363 | f = util.posixfile(temp, b'wb') |
|
364 | 364 | f.write(content) |
|
365 | 365 | f.close() |
|
366 | 366 | |
|
367 | 367 | if readonly: |
|
368 | 368 | mode = 0o444 |
|
369 | 369 | else: |
|
370 | 370 | # tempfiles are created with 0o600, so we need to manually set the |
|
371 | 371 | # mode. |
|
372 | 372 | oldumask = os.umask(0) |
|
373 | 373 | # there's no way to get the umask without modifying it, so set it |
|
374 | 374 | # back |
|
375 | 375 | os.umask(oldumask) |
|
376 | 376 | mode = ~oldumask |
|
377 | 377 | |
|
378 | 378 | renamefile(temp, path) |
|
379 | 379 | os.chmod(path, mode) |
|
380 | 380 | except Exception: |
|
381 | 381 | try: |
|
382 | 382 | unlinkfile(temp) |
|
383 | 383 | except OSError: |
|
384 | 384 | pass |
|
385 | 385 | raise |
|
386 | 386 | |
|
387 | 387 | |
|
388 | 388 | def sortnodes(nodes, parentfunc): |
|
389 | 389 | """Topologically sorts the nodes, using the parentfunc to find |
|
390 | 390 | the parents of nodes.""" |
|
391 | 391 | nodes = set(nodes) |
|
392 | 392 | childmap = {} |
|
393 | 393 | parentmap = {} |
|
394 | 394 | roots = [] |
|
395 | 395 | |
|
396 | 396 | # Build a child and parent map |
|
397 | 397 | for n in nodes: |
|
398 | 398 | parents = [p for p in parentfunc(n) if p in nodes] |
|
399 | 399 | parentmap[n] = set(parents) |
|
400 | 400 | for p in parents: |
|
401 | 401 | childmap.setdefault(p, set()).add(n) |
|
402 | 402 | if not parents: |
|
403 | 403 | roots.append(n) |
|
404 | 404 | |
|
405 | 405 | roots.sort() |
|
406 | 406 | # Process roots, adding children to the queue as they become roots |
|
407 | 407 | results = [] |
|
408 | 408 | while roots: |
|
409 | 409 | n = roots.pop(0) |
|
410 | 410 | results.append(n) |
|
411 | 411 | if n in childmap: |
|
412 | 412 | children = childmap[n] |
|
413 | 413 | for c in children: |
|
414 | 414 | childparents = parentmap[c] |
|
415 | 415 | childparents.remove(n) |
|
416 | 416 | if len(childparents) == 0: |
|
417 | 417 | # insert at the beginning, that way child nodes |
|
418 | 418 | # are likely to be output immediately after their |
|
419 | 419 | # parents. This gives better compression results. |
|
420 | 420 | roots.insert(0, c) |
|
421 | 421 | |
|
422 | 422 | return results |
|
423 | 423 | |
|
424 | 424 | |
|
425 | 425 | def readexactly(stream, n): |
|
426 | 426 | '''read n bytes from stream.read and abort if less was available''' |
|
427 | 427 | s = stream.read(n) |
|
428 | 428 | if len(s) < n: |
|
429 | 429 | raise error.Abort( |
|
430 | 430 | _(b"stream ended unexpectedly (got %d bytes, expected %d)") |
|
431 | 431 | % (len(s), n) |
|
432 | 432 | ) |
|
433 | 433 | return s |
|
434 | 434 | |
|
435 | 435 | |
|
436 | 436 | def readunpack(stream, fmt): |
|
437 | 437 | data = readexactly(stream, struct.calcsize(fmt)) |
|
438 | 438 | return struct.unpack(fmt, data) |
|
439 | 439 | |
|
440 | 440 | |
|
441 | 441 | def readpath(stream): |
|
442 | 442 | rawlen = readexactly(stream, constants.FILENAMESIZE) |
|
443 | 443 | pathlen = struct.unpack(constants.FILENAMESTRUCT, rawlen)[0] |
|
444 | 444 | return readexactly(stream, pathlen) |
|
445 | 445 | |
|
446 | 446 | |
|
447 | 447 | def readnodelist(stream): |
|
448 | 448 | rawlen = readexactly(stream, constants.NODECOUNTSIZE) |
|
449 | 449 | nodecount = struct.unpack(constants.NODECOUNTSTRUCT, rawlen)[0] |
|
450 | 450 | for i in pycompat.xrange(nodecount): |
|
451 | 451 | yield readexactly(stream, constants.NODESIZE) |
|
452 | 452 | |
|
453 | 453 | |
|
454 | 454 | def readpathlist(stream): |
|
455 | 455 | rawlen = readexactly(stream, constants.PATHCOUNTSIZE) |
|
456 | 456 | pathcount = struct.unpack(constants.PATHCOUNTSTRUCT, rawlen)[0] |
|
457 | 457 | for i in pycompat.xrange(pathcount): |
|
458 | 458 | yield readpath(stream) |
|
459 | 459 | |
|
460 | 460 | |
|
461 | 461 | def getgid(groupname): |
|
462 | 462 | try: |
|
463 | 463 | gid = grp.getgrnam(pycompat.fsdecode(groupname)).gr_gid |
|
464 | 464 | return gid |
|
465 | 465 | except KeyError: |
|
466 | 466 | return None |
|
467 | 467 | |
|
468 | 468 | |
|
469 | 469 | def setstickygroupdir(path, gid, warn=None): |
|
470 | 470 | if gid is None: |
|
471 | 471 | return |
|
472 | 472 | try: |
|
473 | 473 | os.chown(path, -1, gid) |
|
474 | 474 | os.chmod(path, 0o2775) |
|
475 | 475 | except (IOError, OSError) as ex: |
|
476 | 476 | if warn: |
|
477 | 477 | warn(_(b'unable to chown/chmod on %s: %s\n') % (path, ex)) |
|
478 | 478 | |
|
479 | 479 | |
|
480 | 480 | def mkstickygroupdir(ui, path): |
|
481 | 481 | """Creates the given directory (if it doesn't exist) and give it a |
|
482 | 482 | particular group with setgid enabled.""" |
|
483 | 483 | gid = None |
|
484 | 484 | groupname = ui.config(b"remotefilelog", b"cachegroup") |
|
485 | 485 | if groupname: |
|
486 | 486 | gid = getgid(groupname) |
|
487 | 487 | if gid is None: |
|
488 | 488 | ui.warn(_(b'unable to resolve group name: %s\n') % groupname) |
|
489 | 489 | |
|
490 | 490 | # we use a single stat syscall to test the existence and mode / group bit |
|
491 | 491 | st = None |
|
492 | 492 | try: |
|
493 | 493 | st = os.stat(path) |
|
494 | 494 | except OSError: |
|
495 | 495 | pass |
|
496 | 496 | |
|
497 | 497 | if st: |
|
498 | 498 | # exists |
|
499 | 499 | if (st.st_mode & 0o2775) != 0o2775 or st.st_gid != gid: |
|
500 | 500 | # permission needs to be fixed |
|
501 | 501 | setstickygroupdir(path, gid, ui.warn) |
|
502 | 502 | return |
|
503 | 503 | |
|
504 | 504 | oldumask = os.umask(0o002) |
|
505 | 505 | try: |
|
506 | 506 | missingdirs = [path] |
|
507 | 507 | path = os.path.dirname(path) |
|
508 | 508 | while path and not os.path.exists(path): |
|
509 | 509 | missingdirs.append(path) |
|
510 | 510 | path = os.path.dirname(path) |
|
511 | 511 | |
|
512 | 512 | for path in reversed(missingdirs): |
|
513 | 513 | try: |
|
514 | 514 | os.mkdir(path) |
|
515 | 515 | except OSError as ex: |
|
516 | 516 | if ex.errno != errno.EEXIST: |
|
517 | 517 | raise |
|
518 | 518 | |
|
519 | 519 | for path in missingdirs: |
|
520 | 520 | setstickygroupdir(path, gid, ui.warn) |
|
521 | 521 | finally: |
|
522 | 522 | os.umask(oldumask) |
|
523 | 523 | |
|
524 | 524 | |
|
525 | 525 | def getusername(ui): |
|
526 | 526 | try: |
|
527 | 527 | return stringutil.shortuser(ui.username()) |
|
528 | 528 | except Exception: |
|
529 | 529 | return b'unknown' |
|
530 | 530 | |
|
531 | 531 | |
|
532 | 532 | def getreponame(ui): |
|
533 | 533 | reponame = ui.config(b'paths', b'default') |
|
534 | 534 | if reponame: |
|
535 | 535 | return os.path.basename(reponame) |
|
536 | 536 | return b"unknown" |
@@ -1,1293 +1,1295 b'' | |||
|
1 | 1 | # sqlitestore.py - Storage backend that uses SQLite |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com> |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | """store repository data in SQLite (EXPERIMENTAL) |
|
9 | 9 | |
|
10 | 10 | The sqlitestore extension enables the storage of repository data in SQLite. |
|
11 | 11 | |
|
12 | 12 | This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY |
|
13 | 13 | GUARANTEES. This means that repositories created with this extension may |
|
14 | 14 | only be usable with the exact version of this extension/Mercurial that was |
|
15 | 15 | used. The extension attempts to enforce this in order to prevent repository |
|
16 | 16 | corruption. |
|
17 | 17 | |
|
18 | 18 | In addition, several features are not yet supported or have known bugs: |
|
19 | 19 | |
|
20 | 20 | * Only some data is stored in SQLite. Changeset, manifest, and other repository |
|
21 | 21 | data is not yet stored in SQLite. |
|
22 | 22 | * Transactions are not robust. If the process is aborted at the right time |
|
23 | 23 | during transaction close/rollback, the repository could be in an inconsistent |
|
24 | 24 | state. This problem will diminish once all repository data is tracked by |
|
25 | 25 | SQLite. |
|
26 | 26 | * Bundle repositories do not work (the ability to use e.g. |
|
27 | 27 | `hg -R <bundle-file> log` to automatically overlay a bundle on top of the |
|
28 | 28 | existing repository). |
|
29 | 29 | * Various other features don't work. |
|
30 | 30 | |
|
31 | 31 | This extension should work for basic clone/pull, update, and commit workflows. |
|
32 | 32 | Some history rewriting operations may fail due to lack of support for bundle |
|
33 | 33 | repositories. |
|
34 | 34 | |
|
35 | 35 | To use, activate the extension and set the ``storage.new-repo-backend`` config |
|
36 | 36 | option to ``sqlite`` to enable new repositories to use SQLite for storage. |
|
37 | 37 | """ |
|
38 | 38 | |
|
39 | 39 | # To run the test suite with repos using SQLite by default, execute the |
|
40 | 40 | # following: |
|
41 | 41 | # |
|
42 | 42 | # HGREPOFEATURES="sqlitestore" run-tests.py \ |
|
43 | 43 | # --extra-config-opt extensions.sqlitestore= \ |
|
44 | 44 | # --extra-config-opt storage.new-repo-backend=sqlite |
|
45 | 45 | |
|
46 | 46 | from __future__ import absolute_import |
|
47 | 47 | |
|
48 | import hashlib | |
|
49 | 48 | import sqlite3 |
|
50 | 49 | import struct |
|
51 | 50 | import threading |
|
52 | 51 | import zlib |
|
53 | 52 | |
|
54 | 53 | from mercurial.i18n import _ |
|
55 | 54 | from mercurial.node import ( |
|
56 | 55 | nullid, |
|
57 | 56 | nullrev, |
|
58 | 57 | short, |
|
59 | 58 | ) |
|
60 | 59 | from mercurial.thirdparty import attr |
|
61 | 60 | from mercurial import ( |
|
62 | 61 | ancestor, |
|
63 | 62 | dagop, |
|
64 | 63 | encoding, |
|
65 | 64 | error, |
|
66 | 65 | extensions, |
|
67 | 66 | localrepo, |
|
68 | 67 | mdiff, |
|
69 | 68 | pycompat, |
|
70 | 69 | registrar, |
|
71 | 70 | util, |
|
72 | 71 | verify, |
|
73 | 72 | ) |
|
74 | 73 | from mercurial.interfaces import ( |
|
75 | 74 | repository, |
|
76 | 75 | util as interfaceutil, |
|
77 | 76 | ) |
|
78 |
from mercurial.utils import |
|
|
77 | from mercurial.utils import ( | |
|
78 | hashutil, | |
|
79 | storageutil, | |
|
80 | ) | |
|
79 | 81 | |
|
80 | 82 | try: |
|
81 | 83 | from mercurial import zstd |
|
82 | 84 | |
|
83 | 85 | zstd.__version__ |
|
84 | 86 | except ImportError: |
|
85 | 87 | zstd = None |
|
86 | 88 | |
|
87 | 89 | configtable = {} |
|
88 | 90 | configitem = registrar.configitem(configtable) |
|
89 | 91 | |
|
90 | 92 | # experimental config: storage.sqlite.compression |
|
91 | 93 | configitem( |
|
92 | 94 | b'storage', |
|
93 | 95 | b'sqlite.compression', |
|
94 | 96 | default=b'zstd' if zstd else b'zlib', |
|
95 | 97 | experimental=True, |
|
96 | 98 | ) |
|
97 | 99 | |
|
98 | 100 | # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for |
|
99 | 101 | # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should |
|
100 | 102 | # be specifying the version(s) of Mercurial they are tested with, or |
|
101 | 103 | # leave the attribute unspecified. |
|
102 | 104 | testedwith = b'ships-with-hg-core' |
|
103 | 105 | |
|
104 | 106 | REQUIREMENT = b'exp-sqlite-001' |
|
105 | 107 | REQUIREMENT_ZSTD = b'exp-sqlite-comp-001=zstd' |
|
106 | 108 | REQUIREMENT_ZLIB = b'exp-sqlite-comp-001=zlib' |
|
107 | 109 | REQUIREMENT_NONE = b'exp-sqlite-comp-001=none' |
|
108 | 110 | REQUIREMENT_SHALLOW_FILES = b'exp-sqlite-shallow-files' |
|
109 | 111 | |
|
110 | 112 | CURRENT_SCHEMA_VERSION = 1 |
|
111 | 113 | |
|
112 | 114 | COMPRESSION_NONE = 1 |
|
113 | 115 | COMPRESSION_ZSTD = 2 |
|
114 | 116 | COMPRESSION_ZLIB = 3 |
|
115 | 117 | |
|
116 | 118 | FLAG_CENSORED = 1 |
|
117 | 119 | FLAG_MISSING_P1 = 2 |
|
118 | 120 | FLAG_MISSING_P2 = 4 |
|
119 | 121 | |
|
120 | 122 | CREATE_SCHEMA = [ |
|
121 | 123 | # Deltas are stored as content-indexed blobs. |
|
122 | 124 | # compression column holds COMPRESSION_* constant for how the |
|
123 | 125 | # delta is encoded. |
|
124 | 126 | 'CREATE TABLE delta (' |
|
125 | 127 | ' id INTEGER PRIMARY KEY, ' |
|
126 | 128 | ' compression INTEGER NOT NULL, ' |
|
127 | 129 | ' hash BLOB UNIQUE ON CONFLICT ABORT, ' |
|
128 | 130 | ' delta BLOB NOT NULL ' |
|
129 | 131 | ')', |
|
130 | 132 | # Tracked paths are denormalized to integers to avoid redundant |
|
131 | 133 | # storage of the path name. |
|
132 | 134 | 'CREATE TABLE filepath (' |
|
133 | 135 | ' id INTEGER PRIMARY KEY, ' |
|
134 | 136 | ' path BLOB NOT NULL ' |
|
135 | 137 | ')', |
|
136 | 138 | 'CREATE UNIQUE INDEX filepath_path ON filepath (path)', |
|
137 | 139 | # We have a single table for all file revision data. |
|
138 | 140 | # Each file revision is uniquely described by a (path, rev) and |
|
139 | 141 | # (path, node). |
|
140 | 142 | # |
|
141 | 143 | # Revision data is stored as a pointer to the delta producing this |
|
142 | 144 | # revision and the file revision whose delta should be applied before |
|
143 | 145 | # that one. One can reconstruct the delta chain by recursively following |
|
144 | 146 | # the delta base revision pointers until one encounters NULL. |
|
145 | 147 | # |
|
146 | 148 | # flags column holds bitwise integer flags controlling storage options. |
|
147 | 149 | # These flags are defined by the FLAG_* constants. |
|
148 | 150 | 'CREATE TABLE fileindex (' |
|
149 | 151 | ' id INTEGER PRIMARY KEY, ' |
|
150 | 152 | ' pathid INTEGER REFERENCES filepath(id), ' |
|
151 | 153 | ' revnum INTEGER NOT NULL, ' |
|
152 | 154 | ' p1rev INTEGER NOT NULL, ' |
|
153 | 155 | ' p2rev INTEGER NOT NULL, ' |
|
154 | 156 | ' linkrev INTEGER NOT NULL, ' |
|
155 | 157 | ' flags INTEGER NOT NULL, ' |
|
156 | 158 | ' deltaid INTEGER REFERENCES delta(id), ' |
|
157 | 159 | ' deltabaseid INTEGER REFERENCES fileindex(id), ' |
|
158 | 160 | ' node BLOB NOT NULL ' |
|
159 | 161 | ')', |
|
160 | 162 | 'CREATE UNIQUE INDEX fileindex_pathrevnum ' |
|
161 | 163 | ' ON fileindex (pathid, revnum)', |
|
162 | 164 | 'CREATE UNIQUE INDEX fileindex_pathnode ON fileindex (pathid, node)', |
|
163 | 165 | # Provide a view over all file data for convenience. |
|
164 | 166 | 'CREATE VIEW filedata AS ' |
|
165 | 167 | 'SELECT ' |
|
166 | 168 | ' fileindex.id AS id, ' |
|
167 | 169 | ' filepath.id AS pathid, ' |
|
168 | 170 | ' filepath.path AS path, ' |
|
169 | 171 | ' fileindex.revnum AS revnum, ' |
|
170 | 172 | ' fileindex.node AS node, ' |
|
171 | 173 | ' fileindex.p1rev AS p1rev, ' |
|
172 | 174 | ' fileindex.p2rev AS p2rev, ' |
|
173 | 175 | ' fileindex.linkrev AS linkrev, ' |
|
174 | 176 | ' fileindex.flags AS flags, ' |
|
175 | 177 | ' fileindex.deltaid AS deltaid, ' |
|
176 | 178 | ' fileindex.deltabaseid AS deltabaseid ' |
|
177 | 179 | 'FROM filepath, fileindex ' |
|
178 | 180 | 'WHERE fileindex.pathid=filepath.id', |
|
179 | 181 | 'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION, |
|
180 | 182 | ] |
|
181 | 183 | |
|
182 | 184 | |
|
183 | 185 | def resolvedeltachain(db, pathid, node, revisioncache, stoprids, zstddctx=None): |
|
184 | 186 | """Resolve a delta chain for a file node.""" |
|
185 | 187 | |
|
186 | 188 | # TODO the "not in ({stops})" here is possibly slowing down the query |
|
187 | 189 | # because it needs to perform the lookup on every recursive invocation. |
|
188 | 190 | # This could possibly be faster if we created a temporary query with |
|
189 | 191 | # baseid "poisoned" to null and limited the recursive filter to |
|
190 | 192 | # "is not null". |
|
191 | 193 | res = db.execute( |
|
192 | 194 | 'WITH RECURSIVE ' |
|
193 | 195 | ' deltachain(deltaid, baseid) AS (' |
|
194 | 196 | ' SELECT deltaid, deltabaseid FROM fileindex ' |
|
195 | 197 | ' WHERE pathid=? AND node=? ' |
|
196 | 198 | ' UNION ALL ' |
|
197 | 199 | ' SELECT fileindex.deltaid, deltabaseid ' |
|
198 | 200 | ' FROM fileindex, deltachain ' |
|
199 | 201 | ' WHERE ' |
|
200 | 202 | ' fileindex.id=deltachain.baseid ' |
|
201 | 203 | ' AND deltachain.baseid IS NOT NULL ' |
|
202 | 204 | ' AND fileindex.id NOT IN ({stops}) ' |
|
203 | 205 | ' ) ' |
|
204 | 206 | 'SELECT deltachain.baseid, compression, delta ' |
|
205 | 207 | 'FROM deltachain, delta ' |
|
206 | 208 | 'WHERE delta.id=deltachain.deltaid'.format( |
|
207 | 209 | stops=','.join(['?'] * len(stoprids)) |
|
208 | 210 | ), |
|
209 | 211 | tuple([pathid, node] + list(stoprids.keys())), |
|
210 | 212 | ) |
|
211 | 213 | |
|
212 | 214 | deltas = [] |
|
213 | 215 | lastdeltabaseid = None |
|
214 | 216 | |
|
215 | 217 | for deltabaseid, compression, delta in res: |
|
216 | 218 | lastdeltabaseid = deltabaseid |
|
217 | 219 | |
|
218 | 220 | if compression == COMPRESSION_ZSTD: |
|
219 | 221 | delta = zstddctx.decompress(delta) |
|
220 | 222 | elif compression == COMPRESSION_NONE: |
|
221 | 223 | delta = delta |
|
222 | 224 | elif compression == COMPRESSION_ZLIB: |
|
223 | 225 | delta = zlib.decompress(delta) |
|
224 | 226 | else: |
|
225 | 227 | raise SQLiteStoreError( |
|
226 | 228 | b'unhandled compression type: %d' % compression |
|
227 | 229 | ) |
|
228 | 230 | |
|
229 | 231 | deltas.append(delta) |
|
230 | 232 | |
|
231 | 233 | if lastdeltabaseid in stoprids: |
|
232 | 234 | basetext = revisioncache[stoprids[lastdeltabaseid]] |
|
233 | 235 | else: |
|
234 | 236 | basetext = deltas.pop() |
|
235 | 237 | |
|
236 | 238 | deltas.reverse() |
|
237 | 239 | fulltext = mdiff.patches(basetext, deltas) |
|
238 | 240 | |
|
239 | 241 | # SQLite returns buffer instances for blob columns on Python 2. This |
|
240 | 242 | # type can propagate through the delta application layer. Because |
|
241 | 243 | # downstream callers assume revisions are bytes, cast as needed. |
|
242 | 244 | if not isinstance(fulltext, bytes): |
|
243 | 245 | fulltext = bytes(delta) |
|
244 | 246 | |
|
245 | 247 | return fulltext |
|
246 | 248 | |
|
247 | 249 | |
|
248 | 250 | def insertdelta(db, compression, hash, delta): |
|
249 | 251 | try: |
|
250 | 252 | return db.execute( |
|
251 | 253 | 'INSERT INTO delta (compression, hash, delta) VALUES (?, ?, ?)', |
|
252 | 254 | (compression, hash, delta), |
|
253 | 255 | ).lastrowid |
|
254 | 256 | except sqlite3.IntegrityError: |
|
255 | 257 | return db.execute( |
|
256 | 258 | 'SELECT id FROM delta WHERE hash=?', (hash,) |
|
257 | 259 | ).fetchone()[0] |
|
258 | 260 | |
|
259 | 261 | |
|
260 | 262 | class SQLiteStoreError(error.StorageError): |
|
261 | 263 | pass |
|
262 | 264 | |
|
263 | 265 | |
|
264 | 266 | @attr.s |
|
265 | 267 | class revisionentry(object): |
|
266 | 268 | rid = attr.ib() |
|
267 | 269 | rev = attr.ib() |
|
268 | 270 | node = attr.ib() |
|
269 | 271 | p1rev = attr.ib() |
|
270 | 272 | p2rev = attr.ib() |
|
271 | 273 | p1node = attr.ib() |
|
272 | 274 | p2node = attr.ib() |
|
273 | 275 | linkrev = attr.ib() |
|
274 | 276 | flags = attr.ib() |
|
275 | 277 | |
|
276 | 278 | |
|
277 | 279 | @interfaceutil.implementer(repository.irevisiondelta) |
|
278 | 280 | @attr.s(slots=True) |
|
279 | 281 | class sqliterevisiondelta(object): |
|
280 | 282 | node = attr.ib() |
|
281 | 283 | p1node = attr.ib() |
|
282 | 284 | p2node = attr.ib() |
|
283 | 285 | basenode = attr.ib() |
|
284 | 286 | flags = attr.ib() |
|
285 | 287 | baserevisionsize = attr.ib() |
|
286 | 288 | revision = attr.ib() |
|
287 | 289 | delta = attr.ib() |
|
288 | 290 | linknode = attr.ib(default=None) |
|
289 | 291 | |
|
290 | 292 | |
|
291 | 293 | @interfaceutil.implementer(repository.iverifyproblem) |
|
292 | 294 | @attr.s(frozen=True) |
|
293 | 295 | class sqliteproblem(object): |
|
294 | 296 | warning = attr.ib(default=None) |
|
295 | 297 | error = attr.ib(default=None) |
|
296 | 298 | node = attr.ib(default=None) |
|
297 | 299 | |
|
298 | 300 | |
|
299 | 301 | @interfaceutil.implementer(repository.ifilestorage) |
|
300 | 302 | class sqlitefilestore(object): |
|
301 | 303 | """Implements storage for an individual tracked path.""" |
|
302 | 304 | |
|
303 | 305 | def __init__(self, db, path, compression): |
|
304 | 306 | self._db = db |
|
305 | 307 | self._path = path |
|
306 | 308 | |
|
307 | 309 | self._pathid = None |
|
308 | 310 | |
|
309 | 311 | # revnum -> node |
|
310 | 312 | self._revtonode = {} |
|
311 | 313 | # node -> revnum |
|
312 | 314 | self._nodetorev = {} |
|
313 | 315 | # node -> data structure |
|
314 | 316 | self._revisions = {} |
|
315 | 317 | |
|
316 | 318 | self._revisioncache = util.lrucachedict(10) |
|
317 | 319 | |
|
318 | 320 | self._compengine = compression |
|
319 | 321 | |
|
320 | 322 | if compression == b'zstd': |
|
321 | 323 | self._cctx = zstd.ZstdCompressor(level=3) |
|
322 | 324 | self._dctx = zstd.ZstdDecompressor() |
|
323 | 325 | else: |
|
324 | 326 | self._cctx = None |
|
325 | 327 | self._dctx = None |
|
326 | 328 | |
|
327 | 329 | self._refreshindex() |
|
328 | 330 | |
|
329 | 331 | def _refreshindex(self): |
|
330 | 332 | self._revtonode = {} |
|
331 | 333 | self._nodetorev = {} |
|
332 | 334 | self._revisions = {} |
|
333 | 335 | |
|
334 | 336 | res = list( |
|
335 | 337 | self._db.execute( |
|
336 | 338 | 'SELECT id FROM filepath WHERE path=?', (self._path,) |
|
337 | 339 | ) |
|
338 | 340 | ) |
|
339 | 341 | |
|
340 | 342 | if not res: |
|
341 | 343 | self._pathid = None |
|
342 | 344 | return |
|
343 | 345 | |
|
344 | 346 | self._pathid = res[0][0] |
|
345 | 347 | |
|
346 | 348 | res = self._db.execute( |
|
347 | 349 | 'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags ' |
|
348 | 350 | 'FROM fileindex ' |
|
349 | 351 | 'WHERE pathid=? ' |
|
350 | 352 | 'ORDER BY revnum ASC', |
|
351 | 353 | (self._pathid,), |
|
352 | 354 | ) |
|
353 | 355 | |
|
354 | 356 | for i, row in enumerate(res): |
|
355 | 357 | rid, rev, node, p1rev, p2rev, linkrev, flags = row |
|
356 | 358 | |
|
357 | 359 | if i != rev: |
|
358 | 360 | raise SQLiteStoreError( |
|
359 | 361 | _(b'sqlite database has inconsistent revision numbers') |
|
360 | 362 | ) |
|
361 | 363 | |
|
362 | 364 | if p1rev == nullrev: |
|
363 | 365 | p1node = nullid |
|
364 | 366 | else: |
|
365 | 367 | p1node = self._revtonode[p1rev] |
|
366 | 368 | |
|
367 | 369 | if p2rev == nullrev: |
|
368 | 370 | p2node = nullid |
|
369 | 371 | else: |
|
370 | 372 | p2node = self._revtonode[p2rev] |
|
371 | 373 | |
|
372 | 374 | entry = revisionentry( |
|
373 | 375 | rid=rid, |
|
374 | 376 | rev=rev, |
|
375 | 377 | node=node, |
|
376 | 378 | p1rev=p1rev, |
|
377 | 379 | p2rev=p2rev, |
|
378 | 380 | p1node=p1node, |
|
379 | 381 | p2node=p2node, |
|
380 | 382 | linkrev=linkrev, |
|
381 | 383 | flags=flags, |
|
382 | 384 | ) |
|
383 | 385 | |
|
384 | 386 | self._revtonode[rev] = node |
|
385 | 387 | self._nodetorev[node] = rev |
|
386 | 388 | self._revisions[node] = entry |
|
387 | 389 | |
|
388 | 390 | # Start of ifileindex interface. |
|
389 | 391 | |
|
390 | 392 | def __len__(self): |
|
391 | 393 | return len(self._revisions) |
|
392 | 394 | |
|
393 | 395 | def __iter__(self): |
|
394 | 396 | return iter(pycompat.xrange(len(self._revisions))) |
|
395 | 397 | |
|
396 | 398 | def hasnode(self, node): |
|
397 | 399 | if node == nullid: |
|
398 | 400 | return False |
|
399 | 401 | |
|
400 | 402 | return node in self._nodetorev |
|
401 | 403 | |
|
402 | 404 | def revs(self, start=0, stop=None): |
|
403 | 405 | return storageutil.iterrevs( |
|
404 | 406 | len(self._revisions), start=start, stop=stop |
|
405 | 407 | ) |
|
406 | 408 | |
|
407 | 409 | def parents(self, node): |
|
408 | 410 | if node == nullid: |
|
409 | 411 | return nullid, nullid |
|
410 | 412 | |
|
411 | 413 | if node not in self._revisions: |
|
412 | 414 | raise error.LookupError(node, self._path, _(b'no node')) |
|
413 | 415 | |
|
414 | 416 | entry = self._revisions[node] |
|
415 | 417 | return entry.p1node, entry.p2node |
|
416 | 418 | |
|
417 | 419 | def parentrevs(self, rev): |
|
418 | 420 | if rev == nullrev: |
|
419 | 421 | return nullrev, nullrev |
|
420 | 422 | |
|
421 | 423 | if rev not in self._revtonode: |
|
422 | 424 | raise IndexError(rev) |
|
423 | 425 | |
|
424 | 426 | entry = self._revisions[self._revtonode[rev]] |
|
425 | 427 | return entry.p1rev, entry.p2rev |
|
426 | 428 | |
|
427 | 429 | def rev(self, node): |
|
428 | 430 | if node == nullid: |
|
429 | 431 | return nullrev |
|
430 | 432 | |
|
431 | 433 | if node not in self._nodetorev: |
|
432 | 434 | raise error.LookupError(node, self._path, _(b'no node')) |
|
433 | 435 | |
|
434 | 436 | return self._nodetorev[node] |
|
435 | 437 | |
|
436 | 438 | def node(self, rev): |
|
437 | 439 | if rev == nullrev: |
|
438 | 440 | return nullid |
|
439 | 441 | |
|
440 | 442 | if rev not in self._revtonode: |
|
441 | 443 | raise IndexError(rev) |
|
442 | 444 | |
|
443 | 445 | return self._revtonode[rev] |
|
444 | 446 | |
|
445 | 447 | def lookup(self, node): |
|
446 | 448 | return storageutil.fileidlookup(self, node, self._path) |
|
447 | 449 | |
|
448 | 450 | def linkrev(self, rev): |
|
449 | 451 | if rev == nullrev: |
|
450 | 452 | return nullrev |
|
451 | 453 | |
|
452 | 454 | if rev not in self._revtonode: |
|
453 | 455 | raise IndexError(rev) |
|
454 | 456 | |
|
455 | 457 | entry = self._revisions[self._revtonode[rev]] |
|
456 | 458 | return entry.linkrev |
|
457 | 459 | |
|
458 | 460 | def iscensored(self, rev): |
|
459 | 461 | if rev == nullrev: |
|
460 | 462 | return False |
|
461 | 463 | |
|
462 | 464 | if rev not in self._revtonode: |
|
463 | 465 | raise IndexError(rev) |
|
464 | 466 | |
|
465 | 467 | return self._revisions[self._revtonode[rev]].flags & FLAG_CENSORED |
|
466 | 468 | |
|
467 | 469 | def commonancestorsheads(self, node1, node2): |
|
468 | 470 | rev1 = self.rev(node1) |
|
469 | 471 | rev2 = self.rev(node2) |
|
470 | 472 | |
|
471 | 473 | ancestors = ancestor.commonancestorsheads(self.parentrevs, rev1, rev2) |
|
472 | 474 | return pycompat.maplist(self.node, ancestors) |
|
473 | 475 | |
|
474 | 476 | def descendants(self, revs): |
|
475 | 477 | # TODO we could implement this using a recursive SQL query, which |
|
476 | 478 | # might be faster. |
|
477 | 479 | return dagop.descendantrevs(revs, self.revs, self.parentrevs) |
|
478 | 480 | |
|
479 | 481 | def heads(self, start=None, stop=None): |
|
480 | 482 | if start is None and stop is None: |
|
481 | 483 | if not len(self): |
|
482 | 484 | return [nullid] |
|
483 | 485 | |
|
484 | 486 | startrev = self.rev(start) if start is not None else nullrev |
|
485 | 487 | stoprevs = {self.rev(n) for n in stop or []} |
|
486 | 488 | |
|
487 | 489 | revs = dagop.headrevssubset( |
|
488 | 490 | self.revs, self.parentrevs, startrev=startrev, stoprevs=stoprevs |
|
489 | 491 | ) |
|
490 | 492 | |
|
491 | 493 | return [self.node(rev) for rev in revs] |
|
492 | 494 | |
|
493 | 495 | def children(self, node): |
|
494 | 496 | rev = self.rev(node) |
|
495 | 497 | |
|
496 | 498 | res = self._db.execute( |
|
497 | 499 | 'SELECT' |
|
498 | 500 | ' node ' |
|
499 | 501 | ' FROM filedata ' |
|
500 | 502 | ' WHERE path=? AND (p1rev=? OR p2rev=?) ' |
|
501 | 503 | ' ORDER BY revnum ASC', |
|
502 | 504 | (self._path, rev, rev), |
|
503 | 505 | ) |
|
504 | 506 | |
|
505 | 507 | return [row[0] for row in res] |
|
506 | 508 | |
|
507 | 509 | # End of ifileindex interface. |
|
508 | 510 | |
|
509 | 511 | # Start of ifiledata interface. |
|
510 | 512 | |
|
511 | 513 | def size(self, rev): |
|
512 | 514 | if rev == nullrev: |
|
513 | 515 | return 0 |
|
514 | 516 | |
|
515 | 517 | if rev not in self._revtonode: |
|
516 | 518 | raise IndexError(rev) |
|
517 | 519 | |
|
518 | 520 | node = self._revtonode[rev] |
|
519 | 521 | |
|
520 | 522 | if self.renamed(node): |
|
521 | 523 | return len(self.read(node)) |
|
522 | 524 | |
|
523 | 525 | return len(self.revision(node)) |
|
524 | 526 | |
|
525 | 527 | def revision(self, node, raw=False, _verifyhash=True): |
|
526 | 528 | if node in (nullid, nullrev): |
|
527 | 529 | return b'' |
|
528 | 530 | |
|
529 | 531 | if isinstance(node, int): |
|
530 | 532 | node = self.node(node) |
|
531 | 533 | |
|
532 | 534 | if node not in self._nodetorev: |
|
533 | 535 | raise error.LookupError(node, self._path, _(b'no node')) |
|
534 | 536 | |
|
535 | 537 | if node in self._revisioncache: |
|
536 | 538 | return self._revisioncache[node] |
|
537 | 539 | |
|
538 | 540 | # Because we have a fulltext revision cache, we are able to |
|
539 | 541 | # short-circuit delta chain traversal and decompression as soon as |
|
540 | 542 | # we encounter a revision in the cache. |
|
541 | 543 | |
|
542 | 544 | stoprids = {self._revisions[n].rid: n for n in self._revisioncache} |
|
543 | 545 | |
|
544 | 546 | if not stoprids: |
|
545 | 547 | stoprids[-1] = None |
|
546 | 548 | |
|
547 | 549 | fulltext = resolvedeltachain( |
|
548 | 550 | self._db, |
|
549 | 551 | self._pathid, |
|
550 | 552 | node, |
|
551 | 553 | self._revisioncache, |
|
552 | 554 | stoprids, |
|
553 | 555 | zstddctx=self._dctx, |
|
554 | 556 | ) |
|
555 | 557 | |
|
556 | 558 | # Don't verify hashes if parent nodes were rewritten, as the hash |
|
557 | 559 | # wouldn't verify. |
|
558 | 560 | if self._revisions[node].flags & (FLAG_MISSING_P1 | FLAG_MISSING_P2): |
|
559 | 561 | _verifyhash = False |
|
560 | 562 | |
|
561 | 563 | if _verifyhash: |
|
562 | 564 | self._checkhash(fulltext, node) |
|
563 | 565 | self._revisioncache[node] = fulltext |
|
564 | 566 | |
|
565 | 567 | return fulltext |
|
566 | 568 | |
|
567 | 569 | def rawdata(self, *args, **kwargs): |
|
568 | 570 | return self.revision(*args, **kwargs) |
|
569 | 571 | |
|
570 | 572 | def read(self, node): |
|
571 | 573 | return storageutil.filtermetadata(self.revision(node)) |
|
572 | 574 | |
|
573 | 575 | def renamed(self, node): |
|
574 | 576 | return storageutil.filerevisioncopied(self, node) |
|
575 | 577 | |
|
576 | 578 | def cmp(self, node, fulltext): |
|
577 | 579 | return not storageutil.filedataequivalent(self, node, fulltext) |
|
578 | 580 | |
|
579 | 581 | def emitrevisions( |
|
580 | 582 | self, |
|
581 | 583 | nodes, |
|
582 | 584 | nodesorder=None, |
|
583 | 585 | revisiondata=False, |
|
584 | 586 | assumehaveparentrevisions=False, |
|
585 | 587 | deltamode=repository.CG_DELTAMODE_STD, |
|
586 | 588 | ): |
|
587 | 589 | if nodesorder not in (b'nodes', b'storage', b'linear', None): |
|
588 | 590 | raise error.ProgrammingError( |
|
589 | 591 | b'unhandled value for nodesorder: %s' % nodesorder |
|
590 | 592 | ) |
|
591 | 593 | |
|
592 | 594 | nodes = [n for n in nodes if n != nullid] |
|
593 | 595 | |
|
594 | 596 | if not nodes: |
|
595 | 597 | return |
|
596 | 598 | |
|
597 | 599 | # TODO perform in a single query. |
|
598 | 600 | res = self._db.execute( |
|
599 | 601 | 'SELECT revnum, deltaid FROM fileindex ' |
|
600 | 602 | 'WHERE pathid=? ' |
|
601 | 603 | ' AND node in (%s)' % (','.join(['?'] * len(nodes))), |
|
602 | 604 | tuple([self._pathid] + nodes), |
|
603 | 605 | ) |
|
604 | 606 | |
|
605 | 607 | deltabases = {} |
|
606 | 608 | |
|
607 | 609 | for rev, deltaid in res: |
|
608 | 610 | res = self._db.execute( |
|
609 | 611 | 'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?', |
|
610 | 612 | (self._pathid, deltaid), |
|
611 | 613 | ) |
|
612 | 614 | deltabases[rev] = res.fetchone()[0] |
|
613 | 615 | |
|
614 | 616 | # TODO define revdifffn so we can use delta from storage. |
|
615 | 617 | for delta in storageutil.emitrevisions( |
|
616 | 618 | self, |
|
617 | 619 | nodes, |
|
618 | 620 | nodesorder, |
|
619 | 621 | sqliterevisiondelta, |
|
620 | 622 | deltaparentfn=deltabases.__getitem__, |
|
621 | 623 | revisiondata=revisiondata, |
|
622 | 624 | assumehaveparentrevisions=assumehaveparentrevisions, |
|
623 | 625 | deltamode=deltamode, |
|
624 | 626 | ): |
|
625 | 627 | |
|
626 | 628 | yield delta |
|
627 | 629 | |
|
628 | 630 | # End of ifiledata interface. |
|
629 | 631 | |
|
630 | 632 | # Start of ifilemutation interface. |
|
631 | 633 | |
|
632 | 634 | def add(self, filedata, meta, transaction, linkrev, p1, p2): |
|
633 | 635 | if meta or filedata.startswith(b'\x01\n'): |
|
634 | 636 | filedata = storageutil.packmeta(meta, filedata) |
|
635 | 637 | |
|
636 | 638 | return self.addrevision(filedata, transaction, linkrev, p1, p2) |
|
637 | 639 | |
|
638 | 640 | def addrevision( |
|
639 | 641 | self, |
|
640 | 642 | revisiondata, |
|
641 | 643 | transaction, |
|
642 | 644 | linkrev, |
|
643 | 645 | p1, |
|
644 | 646 | p2, |
|
645 | 647 | node=None, |
|
646 | 648 | flags=0, |
|
647 | 649 | cachedelta=None, |
|
648 | 650 | ): |
|
649 | 651 | if flags: |
|
650 | 652 | raise SQLiteStoreError(_(b'flags not supported on revisions')) |
|
651 | 653 | |
|
652 | 654 | validatehash = node is not None |
|
653 | 655 | node = node or storageutil.hashrevisionsha1(revisiondata, p1, p2) |
|
654 | 656 | |
|
655 | 657 | if validatehash: |
|
656 | 658 | self._checkhash(revisiondata, node, p1, p2) |
|
657 | 659 | |
|
658 | 660 | if node in self._nodetorev: |
|
659 | 661 | return node |
|
660 | 662 | |
|
661 | 663 | node = self._addrawrevision( |
|
662 | 664 | node, revisiondata, transaction, linkrev, p1, p2 |
|
663 | 665 | ) |
|
664 | 666 | |
|
665 | 667 | self._revisioncache[node] = revisiondata |
|
666 | 668 | return node |
|
667 | 669 | |
|
668 | 670 | def addgroup( |
|
669 | 671 | self, |
|
670 | 672 | deltas, |
|
671 | 673 | linkmapper, |
|
672 | 674 | transaction, |
|
673 | 675 | addrevisioncb=None, |
|
674 | 676 | maybemissingparents=False, |
|
675 | 677 | ): |
|
676 | 678 | nodes = [] |
|
677 | 679 | |
|
678 | 680 | for node, p1, p2, linknode, deltabase, delta, wireflags in deltas: |
|
679 | 681 | storeflags = 0 |
|
680 | 682 | |
|
681 | 683 | if wireflags & repository.REVISION_FLAG_CENSORED: |
|
682 | 684 | storeflags |= FLAG_CENSORED |
|
683 | 685 | |
|
684 | 686 | if wireflags & ~repository.REVISION_FLAG_CENSORED: |
|
685 | 687 | raise SQLiteStoreError(b'unhandled revision flag') |
|
686 | 688 | |
|
687 | 689 | if maybemissingparents: |
|
688 | 690 | if p1 != nullid and not self.hasnode(p1): |
|
689 | 691 | p1 = nullid |
|
690 | 692 | storeflags |= FLAG_MISSING_P1 |
|
691 | 693 | |
|
692 | 694 | if p2 != nullid and not self.hasnode(p2): |
|
693 | 695 | p2 = nullid |
|
694 | 696 | storeflags |= FLAG_MISSING_P2 |
|
695 | 697 | |
|
696 | 698 | baserev = self.rev(deltabase) |
|
697 | 699 | |
|
698 | 700 | # If base is censored, delta must be full replacement in a single |
|
699 | 701 | # patch operation. |
|
700 | 702 | if baserev != nullrev and self.iscensored(baserev): |
|
701 | 703 | hlen = struct.calcsize(b'>lll') |
|
702 | 704 | oldlen = len(self.rawdata(deltabase, _verifyhash=False)) |
|
703 | 705 | newlen = len(delta) - hlen |
|
704 | 706 | |
|
705 | 707 | if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen): |
|
706 | 708 | raise error.CensoredBaseError(self._path, deltabase) |
|
707 | 709 | |
|
708 | 710 | if not (storeflags & FLAG_CENSORED) and storageutil.deltaiscensored( |
|
709 | 711 | delta, baserev, lambda x: len(self.rawdata(x)) |
|
710 | 712 | ): |
|
711 | 713 | storeflags |= FLAG_CENSORED |
|
712 | 714 | |
|
713 | 715 | linkrev = linkmapper(linknode) |
|
714 | 716 | |
|
715 | 717 | nodes.append(node) |
|
716 | 718 | |
|
717 | 719 | if node in self._revisions: |
|
718 | 720 | # Possibly reset parents to make them proper. |
|
719 | 721 | entry = self._revisions[node] |
|
720 | 722 | |
|
721 | 723 | if entry.flags & FLAG_MISSING_P1 and p1 != nullid: |
|
722 | 724 | entry.p1node = p1 |
|
723 | 725 | entry.p1rev = self._nodetorev[p1] |
|
724 | 726 | entry.flags &= ~FLAG_MISSING_P1 |
|
725 | 727 | |
|
726 | 728 | self._db.execute( |
|
727 | 729 | 'UPDATE fileindex SET p1rev=?, flags=? WHERE id=?', |
|
728 | 730 | (self._nodetorev[p1], entry.flags, entry.rid), |
|
729 | 731 | ) |
|
730 | 732 | |
|
731 | 733 | if entry.flags & FLAG_MISSING_P2 and p2 != nullid: |
|
732 | 734 | entry.p2node = p2 |
|
733 | 735 | entry.p2rev = self._nodetorev[p2] |
|
734 | 736 | entry.flags &= ~FLAG_MISSING_P2 |
|
735 | 737 | |
|
736 | 738 | self._db.execute( |
|
737 | 739 | 'UPDATE fileindex SET p2rev=?, flags=? WHERE id=?', |
|
738 | 740 | (self._nodetorev[p1], entry.flags, entry.rid), |
|
739 | 741 | ) |
|
740 | 742 | |
|
741 | 743 | continue |
|
742 | 744 | |
|
743 | 745 | if deltabase == nullid: |
|
744 | 746 | text = mdiff.patch(b'', delta) |
|
745 | 747 | storedelta = None |
|
746 | 748 | else: |
|
747 | 749 | text = None |
|
748 | 750 | storedelta = (deltabase, delta) |
|
749 | 751 | |
|
750 | 752 | self._addrawrevision( |
|
751 | 753 | node, |
|
752 | 754 | text, |
|
753 | 755 | transaction, |
|
754 | 756 | linkrev, |
|
755 | 757 | p1, |
|
756 | 758 | p2, |
|
757 | 759 | storedelta=storedelta, |
|
758 | 760 | flags=storeflags, |
|
759 | 761 | ) |
|
760 | 762 | |
|
761 | 763 | if addrevisioncb: |
|
762 | 764 | addrevisioncb(self, node) |
|
763 | 765 | |
|
764 | 766 | return nodes |
|
765 | 767 | |
|
766 | 768 | def censorrevision(self, tr, censornode, tombstone=b''): |
|
767 | 769 | tombstone = storageutil.packmeta({b'censored': tombstone}, b'') |
|
768 | 770 | |
|
769 | 771 | # This restriction is cargo culted from revlogs and makes no sense for |
|
770 | 772 | # SQLite, since columns can be resized at will. |
|
771 | 773 | if len(tombstone) > len(self.rawdata(censornode)): |
|
772 | 774 | raise error.Abort( |
|
773 | 775 | _(b'censor tombstone must be no longer than censored data') |
|
774 | 776 | ) |
|
775 | 777 | |
|
776 | 778 | # We need to replace the censored revision's data with the tombstone. |
|
777 | 779 | # But replacing that data will have implications for delta chains that |
|
778 | 780 | # reference it. |
|
779 | 781 | # |
|
780 | 782 | # While "better," more complex strategies are possible, we do something |
|
781 | 783 | # simple: we find delta chain children of the censored revision and we |
|
782 | 784 | # replace those incremental deltas with fulltexts of their corresponding |
|
783 | 785 | # revision. Then we delete the now-unreferenced delta and original |
|
784 | 786 | # revision and insert a replacement. |
|
785 | 787 | |
|
786 | 788 | # Find the delta to be censored. |
|
787 | 789 | censoreddeltaid = self._db.execute( |
|
788 | 790 | 'SELECT deltaid FROM fileindex WHERE id=?', |
|
789 | 791 | (self._revisions[censornode].rid,), |
|
790 | 792 | ).fetchone()[0] |
|
791 | 793 | |
|
792 | 794 | # Find all its delta chain children. |
|
793 | 795 | # TODO once we support storing deltas for !files, we'll need to look |
|
794 | 796 | # for those delta chains too. |
|
795 | 797 | rows = list( |
|
796 | 798 | self._db.execute( |
|
797 | 799 | 'SELECT id, pathid, node FROM fileindex ' |
|
798 | 800 | 'WHERE deltabaseid=? OR deltaid=?', |
|
799 | 801 | (censoreddeltaid, censoreddeltaid), |
|
800 | 802 | ) |
|
801 | 803 | ) |
|
802 | 804 | |
|
803 | 805 | for row in rows: |
|
804 | 806 | rid, pathid, node = row |
|
805 | 807 | |
|
806 | 808 | fulltext = resolvedeltachain( |
|
807 | 809 | self._db, pathid, node, {}, {-1: None}, zstddctx=self._dctx |
|
808 | 810 | ) |
|
809 | 811 | |
|
810 |
deltahash = hashl |
|
|
812 | deltahash = hashutil.sha1(fulltext).digest() | |
|
811 | 813 | |
|
812 | 814 | if self._compengine == b'zstd': |
|
813 | 815 | deltablob = self._cctx.compress(fulltext) |
|
814 | 816 | compression = COMPRESSION_ZSTD |
|
815 | 817 | elif self._compengine == b'zlib': |
|
816 | 818 | deltablob = zlib.compress(fulltext) |
|
817 | 819 | compression = COMPRESSION_ZLIB |
|
818 | 820 | elif self._compengine == b'none': |
|
819 | 821 | deltablob = fulltext |
|
820 | 822 | compression = COMPRESSION_NONE |
|
821 | 823 | else: |
|
822 | 824 | raise error.ProgrammingError( |
|
823 | 825 | b'unhandled compression engine: %s' % self._compengine |
|
824 | 826 | ) |
|
825 | 827 | |
|
826 | 828 | if len(deltablob) >= len(fulltext): |
|
827 | 829 | deltablob = fulltext |
|
828 | 830 | compression = COMPRESSION_NONE |
|
829 | 831 | |
|
830 | 832 | deltaid = insertdelta(self._db, compression, deltahash, deltablob) |
|
831 | 833 | |
|
832 | 834 | self._db.execute( |
|
833 | 835 | 'UPDATE fileindex SET deltaid=?, deltabaseid=NULL ' |
|
834 | 836 | 'WHERE id=?', |
|
835 | 837 | (deltaid, rid), |
|
836 | 838 | ) |
|
837 | 839 | |
|
838 | 840 | # Now create the tombstone delta and replace the delta on the censored |
|
839 | 841 | # node. |
|
840 |
deltahash = hashl |
|
|
842 | deltahash = hashutil.sha1(tombstone).digest() | |
|
841 | 843 | tombstonedeltaid = insertdelta( |
|
842 | 844 | self._db, COMPRESSION_NONE, deltahash, tombstone |
|
843 | 845 | ) |
|
844 | 846 | |
|
845 | 847 | flags = self._revisions[censornode].flags |
|
846 | 848 | flags |= FLAG_CENSORED |
|
847 | 849 | |
|
848 | 850 | self._db.execute( |
|
849 | 851 | 'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL ' |
|
850 | 852 | 'WHERE pathid=? AND node=?', |
|
851 | 853 | (flags, tombstonedeltaid, self._pathid, censornode), |
|
852 | 854 | ) |
|
853 | 855 | |
|
854 | 856 | self._db.execute('DELETE FROM delta WHERE id=?', (censoreddeltaid,)) |
|
855 | 857 | |
|
856 | 858 | self._refreshindex() |
|
857 | 859 | self._revisioncache.clear() |
|
858 | 860 | |
|
859 | 861 | def getstrippoint(self, minlink): |
|
860 | 862 | return storageutil.resolvestripinfo( |
|
861 | 863 | minlink, |
|
862 | 864 | len(self) - 1, |
|
863 | 865 | [self.rev(n) for n in self.heads()], |
|
864 | 866 | self.linkrev, |
|
865 | 867 | self.parentrevs, |
|
866 | 868 | ) |
|
867 | 869 | |
|
868 | 870 | def strip(self, minlink, transaction): |
|
869 | 871 | if not len(self): |
|
870 | 872 | return |
|
871 | 873 | |
|
872 | 874 | rev, _ignored = self.getstrippoint(minlink) |
|
873 | 875 | |
|
874 | 876 | if rev == len(self): |
|
875 | 877 | return |
|
876 | 878 | |
|
877 | 879 | for rev in self.revs(rev): |
|
878 | 880 | self._db.execute( |
|
879 | 881 | 'DELETE FROM fileindex WHERE pathid=? AND node=?', |
|
880 | 882 | (self._pathid, self.node(rev)), |
|
881 | 883 | ) |
|
882 | 884 | |
|
883 | 885 | # TODO how should we garbage collect data in delta table? |
|
884 | 886 | |
|
885 | 887 | self._refreshindex() |
|
886 | 888 | |
|
887 | 889 | # End of ifilemutation interface. |
|
888 | 890 | |
|
889 | 891 | # Start of ifilestorage interface. |
|
890 | 892 | |
|
891 | 893 | def files(self): |
|
892 | 894 | return [] |
|
893 | 895 | |
|
894 | 896 | def storageinfo( |
|
895 | 897 | self, |
|
896 | 898 | exclusivefiles=False, |
|
897 | 899 | sharedfiles=False, |
|
898 | 900 | revisionscount=False, |
|
899 | 901 | trackedsize=False, |
|
900 | 902 | storedsize=False, |
|
901 | 903 | ): |
|
902 | 904 | d = {} |
|
903 | 905 | |
|
904 | 906 | if exclusivefiles: |
|
905 | 907 | d[b'exclusivefiles'] = [] |
|
906 | 908 | |
|
907 | 909 | if sharedfiles: |
|
908 | 910 | # TODO list sqlite file(s) here. |
|
909 | 911 | d[b'sharedfiles'] = [] |
|
910 | 912 | |
|
911 | 913 | if revisionscount: |
|
912 | 914 | d[b'revisionscount'] = len(self) |
|
913 | 915 | |
|
914 | 916 | if trackedsize: |
|
915 | 917 | d[b'trackedsize'] = sum( |
|
916 | 918 | len(self.revision(node)) for node in self._nodetorev |
|
917 | 919 | ) |
|
918 | 920 | |
|
919 | 921 | if storedsize: |
|
920 | 922 | # TODO implement this? |
|
921 | 923 | d[b'storedsize'] = None |
|
922 | 924 | |
|
923 | 925 | return d |
|
924 | 926 | |
|
925 | 927 | def verifyintegrity(self, state): |
|
926 | 928 | state[b'skipread'] = set() |
|
927 | 929 | |
|
928 | 930 | for rev in self: |
|
929 | 931 | node = self.node(rev) |
|
930 | 932 | |
|
931 | 933 | try: |
|
932 | 934 | self.revision(node) |
|
933 | 935 | except Exception as e: |
|
934 | 936 | yield sqliteproblem( |
|
935 | 937 | error=_(b'unpacking %s: %s') % (short(node), e), node=node |
|
936 | 938 | ) |
|
937 | 939 | |
|
938 | 940 | state[b'skipread'].add(node) |
|
939 | 941 | |
|
940 | 942 | # End of ifilestorage interface. |
|
941 | 943 | |
|
942 | 944 | def _checkhash(self, fulltext, node, p1=None, p2=None): |
|
943 | 945 | if p1 is None and p2 is None: |
|
944 | 946 | p1, p2 = self.parents(node) |
|
945 | 947 | |
|
946 | 948 | if node == storageutil.hashrevisionsha1(fulltext, p1, p2): |
|
947 | 949 | return |
|
948 | 950 | |
|
949 | 951 | try: |
|
950 | 952 | del self._revisioncache[node] |
|
951 | 953 | except KeyError: |
|
952 | 954 | pass |
|
953 | 955 | |
|
954 | 956 | if storageutil.iscensoredtext(fulltext): |
|
955 | 957 | raise error.CensoredNodeError(self._path, node, fulltext) |
|
956 | 958 | |
|
957 | 959 | raise SQLiteStoreError(_(b'integrity check failed on %s') % self._path) |
|
958 | 960 | |
|
959 | 961 | def _addrawrevision( |
|
960 | 962 | self, |
|
961 | 963 | node, |
|
962 | 964 | revisiondata, |
|
963 | 965 | transaction, |
|
964 | 966 | linkrev, |
|
965 | 967 | p1, |
|
966 | 968 | p2, |
|
967 | 969 | storedelta=None, |
|
968 | 970 | flags=0, |
|
969 | 971 | ): |
|
970 | 972 | if self._pathid is None: |
|
971 | 973 | res = self._db.execute( |
|
972 | 974 | 'INSERT INTO filepath (path) VALUES (?)', (self._path,) |
|
973 | 975 | ) |
|
974 | 976 | self._pathid = res.lastrowid |
|
975 | 977 | |
|
976 | 978 | # For simplicity, always store a delta against p1. |
|
977 | 979 | # TODO we need a lot more logic here to make behavior reasonable. |
|
978 | 980 | |
|
979 | 981 | if storedelta: |
|
980 | 982 | deltabase, delta = storedelta |
|
981 | 983 | |
|
982 | 984 | if isinstance(deltabase, int): |
|
983 | 985 | deltabase = self.node(deltabase) |
|
984 | 986 | |
|
985 | 987 | else: |
|
986 | 988 | assert revisiondata is not None |
|
987 | 989 | deltabase = p1 |
|
988 | 990 | |
|
989 | 991 | if deltabase == nullid: |
|
990 | 992 | delta = revisiondata |
|
991 | 993 | else: |
|
992 | 994 | delta = mdiff.textdiff( |
|
993 | 995 | self.revision(self.rev(deltabase)), revisiondata |
|
994 | 996 | ) |
|
995 | 997 | |
|
996 | 998 | # File index stores a pointer to its delta and the parent delta. |
|
997 | 999 | # The parent delta is stored via a pointer to the fileindex PK. |
|
998 | 1000 | if deltabase == nullid: |
|
999 | 1001 | baseid = None |
|
1000 | 1002 | else: |
|
1001 | 1003 | baseid = self._revisions[deltabase].rid |
|
1002 | 1004 | |
|
1003 | 1005 | # Deltas are stored with a hash of their content. This allows |
|
1004 | 1006 | # us to de-duplicate. The table is configured to ignore conflicts |
|
1005 | 1007 | # and it is faster to just insert and silently noop than to look |
|
1006 | 1008 | # first. |
|
1007 |
deltahash = hashl |
|
|
1009 | deltahash = hashutil.sha1(delta).digest() | |
|
1008 | 1010 | |
|
1009 | 1011 | if self._compengine == b'zstd': |
|
1010 | 1012 | deltablob = self._cctx.compress(delta) |
|
1011 | 1013 | compression = COMPRESSION_ZSTD |
|
1012 | 1014 | elif self._compengine == b'zlib': |
|
1013 | 1015 | deltablob = zlib.compress(delta) |
|
1014 | 1016 | compression = COMPRESSION_ZLIB |
|
1015 | 1017 | elif self._compengine == b'none': |
|
1016 | 1018 | deltablob = delta |
|
1017 | 1019 | compression = COMPRESSION_NONE |
|
1018 | 1020 | else: |
|
1019 | 1021 | raise error.ProgrammingError( |
|
1020 | 1022 | b'unhandled compression engine: %s' % self._compengine |
|
1021 | 1023 | ) |
|
1022 | 1024 | |
|
1023 | 1025 | # Don't store compressed data if it isn't practical. |
|
1024 | 1026 | if len(deltablob) >= len(delta): |
|
1025 | 1027 | deltablob = delta |
|
1026 | 1028 | compression = COMPRESSION_NONE |
|
1027 | 1029 | |
|
1028 | 1030 | deltaid = insertdelta(self._db, compression, deltahash, deltablob) |
|
1029 | 1031 | |
|
1030 | 1032 | rev = len(self) |
|
1031 | 1033 | |
|
1032 | 1034 | if p1 == nullid: |
|
1033 | 1035 | p1rev = nullrev |
|
1034 | 1036 | else: |
|
1035 | 1037 | p1rev = self._nodetorev[p1] |
|
1036 | 1038 | |
|
1037 | 1039 | if p2 == nullid: |
|
1038 | 1040 | p2rev = nullrev |
|
1039 | 1041 | else: |
|
1040 | 1042 | p2rev = self._nodetorev[p2] |
|
1041 | 1043 | |
|
1042 | 1044 | rid = self._db.execute( |
|
1043 | 1045 | 'INSERT INTO fileindex (' |
|
1044 | 1046 | ' pathid, revnum, node, p1rev, p2rev, linkrev, flags, ' |
|
1045 | 1047 | ' deltaid, deltabaseid) ' |
|
1046 | 1048 | ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', |
|
1047 | 1049 | ( |
|
1048 | 1050 | self._pathid, |
|
1049 | 1051 | rev, |
|
1050 | 1052 | node, |
|
1051 | 1053 | p1rev, |
|
1052 | 1054 | p2rev, |
|
1053 | 1055 | linkrev, |
|
1054 | 1056 | flags, |
|
1055 | 1057 | deltaid, |
|
1056 | 1058 | baseid, |
|
1057 | 1059 | ), |
|
1058 | 1060 | ).lastrowid |
|
1059 | 1061 | |
|
1060 | 1062 | entry = revisionentry( |
|
1061 | 1063 | rid=rid, |
|
1062 | 1064 | rev=rev, |
|
1063 | 1065 | node=node, |
|
1064 | 1066 | p1rev=p1rev, |
|
1065 | 1067 | p2rev=p2rev, |
|
1066 | 1068 | p1node=p1, |
|
1067 | 1069 | p2node=p2, |
|
1068 | 1070 | linkrev=linkrev, |
|
1069 | 1071 | flags=flags, |
|
1070 | 1072 | ) |
|
1071 | 1073 | |
|
1072 | 1074 | self._nodetorev[node] = rev |
|
1073 | 1075 | self._revtonode[rev] = node |
|
1074 | 1076 | self._revisions[node] = entry |
|
1075 | 1077 | |
|
1076 | 1078 | return node |
|
1077 | 1079 | |
|
1078 | 1080 | |
|
1079 | 1081 | class sqliterepository(localrepo.localrepository): |
|
1080 | 1082 | def cancopy(self): |
|
1081 | 1083 | return False |
|
1082 | 1084 | |
|
1083 | 1085 | def transaction(self, *args, **kwargs): |
|
1084 | 1086 | current = self.currenttransaction() |
|
1085 | 1087 | |
|
1086 | 1088 | tr = super(sqliterepository, self).transaction(*args, **kwargs) |
|
1087 | 1089 | |
|
1088 | 1090 | if current: |
|
1089 | 1091 | return tr |
|
1090 | 1092 | |
|
1091 | 1093 | self._dbconn.execute('BEGIN TRANSACTION') |
|
1092 | 1094 | |
|
1093 | 1095 | def committransaction(_): |
|
1094 | 1096 | self._dbconn.commit() |
|
1095 | 1097 | |
|
1096 | 1098 | tr.addfinalize(b'sqlitestore', committransaction) |
|
1097 | 1099 | |
|
1098 | 1100 | return tr |
|
1099 | 1101 | |
|
1100 | 1102 | @property |
|
1101 | 1103 | def _dbconn(self): |
|
1102 | 1104 | # SQLite connections can only be used on the thread that created |
|
1103 | 1105 | # them. In most cases, this "just works." However, hgweb uses |
|
1104 | 1106 | # multiple threads. |
|
1105 | 1107 | tid = threading.current_thread().ident |
|
1106 | 1108 | |
|
1107 | 1109 | if self._db: |
|
1108 | 1110 | if self._db[0] == tid: |
|
1109 | 1111 | return self._db[1] |
|
1110 | 1112 | |
|
1111 | 1113 | db = makedb(self.svfs.join(b'db.sqlite')) |
|
1112 | 1114 | self._db = (tid, db) |
|
1113 | 1115 | |
|
1114 | 1116 | return db |
|
1115 | 1117 | |
|
1116 | 1118 | |
|
1117 | 1119 | def makedb(path): |
|
1118 | 1120 | """Construct a database handle for a database at path.""" |
|
1119 | 1121 | |
|
1120 | 1122 | db = sqlite3.connect(encoding.strfromlocal(path)) |
|
1121 | 1123 | db.text_factory = bytes |
|
1122 | 1124 | |
|
1123 | 1125 | res = db.execute('PRAGMA user_version').fetchone()[0] |
|
1124 | 1126 | |
|
1125 | 1127 | # New database. |
|
1126 | 1128 | if res == 0: |
|
1127 | 1129 | for statement in CREATE_SCHEMA: |
|
1128 | 1130 | db.execute(statement) |
|
1129 | 1131 | |
|
1130 | 1132 | db.commit() |
|
1131 | 1133 | |
|
1132 | 1134 | elif res == CURRENT_SCHEMA_VERSION: |
|
1133 | 1135 | pass |
|
1134 | 1136 | |
|
1135 | 1137 | else: |
|
1136 | 1138 | raise error.Abort(_(b'sqlite database has unrecognized version')) |
|
1137 | 1139 | |
|
1138 | 1140 | db.execute('PRAGMA journal_mode=WAL') |
|
1139 | 1141 | |
|
1140 | 1142 | return db |
|
1141 | 1143 | |
|
1142 | 1144 | |
|
1143 | 1145 | def featuresetup(ui, supported): |
|
1144 | 1146 | supported.add(REQUIREMENT) |
|
1145 | 1147 | |
|
1146 | 1148 | if zstd: |
|
1147 | 1149 | supported.add(REQUIREMENT_ZSTD) |
|
1148 | 1150 | |
|
1149 | 1151 | supported.add(REQUIREMENT_ZLIB) |
|
1150 | 1152 | supported.add(REQUIREMENT_NONE) |
|
1151 | 1153 | supported.add(REQUIREMENT_SHALLOW_FILES) |
|
1152 | 1154 | supported.add(repository.NARROW_REQUIREMENT) |
|
1153 | 1155 | |
|
1154 | 1156 | |
|
1155 | 1157 | def newreporequirements(orig, ui, createopts): |
|
1156 | 1158 | if createopts[b'backend'] != b'sqlite': |
|
1157 | 1159 | return orig(ui, createopts) |
|
1158 | 1160 | |
|
1159 | 1161 | # This restriction can be lifted once we have more confidence. |
|
1160 | 1162 | if b'sharedrepo' in createopts: |
|
1161 | 1163 | raise error.Abort( |
|
1162 | 1164 | _(b'shared repositories not supported with SQLite store') |
|
1163 | 1165 | ) |
|
1164 | 1166 | |
|
1165 | 1167 | # This filtering is out of an abundance of caution: we want to ensure |
|
1166 | 1168 | # we honor creation options and we do that by annotating exactly the |
|
1167 | 1169 | # creation options we recognize. |
|
1168 | 1170 | known = { |
|
1169 | 1171 | b'narrowfiles', |
|
1170 | 1172 | b'backend', |
|
1171 | 1173 | b'shallowfilestore', |
|
1172 | 1174 | } |
|
1173 | 1175 | |
|
1174 | 1176 | unsupported = set(createopts) - known |
|
1175 | 1177 | if unsupported: |
|
1176 | 1178 | raise error.Abort( |
|
1177 | 1179 | _(b'SQLite store does not support repo creation option: %s') |
|
1178 | 1180 | % b', '.join(sorted(unsupported)) |
|
1179 | 1181 | ) |
|
1180 | 1182 | |
|
1181 | 1183 | # Since we're a hybrid store that still relies on revlogs, we fall back |
|
1182 | 1184 | # to using the revlogv1 backend's storage requirements then adding our |
|
1183 | 1185 | # own requirement. |
|
1184 | 1186 | createopts[b'backend'] = b'revlogv1' |
|
1185 | 1187 | requirements = orig(ui, createopts) |
|
1186 | 1188 | requirements.add(REQUIREMENT) |
|
1187 | 1189 | |
|
1188 | 1190 | compression = ui.config(b'storage', b'sqlite.compression') |
|
1189 | 1191 | |
|
1190 | 1192 | if compression == b'zstd' and not zstd: |
|
1191 | 1193 | raise error.Abort( |
|
1192 | 1194 | _( |
|
1193 | 1195 | b'storage.sqlite.compression set to "zstd" but ' |
|
1194 | 1196 | b'zstandard compression not available to this ' |
|
1195 | 1197 | b'Mercurial install' |
|
1196 | 1198 | ) |
|
1197 | 1199 | ) |
|
1198 | 1200 | |
|
1199 | 1201 | if compression == b'zstd': |
|
1200 | 1202 | requirements.add(REQUIREMENT_ZSTD) |
|
1201 | 1203 | elif compression == b'zlib': |
|
1202 | 1204 | requirements.add(REQUIREMENT_ZLIB) |
|
1203 | 1205 | elif compression == b'none': |
|
1204 | 1206 | requirements.add(REQUIREMENT_NONE) |
|
1205 | 1207 | else: |
|
1206 | 1208 | raise error.Abort( |
|
1207 | 1209 | _( |
|
1208 | 1210 | b'unknown compression engine defined in ' |
|
1209 | 1211 | b'storage.sqlite.compression: %s' |
|
1210 | 1212 | ) |
|
1211 | 1213 | % compression |
|
1212 | 1214 | ) |
|
1213 | 1215 | |
|
1214 | 1216 | if createopts.get(b'shallowfilestore'): |
|
1215 | 1217 | requirements.add(REQUIREMENT_SHALLOW_FILES) |
|
1216 | 1218 | |
|
1217 | 1219 | return requirements |
|
1218 | 1220 | |
|
1219 | 1221 | |
|
1220 | 1222 | @interfaceutil.implementer(repository.ilocalrepositoryfilestorage) |
|
1221 | 1223 | class sqlitefilestorage(object): |
|
1222 | 1224 | """Repository file storage backed by SQLite.""" |
|
1223 | 1225 | |
|
1224 | 1226 | def file(self, path): |
|
1225 | 1227 | if path[0] == b'/': |
|
1226 | 1228 | path = path[1:] |
|
1227 | 1229 | |
|
1228 | 1230 | if REQUIREMENT_ZSTD in self.requirements: |
|
1229 | 1231 | compression = b'zstd' |
|
1230 | 1232 | elif REQUIREMENT_ZLIB in self.requirements: |
|
1231 | 1233 | compression = b'zlib' |
|
1232 | 1234 | elif REQUIREMENT_NONE in self.requirements: |
|
1233 | 1235 | compression = b'none' |
|
1234 | 1236 | else: |
|
1235 | 1237 | raise error.Abort( |
|
1236 | 1238 | _( |
|
1237 | 1239 | b'unable to determine what compression engine ' |
|
1238 | 1240 | b'to use for SQLite storage' |
|
1239 | 1241 | ) |
|
1240 | 1242 | ) |
|
1241 | 1243 | |
|
1242 | 1244 | return sqlitefilestore(self._dbconn, path, compression) |
|
1243 | 1245 | |
|
1244 | 1246 | |
|
1245 | 1247 | def makefilestorage(orig, requirements, features, **kwargs): |
|
1246 | 1248 | """Produce a type conforming to ``ilocalrepositoryfilestorage``.""" |
|
1247 | 1249 | if REQUIREMENT in requirements: |
|
1248 | 1250 | if REQUIREMENT_SHALLOW_FILES in requirements: |
|
1249 | 1251 | features.add(repository.REPO_FEATURE_SHALLOW_FILE_STORAGE) |
|
1250 | 1252 | |
|
1251 | 1253 | return sqlitefilestorage |
|
1252 | 1254 | else: |
|
1253 | 1255 | return orig(requirements=requirements, features=features, **kwargs) |
|
1254 | 1256 | |
|
1255 | 1257 | |
|
1256 | 1258 | def makemain(orig, ui, requirements, **kwargs): |
|
1257 | 1259 | if REQUIREMENT in requirements: |
|
1258 | 1260 | if REQUIREMENT_ZSTD in requirements and not zstd: |
|
1259 | 1261 | raise error.Abort( |
|
1260 | 1262 | _( |
|
1261 | 1263 | b'repository uses zstandard compression, which ' |
|
1262 | 1264 | b'is not available to this Mercurial install' |
|
1263 | 1265 | ) |
|
1264 | 1266 | ) |
|
1265 | 1267 | |
|
1266 | 1268 | return sqliterepository |
|
1267 | 1269 | |
|
1268 | 1270 | return orig(requirements=requirements, **kwargs) |
|
1269 | 1271 | |
|
1270 | 1272 | |
|
1271 | 1273 | def verifierinit(orig, self, *args, **kwargs): |
|
1272 | 1274 | orig(self, *args, **kwargs) |
|
1273 | 1275 | |
|
1274 | 1276 | # We don't care that files in the store don't align with what is |
|
1275 | 1277 | # advertised. So suppress these warnings. |
|
1276 | 1278 | self.warnorphanstorefiles = False |
|
1277 | 1279 | |
|
1278 | 1280 | |
|
1279 | 1281 | def extsetup(ui): |
|
1280 | 1282 | localrepo.featuresetupfuncs.add(featuresetup) |
|
1281 | 1283 | extensions.wrapfunction( |
|
1282 | 1284 | localrepo, b'newreporequirements', newreporequirements |
|
1283 | 1285 | ) |
|
1284 | 1286 | extensions.wrapfunction(localrepo, b'makefilestorage', makefilestorage) |
|
1285 | 1287 | extensions.wrapfunction(localrepo, b'makemain', makemain) |
|
1286 | 1288 | extensions.wrapfunction(verify.verifier, b'__init__', verifierinit) |
|
1287 | 1289 | |
|
1288 | 1290 | |
|
1289 | 1291 | def reposetup(ui, repo): |
|
1290 | 1292 | if isinstance(repo, sqliterepository): |
|
1291 | 1293 | repo._db = None |
|
1292 | 1294 | |
|
1293 | 1295 | # TODO check for bundlerepository? |
General Comments 0
You need to be logged in to leave comments.
Login now