##// END OF EJS Templates
hgext: replace references to hashlib.sha1 with hashutil.sha1...
Augie Fackler -
r44519:2d49482d default
parent child Browse files
Show More
@@ -1,856 +1,858 b''
1 1 # Copyright 2016-present Facebook. All Rights Reserved.
2 2 #
3 3 # context: context needed to annotate a file
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import contextlib
12 import hashlib
13 12 import os
14 13
15 14 from mercurial.i18n import _
16 15 from mercurial.pycompat import (
17 16 getattr,
18 17 open,
19 18 setattr,
20 19 )
21 20 from mercurial import (
22 21 error,
23 22 linelog as linelogmod,
24 23 lock as lockmod,
25 24 mdiff,
26 25 node,
27 26 pycompat,
28 27 scmutil,
29 28 util,
30 29 )
31 from mercurial.utils import stringutil
30 from mercurial.utils import (
31 hashutil,
32 stringutil,
33 )
32 34
33 35 from . import (
34 36 error as faerror,
35 37 revmap as revmapmod,
36 38 )
37 39
38 40 # given path, get filelog, cached
39 41 @util.lrucachefunc
40 42 def _getflog(repo, path):
41 43 return repo.file(path)
42 44
43 45
44 46 # extracted from mercurial.context.basefilectx.annotate
45 47 def _parents(f, follow=True):
46 48 # Cut _descendantrev here to mitigate the penalty of lazy linkrev
47 49 # adjustment. Otherwise, p._adjustlinkrev() would walk changelog
48 50 # from the topmost introrev (= srcrev) down to p.linkrev() if it
49 51 # isn't an ancestor of the srcrev.
50 52 f._changeid
51 53 pl = f.parents()
52 54
53 55 # Don't return renamed parents if we aren't following.
54 56 if not follow:
55 57 pl = [p for p in pl if p.path() == f.path()]
56 58
57 59 # renamed filectx won't have a filelog yet, so set it
58 60 # from the cache to save time
59 61 for p in pl:
60 62 if not '_filelog' in p.__dict__:
61 63 p._filelog = _getflog(f._repo, p.path())
62 64
63 65 return pl
64 66
65 67
66 68 # extracted from mercurial.context.basefilectx.annotate. slightly modified
67 69 # so it takes a fctx instead of a pair of text and fctx.
68 70 def _decorate(fctx):
69 71 text = fctx.data()
70 72 linecount = text.count(b'\n')
71 73 if text and not text.endswith(b'\n'):
72 74 linecount += 1
73 75 return ([(fctx, i) for i in pycompat.xrange(linecount)], text)
74 76
75 77
76 78 # extracted from mercurial.context.basefilectx.annotate. slightly modified
77 79 # so it takes an extra "blocks" parameter calculated elsewhere, instead of
78 80 # calculating diff here.
79 81 def _pair(parent, child, blocks):
80 82 for (a1, a2, b1, b2), t in blocks:
81 83 # Changed blocks ('!') or blocks made only of blank lines ('~')
82 84 # belong to the child.
83 85 if t == b'=':
84 86 child[0][b1:b2] = parent[0][a1:a2]
85 87 return child
86 88
87 89
88 90 # like scmutil.revsingle, but with lru cache, so their states (like manifests)
89 91 # could be reused
90 92 _revsingle = util.lrucachefunc(scmutil.revsingle)
91 93
92 94
93 95 def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None):
94 96 """(repo, str, str) -> fctx
95 97
96 98 get the filectx object from repo, rev, path, in an efficient way.
97 99
98 100 if resolverev is True, "rev" is a revision specified by the revset
99 101 language, otherwise "rev" is a nodeid, or a revision number that can
100 102 be consumed by repo.__getitem__.
101 103
102 104 if adjustctx is not None, the returned fctx will point to a changeset
103 105 that introduces the change (last modified the file). if adjustctx
104 106 is 'linkrev', trust the linkrev and do not adjust it. this is noticeably
105 107 faster for big repos but is incorrect for some cases.
106 108 """
107 109 if resolverev and not isinstance(rev, int) and rev is not None:
108 110 ctx = _revsingle(repo, rev)
109 111 else:
110 112 ctx = repo[rev]
111 113
112 114 # If we don't need to adjust the linkrev, create the filectx using the
113 115 # changectx instead of using ctx[path]. This means it already has the
114 116 # changectx information, so blame -u will be able to look directly at the
115 117 # commitctx object instead of having to resolve it by going through the
116 118 # manifest. In a lazy-manifest world this can prevent us from downloading a
117 119 # lot of data.
118 120 if adjustctx is None:
119 121 # ctx.rev() is None means it's the working copy, which is a special
120 122 # case.
121 123 if ctx.rev() is None:
122 124 fctx = ctx[path]
123 125 else:
124 126 fctx = repo.filectx(path, changeid=ctx.rev())
125 127 else:
126 128 fctx = ctx[path]
127 129 if adjustctx == b'linkrev':
128 130 introrev = fctx.linkrev()
129 131 else:
130 132 introrev = fctx.introrev()
131 133 if introrev != ctx.rev():
132 134 fctx._changeid = introrev
133 135 fctx._changectx = repo[introrev]
134 136 return fctx
135 137
136 138
137 139 # like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock
138 140 def encodedir(path):
139 141 return (
140 142 path.replace(b'.hg/', b'.hg.hg/')
141 143 .replace(b'.l/', b'.l.hg/')
142 144 .replace(b'.m/', b'.m.hg/')
143 145 .replace(b'.lock/', b'.lock.hg/')
144 146 )
145 147
146 148
147 149 def hashdiffopts(diffopts):
148 150 diffoptstr = stringutil.pprint(
149 151 sorted((k, getattr(diffopts, k)) for k in mdiff.diffopts.defaults)
150 152 )
151 return node.hex(hashlib.sha1(diffoptstr).digest())[:6]
153 return node.hex(hashutil.sha1(diffoptstr).digest())[:6]
152 154
153 155
154 156 _defaultdiffopthash = hashdiffopts(mdiff.defaultopts)
155 157
156 158
157 159 class annotateopts(object):
158 160 """like mercurial.mdiff.diffopts, but is for annotate
159 161
160 162 followrename: follow renames, like "hg annotate -f"
161 163 followmerge: follow p2 of a merge changeset, otherwise p2 is ignored
162 164 """
163 165
164 166 defaults = {
165 167 b'diffopts': None,
166 168 b'followrename': True,
167 169 b'followmerge': True,
168 170 }
169 171
170 172 def __init__(self, **opts):
171 173 opts = pycompat.byteskwargs(opts)
172 174 for k, v in pycompat.iteritems(self.defaults):
173 175 setattr(self, k, opts.get(k, v))
174 176
175 177 @util.propertycache
176 178 def shortstr(self):
177 179 """represent opts in a short string, suitable for a directory name"""
178 180 result = b''
179 181 if not self.followrename:
180 182 result += b'r0'
181 183 if not self.followmerge:
182 184 result += b'm0'
183 185 if self.diffopts is not None:
184 186 assert isinstance(self.diffopts, mdiff.diffopts)
185 187 diffopthash = hashdiffopts(self.diffopts)
186 188 if diffopthash != _defaultdiffopthash:
187 189 result += b'i' + diffopthash
188 190 return result or b'default'
189 191
190 192
191 193 defaultopts = annotateopts()
192 194
193 195
194 196 class _annotatecontext(object):
195 197 """do not use this class directly as it does not use lock to protect
196 198 writes. use "with annotatecontext(...)" instead.
197 199 """
198 200
199 201 def __init__(self, repo, path, linelogpath, revmappath, opts):
200 202 self.repo = repo
201 203 self.ui = repo.ui
202 204 self.path = path
203 205 self.opts = opts
204 206 self.linelogpath = linelogpath
205 207 self.revmappath = revmappath
206 208 self._linelog = None
207 209 self._revmap = None
208 210 self._node2path = {} # {str: str}
209 211
210 212 @property
211 213 def linelog(self):
212 214 if self._linelog is None:
213 215 if os.path.exists(self.linelogpath):
214 216 with open(self.linelogpath, b'rb') as f:
215 217 try:
216 218 self._linelog = linelogmod.linelog.fromdata(f.read())
217 219 except linelogmod.LineLogError:
218 220 self._linelog = linelogmod.linelog()
219 221 else:
220 222 self._linelog = linelogmod.linelog()
221 223 return self._linelog
222 224
223 225 @property
224 226 def revmap(self):
225 227 if self._revmap is None:
226 228 self._revmap = revmapmod.revmap(self.revmappath)
227 229 return self._revmap
228 230
229 231 def close(self):
230 232 if self._revmap is not None:
231 233 self._revmap.flush()
232 234 self._revmap = None
233 235 if self._linelog is not None:
234 236 with open(self.linelogpath, b'wb') as f:
235 237 f.write(self._linelog.encode())
236 238 self._linelog = None
237 239
238 240 __del__ = close
239 241
240 242 def rebuild(self):
241 243 """delete linelog and revmap, useful for rebuilding"""
242 244 self.close()
243 245 self._node2path.clear()
244 246 _unlinkpaths([self.revmappath, self.linelogpath])
245 247
246 248 @property
247 249 def lastnode(self):
248 250 """return last node in revmap, or None if revmap is empty"""
249 251 if self._revmap is None:
250 252 # fast path, read revmap without loading its full content
251 253 return revmapmod.getlastnode(self.revmappath)
252 254 else:
253 255 return self._revmap.rev2hsh(self._revmap.maxrev)
254 256
255 257 def isuptodate(self, master, strict=True):
256 258 """return True if the revmap / linelog is up-to-date, or the file
257 259 does not exist in the master revision. False otherwise.
258 260
259 261 it tries to be fast and could return false negatives, because of the
260 262 use of linkrev instead of introrev.
261 263
262 264 useful for both server and client to decide whether to update
263 265 fastannotate cache or not.
264 266
265 267 if strict is True, even if fctx exists in the revmap, but is not the
266 268 last node, isuptodate will return False. it's good for performance - no
267 269 expensive check was done.
268 270
269 271 if strict is False, if fctx exists in the revmap, this function may
270 272 return True. this is useful for the client to skip downloading the
271 273 cache if the client's master is behind the server's.
272 274 """
273 275 lastnode = self.lastnode
274 276 try:
275 277 f = self._resolvefctx(master, resolverev=True)
276 278 # choose linkrev instead of introrev as the check is meant to be
277 279 # *fast*.
278 280 linknode = self.repo.changelog.node(f.linkrev())
279 281 if not strict and lastnode and linknode != lastnode:
280 282 # check if f.node() is in the revmap. note: this loads the
281 283 # revmap and can be slow.
282 284 return self.revmap.hsh2rev(linknode) is not None
283 285 # avoid resolving old manifest, or slow adjustlinkrev to be fast,
284 286 # false negatives are acceptable in this case.
285 287 return linknode == lastnode
286 288 except LookupError:
287 289 # master does not have the file, or the revmap is ahead
288 290 return True
289 291
290 292 def annotate(self, rev, master=None, showpath=False, showlines=False):
291 293 """incrementally update the cache so it includes revisions in the main
292 294 branch till 'master'. and run annotate on 'rev', which may or may not be
293 295 included in the main branch.
294 296
295 297 if master is None, do not update linelog.
296 298
297 299 the first value returned is the annotate result, it is [(node, linenum)]
298 300 by default. [(node, linenum, path)] if showpath is True.
299 301
300 302 if showlines is True, a second value will be returned, it is a list of
301 303 corresponding line contents.
302 304 """
303 305
304 306 # the fast path test requires commit hash, convert rev number to hash,
305 307 # so it may hit the fast path. note: in the "fctx" mode, the "annotate"
306 308 # command could give us a revision number even if the user passes a
307 309 # commit hash.
308 310 if isinstance(rev, int):
309 311 rev = node.hex(self.repo.changelog.node(rev))
310 312
311 313 # fast path: if rev is in the main branch already
312 314 directly, revfctx = self.canannotatedirectly(rev)
313 315 if directly:
314 316 if self.ui.debugflag:
315 317 self.ui.debug(
316 318 b'fastannotate: %s: using fast path '
317 319 b'(resolved fctx: %s)\n'
318 320 % (
319 321 self.path,
320 322 stringutil.pprint(util.safehasattr(revfctx, b'node')),
321 323 )
322 324 )
323 325 return self.annotatedirectly(revfctx, showpath, showlines)
324 326
325 327 # resolve master
326 328 masterfctx = None
327 329 if master:
328 330 try:
329 331 masterfctx = self._resolvefctx(
330 332 master, resolverev=True, adjustctx=True
331 333 )
332 334 except LookupError: # master does not have the file
333 335 pass
334 336 else:
335 337 if masterfctx in self.revmap: # no need to update linelog
336 338 masterfctx = None
337 339
338 340 # ... - @ <- rev (can be an arbitrary changeset,
339 341 # / not necessarily a descendant
340 342 # master -> o of master)
341 343 # |
342 344 # a merge -> o 'o': new changesets in the main branch
343 345 # |\ '#': revisions in the main branch that
344 346 # o * exist in linelog / revmap
345 347 # | . '*': changesets in side branches, or
346 348 # last master -> # . descendants of master
347 349 # | .
348 350 # # * joint: '#', and is a parent of a '*'
349 351 # |/
350 352 # a joint -> # ^^^^ --- side branches
351 353 # |
352 354 # ^ --- main branch (in linelog)
353 355
354 356 # these DFSes are similar to the traditional annotate algorithm.
355 357 # we cannot really reuse the code for perf reason.
356 358
357 359 # 1st DFS calculates merges, joint points, and needed.
358 360 # "needed" is a simple reference counting dict to free items in
359 361 # "hist", reducing its memory usage otherwise could be huge.
360 362 initvisit = [revfctx]
361 363 if masterfctx:
362 364 if masterfctx.rev() is None:
363 365 raise error.Abort(
364 366 _(b'cannot update linelog to wdir()'),
365 367 hint=_(b'set fastannotate.mainbranch'),
366 368 )
367 369 initvisit.append(masterfctx)
368 370 visit = initvisit[:]
369 371 pcache = {}
370 372 needed = {revfctx: 1}
371 373 hist = {} # {fctx: ([(llrev or fctx, linenum)], text)}
372 374 while visit:
373 375 f = visit.pop()
374 376 if f in pcache or f in hist:
375 377 continue
376 378 if f in self.revmap: # in the old main branch, it's a joint
377 379 llrev = self.revmap.hsh2rev(f.node())
378 380 self.linelog.annotate(llrev)
379 381 result = self.linelog.annotateresult
380 382 hist[f] = (result, f.data())
381 383 continue
382 384 pl = self._parentfunc(f)
383 385 pcache[f] = pl
384 386 for p in pl:
385 387 needed[p] = needed.get(p, 0) + 1
386 388 if p not in pcache:
387 389 visit.append(p)
388 390
389 391 # 2nd (simple) DFS calculates new changesets in the main branch
390 392 # ('o' nodes in # the above graph), so we know when to update linelog.
391 393 newmainbranch = set()
392 394 f = masterfctx
393 395 while f and f not in self.revmap:
394 396 newmainbranch.add(f)
395 397 pl = pcache[f]
396 398 if pl:
397 399 f = pl[0]
398 400 else:
399 401 f = None
400 402 break
401 403
402 404 # f, if present, is the position where the last build stopped at, and
403 405 # should be the "master" last time. check to see if we can continue
404 406 # building the linelog incrementally. (we cannot if diverged)
405 407 if masterfctx is not None:
406 408 self._checklastmasterhead(f)
407 409
408 410 if self.ui.debugflag:
409 411 if newmainbranch:
410 412 self.ui.debug(
411 413 b'fastannotate: %s: %d new changesets in the main'
412 414 b' branch\n' % (self.path, len(newmainbranch))
413 415 )
414 416 elif not hist: # no joints, no updates
415 417 self.ui.debug(
416 418 b'fastannotate: %s: linelog cannot help in '
417 419 b'annotating this revision\n' % self.path
418 420 )
419 421
420 422 # prepare annotateresult so we can update linelog incrementally
421 423 self.linelog.annotate(self.linelog.maxrev)
422 424
423 425 # 3rd DFS does the actual annotate
424 426 visit = initvisit[:]
425 427 progress = self.ui.makeprogress(
426 428 b'building cache', total=len(newmainbranch)
427 429 )
428 430 while visit:
429 431 f = visit[-1]
430 432 if f in hist:
431 433 visit.pop()
432 434 continue
433 435
434 436 ready = True
435 437 pl = pcache[f]
436 438 for p in pl:
437 439 if p not in hist:
438 440 ready = False
439 441 visit.append(p)
440 442 if not ready:
441 443 continue
442 444
443 445 visit.pop()
444 446 blocks = None # mdiff blocks, used for appending linelog
445 447 ismainbranch = f in newmainbranch
446 448 # curr is the same as the traditional annotate algorithm,
447 449 # if we only care about linear history (do not follow merge),
448 450 # then curr is not actually used.
449 451 assert f not in hist
450 452 curr = _decorate(f)
451 453 for i, p in enumerate(pl):
452 454 bs = list(self._diffblocks(hist[p][1], curr[1]))
453 455 if i == 0 and ismainbranch:
454 456 blocks = bs
455 457 curr = _pair(hist[p], curr, bs)
456 458 if needed[p] == 1:
457 459 del hist[p]
458 460 del needed[p]
459 461 else:
460 462 needed[p] -= 1
461 463
462 464 hist[f] = curr
463 465 del pcache[f]
464 466
465 467 if ismainbranch: # need to write to linelog
466 468 progress.increment()
467 469 bannotated = None
468 470 if len(pl) == 2 and self.opts.followmerge: # merge
469 471 bannotated = curr[0]
470 472 if blocks is None: # no parents, add an empty one
471 473 blocks = list(self._diffblocks(b'', curr[1]))
472 474 self._appendrev(f, blocks, bannotated)
473 475 elif showpath: # not append linelog, but we need to record path
474 476 self._node2path[f.node()] = f.path()
475 477
476 478 progress.complete()
477 479
478 480 result = [
479 481 ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l)
480 482 for fr, l in hist[revfctx][0]
481 483 ] # [(node, linenumber)]
482 484 return self._refineannotateresult(result, revfctx, showpath, showlines)
483 485
484 486 def canannotatedirectly(self, rev):
485 487 """(str) -> bool, fctx or node.
486 488 return (True, f) if we can annotate without updating the linelog, pass
487 489 f to annotatedirectly.
488 490 return (False, f) if we need extra calculation. f is the fctx resolved
489 491 from rev.
490 492 """
491 493 result = True
492 494 f = None
493 495 if not isinstance(rev, int) and rev is not None:
494 496 hsh = {20: bytes, 40: node.bin}.get(len(rev), lambda x: None)(rev)
495 497 if hsh is not None and (hsh, self.path) in self.revmap:
496 498 f = hsh
497 499 if f is None:
498 500 adjustctx = b'linkrev' if self._perfhack else True
499 501 f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True)
500 502 result = f in self.revmap
501 503 if not result and self._perfhack:
502 504 # redo the resolution without perfhack - as we are going to
503 505 # do write operations, we need a correct fctx.
504 506 f = self._resolvefctx(rev, adjustctx=True, resolverev=True)
505 507 return result, f
506 508
507 509 def annotatealllines(self, rev, showpath=False, showlines=False):
508 510 """(rev : str) -> [(node : str, linenum : int, path : str)]
509 511
510 512 the result has the same format with annotate, but include all (including
511 513 deleted) lines up to rev. call this after calling annotate(rev, ...) for
512 514 better performance and accuracy.
513 515 """
514 516 revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True)
515 517
516 518 # find a chain from rev to anything in the mainbranch
517 519 if revfctx not in self.revmap:
518 520 chain = [revfctx]
519 521 a = b''
520 522 while True:
521 523 f = chain[-1]
522 524 pl = self._parentfunc(f)
523 525 if not pl:
524 526 break
525 527 if pl[0] in self.revmap:
526 528 a = pl[0].data()
527 529 break
528 530 chain.append(pl[0])
529 531
530 532 # both self.linelog and self.revmap is backed by filesystem. now
531 533 # we want to modify them but do not want to write changes back to
532 534 # files. so we create in-memory objects and copy them. it's like
533 535 # a "fork".
534 536 linelog = linelogmod.linelog()
535 537 linelog.copyfrom(self.linelog)
536 538 linelog.annotate(linelog.maxrev)
537 539 revmap = revmapmod.revmap()
538 540 revmap.copyfrom(self.revmap)
539 541
540 542 for f in reversed(chain):
541 543 b = f.data()
542 544 blocks = list(self._diffblocks(a, b))
543 545 self._doappendrev(linelog, revmap, f, blocks)
544 546 a = b
545 547 else:
546 548 # fastpath: use existing linelog, revmap as we don't write to them
547 549 linelog = self.linelog
548 550 revmap = self.revmap
549 551
550 552 lines = linelog.getalllines()
551 553 hsh = revfctx.node()
552 554 llrev = revmap.hsh2rev(hsh)
553 555 result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev]
554 556 # cannot use _refineannotateresult since we need custom logic for
555 557 # resolving line contents
556 558 if showpath:
557 559 result = self._addpathtoresult(result, revmap)
558 560 if showlines:
559 561 linecontents = self._resolvelines(result, revmap, linelog)
560 562 result = (result, linecontents)
561 563 return result
562 564
563 565 def _resolvelines(self, annotateresult, revmap, linelog):
564 566 """(annotateresult) -> [line]. designed for annotatealllines.
565 567 this is probably the most inefficient code in the whole fastannotate
566 568 directory. but we have made a decision that the linelog does not
567 569 store line contents. so getting them requires random accesses to
568 570 the revlog data, since they can be many, it can be very slow.
569 571 """
570 572 # [llrev]
571 573 revs = [revmap.hsh2rev(l[0]) for l in annotateresult]
572 574 result = [None] * len(annotateresult)
573 575 # {(rev, linenum): [lineindex]}
574 576 key2idxs = collections.defaultdict(list)
575 577 for i in pycompat.xrange(len(result)):
576 578 key2idxs[(revs[i], annotateresult[i][1])].append(i)
577 579 while key2idxs:
578 580 # find an unresolved line and its linelog rev to annotate
579 581 hsh = None
580 582 try:
581 583 for (rev, _linenum), idxs in pycompat.iteritems(key2idxs):
582 584 if revmap.rev2flag(rev) & revmapmod.sidebranchflag:
583 585 continue
584 586 hsh = annotateresult[idxs[0]][0]
585 587 break
586 588 except StopIteration: # no more unresolved lines
587 589 return result
588 590 if hsh is None:
589 591 # the remaining key2idxs are not in main branch, resolving them
590 592 # using the hard way...
591 593 revlines = {}
592 594 for (rev, linenum), idxs in pycompat.iteritems(key2idxs):
593 595 if rev not in revlines:
594 596 hsh = annotateresult[idxs[0]][0]
595 597 if self.ui.debugflag:
596 598 self.ui.debug(
597 599 b'fastannotate: reading %s line #%d '
598 600 b'to resolve lines %r\n'
599 601 % (node.short(hsh), linenum, idxs)
600 602 )
601 603 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
602 604 lines = mdiff.splitnewlines(fctx.data())
603 605 revlines[rev] = lines
604 606 for idx in idxs:
605 607 result[idx] = revlines[rev][linenum]
606 608 assert all(x is not None for x in result)
607 609 return result
608 610
609 611 # run the annotate and the lines should match to the file content
610 612 self.ui.debug(
611 613 b'fastannotate: annotate %s to resolve lines\n'
612 614 % node.short(hsh)
613 615 )
614 616 linelog.annotate(rev)
615 617 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
616 618 annotated = linelog.annotateresult
617 619 lines = mdiff.splitnewlines(fctx.data())
618 620 if len(lines) != len(annotated):
619 621 raise faerror.CorruptedFileError(b'unexpected annotated lines')
620 622 # resolve lines from the annotate result
621 623 for i, line in enumerate(lines):
622 624 k = annotated[i]
623 625 if k in key2idxs:
624 626 for idx in key2idxs[k]:
625 627 result[idx] = line
626 628 del key2idxs[k]
627 629 return result
628 630
629 631 def annotatedirectly(self, f, showpath, showlines):
630 632 """like annotate, but when we know that f is in linelog.
631 633 f can be either a 20-char str (node) or a fctx. this is for perf - in
632 634 the best case, the user provides a node and we don't need to read the
633 635 filelog or construct any filecontext.
634 636 """
635 637 if isinstance(f, bytes):
636 638 hsh = f
637 639 else:
638 640 hsh = f.node()
639 641 llrev = self.revmap.hsh2rev(hsh)
640 642 if not llrev:
641 643 raise faerror.CorruptedFileError(
642 644 b'%s is not in revmap' % node.hex(hsh)
643 645 )
644 646 if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0:
645 647 raise faerror.CorruptedFileError(
646 648 b'%s is not in revmap mainbranch' % node.hex(hsh)
647 649 )
648 650 self.linelog.annotate(llrev)
649 651 result = [
650 652 (self.revmap.rev2hsh(r), l) for r, l in self.linelog.annotateresult
651 653 ]
652 654 return self._refineannotateresult(result, f, showpath, showlines)
653 655
654 656 def _refineannotateresult(self, result, f, showpath, showlines):
655 657 """add the missing path or line contents, they can be expensive.
656 658 f could be either node or fctx.
657 659 """
658 660 if showpath:
659 661 result = self._addpathtoresult(result)
660 662 if showlines:
661 663 if isinstance(f, bytes): # f: node or fctx
662 664 llrev = self.revmap.hsh2rev(f)
663 665 fctx = self._resolvefctx(f, self.revmap.rev2path(llrev))
664 666 else:
665 667 fctx = f
666 668 lines = mdiff.splitnewlines(fctx.data())
667 669 if len(lines) != len(result): # linelog is probably corrupted
668 670 raise faerror.CorruptedFileError()
669 671 result = (result, lines)
670 672 return result
671 673
672 674 def _appendrev(self, fctx, blocks, bannotated=None):
673 675 self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated)
674 676
675 677 def _diffblocks(self, a, b):
676 678 return mdiff.allblocks(a, b, self.opts.diffopts)
677 679
678 680 @staticmethod
679 681 def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None):
680 682 """append a revision to linelog and revmap"""
681 683
682 684 def getllrev(f):
683 685 """(fctx) -> int"""
684 686 # f should not be a linelog revision
685 687 if isinstance(f, int):
686 688 raise error.ProgrammingError(b'f should not be an int')
687 689 # f is a fctx, allocate linelog rev on demand
688 690 hsh = f.node()
689 691 rev = revmap.hsh2rev(hsh)
690 692 if rev is None:
691 693 rev = revmap.append(hsh, sidebranch=True, path=f.path())
692 694 return rev
693 695
694 696 # append sidebranch revisions to revmap
695 697 siderevs = []
696 698 siderevmap = {} # node: int
697 699 if bannotated is not None:
698 700 for (a1, a2, b1, b2), op in blocks:
699 701 if op != b'=':
700 702 # f could be either linelong rev, or fctx.
701 703 siderevs += [
702 704 f
703 705 for f, l in bannotated[b1:b2]
704 706 if not isinstance(f, int)
705 707 ]
706 708 siderevs = set(siderevs)
707 709 if fctx in siderevs: # mainnode must be appended seperately
708 710 siderevs.remove(fctx)
709 711 for f in siderevs:
710 712 siderevmap[f] = getllrev(f)
711 713
712 714 # the changeset in the main branch, could be a merge
713 715 llrev = revmap.append(fctx.node(), path=fctx.path())
714 716 siderevmap[fctx] = llrev
715 717
716 718 for (a1, a2, b1, b2), op in reversed(blocks):
717 719 if op == b'=':
718 720 continue
719 721 if bannotated is None:
720 722 linelog.replacelines(llrev, a1, a2, b1, b2)
721 723 else:
722 724 blines = [
723 725 ((r if isinstance(r, int) else siderevmap[r]), l)
724 726 for r, l in bannotated[b1:b2]
725 727 ]
726 728 linelog.replacelines_vec(llrev, a1, a2, blines)
727 729
728 730 def _addpathtoresult(self, annotateresult, revmap=None):
729 731 """(revmap, [(node, linenum)]) -> [(node, linenum, path)]"""
730 732 if revmap is None:
731 733 revmap = self.revmap
732 734
733 735 def _getpath(nodeid):
734 736 path = self._node2path.get(nodeid)
735 737 if path is None:
736 738 path = revmap.rev2path(revmap.hsh2rev(nodeid))
737 739 self._node2path[nodeid] = path
738 740 return path
739 741
740 742 return [(n, l, _getpath(n)) for n, l in annotateresult]
741 743
742 744 def _checklastmasterhead(self, fctx):
743 745 """check if fctx is the master's head last time, raise if not"""
744 746 if fctx is None:
745 747 llrev = 0
746 748 else:
747 749 llrev = self.revmap.hsh2rev(fctx.node())
748 750 if not llrev:
749 751 raise faerror.CannotReuseError()
750 752 if self.linelog.maxrev != llrev:
751 753 raise faerror.CannotReuseError()
752 754
753 755 @util.propertycache
754 756 def _parentfunc(self):
755 757 """-> (fctx) -> [fctx]"""
756 758 followrename = self.opts.followrename
757 759 followmerge = self.opts.followmerge
758 760
759 761 def parents(f):
760 762 pl = _parents(f, follow=followrename)
761 763 if not followmerge:
762 764 pl = pl[:1]
763 765 return pl
764 766
765 767 return parents
766 768
767 769 @util.propertycache
768 770 def _perfhack(self):
769 771 return self.ui.configbool(b'fastannotate', b'perfhack')
770 772
771 773 def _resolvefctx(self, rev, path=None, **kwds):
772 774 return resolvefctx(self.repo, rev, (path or self.path), **kwds)
773 775
774 776
775 777 def _unlinkpaths(paths):
776 778 """silent, best-effort unlink"""
777 779 for path in paths:
778 780 try:
779 781 util.unlink(path)
780 782 except OSError:
781 783 pass
782 784
783 785
784 786 class pathhelper(object):
785 787 """helper for getting paths for lockfile, linelog and revmap"""
786 788
787 789 def __init__(self, repo, path, opts=defaultopts):
788 790 # different options use different directories
789 791 self._vfspath = os.path.join(
790 792 b'fastannotate', opts.shortstr, encodedir(path)
791 793 )
792 794 self._repo = repo
793 795
794 796 @property
795 797 def dirname(self):
796 798 return os.path.dirname(self._repo.vfs.join(self._vfspath))
797 799
798 800 @property
799 801 def linelogpath(self):
800 802 return self._repo.vfs.join(self._vfspath + b'.l')
801 803
802 804 def lock(self):
803 805 return lockmod.lock(self._repo.vfs, self._vfspath + b'.lock')
804 806
805 807 @property
806 808 def revmappath(self):
807 809 return self._repo.vfs.join(self._vfspath + b'.m')
808 810
809 811
810 812 @contextlib.contextmanager
811 813 def annotatecontext(repo, path, opts=defaultopts, rebuild=False):
812 814 """context needed to perform (fast) annotate on a file
813 815
814 816 an annotatecontext of a single file consists of two structures: the
815 817 linelog and the revmap. this function takes care of locking. only 1
816 818 process is allowed to write that file's linelog and revmap at a time.
817 819
818 820 when something goes wrong, this function will assume the linelog and the
819 821 revmap are in a bad state, and remove them from disk.
820 822
821 823 use this function in the following way:
822 824
823 825 with annotatecontext(...) as actx:
824 826 actx. ....
825 827 """
826 828 helper = pathhelper(repo, path, opts)
827 829 util.makedirs(helper.dirname)
828 830 revmappath = helper.revmappath
829 831 linelogpath = helper.linelogpath
830 832 actx = None
831 833 try:
832 834 with helper.lock():
833 835 actx = _annotatecontext(repo, path, linelogpath, revmappath, opts)
834 836 if rebuild:
835 837 actx.rebuild()
836 838 yield actx
837 839 except Exception:
838 840 if actx is not None:
839 841 actx.rebuild()
840 842 repo.ui.debug(b'fastannotate: %s: cache broken and deleted\n' % path)
841 843 raise
842 844 finally:
843 845 if actx is not None:
844 846 actx.close()
845 847
846 848
847 849 def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False):
848 850 """like annotatecontext but get the context from a fctx. convenient when
849 851 used in fctx.annotate
850 852 """
851 853 repo = fctx._repo
852 854 path = fctx._path
853 855 if repo.ui.configbool(b'fastannotate', b'forcefollow', True):
854 856 follow = True
855 857 aopts = annotateopts(diffopts=diffopts, followrename=follow)
856 858 return annotatecontext(repo, path, aopts, rebuild)
@@ -1,988 +1,990 b''
1 1 # __init__.py - fsmonitor initialization and overrides
2 2 #
3 3 # Copyright 2013-2016 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 '''Faster status operations with the Watchman file monitor (EXPERIMENTAL)
9 9
10 10 Integrates the file-watching program Watchman with Mercurial to produce faster
11 11 status results.
12 12
13 13 On a particular Linux system, for a real-world repository with over 400,000
14 14 files hosted on ext4, vanilla `hg status` takes 1.3 seconds. On the same
15 15 system, with fsmonitor it takes about 0.3 seconds.
16 16
17 17 fsmonitor requires no configuration -- it will tell Watchman about your
18 18 repository as necessary. You'll need to install Watchman from
19 19 https://facebook.github.io/watchman/ and make sure it is in your PATH.
20 20
21 21 fsmonitor is incompatible with the largefiles and eol extensions, and
22 22 will disable itself if any of those are active.
23 23
24 24 The following configuration options exist:
25 25
26 26 ::
27 27
28 28 [fsmonitor]
29 29 mode = {off, on, paranoid}
30 30
31 31 When `mode = off`, fsmonitor will disable itself (similar to not loading the
32 32 extension at all). When `mode = on`, fsmonitor will be enabled (the default).
33 33 When `mode = paranoid`, fsmonitor will query both Watchman and the filesystem,
34 34 and ensure that the results are consistent.
35 35
36 36 ::
37 37
38 38 [fsmonitor]
39 39 timeout = (float)
40 40
41 41 A value, in seconds, that determines how long fsmonitor will wait for Watchman
42 42 to return results. Defaults to `2.0`.
43 43
44 44 ::
45 45
46 46 [fsmonitor]
47 47 blacklistusers = (list of userids)
48 48
49 49 A list of usernames for which fsmonitor will disable itself altogether.
50 50
51 51 ::
52 52
53 53 [fsmonitor]
54 54 walk_on_invalidate = (boolean)
55 55
56 56 Whether or not to walk the whole repo ourselves when our cached state has been
57 57 invalidated, for example when Watchman has been restarted or .hgignore rules
58 58 have been changed. Walking the repo in that case can result in competing for
59 59 I/O with Watchman. For large repos it is recommended to set this value to
60 60 false. You may wish to set this to true if you have a very fast filesystem
61 61 that can outpace the IPC overhead of getting the result data for the full repo
62 62 from Watchman. Defaults to false.
63 63
64 64 ::
65 65
66 66 [fsmonitor]
67 67 warn_when_unused = (boolean)
68 68
69 69 Whether to print a warning during certain operations when fsmonitor would be
70 70 beneficial to performance but isn't enabled.
71 71
72 72 ::
73 73
74 74 [fsmonitor]
75 75 warn_update_file_count = (integer)
76 76
77 77 If ``warn_when_unused`` is set and fsmonitor isn't enabled, a warning will
78 78 be printed during working directory updates if this many files will be
79 79 created.
80 80 '''
81 81
82 82 # Platforms Supported
83 83 # ===================
84 84 #
85 85 # **Linux:** *Stable*. Watchman and fsmonitor are both known to work reliably,
86 86 # even under severe loads.
87 87 #
88 88 # **Mac OS X:** *Stable*. The Mercurial test suite passes with fsmonitor
89 89 # turned on, on case-insensitive HFS+. There has been a reasonable amount of
90 90 # user testing under normal loads.
91 91 #
92 92 # **Solaris, BSD:** *Alpha*. watchman and fsmonitor are believed to work, but
93 93 # very little testing has been done.
94 94 #
95 95 # **Windows:** *Alpha*. Not in a release version of watchman or fsmonitor yet.
96 96 #
97 97 # Known Issues
98 98 # ============
99 99 #
100 100 # * fsmonitor will disable itself if any of the following extensions are
101 101 # enabled: largefiles, inotify, eol; or if the repository has subrepos.
102 102 # * fsmonitor will produce incorrect results if nested repos that are not
103 103 # subrepos exist. *Workaround*: add nested repo paths to your `.hgignore`.
104 104 #
105 105 # The issues related to nested repos and subrepos are probably not fundamental
106 106 # ones. Patches to fix them are welcome.
107 107
108 108 from __future__ import absolute_import
109 109
110 110 import codecs
111 import hashlib
112 111 import os
113 112 import stat
114 113 import sys
115 114 import tempfile
116 115 import weakref
117 116
118 117 from mercurial.i18n import _
119 118 from mercurial.node import hex
120 119 from mercurial.pycompat import open
121 120 from mercurial import (
122 121 context,
123 122 encoding,
124 123 error,
125 124 extensions,
126 125 localrepo,
127 126 merge,
128 127 pathutil,
129 128 pycompat,
130 129 registrar,
131 130 scmutil,
132 131 util,
133 132 )
134 133 from mercurial import match as matchmod
135 from mercurial.utils import stringutil
134 from mercurial.utils import (
135 hashutil,
136 stringutil,
137 )
136 138
137 139 from . import (
138 140 pywatchman,
139 141 state,
140 142 watchmanclient,
141 143 )
142 144
143 145 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
144 146 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
145 147 # be specifying the version(s) of Mercurial they are tested with, or
146 148 # leave the attribute unspecified.
147 149 testedwith = b'ships-with-hg-core'
148 150
149 151 configtable = {}
150 152 configitem = registrar.configitem(configtable)
151 153
152 154 configitem(
153 155 b'fsmonitor', b'mode', default=b'on',
154 156 )
155 157 configitem(
156 158 b'fsmonitor', b'walk_on_invalidate', default=False,
157 159 )
158 160 configitem(
159 161 b'fsmonitor', b'timeout', default=b'2',
160 162 )
161 163 configitem(
162 164 b'fsmonitor', b'blacklistusers', default=list,
163 165 )
164 166 configitem(
165 167 b'fsmonitor', b'watchman_exe', default=b'watchman',
166 168 )
167 169 configitem(
168 170 b'fsmonitor', b'verbose', default=True, experimental=True,
169 171 )
170 172 configitem(
171 173 b'experimental', b'fsmonitor.transaction_notify', default=False,
172 174 )
173 175
174 176 # This extension is incompatible with the following blacklisted extensions
175 177 # and will disable itself when encountering one of these:
176 178 _blacklist = [b'largefiles', b'eol']
177 179
178 180
179 181 def debuginstall(ui, fm):
180 182 fm.write(
181 183 b"fsmonitor-watchman",
182 184 _(b"fsmonitor checking for watchman binary... (%s)\n"),
183 185 ui.configpath(b"fsmonitor", b"watchman_exe"),
184 186 )
185 187 root = tempfile.mkdtemp()
186 188 c = watchmanclient.client(ui, root)
187 189 err = None
188 190 try:
189 191 v = c.command(b"version")
190 192 fm.write(
191 193 b"fsmonitor-watchman-version",
192 194 _(b" watchman binary version %s\n"),
193 195 pycompat.bytestr(v["version"]),
194 196 )
195 197 except watchmanclient.Unavailable as e:
196 198 err = stringutil.forcebytestr(e)
197 199 fm.condwrite(
198 200 err,
199 201 b"fsmonitor-watchman-error",
200 202 _(b" watchman binary missing or broken: %s\n"),
201 203 err,
202 204 )
203 205 return 1 if err else 0
204 206
205 207
206 208 def _handleunavailable(ui, state, ex):
207 209 """Exception handler for Watchman interaction exceptions"""
208 210 if isinstance(ex, watchmanclient.Unavailable):
209 211 # experimental config: fsmonitor.verbose
210 212 if ex.warn and ui.configbool(b'fsmonitor', b'verbose'):
211 213 if b'illegal_fstypes' not in stringutil.forcebytestr(ex):
212 214 ui.warn(stringutil.forcebytestr(ex) + b'\n')
213 215 if ex.invalidate:
214 216 state.invalidate()
215 217 # experimental config: fsmonitor.verbose
216 218 if ui.configbool(b'fsmonitor', b'verbose'):
217 219 ui.log(
218 220 b'fsmonitor',
219 221 b'Watchman unavailable: %s\n',
220 222 stringutil.forcebytestr(ex.msg),
221 223 )
222 224 else:
223 225 ui.log(
224 226 b'fsmonitor',
225 227 b'Watchman exception: %s\n',
226 228 stringutil.forcebytestr(ex),
227 229 )
228 230
229 231
230 232 def _hashignore(ignore):
231 233 """Calculate hash for ignore patterns and filenames
232 234
233 235 If this information changes between Mercurial invocations, we can't
234 236 rely on Watchman information anymore and have to re-scan the working
235 237 copy.
236 238
237 239 """
238 sha1 = hashlib.sha1()
240 sha1 = hashutil.sha1()
239 241 sha1.update(pycompat.byterepr(ignore))
240 242 return pycompat.sysbytes(sha1.hexdigest())
241 243
242 244
243 245 _watchmanencoding = pywatchman.encoding.get_local_encoding()
244 246 _fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
245 247 _fixencoding = codecs.lookup(_watchmanencoding) != codecs.lookup(_fsencoding)
246 248
247 249
248 250 def _watchmantofsencoding(path):
249 251 """Fix path to match watchman and local filesystem encoding
250 252
251 253 watchman's paths encoding can differ from filesystem encoding. For example,
252 254 on Windows, it's always utf-8.
253 255 """
254 256 try:
255 257 decoded = path.decode(_watchmanencoding)
256 258 except UnicodeDecodeError as e:
257 259 raise error.Abort(
258 260 stringutil.forcebytestr(e), hint=b'watchman encoding error'
259 261 )
260 262
261 263 try:
262 264 encoded = decoded.encode(_fsencoding, 'strict')
263 265 except UnicodeEncodeError as e:
264 266 raise error.Abort(stringutil.forcebytestr(e))
265 267
266 268 return encoded
267 269
268 270
269 271 def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True):
270 272 '''Replacement for dirstate.walk, hooking into Watchman.
271 273
272 274 Whenever full is False, ignored is False, and the Watchman client is
273 275 available, use Watchman combined with saved state to possibly return only a
274 276 subset of files.'''
275 277
276 278 def bail(reason):
277 279 self._ui.debug(b'fsmonitor: fallback to core status, %s\n' % reason)
278 280 return orig(match, subrepos, unknown, ignored, full=True)
279 281
280 282 if full:
281 283 return bail(b'full rewalk requested')
282 284 if ignored:
283 285 return bail(b'listing ignored files')
284 286 if not self._watchmanclient.available():
285 287 return bail(b'client unavailable')
286 288 state = self._fsmonitorstate
287 289 clock, ignorehash, notefiles = state.get()
288 290 if not clock:
289 291 if state.walk_on_invalidate:
290 292 return bail(b'no clock')
291 293 # Initial NULL clock value, see
292 294 # https://facebook.github.io/watchman/docs/clockspec.html
293 295 clock = b'c:0:0'
294 296 notefiles = []
295 297
296 298 ignore = self._ignore
297 299 dirignore = self._dirignore
298 300 if unknown:
299 301 if _hashignore(ignore) != ignorehash and clock != b'c:0:0':
300 302 # ignore list changed -- can't rely on Watchman state any more
301 303 if state.walk_on_invalidate:
302 304 return bail(b'ignore rules changed')
303 305 notefiles = []
304 306 clock = b'c:0:0'
305 307 else:
306 308 # always ignore
307 309 ignore = util.always
308 310 dirignore = util.always
309 311
310 312 matchfn = match.matchfn
311 313 matchalways = match.always()
312 314 dmap = self._map
313 315 if util.safehasattr(dmap, b'_map'):
314 316 # for better performance, directly access the inner dirstate map if the
315 317 # standard dirstate implementation is in use.
316 318 dmap = dmap._map
317 319 nonnormalset = self._map.nonnormalset
318 320
319 321 copymap = self._map.copymap
320 322 getkind = stat.S_IFMT
321 323 dirkind = stat.S_IFDIR
322 324 regkind = stat.S_IFREG
323 325 lnkkind = stat.S_IFLNK
324 326 join = self._join
325 327 normcase = util.normcase
326 328 fresh_instance = False
327 329
328 330 exact = skipstep3 = False
329 331 if match.isexact(): # match.exact
330 332 exact = True
331 333 dirignore = util.always # skip step 2
332 334 elif match.prefix(): # match.match, no patterns
333 335 skipstep3 = True
334 336
335 337 if not exact and self._checkcase:
336 338 # note that even though we could receive directory entries, we're only
337 339 # interested in checking if a file with the same name exists. So only
338 340 # normalize files if possible.
339 341 normalize = self._normalizefile
340 342 skipstep3 = False
341 343 else:
342 344 normalize = None
343 345
344 346 # step 1: find all explicit files
345 347 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
346 348
347 349 skipstep3 = skipstep3 and not (work or dirsnotfound)
348 350 work = [d for d in work if not dirignore(d[0])]
349 351
350 352 if not work and (exact or skipstep3):
351 353 for s in subrepos:
352 354 del results[s]
353 355 del results[b'.hg']
354 356 return results
355 357
356 358 # step 2: query Watchman
357 359 try:
358 360 # Use the user-configured timeout for the query.
359 361 # Add a little slack over the top of the user query to allow for
360 362 # overheads while transferring the data
361 363 self._watchmanclient.settimeout(state.timeout + 0.1)
362 364 result = self._watchmanclient.command(
363 365 b'query',
364 366 {
365 367 b'fields': [b'mode', b'mtime', b'size', b'exists', b'name'],
366 368 b'since': clock,
367 369 b'expression': [
368 370 b'not',
369 371 [
370 372 b'anyof',
371 373 [b'dirname', b'.hg'],
372 374 [b'name', b'.hg', b'wholename'],
373 375 ],
374 376 ],
375 377 b'sync_timeout': int(state.timeout * 1000),
376 378 b'empty_on_fresh_instance': state.walk_on_invalidate,
377 379 },
378 380 )
379 381 except Exception as ex:
380 382 _handleunavailable(self._ui, state, ex)
381 383 self._watchmanclient.clearconnection()
382 384 return bail(b'exception during run')
383 385 else:
384 386 # We need to propagate the last observed clock up so that we
385 387 # can use it for our next query
386 388 state.setlastclock(pycompat.sysbytes(result[b'clock']))
387 389 if result[b'is_fresh_instance']:
388 390 if state.walk_on_invalidate:
389 391 state.invalidate()
390 392 return bail(b'fresh instance')
391 393 fresh_instance = True
392 394 # Ignore any prior noteable files from the state info
393 395 notefiles = []
394 396
395 397 # for file paths which require normalization and we encounter a case
396 398 # collision, we store our own foldmap
397 399 if normalize:
398 400 foldmap = dict((normcase(k), k) for k in results)
399 401
400 402 switch_slashes = pycompat.ossep == b'\\'
401 403 # The order of the results is, strictly speaking, undefined.
402 404 # For case changes on a case insensitive filesystem we may receive
403 405 # two entries, one with exists=True and another with exists=False.
404 406 # The exists=True entries in the same response should be interpreted
405 407 # as being happens-after the exists=False entries due to the way that
406 408 # Watchman tracks files. We use this property to reconcile deletes
407 409 # for name case changes.
408 410 for entry in result[b'files']:
409 411 fname = entry[b'name']
410 412
411 413 # Watchman always give us a str. Normalize to bytes on Python 3
412 414 # using Watchman's encoding, if needed.
413 415 if not isinstance(fname, bytes):
414 416 fname = fname.encode(_watchmanencoding)
415 417
416 418 if _fixencoding:
417 419 fname = _watchmantofsencoding(fname)
418 420
419 421 if switch_slashes:
420 422 fname = fname.replace(b'\\', b'/')
421 423 if normalize:
422 424 normed = normcase(fname)
423 425 fname = normalize(fname, True, True)
424 426 foldmap[normed] = fname
425 427 fmode = entry[b'mode']
426 428 fexists = entry[b'exists']
427 429 kind = getkind(fmode)
428 430
429 431 if b'/.hg/' in fname or fname.endswith(b'/.hg'):
430 432 return bail(b'nested-repo-detected')
431 433
432 434 if not fexists:
433 435 # if marked as deleted and we don't already have a change
434 436 # record, mark it as deleted. If we already have an entry
435 437 # for fname then it was either part of walkexplicit or was
436 438 # an earlier result that was a case change
437 439 if (
438 440 fname not in results
439 441 and fname in dmap
440 442 and (matchalways or matchfn(fname))
441 443 ):
442 444 results[fname] = None
443 445 elif kind == dirkind:
444 446 if fname in dmap and (matchalways or matchfn(fname)):
445 447 results[fname] = None
446 448 elif kind == regkind or kind == lnkkind:
447 449 if fname in dmap:
448 450 if matchalways or matchfn(fname):
449 451 results[fname] = entry
450 452 elif (matchalways or matchfn(fname)) and not ignore(fname):
451 453 results[fname] = entry
452 454 elif fname in dmap and (matchalways or matchfn(fname)):
453 455 results[fname] = None
454 456
455 457 # step 3: query notable files we don't already know about
456 458 # XXX try not to iterate over the entire dmap
457 459 if normalize:
458 460 # any notable files that have changed case will already be handled
459 461 # above, so just check membership in the foldmap
460 462 notefiles = set(
461 463 (
462 464 normalize(f, True, True)
463 465 for f in notefiles
464 466 if normcase(f) not in foldmap
465 467 )
466 468 )
467 469 visit = set(
468 470 (
469 471 f
470 472 for f in notefiles
471 473 if (
472 474 f not in results and matchfn(f) and (f in dmap or not ignore(f))
473 475 )
474 476 )
475 477 )
476 478
477 479 if not fresh_instance:
478 480 if matchalways:
479 481 visit.update(f for f in nonnormalset if f not in results)
480 482 visit.update(f for f in copymap if f not in results)
481 483 else:
482 484 visit.update(
483 485 f for f in nonnormalset if f not in results and matchfn(f)
484 486 )
485 487 visit.update(f for f in copymap if f not in results and matchfn(f))
486 488 else:
487 489 if matchalways:
488 490 visit.update(
489 491 f for f, st in pycompat.iteritems(dmap) if f not in results
490 492 )
491 493 visit.update(f for f in copymap if f not in results)
492 494 else:
493 495 visit.update(
494 496 f
495 497 for f, st in pycompat.iteritems(dmap)
496 498 if f not in results and matchfn(f)
497 499 )
498 500 visit.update(f for f in copymap if f not in results and matchfn(f))
499 501
500 502 audit = pathutil.pathauditor(self._root, cached=True).check
501 503 auditpass = [f for f in visit if audit(f)]
502 504 auditpass.sort()
503 505 auditfail = visit.difference(auditpass)
504 506 for f in auditfail:
505 507 results[f] = None
506 508
507 509 nf = iter(auditpass)
508 510 for st in util.statfiles([join(f) for f in auditpass]):
509 511 f = next(nf)
510 512 if st or f in dmap:
511 513 results[f] = st
512 514
513 515 for s in subrepos:
514 516 del results[s]
515 517 del results[b'.hg']
516 518 return results
517 519
518 520
519 521 def overridestatus(
520 522 orig,
521 523 self,
522 524 node1=b'.',
523 525 node2=None,
524 526 match=None,
525 527 ignored=False,
526 528 clean=False,
527 529 unknown=False,
528 530 listsubrepos=False,
529 531 ):
530 532 listignored = ignored
531 533 listclean = clean
532 534 listunknown = unknown
533 535
534 536 def _cmpsets(l1, l2):
535 537 try:
536 538 if b'FSMONITOR_LOG_FILE' in encoding.environ:
537 539 fn = encoding.environ[b'FSMONITOR_LOG_FILE']
538 540 f = open(fn, b'wb')
539 541 else:
540 542 fn = b'fsmonitorfail.log'
541 543 f = self.vfs.open(fn, b'wb')
542 544 except (IOError, OSError):
543 545 self.ui.warn(_(b'warning: unable to write to %s\n') % fn)
544 546 return
545 547
546 548 try:
547 549 for i, (s1, s2) in enumerate(zip(l1, l2)):
548 550 if set(s1) != set(s2):
549 551 f.write(b'sets at position %d are unequal\n' % i)
550 552 f.write(b'watchman returned: %s\n' % s1)
551 553 f.write(b'stat returned: %s\n' % s2)
552 554 finally:
553 555 f.close()
554 556
555 557 if isinstance(node1, context.changectx):
556 558 ctx1 = node1
557 559 else:
558 560 ctx1 = self[node1]
559 561 if isinstance(node2, context.changectx):
560 562 ctx2 = node2
561 563 else:
562 564 ctx2 = self[node2]
563 565
564 566 working = ctx2.rev() is None
565 567 parentworking = working and ctx1 == self[b'.']
566 568 match = match or matchmod.always()
567 569
568 570 # Maybe we can use this opportunity to update Watchman's state.
569 571 # Mercurial uses workingcommitctx and/or memctx to represent the part of
570 572 # the workingctx that is to be committed. So don't update the state in
571 573 # that case.
572 574 # HG_PENDING is set in the environment when the dirstate is being updated
573 575 # in the middle of a transaction; we must not update our state in that
574 576 # case, or we risk forgetting about changes in the working copy.
575 577 updatestate = (
576 578 parentworking
577 579 and match.always()
578 580 and not isinstance(ctx2, (context.workingcommitctx, context.memctx))
579 581 and b'HG_PENDING' not in encoding.environ
580 582 )
581 583
582 584 try:
583 585 if self._fsmonitorstate.walk_on_invalidate:
584 586 # Use a short timeout to query the current clock. If that
585 587 # takes too long then we assume that the service will be slow
586 588 # to answer our query.
587 589 # walk_on_invalidate indicates that we prefer to walk the
588 590 # tree ourselves because we can ignore portions that Watchman
589 591 # cannot and we tend to be faster in the warmer buffer cache
590 592 # cases.
591 593 self._watchmanclient.settimeout(0.1)
592 594 else:
593 595 # Give Watchman more time to potentially complete its walk
594 596 # and return the initial clock. In this mode we assume that
595 597 # the filesystem will be slower than parsing a potentially
596 598 # very large Watchman result set.
597 599 self._watchmanclient.settimeout(self._fsmonitorstate.timeout + 0.1)
598 600 startclock = self._watchmanclient.getcurrentclock()
599 601 except Exception as ex:
600 602 self._watchmanclient.clearconnection()
601 603 _handleunavailable(self.ui, self._fsmonitorstate, ex)
602 604 # boo, Watchman failed. bail
603 605 return orig(
604 606 node1,
605 607 node2,
606 608 match,
607 609 listignored,
608 610 listclean,
609 611 listunknown,
610 612 listsubrepos,
611 613 )
612 614
613 615 if updatestate:
614 616 # We need info about unknown files. This may make things slower the
615 617 # first time, but whatever.
616 618 stateunknown = True
617 619 else:
618 620 stateunknown = listunknown
619 621
620 622 if updatestate:
621 623 ps = poststatus(startclock)
622 624 self.addpostdsstatus(ps)
623 625
624 626 r = orig(
625 627 node1, node2, match, listignored, listclean, stateunknown, listsubrepos
626 628 )
627 629 modified, added, removed, deleted, unknown, ignored, clean = r
628 630
629 631 if not listunknown:
630 632 unknown = []
631 633
632 634 # don't do paranoid checks if we're not going to query Watchman anyway
633 635 full = listclean or match.traversedir is not None
634 636 if self._fsmonitorstate.mode == b'paranoid' and not full:
635 637 # run status again and fall back to the old walk this time
636 638 self.dirstate._fsmonitordisable = True
637 639
638 640 # shut the UI up
639 641 quiet = self.ui.quiet
640 642 self.ui.quiet = True
641 643 fout, ferr = self.ui.fout, self.ui.ferr
642 644 self.ui.fout = self.ui.ferr = open(os.devnull, b'wb')
643 645
644 646 try:
645 647 rv2 = orig(
646 648 node1,
647 649 node2,
648 650 match,
649 651 listignored,
650 652 listclean,
651 653 listunknown,
652 654 listsubrepos,
653 655 )
654 656 finally:
655 657 self.dirstate._fsmonitordisable = False
656 658 self.ui.quiet = quiet
657 659 self.ui.fout, self.ui.ferr = fout, ferr
658 660
659 661 # clean isn't tested since it's set to True above
660 662 with self.wlock():
661 663 _cmpsets(
662 664 [modified, added, removed, deleted, unknown, ignored, clean],
663 665 rv2,
664 666 )
665 667 modified, added, removed, deleted, unknown, ignored, clean = rv2
666 668
667 669 return scmutil.status(
668 670 modified, added, removed, deleted, unknown, ignored, clean
669 671 )
670 672
671 673
672 674 class poststatus(object):
673 675 def __init__(self, startclock):
674 676 self._startclock = startclock
675 677
676 678 def __call__(self, wctx, status):
677 679 clock = wctx.repo()._fsmonitorstate.getlastclock() or self._startclock
678 680 hashignore = _hashignore(wctx.repo().dirstate._ignore)
679 681 notefiles = (
680 682 status.modified
681 683 + status.added
682 684 + status.removed
683 685 + status.deleted
684 686 + status.unknown
685 687 )
686 688 wctx.repo()._fsmonitorstate.set(clock, hashignore, notefiles)
687 689
688 690
689 691 def makedirstate(repo, dirstate):
690 692 class fsmonitordirstate(dirstate.__class__):
691 693 def _fsmonitorinit(self, repo):
692 694 # _fsmonitordisable is used in paranoid mode
693 695 self._fsmonitordisable = False
694 696 self._fsmonitorstate = repo._fsmonitorstate
695 697 self._watchmanclient = repo._watchmanclient
696 698 self._repo = weakref.proxy(repo)
697 699
698 700 def walk(self, *args, **kwargs):
699 701 orig = super(fsmonitordirstate, self).walk
700 702 if self._fsmonitordisable:
701 703 return orig(*args, **kwargs)
702 704 return overridewalk(orig, self, *args, **kwargs)
703 705
704 706 def rebuild(self, *args, **kwargs):
705 707 self._fsmonitorstate.invalidate()
706 708 return super(fsmonitordirstate, self).rebuild(*args, **kwargs)
707 709
708 710 def invalidate(self, *args, **kwargs):
709 711 self._fsmonitorstate.invalidate()
710 712 return super(fsmonitordirstate, self).invalidate(*args, **kwargs)
711 713
712 714 dirstate.__class__ = fsmonitordirstate
713 715 dirstate._fsmonitorinit(repo)
714 716
715 717
716 718 def wrapdirstate(orig, self):
717 719 ds = orig(self)
718 720 # only override the dirstate when Watchman is available for the repo
719 721 if util.safehasattr(self, b'_fsmonitorstate'):
720 722 makedirstate(self, ds)
721 723 return ds
722 724
723 725
724 726 def extsetup(ui):
725 727 extensions.wrapfilecache(
726 728 localrepo.localrepository, b'dirstate', wrapdirstate
727 729 )
728 730 if pycompat.isdarwin:
729 731 # An assist for avoiding the dangling-symlink fsevents bug
730 732 extensions.wrapfunction(os, b'symlink', wrapsymlink)
731 733
732 734 extensions.wrapfunction(merge, b'update', wrapupdate)
733 735
734 736
735 737 def wrapsymlink(orig, source, link_name):
736 738 ''' if we create a dangling symlink, also touch the parent dir
737 739 to encourage fsevents notifications to work more correctly '''
738 740 try:
739 741 return orig(source, link_name)
740 742 finally:
741 743 try:
742 744 os.utime(os.path.dirname(link_name), None)
743 745 except OSError:
744 746 pass
745 747
746 748
747 749 class state_update(object):
748 750 ''' This context manager is responsible for dispatching the state-enter
749 751 and state-leave signals to the watchman service. The enter and leave
750 752 methods can be invoked manually (for scenarios where context manager
751 753 semantics are not possible). If parameters oldnode and newnode are None,
752 754 they will be populated based on current working copy in enter and
753 755 leave, respectively. Similarly, if the distance is none, it will be
754 756 calculated based on the oldnode and newnode in the leave method.'''
755 757
756 758 def __init__(
757 759 self,
758 760 repo,
759 761 name,
760 762 oldnode=None,
761 763 newnode=None,
762 764 distance=None,
763 765 partial=False,
764 766 ):
765 767 self.repo = repo.unfiltered()
766 768 self.name = name
767 769 self.oldnode = oldnode
768 770 self.newnode = newnode
769 771 self.distance = distance
770 772 self.partial = partial
771 773 self._lock = None
772 774 self.need_leave = False
773 775
774 776 def __enter__(self):
775 777 self.enter()
776 778
777 779 def enter(self):
778 780 # Make sure we have a wlock prior to sending notifications to watchman.
779 781 # We don't want to race with other actors. In the update case,
780 782 # merge.update is going to take the wlock almost immediately. We are
781 783 # effectively extending the lock around several short sanity checks.
782 784 if self.oldnode is None:
783 785 self.oldnode = self.repo[b'.'].node()
784 786
785 787 if self.repo.currentwlock() is None:
786 788 if util.safehasattr(self.repo, b'wlocknostateupdate'):
787 789 self._lock = self.repo.wlocknostateupdate()
788 790 else:
789 791 self._lock = self.repo.wlock()
790 792 self.need_leave = self._state(b'state-enter', hex(self.oldnode))
791 793 return self
792 794
793 795 def __exit__(self, type_, value, tb):
794 796 abort = True if type_ else False
795 797 self.exit(abort=abort)
796 798
797 799 def exit(self, abort=False):
798 800 try:
799 801 if self.need_leave:
800 802 status = b'failed' if abort else b'ok'
801 803 if self.newnode is None:
802 804 self.newnode = self.repo[b'.'].node()
803 805 if self.distance is None:
804 806 self.distance = calcdistance(
805 807 self.repo, self.oldnode, self.newnode
806 808 )
807 809 self._state(b'state-leave', hex(self.newnode), status=status)
808 810 finally:
809 811 self.need_leave = False
810 812 if self._lock:
811 813 self._lock.release()
812 814
813 815 def _state(self, cmd, commithash, status=b'ok'):
814 816 if not util.safehasattr(self.repo, b'_watchmanclient'):
815 817 return False
816 818 try:
817 819 self.repo._watchmanclient.command(
818 820 cmd,
819 821 {
820 822 b'name': self.name,
821 823 b'metadata': {
822 824 # the target revision
823 825 b'rev': commithash,
824 826 # approximate number of commits between current and target
825 827 b'distance': self.distance if self.distance else 0,
826 828 # success/failure (only really meaningful for state-leave)
827 829 b'status': status,
828 830 # whether the working copy parent is changing
829 831 b'partial': self.partial,
830 832 },
831 833 },
832 834 )
833 835 return True
834 836 except Exception as e:
835 837 # Swallow any errors; fire and forget
836 838 self.repo.ui.log(
837 839 b'watchman', b'Exception %s while running %s\n', e, cmd
838 840 )
839 841 return False
840 842
841 843
842 844 # Estimate the distance between two nodes
843 845 def calcdistance(repo, oldnode, newnode):
844 846 anc = repo.changelog.ancestor(oldnode, newnode)
845 847 ancrev = repo[anc].rev()
846 848 distance = abs(repo[oldnode].rev() - ancrev) + abs(
847 849 repo[newnode].rev() - ancrev
848 850 )
849 851 return distance
850 852
851 853
852 854 # Bracket working copy updates with calls to the watchman state-enter
853 855 # and state-leave commands. This allows clients to perform more intelligent
854 856 # settling during bulk file change scenarios
855 857 # https://facebook.github.io/watchman/docs/cmd/subscribe.html#advanced-settling
856 858 def wrapupdate(
857 859 orig,
858 860 repo,
859 861 node,
860 862 branchmerge,
861 863 force,
862 864 ancestor=None,
863 865 mergeancestor=False,
864 866 labels=None,
865 867 matcher=None,
866 868 **kwargs
867 869 ):
868 870
869 871 distance = 0
870 872 partial = True
871 873 oldnode = repo[b'.'].node()
872 874 newnode = repo[node].node()
873 875 if matcher is None or matcher.always():
874 876 partial = False
875 877 distance = calcdistance(repo.unfiltered(), oldnode, newnode)
876 878
877 879 with state_update(
878 880 repo,
879 881 name=b"hg.update",
880 882 oldnode=oldnode,
881 883 newnode=newnode,
882 884 distance=distance,
883 885 partial=partial,
884 886 ):
885 887 return orig(
886 888 repo,
887 889 node,
888 890 branchmerge,
889 891 force,
890 892 ancestor,
891 893 mergeancestor,
892 894 labels,
893 895 matcher,
894 896 **kwargs
895 897 )
896 898
897 899
898 900 def repo_has_depth_one_nested_repo(repo):
899 901 for f in repo.wvfs.listdir():
900 902 if os.path.isdir(os.path.join(repo.root, f, b'.hg')):
901 903 msg = b'fsmonitor: sub-repository %r detected, fsmonitor disabled\n'
902 904 repo.ui.debug(msg % f)
903 905 return True
904 906 return False
905 907
906 908
907 909 def reposetup(ui, repo):
908 910 # We don't work with largefiles or inotify
909 911 exts = extensions.enabled()
910 912 for ext in _blacklist:
911 913 if ext in exts:
912 914 ui.warn(
913 915 _(
914 916 b'The fsmonitor extension is incompatible with the %s '
915 917 b'extension and has been disabled.\n'
916 918 )
917 919 % ext
918 920 )
919 921 return
920 922
921 923 if repo.local():
922 924 # We don't work with subrepos either.
923 925 #
924 926 # if repo[None].substate can cause a dirstate parse, which is too
925 927 # slow. Instead, look for a file called hgsubstate,
926 928 if repo.wvfs.exists(b'.hgsubstate') or repo.wvfs.exists(b'.hgsub'):
927 929 return
928 930
929 931 if repo_has_depth_one_nested_repo(repo):
930 932 return
931 933
932 934 fsmonitorstate = state.state(repo)
933 935 if fsmonitorstate.mode == b'off':
934 936 return
935 937
936 938 try:
937 939 client = watchmanclient.client(repo.ui, repo.root)
938 940 except Exception as ex:
939 941 _handleunavailable(ui, fsmonitorstate, ex)
940 942 return
941 943
942 944 repo._fsmonitorstate = fsmonitorstate
943 945 repo._watchmanclient = client
944 946
945 947 dirstate, cached = localrepo.isfilecached(repo, b'dirstate')
946 948 if cached:
947 949 # at this point since fsmonitorstate wasn't present,
948 950 # repo.dirstate is not a fsmonitordirstate
949 951 makedirstate(repo, dirstate)
950 952
951 953 class fsmonitorrepo(repo.__class__):
952 954 def status(self, *args, **kwargs):
953 955 orig = super(fsmonitorrepo, self).status
954 956 return overridestatus(orig, self, *args, **kwargs)
955 957
956 958 def wlocknostateupdate(self, *args, **kwargs):
957 959 return super(fsmonitorrepo, self).wlock(*args, **kwargs)
958 960
959 961 def wlock(self, *args, **kwargs):
960 962 l = super(fsmonitorrepo, self).wlock(*args, **kwargs)
961 963 if not ui.configbool(
962 964 b"experimental", b"fsmonitor.transaction_notify"
963 965 ):
964 966 return l
965 967 if l.held != 1:
966 968 return l
967 969 origrelease = l.releasefn
968 970
969 971 def staterelease():
970 972 if origrelease:
971 973 origrelease()
972 974 if l.stateupdate:
973 975 l.stateupdate.exit()
974 976 l.stateupdate = None
975 977
976 978 try:
977 979 l.stateupdate = None
978 980 l.stateupdate = state_update(self, name=b"hg.transaction")
979 981 l.stateupdate.enter()
980 982 l.releasefn = staterelease
981 983 except Exception as e:
982 984 # Swallow any errors; fire and forget
983 985 self.ui.log(
984 986 b'watchman', b'Exception in state update %s\n', e
985 987 )
986 988 return l
987 989
988 990 repo.__class__ = fsmonitorrepo
@@ -1,184 +1,186 b''
1 1 # This software may be used and distributed according to the terms of the
2 2 # GNU General Public License version 2 or any later version.
3 3
4 4 # based on bundleheads extension by Gregory Szorc <gps@mozilla.com>
5 5
6 6 from __future__ import absolute_import
7 7
8 8 import abc
9 import hashlib
10 9 import os
11 10 import subprocess
12 11 import tempfile
13 12
14 13 from mercurial.pycompat import open
15 14 from mercurial import (
16 15 node,
17 16 pycompat,
18 17 )
19 from mercurial.utils import procutil
18 from mercurial.utils import (
19 hashutil,
20 procutil,
21 )
20 22
21 23 NamedTemporaryFile = tempfile.NamedTemporaryFile
22 24
23 25
24 26 class BundleWriteException(Exception):
25 27 pass
26 28
27 29
28 30 class BundleReadException(Exception):
29 31 pass
30 32
31 33
32 34 class abstractbundlestore(object): # pytype: disable=ignored-metaclass
33 35 """Defines the interface for bundle stores.
34 36
35 37 A bundle store is an entity that stores raw bundle data. It is a simple
36 38 key-value store. However, the keys are chosen by the store. The keys can
37 39 be any Python object understood by the corresponding bundle index (see
38 40 ``abstractbundleindex`` below).
39 41 """
40 42
41 43 __metaclass__ = abc.ABCMeta
42 44
43 45 @abc.abstractmethod
44 46 def write(self, data):
45 47 """Write bundle data to the store.
46 48
47 49 This function receives the raw data to be written as a str.
48 50 Throws BundleWriteException
49 51 The key of the written data MUST be returned.
50 52 """
51 53
52 54 @abc.abstractmethod
53 55 def read(self, key):
54 56 """Obtain bundle data for a key.
55 57
56 58 Returns None if the bundle isn't known.
57 59 Throws BundleReadException
58 60 The returned object should be a file object supporting read()
59 61 and close().
60 62 """
61 63
62 64
63 65 class filebundlestore(object):
64 66 """bundle store in filesystem
65 67
66 68 meant for storing bundles somewhere on disk and on network filesystems
67 69 """
68 70
69 71 def __init__(self, ui, repo):
70 72 self.ui = ui
71 73 self.repo = repo
72 74 self.storepath = ui.configpath(b'scratchbranch', b'storepath')
73 75 if not self.storepath:
74 76 self.storepath = self.repo.vfs.join(
75 77 b"scratchbranches", b"filebundlestore"
76 78 )
77 79 if not os.path.exists(self.storepath):
78 80 os.makedirs(self.storepath)
79 81
80 82 def _dirpath(self, hashvalue):
81 83 """First two bytes of the hash are the name of the upper
82 84 level directory, next two bytes are the name of the
83 85 next level directory"""
84 86 return os.path.join(self.storepath, hashvalue[0:2], hashvalue[2:4])
85 87
86 88 def _filepath(self, filename):
87 89 return os.path.join(self._dirpath(filename), filename)
88 90
89 91 def write(self, data):
90 filename = node.hex(hashlib.sha1(data).digest())
92 filename = node.hex(hashutil.sha1(data).digest())
91 93 dirpath = self._dirpath(filename)
92 94
93 95 if not os.path.exists(dirpath):
94 96 os.makedirs(dirpath)
95 97
96 98 with open(self._filepath(filename), b'wb') as f:
97 99 f.write(data)
98 100
99 101 return filename
100 102
101 103 def read(self, key):
102 104 try:
103 105 with open(self._filepath(key), b'rb') as f:
104 106 return f.read()
105 107 except IOError:
106 108 return None
107 109
108 110
109 111 class externalbundlestore(abstractbundlestore):
110 112 def __init__(self, put_binary, put_args, get_binary, get_args):
111 113 """
112 114 `put_binary` - path to binary file which uploads bundle to external
113 115 storage and prints key to stdout
114 116 `put_args` - format string with additional args to `put_binary`
115 117 {filename} replacement field can be used.
116 118 `get_binary` - path to binary file which accepts filename and key
117 119 (in that order), downloads bundle from store and saves it to file
118 120 `get_args` - format string with additional args to `get_binary`.
119 121 {filename} and {handle} replacement field can be used.
120 122 """
121 123
122 124 self.put_args = put_args
123 125 self.get_args = get_args
124 126 self.put_binary = put_binary
125 127 self.get_binary = get_binary
126 128
127 129 def _call_binary(self, args):
128 130 p = subprocess.Popen(
129 131 pycompat.rapply(procutil.tonativestr, args),
130 132 stdout=subprocess.PIPE,
131 133 stderr=subprocess.PIPE,
132 134 close_fds=True,
133 135 )
134 136 stdout, stderr = p.communicate()
135 137 returncode = p.returncode
136 138 return returncode, stdout, stderr
137 139
138 140 def write(self, data):
139 141 # Won't work on windows because you can't open file second time without
140 142 # closing it
141 143 # TODO: rewrite without str.format() and replace NamedTemporaryFile()
142 144 # with pycompat.namedtempfile()
143 145 with NamedTemporaryFile() as temp:
144 146 temp.write(data)
145 147 temp.flush()
146 148 temp.seek(0)
147 149 formatted_args = [
148 150 arg.format(filename=temp.name) for arg in self.put_args
149 151 ]
150 152 returncode, stdout, stderr = self._call_binary(
151 153 [self.put_binary] + formatted_args
152 154 )
153 155
154 156 if returncode != 0:
155 157 raise BundleWriteException(
156 158 b'Failed to upload to external store: %s' % stderr
157 159 )
158 160 stdout_lines = stdout.splitlines()
159 161 if len(stdout_lines) == 1:
160 162 return stdout_lines[0]
161 163 else:
162 164 raise BundleWriteException(
163 165 b'Bad output from %s: %s' % (self.put_binary, stdout)
164 166 )
165 167
166 168 def read(self, handle):
167 169 # Won't work on windows because you can't open file second time without
168 170 # closing it
169 171 # TODO: rewrite without str.format() and replace NamedTemporaryFile()
170 172 # with pycompat.namedtempfile()
171 173 with NamedTemporaryFile() as temp:
172 174 formatted_args = [
173 175 arg.format(filename=temp.name, handle=handle)
174 176 for arg in self.get_args
175 177 ]
176 178 returncode, stdout, stderr = self._call_binary(
177 179 [self.get_binary] + formatted_args
178 180 )
179 181
180 182 if returncode != 0:
181 183 raise BundleReadException(
182 184 b'Failed to download from external store: %s' % stderr
183 185 )
184 186 return temp.read()
@@ -1,669 +1,669 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''High-level command function for lfconvert, plus the cmdtable.'''
10 10 from __future__ import absolute_import
11 11
12 12 import errno
13 import hashlib
14 13 import os
15 14 import shutil
16 15
17 16 from mercurial.i18n import _
18 17
19 18 from mercurial import (
20 19 cmdutil,
21 20 context,
22 21 error,
23 22 exthelper,
24 23 hg,
25 24 lock,
26 25 match as matchmod,
27 26 node,
28 27 pycompat,
29 28 scmutil,
30 29 util,
31 30 )
31 from mercurial.utils import hashutil
32 32
33 33 from ..convert import (
34 34 convcmd,
35 35 filemap,
36 36 )
37 37
38 38 from . import lfutil, storefactory
39 39
40 40 release = lock.release
41 41
42 42 # -- Commands ----------------------------------------------------------
43 43
44 44 eh = exthelper.exthelper()
45 45
46 46
47 47 @eh.command(
48 48 b'lfconvert',
49 49 [
50 50 (
51 51 b's',
52 52 b'size',
53 53 b'',
54 54 _(b'minimum size (MB) for files to be converted as largefiles'),
55 55 b'SIZE',
56 56 ),
57 57 (
58 58 b'',
59 59 b'to-normal',
60 60 False,
61 61 _(b'convert from a largefiles repo to a normal repo'),
62 62 ),
63 63 ],
64 64 _(b'hg lfconvert SOURCE DEST [FILE ...]'),
65 65 norepo=True,
66 66 inferrepo=True,
67 67 )
68 68 def lfconvert(ui, src, dest, *pats, **opts):
69 69 '''convert a normal repository to a largefiles repository
70 70
71 71 Convert repository SOURCE to a new repository DEST, identical to
72 72 SOURCE except that certain files will be converted as largefiles:
73 73 specifically, any file that matches any PATTERN *or* whose size is
74 74 above the minimum size threshold is converted as a largefile. The
75 75 size used to determine whether or not to track a file as a
76 76 largefile is the size of the first version of the file. The
77 77 minimum size can be specified either with --size or in
78 78 configuration as ``largefiles.size``.
79 79
80 80 After running this command you will need to make sure that
81 81 largefiles is enabled anywhere you intend to push the new
82 82 repository.
83 83
84 84 Use --to-normal to convert largefiles back to normal files; after
85 85 this, the DEST repository can be used without largefiles at all.'''
86 86
87 87 opts = pycompat.byteskwargs(opts)
88 88 if opts[b'to_normal']:
89 89 tolfile = False
90 90 else:
91 91 tolfile = True
92 92 size = lfutil.getminsize(ui, True, opts.get(b'size'), default=None)
93 93
94 94 if not hg.islocal(src):
95 95 raise error.Abort(_(b'%s is not a local Mercurial repo') % src)
96 96 if not hg.islocal(dest):
97 97 raise error.Abort(_(b'%s is not a local Mercurial repo') % dest)
98 98
99 99 rsrc = hg.repository(ui, src)
100 100 ui.status(_(b'initializing destination %s\n') % dest)
101 101 rdst = hg.repository(ui, dest, create=True)
102 102
103 103 success = False
104 104 dstwlock = dstlock = None
105 105 try:
106 106 # Get a list of all changesets in the source. The easy way to do this
107 107 # is to simply walk the changelog, using changelog.nodesbetween().
108 108 # Take a look at mercurial/revlog.py:639 for more details.
109 109 # Use a generator instead of a list to decrease memory usage
110 110 ctxs = (
111 111 rsrc[ctx]
112 112 for ctx in rsrc.changelog.nodesbetween(None, rsrc.heads())[0]
113 113 )
114 114 revmap = {node.nullid: node.nullid}
115 115 if tolfile:
116 116 # Lock destination to prevent modification while it is converted to.
117 117 # Don't need to lock src because we are just reading from its
118 118 # history which can't change.
119 119 dstwlock = rdst.wlock()
120 120 dstlock = rdst.lock()
121 121
122 122 lfiles = set()
123 123 normalfiles = set()
124 124 if not pats:
125 125 pats = ui.configlist(lfutil.longname, b'patterns')
126 126 if pats:
127 127 matcher = matchmod.match(rsrc.root, b'', list(pats))
128 128 else:
129 129 matcher = None
130 130
131 131 lfiletohash = {}
132 132 with ui.makeprogress(
133 133 _(b'converting revisions'),
134 134 unit=_(b'revisions'),
135 135 total=rsrc[b'tip'].rev(),
136 136 ) as progress:
137 137 for ctx in ctxs:
138 138 progress.update(ctx.rev())
139 139 _lfconvert_addchangeset(
140 140 rsrc,
141 141 rdst,
142 142 ctx,
143 143 revmap,
144 144 lfiles,
145 145 normalfiles,
146 146 matcher,
147 147 size,
148 148 lfiletohash,
149 149 )
150 150
151 151 if rdst.wvfs.exists(lfutil.shortname):
152 152 rdst.wvfs.rmtree(lfutil.shortname)
153 153
154 154 for f in lfiletohash.keys():
155 155 if rdst.wvfs.isfile(f):
156 156 rdst.wvfs.unlink(f)
157 157 try:
158 158 rdst.wvfs.removedirs(rdst.wvfs.dirname(f))
159 159 except OSError:
160 160 pass
161 161
162 162 # If there were any files converted to largefiles, add largefiles
163 163 # to the destination repository's requirements.
164 164 if lfiles:
165 165 rdst.requirements.add(b'largefiles')
166 166 rdst._writerequirements()
167 167 else:
168 168
169 169 class lfsource(filemap.filemap_source):
170 170 def __init__(self, ui, source):
171 171 super(lfsource, self).__init__(ui, source, None)
172 172 self.filemapper.rename[lfutil.shortname] = b'.'
173 173
174 174 def getfile(self, name, rev):
175 175 realname, realrev = rev
176 176 f = super(lfsource, self).getfile(name, rev)
177 177
178 178 if (
179 179 not realname.startswith(lfutil.shortnameslash)
180 180 or f[0] is None
181 181 ):
182 182 return f
183 183
184 184 # Substitute in the largefile data for the hash
185 185 hash = f[0].strip()
186 186 path = lfutil.findfile(rsrc, hash)
187 187
188 188 if path is None:
189 189 raise error.Abort(
190 190 _(b"missing largefile for '%s' in %s")
191 191 % (realname, realrev)
192 192 )
193 193 return util.readfile(path), f[1]
194 194
195 195 class converter(convcmd.converter):
196 196 def __init__(self, ui, source, dest, revmapfile, opts):
197 197 src = lfsource(ui, source)
198 198
199 199 super(converter, self).__init__(
200 200 ui, src, dest, revmapfile, opts
201 201 )
202 202
203 203 found, missing = downloadlfiles(ui, rsrc)
204 204 if missing != 0:
205 205 raise error.Abort(_(b"all largefiles must be present locally"))
206 206
207 207 orig = convcmd.converter
208 208 convcmd.converter = converter
209 209
210 210 try:
211 211 convcmd.convert(
212 212 ui, src, dest, source_type=b'hg', dest_type=b'hg'
213 213 )
214 214 finally:
215 215 convcmd.converter = orig
216 216 success = True
217 217 finally:
218 218 if tolfile:
219 219 rdst.dirstate.clear()
220 220 release(dstlock, dstwlock)
221 221 if not success:
222 222 # we failed, remove the new directory
223 223 shutil.rmtree(rdst.root)
224 224
225 225
226 226 def _lfconvert_addchangeset(
227 227 rsrc, rdst, ctx, revmap, lfiles, normalfiles, matcher, size, lfiletohash
228 228 ):
229 229 # Convert src parents to dst parents
230 230 parents = _convertparents(ctx, revmap)
231 231
232 232 # Generate list of changed files
233 233 files = _getchangedfiles(ctx, parents)
234 234
235 235 dstfiles = []
236 236 for f in files:
237 237 if f not in lfiles and f not in normalfiles:
238 238 islfile = _islfile(f, ctx, matcher, size)
239 239 # If this file was renamed or copied then copy
240 240 # the largefile-ness of its predecessor
241 241 if f in ctx.manifest():
242 242 fctx = ctx.filectx(f)
243 243 renamed = fctx.copysource()
244 244 if renamed is None:
245 245 # the code below assumes renamed to be a boolean or a list
246 246 # and won't quite work with the value None
247 247 renamed = False
248 248 renamedlfile = renamed and renamed in lfiles
249 249 islfile |= renamedlfile
250 250 if b'l' in fctx.flags():
251 251 if renamedlfile:
252 252 raise error.Abort(
253 253 _(b'renamed/copied largefile %s becomes symlink')
254 254 % f
255 255 )
256 256 islfile = False
257 257 if islfile:
258 258 lfiles.add(f)
259 259 else:
260 260 normalfiles.add(f)
261 261
262 262 if f in lfiles:
263 263 fstandin = lfutil.standin(f)
264 264 dstfiles.append(fstandin)
265 265 # largefile in manifest if it has not been removed/renamed
266 266 if f in ctx.manifest():
267 267 fctx = ctx.filectx(f)
268 268 if b'l' in fctx.flags():
269 269 renamed = fctx.copysource()
270 270 if renamed and renamed in lfiles:
271 271 raise error.Abort(
272 272 _(b'largefile %s becomes symlink') % f
273 273 )
274 274
275 275 # largefile was modified, update standins
276 m = hashlib.sha1(b'')
276 m = hashutil.sha1(b'')
277 277 m.update(ctx[f].data())
278 278 hash = node.hex(m.digest())
279 279 if f not in lfiletohash or lfiletohash[f] != hash:
280 280 rdst.wwrite(f, ctx[f].data(), ctx[f].flags())
281 281 executable = b'x' in ctx[f].flags()
282 282 lfutil.writestandin(rdst, fstandin, hash, executable)
283 283 lfiletohash[f] = hash
284 284 else:
285 285 # normal file
286 286 dstfiles.append(f)
287 287
288 288 def getfilectx(repo, memctx, f):
289 289 srcfname = lfutil.splitstandin(f)
290 290 if srcfname is not None:
291 291 # if the file isn't in the manifest then it was removed
292 292 # or renamed, return None to indicate this
293 293 try:
294 294 fctx = ctx.filectx(srcfname)
295 295 except error.LookupError:
296 296 return None
297 297 renamed = fctx.copysource()
298 298 if renamed:
299 299 # standin is always a largefile because largefile-ness
300 300 # doesn't change after rename or copy
301 301 renamed = lfutil.standin(renamed)
302 302
303 303 return context.memfilectx(
304 304 repo,
305 305 memctx,
306 306 f,
307 307 lfiletohash[srcfname] + b'\n',
308 308 b'l' in fctx.flags(),
309 309 b'x' in fctx.flags(),
310 310 renamed,
311 311 )
312 312 else:
313 313 return _getnormalcontext(repo, ctx, f, revmap)
314 314
315 315 # Commit
316 316 _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
317 317
318 318
319 319 def _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap):
320 320 mctx = context.memctx(
321 321 rdst,
322 322 parents,
323 323 ctx.description(),
324 324 dstfiles,
325 325 getfilectx,
326 326 ctx.user(),
327 327 ctx.date(),
328 328 ctx.extra(),
329 329 )
330 330 ret = rdst.commitctx(mctx)
331 331 lfutil.copyalltostore(rdst, ret)
332 332 rdst.setparents(ret)
333 333 revmap[ctx.node()] = rdst.changelog.tip()
334 334
335 335
336 336 # Generate list of changed files
337 337 def _getchangedfiles(ctx, parents):
338 338 files = set(ctx.files())
339 339 if node.nullid not in parents:
340 340 mc = ctx.manifest()
341 341 for pctx in ctx.parents():
342 342 for fn in pctx.manifest().diff(mc):
343 343 files.add(fn)
344 344 return files
345 345
346 346
347 347 # Convert src parents to dst parents
348 348 def _convertparents(ctx, revmap):
349 349 parents = []
350 350 for p in ctx.parents():
351 351 parents.append(revmap[p.node()])
352 352 while len(parents) < 2:
353 353 parents.append(node.nullid)
354 354 return parents
355 355
356 356
357 357 # Get memfilectx for a normal file
358 358 def _getnormalcontext(repo, ctx, f, revmap):
359 359 try:
360 360 fctx = ctx.filectx(f)
361 361 except error.LookupError:
362 362 return None
363 363 renamed = fctx.copysource()
364 364
365 365 data = fctx.data()
366 366 if f == b'.hgtags':
367 367 data = _converttags(repo.ui, revmap, data)
368 368 return context.memfilectx(
369 369 repo, ctx, f, data, b'l' in fctx.flags(), b'x' in fctx.flags(), renamed
370 370 )
371 371
372 372
373 373 # Remap tag data using a revision map
374 374 def _converttags(ui, revmap, data):
375 375 newdata = []
376 376 for line in data.splitlines():
377 377 try:
378 378 id, name = line.split(b' ', 1)
379 379 except ValueError:
380 380 ui.warn(_(b'skipping incorrectly formatted tag %s\n') % line)
381 381 continue
382 382 try:
383 383 newid = node.bin(id)
384 384 except TypeError:
385 385 ui.warn(_(b'skipping incorrectly formatted id %s\n') % id)
386 386 continue
387 387 try:
388 388 newdata.append(b'%s %s\n' % (node.hex(revmap[newid]), name))
389 389 except KeyError:
390 390 ui.warn(_(b'no mapping for id %s\n') % id)
391 391 continue
392 392 return b''.join(newdata)
393 393
394 394
395 395 def _islfile(file, ctx, matcher, size):
396 396 '''Return true if file should be considered a largefile, i.e.
397 397 matcher matches it or it is larger than size.'''
398 398 # never store special .hg* files as largefiles
399 399 if file == b'.hgtags' or file == b'.hgignore' or file == b'.hgsigs':
400 400 return False
401 401 if matcher and matcher(file):
402 402 return True
403 403 try:
404 404 return ctx.filectx(file).size() >= size * 1024 * 1024
405 405 except error.LookupError:
406 406 return False
407 407
408 408
409 409 def uploadlfiles(ui, rsrc, rdst, files):
410 410 '''upload largefiles to the central store'''
411 411
412 412 if not files:
413 413 return
414 414
415 415 store = storefactory.openstore(rsrc, rdst, put=True)
416 416
417 417 at = 0
418 418 ui.debug(b"sending statlfile command for %d largefiles\n" % len(files))
419 419 retval = store.exists(files)
420 420 files = [h for h in files if not retval[h]]
421 421 ui.debug(b"%d largefiles need to be uploaded\n" % len(files))
422 422
423 423 with ui.makeprogress(
424 424 _(b'uploading largefiles'), unit=_(b'files'), total=len(files)
425 425 ) as progress:
426 426 for hash in files:
427 427 progress.update(at)
428 428 source = lfutil.findfile(rsrc, hash)
429 429 if not source:
430 430 raise error.Abort(
431 431 _(
432 432 b'largefile %s missing from store'
433 433 b' (needs to be uploaded)'
434 434 )
435 435 % hash
436 436 )
437 437 # XXX check for errors here
438 438 store.put(source, hash)
439 439 at += 1
440 440
441 441
442 442 def verifylfiles(ui, repo, all=False, contents=False):
443 443 '''Verify that every largefile revision in the current changeset
444 444 exists in the central store. With --contents, also verify that
445 445 the contents of each local largefile file revision are correct (SHA-1 hash
446 446 matches the revision ID). With --all, check every changeset in
447 447 this repository.'''
448 448 if all:
449 449 revs = repo.revs(b'all()')
450 450 else:
451 451 revs = [b'.']
452 452
453 453 store = storefactory.openstore(repo)
454 454 return store.verify(revs, contents=contents)
455 455
456 456
457 457 def cachelfiles(ui, repo, node, filelist=None):
458 458 '''cachelfiles ensures that all largefiles needed by the specified revision
459 459 are present in the repository's largefile cache.
460 460
461 461 returns a tuple (cached, missing). cached is the list of files downloaded
462 462 by this operation; missing is the list of files that were needed but could
463 463 not be found.'''
464 464 lfiles = lfutil.listlfiles(repo, node)
465 465 if filelist:
466 466 lfiles = set(lfiles) & set(filelist)
467 467 toget = []
468 468
469 469 ctx = repo[node]
470 470 for lfile in lfiles:
471 471 try:
472 472 expectedhash = lfutil.readasstandin(ctx[lfutil.standin(lfile)])
473 473 except IOError as err:
474 474 if err.errno == errno.ENOENT:
475 475 continue # node must be None and standin wasn't found in wctx
476 476 raise
477 477 if not lfutil.findfile(repo, expectedhash):
478 478 toget.append((lfile, expectedhash))
479 479
480 480 if toget:
481 481 store = storefactory.openstore(repo)
482 482 ret = store.get(toget)
483 483 return ret
484 484
485 485 return ([], [])
486 486
487 487
488 488 def downloadlfiles(ui, repo, rev=None):
489 489 match = scmutil.match(repo[None], [repo.wjoin(lfutil.shortname)], {})
490 490
491 491 def prepare(ctx, fns):
492 492 pass
493 493
494 494 totalsuccess = 0
495 495 totalmissing = 0
496 496 if rev != []: # walkchangerevs on empty list would return all revs
497 497 for ctx in cmdutil.walkchangerevs(repo, match, {b'rev': rev}, prepare):
498 498 success, missing = cachelfiles(ui, repo, ctx.node())
499 499 totalsuccess += len(success)
500 500 totalmissing += len(missing)
501 501 ui.status(_(b"%d additional largefiles cached\n") % totalsuccess)
502 502 if totalmissing > 0:
503 503 ui.status(_(b"%d largefiles failed to download\n") % totalmissing)
504 504 return totalsuccess, totalmissing
505 505
506 506
507 507 def updatelfiles(
508 508 ui, repo, filelist=None, printmessage=None, normallookup=False
509 509 ):
510 510 '''Update largefiles according to standins in the working directory
511 511
512 512 If ``printmessage`` is other than ``None``, it means "print (or
513 513 ignore, for false) message forcibly".
514 514 '''
515 515 statuswriter = lfutil.getstatuswriter(ui, repo, printmessage)
516 516 with repo.wlock():
517 517 lfdirstate = lfutil.openlfdirstate(ui, repo)
518 518 lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
519 519
520 520 if filelist is not None:
521 521 filelist = set(filelist)
522 522 lfiles = [f for f in lfiles if f in filelist]
523 523
524 524 update = {}
525 525 dropped = set()
526 526 updated, removed = 0, 0
527 527 wvfs = repo.wvfs
528 528 wctx = repo[None]
529 529 for lfile in lfiles:
530 530 lfileorig = os.path.relpath(
531 531 scmutil.backuppath(ui, repo, lfile), start=repo.root
532 532 )
533 533 standin = lfutil.standin(lfile)
534 534 standinorig = os.path.relpath(
535 535 scmutil.backuppath(ui, repo, standin), start=repo.root
536 536 )
537 537 if wvfs.exists(standin):
538 538 if wvfs.exists(standinorig) and wvfs.exists(lfile):
539 539 shutil.copyfile(wvfs.join(lfile), wvfs.join(lfileorig))
540 540 wvfs.unlinkpath(standinorig)
541 541 expecthash = lfutil.readasstandin(wctx[standin])
542 542 if expecthash != b'':
543 543 if lfile not in wctx: # not switched to normal file
544 544 if repo.dirstate[standin] != b'?':
545 545 wvfs.unlinkpath(lfile, ignoremissing=True)
546 546 else:
547 547 dropped.add(lfile)
548 548
549 549 # use normallookup() to allocate an entry in largefiles
550 550 # dirstate to prevent lfilesrepo.status() from reporting
551 551 # missing files as removed.
552 552 lfdirstate.normallookup(lfile)
553 553 update[lfile] = expecthash
554 554 else:
555 555 # Remove lfiles for which the standin is deleted, unless the
556 556 # lfile is added to the repository again. This happens when a
557 557 # largefile is converted back to a normal file: the standin
558 558 # disappears, but a new (normal) file appears as the lfile.
559 559 if (
560 560 wvfs.exists(lfile)
561 561 and repo.dirstate.normalize(lfile) not in wctx
562 562 ):
563 563 wvfs.unlinkpath(lfile)
564 564 removed += 1
565 565
566 566 # largefile processing might be slow and be interrupted - be prepared
567 567 lfdirstate.write()
568 568
569 569 if lfiles:
570 570 lfiles = [f for f in lfiles if f not in dropped]
571 571
572 572 for f in dropped:
573 573 repo.wvfs.unlinkpath(lfutil.standin(f))
574 574
575 575 # This needs to happen for dropped files, otherwise they stay in
576 576 # the M state.
577 577 lfutil.synclfdirstate(repo, lfdirstate, f, normallookup)
578 578
579 579 statuswriter(_(b'getting changed largefiles\n'))
580 580 cachelfiles(ui, repo, None, lfiles)
581 581
582 582 for lfile in lfiles:
583 583 update1 = 0
584 584
585 585 expecthash = update.get(lfile)
586 586 if expecthash:
587 587 if not lfutil.copyfromcache(repo, expecthash, lfile):
588 588 # failed ... but already removed and set to normallookup
589 589 continue
590 590 # Synchronize largefile dirstate to the last modified
591 591 # time of the file
592 592 lfdirstate.normal(lfile)
593 593 update1 = 1
594 594
595 595 # copy the exec mode of largefile standin from the repository's
596 596 # dirstate to its state in the lfdirstate.
597 597 standin = lfutil.standin(lfile)
598 598 if wvfs.exists(standin):
599 599 # exec is decided by the users permissions using mask 0o100
600 600 standinexec = wvfs.stat(standin).st_mode & 0o100
601 601 st = wvfs.stat(lfile)
602 602 mode = st.st_mode
603 603 if standinexec != mode & 0o100:
604 604 # first remove all X bits, then shift all R bits to X
605 605 mode &= ~0o111
606 606 if standinexec:
607 607 mode |= (mode >> 2) & 0o111 & ~util.umask
608 608 wvfs.chmod(lfile, mode)
609 609 update1 = 1
610 610
611 611 updated += update1
612 612
613 613 lfutil.synclfdirstate(repo, lfdirstate, lfile, normallookup)
614 614
615 615 lfdirstate.write()
616 616 if lfiles:
617 617 statuswriter(
618 618 _(b'%d largefiles updated, %d removed\n') % (updated, removed)
619 619 )
620 620
621 621
622 622 @eh.command(
623 623 b'lfpull',
624 624 [(b'r', b'rev', [], _(b'pull largefiles for these revisions'))]
625 625 + cmdutil.remoteopts,
626 626 _(b'-r REV... [-e CMD] [--remotecmd CMD] [SOURCE]'),
627 627 )
628 628 def lfpull(ui, repo, source=b"default", **opts):
629 629 """pull largefiles for the specified revisions from the specified source
630 630
631 631 Pull largefiles that are referenced from local changesets but missing
632 632 locally, pulling from a remote repository to the local cache.
633 633
634 634 If SOURCE is omitted, the 'default' path will be used.
635 635 See :hg:`help urls` for more information.
636 636
637 637 .. container:: verbose
638 638
639 639 Some examples:
640 640
641 641 - pull largefiles for all branch heads::
642 642
643 643 hg lfpull -r "head() and not closed()"
644 644
645 645 - pull largefiles on the default branch::
646 646
647 647 hg lfpull -r "branch(default)"
648 648 """
649 649 repo.lfpullsource = source
650 650
651 651 revs = opts.get('rev', [])
652 652 if not revs:
653 653 raise error.Abort(_(b'no revisions specified'))
654 654 revs = scmutil.revrange(repo, revs)
655 655
656 656 numcached = 0
657 657 for rev in revs:
658 658 ui.note(_(b'pulling largefiles for revision %d\n') % rev)
659 659 (cached, missing) = cachelfiles(ui, repo, rev)
660 660 numcached += len(cached)
661 661 ui.status(_(b"%d largefiles cached\n") % numcached)
662 662
663 663
664 664 @eh.command(b'debuglfput', [] + cmdutil.remoteopts, _(b'FILE'))
665 665 def debuglfput(ui, repo, filepath, **kwargs):
666 666 hash = lfutil.hashfile(filepath)
667 667 storefactory.openstore(repo).put(filepath, hash)
668 668 ui.write(b'%s\n' % hash)
669 669 return 0
@@ -1,760 +1,760 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10 from __future__ import absolute_import
11 11
12 12 import contextlib
13 13 import copy
14 import hashlib
15 14 import os
16 15 import stat
17 16
18 17 from mercurial.i18n import _
19 18 from mercurial.node import hex
20 19 from mercurial.pycompat import open
21 20
22 21 from mercurial import (
23 22 dirstate,
24 23 encoding,
25 24 error,
26 25 httpconnection,
27 26 match as matchmod,
28 27 node,
29 28 pycompat,
30 29 scmutil,
31 30 sparse,
32 31 util,
33 32 vfs as vfsmod,
34 33 )
34 from mercurial.utils import hashutil
35 35
36 36 shortname = b'.hglf'
37 37 shortnameslash = shortname + b'/'
38 38 longname = b'largefiles'
39 39
40 40 # -- Private worker functions ------------------------------------------
41 41
42 42
43 43 @contextlib.contextmanager
44 44 def lfstatus(repo, value=True):
45 45 oldvalue = getattr(repo, 'lfstatus', False)
46 46 repo.lfstatus = value
47 47 try:
48 48 yield
49 49 finally:
50 50 repo.lfstatus = oldvalue
51 51
52 52
53 53 def getminsize(ui, assumelfiles, opt, default=10):
54 54 lfsize = opt
55 55 if not lfsize and assumelfiles:
56 56 lfsize = ui.config(longname, b'minsize', default=default)
57 57 if lfsize:
58 58 try:
59 59 lfsize = float(lfsize)
60 60 except ValueError:
61 61 raise error.Abort(
62 62 _(b'largefiles: size must be number (not %s)\n') % lfsize
63 63 )
64 64 if lfsize is None:
65 65 raise error.Abort(_(b'minimum size for largefiles must be specified'))
66 66 return lfsize
67 67
68 68
69 69 def link(src, dest):
70 70 """Try to create hardlink - if that fails, efficiently make a copy."""
71 71 util.makedirs(os.path.dirname(dest))
72 72 try:
73 73 util.oslink(src, dest)
74 74 except OSError:
75 75 # if hardlinks fail, fallback on atomic copy
76 76 with open(src, b'rb') as srcf, util.atomictempfile(dest) as dstf:
77 77 for chunk in util.filechunkiter(srcf):
78 78 dstf.write(chunk)
79 79 os.chmod(dest, os.stat(src).st_mode)
80 80
81 81
82 82 def usercachepath(ui, hash):
83 83 '''Return the correct location in the "global" largefiles cache for a file
84 84 with the given hash.
85 85 This cache is used for sharing of largefiles across repositories - both
86 86 to preserve download bandwidth and storage space.'''
87 87 return os.path.join(_usercachedir(ui), hash)
88 88
89 89
90 90 def _usercachedir(ui, name=longname):
91 91 '''Return the location of the "global" largefiles cache.'''
92 92 path = ui.configpath(name, b'usercache')
93 93 if path:
94 94 return path
95 95 if pycompat.iswindows:
96 96 appdata = encoding.environ.get(
97 97 b'LOCALAPPDATA', encoding.environ.get(b'APPDATA')
98 98 )
99 99 if appdata:
100 100 return os.path.join(appdata, name)
101 101 elif pycompat.isdarwin:
102 102 home = encoding.environ.get(b'HOME')
103 103 if home:
104 104 return os.path.join(home, b'Library', b'Caches', name)
105 105 elif pycompat.isposix:
106 106 path = encoding.environ.get(b'XDG_CACHE_HOME')
107 107 if path:
108 108 return os.path.join(path, name)
109 109 home = encoding.environ.get(b'HOME')
110 110 if home:
111 111 return os.path.join(home, b'.cache', name)
112 112 else:
113 113 raise error.Abort(
114 114 _(b'unknown operating system: %s\n') % pycompat.osname
115 115 )
116 116 raise error.Abort(_(b'unknown %s usercache location') % name)
117 117
118 118
119 119 def inusercache(ui, hash):
120 120 path = usercachepath(ui, hash)
121 121 return os.path.exists(path)
122 122
123 123
124 124 def findfile(repo, hash):
125 125 '''Return store path of the largefile with the specified hash.
126 126 As a side effect, the file might be linked from user cache.
127 127 Return None if the file can't be found locally.'''
128 128 path, exists = findstorepath(repo, hash)
129 129 if exists:
130 130 repo.ui.note(_(b'found %s in store\n') % hash)
131 131 return path
132 132 elif inusercache(repo.ui, hash):
133 133 repo.ui.note(_(b'found %s in system cache\n') % hash)
134 134 path = storepath(repo, hash)
135 135 link(usercachepath(repo.ui, hash), path)
136 136 return path
137 137 return None
138 138
139 139
140 140 class largefilesdirstate(dirstate.dirstate):
141 141 def __getitem__(self, key):
142 142 return super(largefilesdirstate, self).__getitem__(unixpath(key))
143 143
144 144 def normal(self, f):
145 145 return super(largefilesdirstate, self).normal(unixpath(f))
146 146
147 147 def remove(self, f):
148 148 return super(largefilesdirstate, self).remove(unixpath(f))
149 149
150 150 def add(self, f):
151 151 return super(largefilesdirstate, self).add(unixpath(f))
152 152
153 153 def drop(self, f):
154 154 return super(largefilesdirstate, self).drop(unixpath(f))
155 155
156 156 def forget(self, f):
157 157 return super(largefilesdirstate, self).forget(unixpath(f))
158 158
159 159 def normallookup(self, f):
160 160 return super(largefilesdirstate, self).normallookup(unixpath(f))
161 161
162 162 def _ignore(self, f):
163 163 return False
164 164
165 165 def write(self, tr=False):
166 166 # (1) disable PENDING mode always
167 167 # (lfdirstate isn't yet managed as a part of the transaction)
168 168 # (2) avoid develwarn 'use dirstate.write with ....'
169 169 super(largefilesdirstate, self).write(None)
170 170
171 171
172 172 def openlfdirstate(ui, repo, create=True):
173 173 '''
174 174 Return a dirstate object that tracks largefiles: i.e. its root is
175 175 the repo root, but it is saved in .hg/largefiles/dirstate.
176 176 '''
177 177 vfs = repo.vfs
178 178 lfstoredir = longname
179 179 opener = vfsmod.vfs(vfs.join(lfstoredir))
180 180 lfdirstate = largefilesdirstate(
181 181 opener,
182 182 ui,
183 183 repo.root,
184 184 repo.dirstate._validate,
185 185 lambda: sparse.matcher(repo),
186 186 )
187 187
188 188 # If the largefiles dirstate does not exist, populate and create
189 189 # it. This ensures that we create it on the first meaningful
190 190 # largefiles operation in a new clone.
191 191 if create and not vfs.exists(vfs.join(lfstoredir, b'dirstate')):
192 192 matcher = getstandinmatcher(repo)
193 193 standins = repo.dirstate.walk(
194 194 matcher, subrepos=[], unknown=False, ignored=False
195 195 )
196 196
197 197 if len(standins) > 0:
198 198 vfs.makedirs(lfstoredir)
199 199
200 200 for standin in standins:
201 201 lfile = splitstandin(standin)
202 202 lfdirstate.normallookup(lfile)
203 203 return lfdirstate
204 204
205 205
206 206 def lfdirstatestatus(lfdirstate, repo):
207 207 pctx = repo[b'.']
208 208 match = matchmod.always()
209 209 unsure, s = lfdirstate.status(
210 210 match, subrepos=[], ignored=False, clean=False, unknown=False
211 211 )
212 212 modified, clean = s.modified, s.clean
213 213 for lfile in unsure:
214 214 try:
215 215 fctx = pctx[standin(lfile)]
216 216 except LookupError:
217 217 fctx = None
218 218 if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
219 219 modified.append(lfile)
220 220 else:
221 221 clean.append(lfile)
222 222 lfdirstate.normal(lfile)
223 223 return s
224 224
225 225
226 226 def listlfiles(repo, rev=None, matcher=None):
227 227 '''return a list of largefiles in the working copy or the
228 228 specified changeset'''
229 229
230 230 if matcher is None:
231 231 matcher = getstandinmatcher(repo)
232 232
233 233 # ignore unknown files in working directory
234 234 return [
235 235 splitstandin(f)
236 236 for f in repo[rev].walk(matcher)
237 237 if rev is not None or repo.dirstate[f] != b'?'
238 238 ]
239 239
240 240
241 241 def instore(repo, hash, forcelocal=False):
242 242 '''Return true if a largefile with the given hash exists in the store'''
243 243 return os.path.exists(storepath(repo, hash, forcelocal))
244 244
245 245
246 246 def storepath(repo, hash, forcelocal=False):
247 247 '''Return the correct location in the repository largefiles store for a
248 248 file with the given hash.'''
249 249 if not forcelocal and repo.shared():
250 250 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
251 251 return repo.vfs.join(longname, hash)
252 252
253 253
254 254 def findstorepath(repo, hash):
255 255 '''Search through the local store path(s) to find the file for the given
256 256 hash. If the file is not found, its path in the primary store is returned.
257 257 The return value is a tuple of (path, exists(path)).
258 258 '''
259 259 # For shared repos, the primary store is in the share source. But for
260 260 # backward compatibility, force a lookup in the local store if it wasn't
261 261 # found in the share source.
262 262 path = storepath(repo, hash, False)
263 263
264 264 if instore(repo, hash):
265 265 return (path, True)
266 266 elif repo.shared() and instore(repo, hash, True):
267 267 return storepath(repo, hash, True), True
268 268
269 269 return (path, False)
270 270
271 271
272 272 def copyfromcache(repo, hash, filename):
273 273 '''Copy the specified largefile from the repo or system cache to
274 274 filename in the repository. Return true on success or false if the
275 275 file was not found in either cache (which should not happened:
276 276 this is meant to be called only after ensuring that the needed
277 277 largefile exists in the cache).'''
278 278 wvfs = repo.wvfs
279 279 path = findfile(repo, hash)
280 280 if path is None:
281 281 return False
282 282 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
283 283 # The write may fail before the file is fully written, but we
284 284 # don't use atomic writes in the working copy.
285 285 with open(path, b'rb') as srcfd, wvfs(filename, b'wb') as destfd:
286 286 gothash = copyandhash(util.filechunkiter(srcfd), destfd)
287 287 if gothash != hash:
288 288 repo.ui.warn(
289 289 _(b'%s: data corruption in %s with hash %s\n')
290 290 % (filename, path, gothash)
291 291 )
292 292 wvfs.unlink(filename)
293 293 return False
294 294 return True
295 295
296 296
297 297 def copytostore(repo, ctx, file, fstandin):
298 298 wvfs = repo.wvfs
299 299 hash = readasstandin(ctx[fstandin])
300 300 if instore(repo, hash):
301 301 return
302 302 if wvfs.exists(file):
303 303 copytostoreabsolute(repo, wvfs.join(file), hash)
304 304 else:
305 305 repo.ui.warn(
306 306 _(b"%s: largefile %s not available from local store\n")
307 307 % (file, hash)
308 308 )
309 309
310 310
311 311 def copyalltostore(repo, node):
312 312 '''Copy all largefiles in a given revision to the store'''
313 313
314 314 ctx = repo[node]
315 315 for filename in ctx.files():
316 316 realfile = splitstandin(filename)
317 317 if realfile is not None and filename in ctx.manifest():
318 318 copytostore(repo, ctx, realfile, filename)
319 319
320 320
321 321 def copytostoreabsolute(repo, file, hash):
322 322 if inusercache(repo.ui, hash):
323 323 link(usercachepath(repo.ui, hash), storepath(repo, hash))
324 324 else:
325 325 util.makedirs(os.path.dirname(storepath(repo, hash)))
326 326 with open(file, b'rb') as srcf:
327 327 with util.atomictempfile(
328 328 storepath(repo, hash), createmode=repo.store.createmode
329 329 ) as dstf:
330 330 for chunk in util.filechunkiter(srcf):
331 331 dstf.write(chunk)
332 332 linktousercache(repo, hash)
333 333
334 334
335 335 def linktousercache(repo, hash):
336 336 '''Link / copy the largefile with the specified hash from the store
337 337 to the cache.'''
338 338 path = usercachepath(repo.ui, hash)
339 339 link(storepath(repo, hash), path)
340 340
341 341
342 342 def getstandinmatcher(repo, rmatcher=None):
343 343 '''Return a match object that applies rmatcher to the standin directory'''
344 344 wvfs = repo.wvfs
345 345 standindir = shortname
346 346
347 347 # no warnings about missing files or directories
348 348 badfn = lambda f, msg: None
349 349
350 350 if rmatcher and not rmatcher.always():
351 351 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
352 352 if not pats:
353 353 pats = [wvfs.join(standindir)]
354 354 match = scmutil.match(repo[None], pats, badfn=badfn)
355 355 else:
356 356 # no patterns: relative to repo root
357 357 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
358 358 return match
359 359
360 360
361 361 def composestandinmatcher(repo, rmatcher):
362 362 '''Return a matcher that accepts standins corresponding to the
363 363 files accepted by rmatcher. Pass the list of files in the matcher
364 364 as the paths specified by the user.'''
365 365 smatcher = getstandinmatcher(repo, rmatcher)
366 366 isstandin = smatcher.matchfn
367 367
368 368 def composedmatchfn(f):
369 369 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
370 370
371 371 smatcher.matchfn = composedmatchfn
372 372
373 373 return smatcher
374 374
375 375
376 376 def standin(filename):
377 377 '''Return the repo-relative path to the standin for the specified big
378 378 file.'''
379 379 # Notes:
380 380 # 1) Some callers want an absolute path, but for instance addlargefiles
381 381 # needs it repo-relative so it can be passed to repo[None].add(). So
382 382 # leave it up to the caller to use repo.wjoin() to get an absolute path.
383 383 # 2) Join with '/' because that's what dirstate always uses, even on
384 384 # Windows. Change existing separator to '/' first in case we are
385 385 # passed filenames from an external source (like the command line).
386 386 return shortnameslash + util.pconvert(filename)
387 387
388 388
389 389 def isstandin(filename):
390 390 '''Return true if filename is a big file standin. filename must be
391 391 in Mercurial's internal form (slash-separated).'''
392 392 return filename.startswith(shortnameslash)
393 393
394 394
395 395 def splitstandin(filename):
396 396 # Split on / because that's what dirstate always uses, even on Windows.
397 397 # Change local separator to / first just in case we are passed filenames
398 398 # from an external source (like the command line).
399 399 bits = util.pconvert(filename).split(b'/', 1)
400 400 if len(bits) == 2 and bits[0] == shortname:
401 401 return bits[1]
402 402 else:
403 403 return None
404 404
405 405
406 406 def updatestandin(repo, lfile, standin):
407 407 """Re-calculate hash value of lfile and write it into standin
408 408
409 409 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
410 410 """
411 411 file = repo.wjoin(lfile)
412 412 if repo.wvfs.exists(lfile):
413 413 hash = hashfile(file)
414 414 executable = getexecutable(file)
415 415 writestandin(repo, standin, hash, executable)
416 416 else:
417 417 raise error.Abort(_(b'%s: file not found!') % lfile)
418 418
419 419
420 420 def readasstandin(fctx):
421 421 '''read hex hash from given filectx of standin file
422 422
423 423 This encapsulates how "standin" data is stored into storage layer.'''
424 424 return fctx.data().strip()
425 425
426 426
427 427 def writestandin(repo, standin, hash, executable):
428 428 '''write hash to <repo.root>/<standin>'''
429 429 repo.wwrite(standin, hash + b'\n', executable and b'x' or b'')
430 430
431 431
432 432 def copyandhash(instream, outfile):
433 433 '''Read bytes from instream (iterable) and write them to outfile,
434 434 computing the SHA-1 hash of the data along the way. Return the hash.'''
435 hasher = hashlib.sha1(b'')
435 hasher = hashutil.sha1(b'')
436 436 for data in instream:
437 437 hasher.update(data)
438 438 outfile.write(data)
439 439 return hex(hasher.digest())
440 440
441 441
442 442 def hashfile(file):
443 443 if not os.path.exists(file):
444 444 return b''
445 445 with open(file, b'rb') as fd:
446 446 return hexsha1(fd)
447 447
448 448
449 449 def getexecutable(filename):
450 450 mode = os.stat(filename).st_mode
451 451 return (
452 452 (mode & stat.S_IXUSR)
453 453 and (mode & stat.S_IXGRP)
454 454 and (mode & stat.S_IXOTH)
455 455 )
456 456
457 457
458 458 def urljoin(first, second, *arg):
459 459 def join(left, right):
460 460 if not left.endswith(b'/'):
461 461 left += b'/'
462 462 if right.startswith(b'/'):
463 463 right = right[1:]
464 464 return left + right
465 465
466 466 url = join(first, second)
467 467 for a in arg:
468 468 url = join(url, a)
469 469 return url
470 470
471 471
472 472 def hexsha1(fileobj):
473 473 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
474 474 object data"""
475 h = hashlib.sha1()
475 h = hashutil.sha1()
476 476 for chunk in util.filechunkiter(fileobj):
477 477 h.update(chunk)
478 478 return hex(h.digest())
479 479
480 480
481 481 def httpsendfile(ui, filename):
482 482 return httpconnection.httpsendfile(ui, filename, b'rb')
483 483
484 484
485 485 def unixpath(path):
486 486 '''Return a version of path normalized for use with the lfdirstate.'''
487 487 return util.pconvert(os.path.normpath(path))
488 488
489 489
490 490 def islfilesrepo(repo):
491 491 '''Return true if the repo is a largefile repo.'''
492 492 if b'largefiles' in repo.requirements and any(
493 493 shortnameslash in f[0] for f in repo.store.datafiles()
494 494 ):
495 495 return True
496 496
497 497 return any(openlfdirstate(repo.ui, repo, False))
498 498
499 499
500 500 class storeprotonotcapable(Exception):
501 501 def __init__(self, storetypes):
502 502 self.storetypes = storetypes
503 503
504 504
505 505 def getstandinsstate(repo):
506 506 standins = []
507 507 matcher = getstandinmatcher(repo)
508 508 wctx = repo[None]
509 509 for standin in repo.dirstate.walk(
510 510 matcher, subrepos=[], unknown=False, ignored=False
511 511 ):
512 512 lfile = splitstandin(standin)
513 513 try:
514 514 hash = readasstandin(wctx[standin])
515 515 except IOError:
516 516 hash = None
517 517 standins.append((lfile, hash))
518 518 return standins
519 519
520 520
521 521 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
522 522 lfstandin = standin(lfile)
523 523 if lfstandin in repo.dirstate:
524 524 stat = repo.dirstate._map[lfstandin]
525 525 state, mtime = stat[0], stat[3]
526 526 else:
527 527 state, mtime = b'?', -1
528 528 if state == b'n':
529 529 if normallookup or mtime < 0 or not repo.wvfs.exists(lfile):
530 530 # state 'n' doesn't ensure 'clean' in this case
531 531 lfdirstate.normallookup(lfile)
532 532 else:
533 533 lfdirstate.normal(lfile)
534 534 elif state == b'm':
535 535 lfdirstate.normallookup(lfile)
536 536 elif state == b'r':
537 537 lfdirstate.remove(lfile)
538 538 elif state == b'a':
539 539 lfdirstate.add(lfile)
540 540 elif state == b'?':
541 541 lfdirstate.drop(lfile)
542 542
543 543
544 544 def markcommitted(orig, ctx, node):
545 545 repo = ctx.repo()
546 546
547 547 orig(node)
548 548
549 549 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
550 550 # because files coming from the 2nd parent are omitted in the latter.
551 551 #
552 552 # The former should be used to get targets of "synclfdirstate",
553 553 # because such files:
554 554 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
555 555 # - have to be marked as "n" after commit, but
556 556 # - aren't listed in "repo[node].files()"
557 557
558 558 lfdirstate = openlfdirstate(repo.ui, repo)
559 559 for f in ctx.files():
560 560 lfile = splitstandin(f)
561 561 if lfile is not None:
562 562 synclfdirstate(repo, lfdirstate, lfile, False)
563 563 lfdirstate.write()
564 564
565 565 # As part of committing, copy all of the largefiles into the cache.
566 566 #
567 567 # Using "node" instead of "ctx" implies additional "repo[node]"
568 568 # lookup while copyalltostore(), but can omit redundant check for
569 569 # files comming from the 2nd parent, which should exist in store
570 570 # at merging.
571 571 copyalltostore(repo, node)
572 572
573 573
574 574 def getlfilestoupdate(oldstandins, newstandins):
575 575 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
576 576 filelist = []
577 577 for f in changedstandins:
578 578 if f[0] not in filelist:
579 579 filelist.append(f[0])
580 580 return filelist
581 581
582 582
583 583 def getlfilestoupload(repo, missing, addfunc):
584 584 makeprogress = repo.ui.makeprogress
585 585 with makeprogress(
586 586 _(b'finding outgoing largefiles'),
587 587 unit=_(b'revisions'),
588 588 total=len(missing),
589 589 ) as progress:
590 590 for i, n in enumerate(missing):
591 591 progress.update(i)
592 592 parents = [p for p in repo[n].parents() if p != node.nullid]
593 593
594 594 with lfstatus(repo, value=False):
595 595 ctx = repo[n]
596 596
597 597 files = set(ctx.files())
598 598 if len(parents) == 2:
599 599 mc = ctx.manifest()
600 600 mp1 = ctx.p1().manifest()
601 601 mp2 = ctx.p2().manifest()
602 602 for f in mp1:
603 603 if f not in mc:
604 604 files.add(f)
605 605 for f in mp2:
606 606 if f not in mc:
607 607 files.add(f)
608 608 for f in mc:
609 609 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
610 610 files.add(f)
611 611 for fn in files:
612 612 if isstandin(fn) and fn in ctx:
613 613 addfunc(fn, readasstandin(ctx[fn]))
614 614
615 615
616 616 def updatestandinsbymatch(repo, match):
617 617 '''Update standins in the working directory according to specified match
618 618
619 619 This returns (possibly modified) ``match`` object to be used for
620 620 subsequent commit process.
621 621 '''
622 622
623 623 ui = repo.ui
624 624
625 625 # Case 1: user calls commit with no specific files or
626 626 # include/exclude patterns: refresh and commit all files that
627 627 # are "dirty".
628 628 if match is None or match.always():
629 629 # Spend a bit of time here to get a list of files we know
630 630 # are modified so we can compare only against those.
631 631 # It can cost a lot of time (several seconds)
632 632 # otherwise to update all standins if the largefiles are
633 633 # large.
634 634 lfdirstate = openlfdirstate(ui, repo)
635 635 dirtymatch = matchmod.always()
636 636 unsure, s = lfdirstate.status(
637 637 dirtymatch, subrepos=[], ignored=False, clean=False, unknown=False
638 638 )
639 639 modifiedfiles = unsure + s.modified + s.added + s.removed
640 640 lfiles = listlfiles(repo)
641 641 # this only loops through largefiles that exist (not
642 642 # removed/renamed)
643 643 for lfile in lfiles:
644 644 if lfile in modifiedfiles:
645 645 fstandin = standin(lfile)
646 646 if repo.wvfs.exists(fstandin):
647 647 # this handles the case where a rebase is being
648 648 # performed and the working copy is not updated
649 649 # yet.
650 650 if repo.wvfs.exists(lfile):
651 651 updatestandin(repo, lfile, fstandin)
652 652
653 653 return match
654 654
655 655 lfiles = listlfiles(repo)
656 656 match._files = repo._subdirlfs(match.files(), lfiles)
657 657
658 658 # Case 2: user calls commit with specified patterns: refresh
659 659 # any matching big files.
660 660 smatcher = composestandinmatcher(repo, match)
661 661 standins = repo.dirstate.walk(
662 662 smatcher, subrepos=[], unknown=False, ignored=False
663 663 )
664 664
665 665 # No matching big files: get out of the way and pass control to
666 666 # the usual commit() method.
667 667 if not standins:
668 668 return match
669 669
670 670 # Refresh all matching big files. It's possible that the
671 671 # commit will end up failing, in which case the big files will
672 672 # stay refreshed. No harm done: the user modified them and
673 673 # asked to commit them, so sooner or later we're going to
674 674 # refresh the standins. Might as well leave them refreshed.
675 675 lfdirstate = openlfdirstate(ui, repo)
676 676 for fstandin in standins:
677 677 lfile = splitstandin(fstandin)
678 678 if lfdirstate[lfile] != b'r':
679 679 updatestandin(repo, lfile, fstandin)
680 680
681 681 # Cook up a new matcher that only matches regular files or
682 682 # standins corresponding to the big files requested by the
683 683 # user. Have to modify _files to prevent commit() from
684 684 # complaining "not tracked" for big files.
685 685 match = copy.copy(match)
686 686 origmatchfn = match.matchfn
687 687
688 688 # Check both the list of largefiles and the list of
689 689 # standins because if a largefile was removed, it
690 690 # won't be in the list of largefiles at this point
691 691 match._files += sorted(standins)
692 692
693 693 actualfiles = []
694 694 for f in match._files:
695 695 fstandin = standin(f)
696 696
697 697 # For largefiles, only one of the normal and standin should be
698 698 # committed (except if one of them is a remove). In the case of a
699 699 # standin removal, drop the normal file if it is unknown to dirstate.
700 700 # Thus, skip plain largefile names but keep the standin.
701 701 if f in lfiles or fstandin in standins:
702 702 if repo.dirstate[fstandin] != b'r':
703 703 if repo.dirstate[f] != b'r':
704 704 continue
705 705 elif repo.dirstate[f] == b'?':
706 706 continue
707 707
708 708 actualfiles.append(f)
709 709 match._files = actualfiles
710 710
711 711 def matchfn(f):
712 712 if origmatchfn(f):
713 713 return f not in lfiles
714 714 else:
715 715 return f in standins
716 716
717 717 match.matchfn = matchfn
718 718
719 719 return match
720 720
721 721
722 722 class automatedcommithook(object):
723 723 '''Stateful hook to update standins at the 1st commit of resuming
724 724
725 725 For efficiency, updating standins in the working directory should
726 726 be avoided while automated committing (like rebase, transplant and
727 727 so on), because they should be updated before committing.
728 728
729 729 But the 1st commit of resuming automated committing (e.g. ``rebase
730 730 --continue``) should update them, because largefiles may be
731 731 modified manually.
732 732 '''
733 733
734 734 def __init__(self, resuming):
735 735 self.resuming = resuming
736 736
737 737 def __call__(self, repo, match):
738 738 if self.resuming:
739 739 self.resuming = False # avoids updating at subsequent commits
740 740 return updatestandinsbymatch(repo, match)
741 741 else:
742 742 return match
743 743
744 744
745 745 def getstatuswriter(ui, repo, forcibly=None):
746 746 '''Return the function to write largefiles specific status out
747 747
748 748 If ``forcibly`` is ``None``, this returns the last element of
749 749 ``repo._lfstatuswriters`` as "default" writer function.
750 750
751 751 Otherwise, this returns the function to always write out (or
752 752 ignore if ``not forcibly``) status.
753 753 '''
754 754 if forcibly is None and util.safehasattr(repo, b'_largefilesenabled'):
755 755 return repo._lfstatuswriters[-1]
756 756 else:
757 757 if forcibly:
758 758 return ui.status # forcibly WRITE OUT
759 759 else:
760 760 return lambda *msg, **opts: None # forcibly IGNORE
@@ -1,561 +1,561 b''
1 1 from __future__ import absolute_import
2 2
3 3 import collections
4 4 import errno
5 import hashlib
6 5 import mmap
7 6 import os
8 7 import struct
9 8 import time
10 9
11 10 from mercurial.i18n import _
12 11 from mercurial.pycompat import (
13 12 getattr,
14 13 open,
15 14 )
16 15 from mercurial import (
17 16 node as nodemod,
18 17 policy,
19 18 pycompat,
20 19 util,
21 20 vfs as vfsmod,
22 21 )
22 from mercurial.utils import hashutil
23 23 from . import shallowutil
24 24
25 25 osutil = policy.importmod('osutil')
26 26
27 27 # The pack version supported by this implementation. This will need to be
28 28 # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
29 29 # changing any of the int sizes, changing the delta algorithm, etc.
30 30 PACKVERSIONSIZE = 1
31 31 INDEXVERSIONSIZE = 2
32 32
33 33 FANOUTSTART = INDEXVERSIONSIZE
34 34
35 35 # Constant that indicates a fanout table entry hasn't been filled in. (This does
36 36 # not get serialized)
37 37 EMPTYFANOUT = -1
38 38
39 39 # The fanout prefix is the number of bytes that can be addressed by the fanout
40 40 # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
41 41 # look in the fanout table (which will be 2^8 entries long).
42 42 SMALLFANOUTPREFIX = 1
43 43 LARGEFANOUTPREFIX = 2
44 44
45 45 # The number of entries in the index at which point we switch to a large fanout.
46 46 # It is chosen to balance the linear scan through a sparse fanout, with the
47 47 # size of the bisect in actual index.
48 48 # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
49 49 # bisect) with (8 step fanout scan + 1 step bisect)
50 50 # 5 step bisect = log(2^16 / 8 / 255) # fanout
51 51 # 10 step fanout scan = 2^16 / (2^16 / 8) # fanout space divided by entries
52 52 SMALLFANOUTCUTOFF = 2 ** 16 // 8
53 53
54 54 # The amount of time to wait between checking for new packs. This prevents an
55 55 # exception when data is moved to a new pack after the process has already
56 56 # loaded the pack list.
57 57 REFRESHRATE = 0.1
58 58
59 59 if pycompat.isposix and not pycompat.ispy3:
60 60 # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening.
61 61 # The 'e' flag will be ignored on older versions of glibc.
62 62 # Python 3 can't handle the 'e' flag.
63 63 PACKOPENMODE = b'rbe'
64 64 else:
65 65 PACKOPENMODE = b'rb'
66 66
67 67
68 68 class _cachebackedpacks(object):
69 69 def __init__(self, packs, cachesize):
70 70 self._packs = set(packs)
71 71 self._lrucache = util.lrucachedict(cachesize)
72 72 self._lastpack = None
73 73
74 74 # Avoid cold start of the cache by populating the most recent packs
75 75 # in the cache.
76 76 for i in reversed(range(min(cachesize, len(packs)))):
77 77 self._movetofront(packs[i])
78 78
79 79 def _movetofront(self, pack):
80 80 # This effectively makes pack the first entry in the cache.
81 81 self._lrucache[pack] = True
82 82
83 83 def _registerlastpackusage(self):
84 84 if self._lastpack is not None:
85 85 self._movetofront(self._lastpack)
86 86 self._lastpack = None
87 87
88 88 def add(self, pack):
89 89 self._registerlastpackusage()
90 90
91 91 # This method will mostly be called when packs are not in cache.
92 92 # Therefore, adding pack to the cache.
93 93 self._movetofront(pack)
94 94 self._packs.add(pack)
95 95
96 96 def __iter__(self):
97 97 self._registerlastpackusage()
98 98
99 99 # Cache iteration is based on LRU.
100 100 for pack in self._lrucache:
101 101 self._lastpack = pack
102 102 yield pack
103 103
104 104 cachedpacks = set(pack for pack in self._lrucache)
105 105 # Yield for paths not in the cache.
106 106 for pack in self._packs - cachedpacks:
107 107 self._lastpack = pack
108 108 yield pack
109 109
110 110 # Data not found in any pack.
111 111 self._lastpack = None
112 112
113 113
114 114 class basepackstore(object):
115 115 # Default cache size limit for the pack files.
116 116 DEFAULTCACHESIZE = 100
117 117
118 118 def __init__(self, ui, path):
119 119 self.ui = ui
120 120 self.path = path
121 121
122 122 # lastrefesh is 0 so we'll immediately check for new packs on the first
123 123 # failure.
124 124 self.lastrefresh = 0
125 125
126 126 packs = []
127 127 for filepath, __, __ in self._getavailablepackfilessorted():
128 128 try:
129 129 pack = self.getpack(filepath)
130 130 except Exception as ex:
131 131 # An exception may be thrown if the pack file is corrupted
132 132 # somehow. Log a warning but keep going in this case, just
133 133 # skipping this pack file.
134 134 #
135 135 # If this is an ENOENT error then don't even bother logging.
136 136 # Someone could have removed the file since we retrieved the
137 137 # list of paths.
138 138 if getattr(ex, 'errno', None) != errno.ENOENT:
139 139 ui.warn(_(b'unable to load pack %s: %s\n') % (filepath, ex))
140 140 continue
141 141 packs.append(pack)
142 142
143 143 self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
144 144
145 145 def _getavailablepackfiles(self):
146 146 """For each pack file (a index/data file combo), yields:
147 147 (full path without extension, mtime, size)
148 148
149 149 mtime will be the mtime of the index/data file (whichever is newer)
150 150 size is the combined size of index/data file
151 151 """
152 152 indexsuffixlen = len(self.INDEXSUFFIX)
153 153 packsuffixlen = len(self.PACKSUFFIX)
154 154
155 155 ids = set()
156 156 sizes = collections.defaultdict(lambda: 0)
157 157 mtimes = collections.defaultdict(lambda: [])
158 158 try:
159 159 for filename, type, stat in osutil.listdir(self.path, stat=True):
160 160 id = None
161 161 if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
162 162 id = filename[:-indexsuffixlen]
163 163 elif filename[-packsuffixlen:] == self.PACKSUFFIX:
164 164 id = filename[:-packsuffixlen]
165 165
166 166 # Since we expect to have two files corresponding to each ID
167 167 # (the index file and the pack file), we can yield once we see
168 168 # it twice.
169 169 if id:
170 170 sizes[id] += stat.st_size # Sum both files' sizes together
171 171 mtimes[id].append(stat.st_mtime)
172 172 if id in ids:
173 173 yield (
174 174 os.path.join(self.path, id),
175 175 max(mtimes[id]),
176 176 sizes[id],
177 177 )
178 178 else:
179 179 ids.add(id)
180 180 except OSError as ex:
181 181 if ex.errno != errno.ENOENT:
182 182 raise
183 183
184 184 def _getavailablepackfilessorted(self):
185 185 """Like `_getavailablepackfiles`, but also sorts the files by mtime,
186 186 yielding newest files first.
187 187
188 188 This is desirable, since it is more likely newer packfiles have more
189 189 desirable data.
190 190 """
191 191 files = []
192 192 for path, mtime, size in self._getavailablepackfiles():
193 193 files.append((mtime, size, path))
194 194 files = sorted(files, reverse=True)
195 195 for mtime, size, path in files:
196 196 yield path, mtime, size
197 197
198 198 def gettotalsizeandcount(self):
199 199 """Returns the total disk size (in bytes) of all the pack files in
200 200 this store, and the count of pack files.
201 201
202 202 (This might be smaller than the total size of the ``self.path``
203 203 directory, since this only considers fuly-writen pack files, and not
204 204 temporary files or other detritus on the directory.)
205 205 """
206 206 totalsize = 0
207 207 count = 0
208 208 for __, __, size in self._getavailablepackfiles():
209 209 totalsize += size
210 210 count += 1
211 211 return totalsize, count
212 212
213 213 def getmetrics(self):
214 214 """Returns metrics on the state of this store."""
215 215 size, count = self.gettotalsizeandcount()
216 216 return {
217 217 b'numpacks': count,
218 218 b'totalpacksize': size,
219 219 }
220 220
221 221 def getpack(self, path):
222 222 raise NotImplementedError()
223 223
224 224 def getmissing(self, keys):
225 225 missing = keys
226 226 for pack in self.packs:
227 227 missing = pack.getmissing(missing)
228 228
229 229 # Ensures better performance of the cache by keeping the most
230 230 # recently accessed pack at the beginning in subsequent iterations.
231 231 if not missing:
232 232 return missing
233 233
234 234 if missing:
235 235 for pack in self.refresh():
236 236 missing = pack.getmissing(missing)
237 237
238 238 return missing
239 239
240 240 def markledger(self, ledger, options=None):
241 241 for pack in self.packs:
242 242 pack.markledger(ledger)
243 243
244 244 def markforrefresh(self):
245 245 """Tells the store that there may be new pack files, so the next time it
246 246 has a lookup miss it should check for new files."""
247 247 self.lastrefresh = 0
248 248
249 249 def refresh(self):
250 250 """Checks for any new packs on disk, adds them to the main pack list,
251 251 and returns a list of just the new packs."""
252 252 now = time.time()
253 253
254 254 # If we experience a lot of misses (like in the case of getmissing() on
255 255 # new objects), let's only actually check disk for new stuff every once
256 256 # in a while. Generally this code path should only ever matter when a
257 257 # repack is going on in the background, and that should be pretty rare
258 258 # to have that happen twice in quick succession.
259 259 newpacks = []
260 260 if now > self.lastrefresh + REFRESHRATE:
261 261 self.lastrefresh = now
262 262 previous = set(p.path for p in self.packs)
263 263 for filepath, __, __ in self._getavailablepackfilessorted():
264 264 if filepath not in previous:
265 265 newpack = self.getpack(filepath)
266 266 newpacks.append(newpack)
267 267 self.packs.add(newpack)
268 268
269 269 return newpacks
270 270
271 271
272 272 class versionmixin(object):
273 273 # Mix-in for classes with multiple supported versions
274 274 VERSION = None
275 275 SUPPORTED_VERSIONS = [2]
276 276
277 277 def _checkversion(self, version):
278 278 if version in self.SUPPORTED_VERSIONS:
279 279 if self.VERSION is None:
280 280 # only affect this instance
281 281 self.VERSION = version
282 282 elif self.VERSION != version:
283 283 raise RuntimeError(b'inconsistent version: %d' % version)
284 284 else:
285 285 raise RuntimeError(b'unsupported version: %d' % version)
286 286
287 287
288 288 class basepack(versionmixin):
289 289 # The maximum amount we should read via mmap before remmaping so the old
290 290 # pages can be released (100MB)
291 291 MAXPAGEDIN = 100 * 1024 ** 2
292 292
293 293 SUPPORTED_VERSIONS = [2]
294 294
295 295 def __init__(self, path):
296 296 self.path = path
297 297 self.packpath = path + self.PACKSUFFIX
298 298 self.indexpath = path + self.INDEXSUFFIX
299 299
300 300 self.indexsize = os.stat(self.indexpath).st_size
301 301 self.datasize = os.stat(self.packpath).st_size
302 302
303 303 self._index = None
304 304 self._data = None
305 305 self.freememory() # initialize the mmap
306 306
307 307 version = struct.unpack(b'!B', self._data[:PACKVERSIONSIZE])[0]
308 308 self._checkversion(version)
309 309
310 310 version, config = struct.unpack(b'!BB', self._index[:INDEXVERSIONSIZE])
311 311 self._checkversion(version)
312 312
313 313 if 0b10000000 & config:
314 314 self.params = indexparams(LARGEFANOUTPREFIX, version)
315 315 else:
316 316 self.params = indexparams(SMALLFANOUTPREFIX, version)
317 317
318 318 @util.propertycache
319 319 def _fanouttable(self):
320 320 params = self.params
321 321 rawfanout = self._index[FANOUTSTART : FANOUTSTART + params.fanoutsize]
322 322 fanouttable = []
323 323 for i in pycompat.xrange(0, params.fanoutcount):
324 324 loc = i * 4
325 325 fanoutentry = struct.unpack(b'!I', rawfanout[loc : loc + 4])[0]
326 326 fanouttable.append(fanoutentry)
327 327 return fanouttable
328 328
329 329 @util.propertycache
330 330 def _indexend(self):
331 331 nodecount = struct.unpack_from(
332 332 b'!Q', self._index, self.params.indexstart - 8
333 333 )[0]
334 334 return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
335 335
336 336 def freememory(self):
337 337 """Unmap and remap the memory to free it up after known expensive
338 338 operations. Return True if self._data and self._index were reloaded.
339 339 """
340 340 if self._index:
341 341 if self._pagedin < self.MAXPAGEDIN:
342 342 return False
343 343
344 344 self._index.close()
345 345 self._data.close()
346 346
347 347 # TODO: use an opener/vfs to access these paths
348 348 with open(self.indexpath, PACKOPENMODE) as indexfp:
349 349 # memory-map the file, size 0 means whole file
350 350 self._index = mmap.mmap(
351 351 indexfp.fileno(), 0, access=mmap.ACCESS_READ
352 352 )
353 353 with open(self.packpath, PACKOPENMODE) as datafp:
354 354 self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
355 355
356 356 self._pagedin = 0
357 357 return True
358 358
359 359 def getmissing(self, keys):
360 360 raise NotImplementedError()
361 361
362 362 def markledger(self, ledger, options=None):
363 363 raise NotImplementedError()
364 364
365 365 def cleanup(self, ledger):
366 366 raise NotImplementedError()
367 367
368 368 def __iter__(self):
369 369 raise NotImplementedError()
370 370
371 371 def iterentries(self):
372 372 raise NotImplementedError()
373 373
374 374
375 375 class mutablebasepack(versionmixin):
376 376 def __init__(self, ui, packdir, version=2):
377 377 self._checkversion(version)
378 378 # TODO(augie): make this configurable
379 379 self._compressor = b'GZ'
380 380 opener = vfsmod.vfs(packdir)
381 381 opener.createmode = 0o444
382 382 self.opener = opener
383 383
384 384 self.entries = {}
385 385
386 386 shallowutil.mkstickygroupdir(ui, packdir)
387 387 self.packfp, self.packpath = opener.mkstemp(
388 388 suffix=self.PACKSUFFIX + b'-tmp'
389 389 )
390 390 self.idxfp, self.idxpath = opener.mkstemp(
391 391 suffix=self.INDEXSUFFIX + b'-tmp'
392 392 )
393 393 self.packfp = os.fdopen(self.packfp, 'wb+')
394 394 self.idxfp = os.fdopen(self.idxfp, 'wb+')
395 self.sha = hashlib.sha1()
395 self.sha = hashutil.sha1()
396 396 self._closed = False
397 397
398 398 # The opener provides no way of doing permission fixup on files created
399 399 # via mkstemp, so we must fix it ourselves. We can probably fix this
400 400 # upstream in vfs.mkstemp so we don't need to use the private method.
401 401 opener._fixfilemode(opener.join(self.packpath))
402 402 opener._fixfilemode(opener.join(self.idxpath))
403 403
404 404 # Write header
405 405 # TODO: make it extensible (ex: allow specifying compression algorithm,
406 406 # a flexible key/value header, delta algorithm, fanout size, etc)
407 407 versionbuf = struct.pack(b'!B', self.VERSION) # unsigned 1 byte int
408 408 self.writeraw(versionbuf)
409 409
410 410 def __enter__(self):
411 411 return self
412 412
413 413 def __exit__(self, exc_type, exc_value, traceback):
414 414 if exc_type is None:
415 415 self.close()
416 416 else:
417 417 self.abort()
418 418
419 419 def abort(self):
420 420 # Unclean exit
421 421 self._cleantemppacks()
422 422
423 423 def writeraw(self, data):
424 424 self.packfp.write(data)
425 425 self.sha.update(data)
426 426
427 427 def close(self, ledger=None):
428 428 if self._closed:
429 429 return
430 430
431 431 try:
432 432 sha = nodemod.hex(self.sha.digest())
433 433 self.packfp.close()
434 434 self.writeindex()
435 435
436 436 if len(self.entries) == 0:
437 437 # Empty pack
438 438 self._cleantemppacks()
439 439 self._closed = True
440 440 return None
441 441
442 442 self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
443 443 try:
444 444 self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
445 445 except Exception as ex:
446 446 try:
447 447 self.opener.unlink(sha + self.PACKSUFFIX)
448 448 except Exception:
449 449 pass
450 450 # Throw exception 'ex' explicitly since a normal 'raise' would
451 451 # potentially throw an exception from the unlink cleanup.
452 452 raise ex
453 453 except Exception:
454 454 # Clean up temp packs in all exception cases
455 455 self._cleantemppacks()
456 456 raise
457 457
458 458 self._closed = True
459 459 result = self.opener.join(sha)
460 460 if ledger:
461 461 ledger.addcreated(result)
462 462 return result
463 463
464 464 def _cleantemppacks(self):
465 465 try:
466 466 self.opener.unlink(self.packpath)
467 467 except Exception:
468 468 pass
469 469 try:
470 470 self.opener.unlink(self.idxpath)
471 471 except Exception:
472 472 pass
473 473
474 474 def writeindex(self):
475 475 largefanout = len(self.entries) > SMALLFANOUTCUTOFF
476 476 if largefanout:
477 477 params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
478 478 else:
479 479 params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
480 480
481 481 fanouttable = [EMPTYFANOUT] * params.fanoutcount
482 482
483 483 # Precompute the location of each entry
484 484 locations = {}
485 485 count = 0
486 486 for node in sorted(self.entries):
487 487 location = count * self.INDEXENTRYLENGTH
488 488 locations[node] = location
489 489 count += 1
490 490
491 491 # Must use [0] on the unpack result since it's always a tuple.
492 492 fanoutkey = struct.unpack(
493 493 params.fanoutstruct, node[: params.fanoutprefix]
494 494 )[0]
495 495 if fanouttable[fanoutkey] == EMPTYFANOUT:
496 496 fanouttable[fanoutkey] = location
497 497
498 498 rawfanouttable = b''
499 499 last = 0
500 500 for offset in fanouttable:
501 501 offset = offset if offset != EMPTYFANOUT else last
502 502 last = offset
503 503 rawfanouttable += struct.pack(b'!I', offset)
504 504
505 505 rawentrieslength = struct.pack(b'!Q', len(self.entries))
506 506
507 507 # The index offset is the it's location in the file. So after the 2 byte
508 508 # header and the fanouttable.
509 509 rawindex = self.createindex(locations, 2 + len(rawfanouttable))
510 510
511 511 self._writeheader(params)
512 512 self.idxfp.write(rawfanouttable)
513 513 self.idxfp.write(rawentrieslength)
514 514 self.idxfp.write(rawindex)
515 515 self.idxfp.close()
516 516
517 517 def createindex(self, nodelocations):
518 518 raise NotImplementedError()
519 519
520 520 def _writeheader(self, indexparams):
521 521 # Index header
522 522 # <version: 1 byte>
523 523 # <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
524 524 # <unused: 7 bit> # future use (compression, delta format, etc)
525 525 config = 0
526 526 if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
527 527 config = 0b10000000
528 528 self.idxfp.write(struct.pack(b'!BB', self.VERSION, config))
529 529
530 530
531 531 class indexparams(object):
532 532 __slots__ = (
533 533 'fanoutprefix',
534 534 'fanoutstruct',
535 535 'fanoutcount',
536 536 'fanoutsize',
537 537 'indexstart',
538 538 )
539 539
540 540 def __init__(self, prefixsize, version):
541 541 self.fanoutprefix = prefixsize
542 542
543 543 # The struct pack format for fanout table location (i.e. the format that
544 544 # converts the node prefix into an integer location in the fanout
545 545 # table).
546 546 if prefixsize == SMALLFANOUTPREFIX:
547 547 self.fanoutstruct = b'!B'
548 548 elif prefixsize == LARGEFANOUTPREFIX:
549 549 self.fanoutstruct = b'!H'
550 550 else:
551 551 raise ValueError(b"invalid fanout prefix size: %s" % prefixsize)
552 552
553 553 # The number of fanout table entries
554 554 self.fanoutcount = 2 ** (prefixsize * 8)
555 555
556 556 # The total bytes used by the fanout table
557 557 self.fanoutsize = self.fanoutcount * 4
558 558
559 559 self.indexstart = FANOUTSTART + self.fanoutsize
560 560 # Skip the index length
561 561 self.indexstart += 8
@@ -1,461 +1,461 b''
1 1 from __future__ import absolute_import
2 2
3 3 import errno
4 import hashlib
5 4 import os
6 5 import shutil
7 6 import stat
8 7 import time
9 8
10 9 from mercurial.i18n import _
11 10 from mercurial.node import bin, hex
12 11 from mercurial.pycompat import open
13 12 from mercurial import (
14 13 error,
15 14 pycompat,
16 15 util,
17 16 )
17 from mercurial.utils import hashutil
18 18 from . import (
19 19 constants,
20 20 shallowutil,
21 21 )
22 22
23 23
24 24 class basestore(object):
25 25 def __init__(self, repo, path, reponame, shared=False):
26 26 """Creates a remotefilelog store object for the given repo name.
27 27
28 28 `path` - The file path where this store keeps its data
29 29 `reponame` - The name of the repo. This is used to partition data from
30 30 many repos.
31 31 `shared` - True if this store is a shared cache of data from the central
32 32 server, for many repos on this machine. False means this store is for
33 33 the local data for one repo.
34 34 """
35 35 self.repo = repo
36 36 self.ui = repo.ui
37 37 self._path = path
38 38 self._reponame = reponame
39 39 self._shared = shared
40 40 self._uid = os.getuid() if not pycompat.iswindows else None
41 41
42 42 self._validatecachelog = self.ui.config(
43 43 b"remotefilelog", b"validatecachelog"
44 44 )
45 45 self._validatecache = self.ui.config(
46 46 b"remotefilelog", b"validatecache", b'on'
47 47 )
48 48 if self._validatecache not in (b'on', b'strict', b'off'):
49 49 self._validatecache = b'on'
50 50 if self._validatecache == b'off':
51 51 self._validatecache = False
52 52
53 53 if shared:
54 54 shallowutil.mkstickygroupdir(self.ui, path)
55 55
56 56 def getmissing(self, keys):
57 57 missing = []
58 58 for name, node in keys:
59 59 filepath = self._getfilepath(name, node)
60 60 exists = os.path.exists(filepath)
61 61 if (
62 62 exists
63 63 and self._validatecache == b'strict'
64 64 and not self._validatekey(filepath, b'contains')
65 65 ):
66 66 exists = False
67 67 if not exists:
68 68 missing.append((name, node))
69 69
70 70 return missing
71 71
72 72 # BELOW THIS ARE IMPLEMENTATIONS OF REPACK SOURCE
73 73
74 74 def markledger(self, ledger, options=None):
75 75 if options and options.get(constants.OPTION_PACKSONLY):
76 76 return
77 77 if self._shared:
78 78 for filename, nodes in self._getfiles():
79 79 for node in nodes:
80 80 ledger.markdataentry(self, filename, node)
81 81 ledger.markhistoryentry(self, filename, node)
82 82
83 83 def cleanup(self, ledger):
84 84 ui = self.ui
85 85 entries = ledger.sources.get(self, [])
86 86 count = 0
87 87 progress = ui.makeprogress(
88 88 _(b"cleaning up"), unit=b"files", total=len(entries)
89 89 )
90 90 for entry in entries:
91 91 if entry.gced or (entry.datarepacked and entry.historyrepacked):
92 92 progress.update(count)
93 93 path = self._getfilepath(entry.filename, entry.node)
94 94 util.tryunlink(path)
95 95 count += 1
96 96 progress.complete()
97 97
98 98 # Clean up the repo cache directory.
99 99 self._cleanupdirectory(self._getrepocachepath())
100 100
101 101 # BELOW THIS ARE NON-STANDARD APIS
102 102
103 103 def _cleanupdirectory(self, rootdir):
104 104 """Removes the empty directories and unnecessary files within the root
105 105 directory recursively. Note that this method does not remove the root
106 106 directory itself. """
107 107
108 108 oldfiles = set()
109 109 otherfiles = set()
110 110 # osutil.listdir returns stat information which saves some rmdir/listdir
111 111 # syscalls.
112 112 for name, mode in util.osutil.listdir(rootdir):
113 113 if stat.S_ISDIR(mode):
114 114 dirpath = os.path.join(rootdir, name)
115 115 self._cleanupdirectory(dirpath)
116 116
117 117 # Now that the directory specified by dirpath is potentially
118 118 # empty, try and remove it.
119 119 try:
120 120 os.rmdir(dirpath)
121 121 except OSError:
122 122 pass
123 123
124 124 elif stat.S_ISREG(mode):
125 125 if name.endswith(b'_old'):
126 126 oldfiles.add(name[:-4])
127 127 else:
128 128 otherfiles.add(name)
129 129
130 130 # Remove the files which end with suffix '_old' and have no
131 131 # corresponding file without the suffix '_old'. See addremotefilelognode
132 132 # method for the generation/purpose of files with '_old' suffix.
133 133 for filename in oldfiles - otherfiles:
134 134 filepath = os.path.join(rootdir, filename + b'_old')
135 135 util.tryunlink(filepath)
136 136
137 137 def _getfiles(self):
138 138 """Return a list of (filename, [node,...]) for all the revisions that
139 139 exist in the store.
140 140
141 141 This is useful for obtaining a list of all the contents of the store
142 142 when performing a repack to another store, since the store API requires
143 143 name+node keys and not namehash+node keys.
144 144 """
145 145 existing = {}
146 146 for filenamehash, node in self._listkeys():
147 147 existing.setdefault(filenamehash, []).append(node)
148 148
149 149 filenamemap = self._resolvefilenames(existing.keys())
150 150
151 151 for filename, sha in pycompat.iteritems(filenamemap):
152 152 yield (filename, existing[sha])
153 153
154 154 def _resolvefilenames(self, hashes):
155 155 """Given a list of filename hashes that are present in the
156 156 remotefilelog store, return a mapping from filename->hash.
157 157
158 158 This is useful when converting remotefilelog blobs into other storage
159 159 formats.
160 160 """
161 161 if not hashes:
162 162 return {}
163 163
164 164 filenames = {}
165 165 missingfilename = set(hashes)
166 166
167 167 # Start with a full manifest, since it'll cover the majority of files
168 168 for filename in self.repo[b'tip'].manifest():
169 sha = hashlib.sha1(filename).digest()
169 sha = hashutil.sha1(filename).digest()
170 170 if sha in missingfilename:
171 171 filenames[filename] = sha
172 172 missingfilename.discard(sha)
173 173
174 174 # Scan the changelog until we've found every file name
175 175 cl = self.repo.unfiltered().changelog
176 176 for rev in pycompat.xrange(len(cl) - 1, -1, -1):
177 177 if not missingfilename:
178 178 break
179 179 files = cl.readfiles(cl.node(rev))
180 180 for filename in files:
181 sha = hashlib.sha1(filename).digest()
181 sha = hashutil.sha1(filename).digest()
182 182 if sha in missingfilename:
183 183 filenames[filename] = sha
184 184 missingfilename.discard(sha)
185 185
186 186 return filenames
187 187
188 188 def _getrepocachepath(self):
189 189 return (
190 190 os.path.join(self._path, self._reponame)
191 191 if self._shared
192 192 else self._path
193 193 )
194 194
195 195 def _listkeys(self):
196 196 """List all the remotefilelog keys that exist in the store.
197 197
198 198 Returns a iterator of (filename hash, filecontent hash) tuples.
199 199 """
200 200
201 201 for root, dirs, files in os.walk(self._getrepocachepath()):
202 202 for filename in files:
203 203 if len(filename) != 40:
204 204 continue
205 205 node = filename
206 206 if self._shared:
207 207 # .../1a/85ffda..be21
208 208 filenamehash = root[-41:-39] + root[-38:]
209 209 else:
210 210 filenamehash = root[-40:]
211 211 yield (bin(filenamehash), bin(node))
212 212
213 213 def _getfilepath(self, name, node):
214 214 node = hex(node)
215 215 if self._shared:
216 216 key = shallowutil.getcachekey(self._reponame, name, node)
217 217 else:
218 218 key = shallowutil.getlocalkey(name, node)
219 219
220 220 return os.path.join(self._path, key)
221 221
222 222 def _getdata(self, name, node):
223 223 filepath = self._getfilepath(name, node)
224 224 try:
225 225 data = shallowutil.readfile(filepath)
226 226 if self._validatecache and not self._validatedata(data, filepath):
227 227 if self._validatecachelog:
228 228 with open(self._validatecachelog, b'a+') as f:
229 229 f.write(b"corrupt %s during read\n" % filepath)
230 230 os.rename(filepath, filepath + b".corrupt")
231 231 raise KeyError(b"corrupt local cache file %s" % filepath)
232 232 except IOError:
233 233 raise KeyError(
234 234 b"no file found at %s for %s:%s" % (filepath, name, hex(node))
235 235 )
236 236
237 237 return data
238 238
239 239 def addremotefilelognode(self, name, node, data):
240 240 filepath = self._getfilepath(name, node)
241 241
242 242 oldumask = os.umask(0o002)
243 243 try:
244 244 # if this node already exists, save the old version for
245 245 # recovery/debugging purposes.
246 246 if os.path.exists(filepath):
247 247 newfilename = filepath + b'_old'
248 248 # newfilename can be read-only and shutil.copy will fail.
249 249 # Delete newfilename to avoid it
250 250 if os.path.exists(newfilename):
251 251 shallowutil.unlinkfile(newfilename)
252 252 shutil.copy(filepath, newfilename)
253 253
254 254 shallowutil.mkstickygroupdir(self.ui, os.path.dirname(filepath))
255 255 shallowutil.writefile(filepath, data, readonly=True)
256 256
257 257 if self._validatecache:
258 258 if not self._validatekey(filepath, b'write'):
259 259 raise error.Abort(
260 260 _(b"local cache write was corrupted %s") % filepath
261 261 )
262 262 finally:
263 263 os.umask(oldumask)
264 264
265 265 def markrepo(self, path):
266 266 """Call this to add the given repo path to the store's list of
267 267 repositories that are using it. This is useful later when doing garbage
268 268 collection, since it allows us to insecpt the repos to see what nodes
269 269 they want to be kept alive in the store.
270 270 """
271 271 repospath = os.path.join(self._path, b"repos")
272 272 with open(repospath, b'ab') as reposfile:
273 273 reposfile.write(os.path.dirname(path) + b"\n")
274 274
275 275 repospathstat = os.stat(repospath)
276 276 if repospathstat.st_uid == self._uid:
277 277 os.chmod(repospath, 0o0664)
278 278
279 279 def _validatekey(self, path, action):
280 280 with open(path, b'rb') as f:
281 281 data = f.read()
282 282
283 283 if self._validatedata(data, path):
284 284 return True
285 285
286 286 if self._validatecachelog:
287 287 with open(self._validatecachelog, b'ab+') as f:
288 288 f.write(b"corrupt %s during %s\n" % (path, action))
289 289
290 290 os.rename(path, path + b".corrupt")
291 291 return False
292 292
293 293 def _validatedata(self, data, path):
294 294 try:
295 295 if len(data) > 0:
296 296 # see remotefilelogserver.createfileblob for the format
297 297 offset, size, flags = shallowutil.parsesizeflags(data)
298 298 if len(data) <= size:
299 299 # it is truncated
300 300 return False
301 301
302 302 # extract the node from the metadata
303 303 offset += size
304 304 datanode = data[offset : offset + 20]
305 305
306 306 # and compare against the path
307 307 if os.path.basename(path) == hex(datanode):
308 308 # Content matches the intended path
309 309 return True
310 310 return False
311 311 except (ValueError, RuntimeError):
312 312 pass
313 313
314 314 return False
315 315
316 316 def gc(self, keepkeys):
317 317 ui = self.ui
318 318 cachepath = self._path
319 319
320 320 # prune cache
321 321 queue = pycompat.queue.PriorityQueue()
322 322 originalsize = 0
323 323 size = 0
324 324 count = 0
325 325 removed = 0
326 326
327 327 # keep files newer than a day even if they aren't needed
328 328 limit = time.time() - (60 * 60 * 24)
329 329
330 330 progress = ui.makeprogress(
331 331 _(b"removing unnecessary files"), unit=b"files"
332 332 )
333 333 progress.update(0)
334 334 for root, dirs, files in os.walk(cachepath):
335 335 for file in files:
336 336 if file == b'repos':
337 337 continue
338 338
339 339 # Don't delete pack files
340 340 if b'/packs/' in root:
341 341 continue
342 342
343 343 progress.update(count)
344 344 path = os.path.join(root, file)
345 345 key = os.path.relpath(path, cachepath)
346 346 count += 1
347 347 try:
348 348 pathstat = os.stat(path)
349 349 except OSError as e:
350 350 # errno.ENOENT = no such file or directory
351 351 if e.errno != errno.ENOENT:
352 352 raise
353 353 msg = _(
354 354 b"warning: file %s was removed by another process\n"
355 355 )
356 356 ui.warn(msg % path)
357 357 continue
358 358
359 359 originalsize += pathstat.st_size
360 360
361 361 if key in keepkeys or pathstat.st_atime > limit:
362 362 queue.put((pathstat.st_atime, path, pathstat))
363 363 size += pathstat.st_size
364 364 else:
365 365 try:
366 366 shallowutil.unlinkfile(path)
367 367 except OSError as e:
368 368 # errno.ENOENT = no such file or directory
369 369 if e.errno != errno.ENOENT:
370 370 raise
371 371 msg = _(
372 372 b"warning: file %s was removed by another "
373 373 b"process\n"
374 374 )
375 375 ui.warn(msg % path)
376 376 continue
377 377 removed += 1
378 378 progress.complete()
379 379
380 380 # remove oldest files until under limit
381 381 limit = ui.configbytes(b"remotefilelog", b"cachelimit")
382 382 if size > limit:
383 383 excess = size - limit
384 384 progress = ui.makeprogress(
385 385 _(b"enforcing cache limit"), unit=b"bytes", total=excess
386 386 )
387 387 removedexcess = 0
388 388 while queue and size > limit and size > 0:
389 389 progress.update(removedexcess)
390 390 atime, oldpath, oldpathstat = queue.get()
391 391 try:
392 392 shallowutil.unlinkfile(oldpath)
393 393 except OSError as e:
394 394 # errno.ENOENT = no such file or directory
395 395 if e.errno != errno.ENOENT:
396 396 raise
397 397 msg = _(
398 398 b"warning: file %s was removed by another process\n"
399 399 )
400 400 ui.warn(msg % oldpath)
401 401 size -= oldpathstat.st_size
402 402 removed += 1
403 403 removedexcess += oldpathstat.st_size
404 404 progress.complete()
405 405
406 406 ui.status(
407 407 _(b"finished: removed %d of %d files (%0.2f GB to %0.2f GB)\n")
408 408 % (
409 409 removed,
410 410 count,
411 411 float(originalsize) / 1024.0 / 1024.0 / 1024.0,
412 412 float(size) / 1024.0 / 1024.0 / 1024.0,
413 413 )
414 414 )
415 415
416 416
417 417 class baseunionstore(object):
418 418 def __init__(self, *args, **kwargs):
419 419 # If one of the functions that iterates all of the stores is about to
420 420 # throw a KeyError, try this many times with a full refresh between
421 421 # attempts. A repack operation may have moved data from one store to
422 422 # another while we were running.
423 423 self.numattempts = kwargs.get('numretries', 0) + 1
424 424 # If not-None, call this function on every retry and if the attempts are
425 425 # exhausted.
426 426 self.retrylog = kwargs.get('retrylog', None)
427 427
428 428 def markforrefresh(self):
429 429 for store in self.stores:
430 430 if util.safehasattr(store, b'markforrefresh'):
431 431 store.markforrefresh()
432 432
433 433 @staticmethod
434 434 def retriable(fn):
435 435 def noop(*args):
436 436 pass
437 437
438 438 def wrapped(self, *args, **kwargs):
439 439 retrylog = self.retrylog or noop
440 440 funcname = fn.__name__
441 441 i = 0
442 442 while i < self.numattempts:
443 443 if i > 0:
444 444 retrylog(
445 445 b're-attempting (n=%d) %s\n'
446 446 % (i, pycompat.sysbytes(funcname))
447 447 )
448 448 self.markforrefresh()
449 449 i += 1
450 450 try:
451 451 return fn(self, *args, **kwargs)
452 452 except KeyError:
453 453 if i == self.numattempts:
454 454 # retries exhausted
455 455 retrylog(
456 456 b'retries exhausted in %s, raising KeyError\n'
457 457 % pycompat.sysbytes(funcname)
458 458 )
459 459 raise
460 460
461 461 return wrapped
@@ -1,477 +1,477 b''
1 1 # debugcommands.py - debug logic for remotefilelog
2 2 #
3 3 # Copyright 2013 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 import hashlib
10 9 import os
11 10 import zlib
12 11
13 12 from mercurial.node import bin, hex, nullid, short
14 13 from mercurial.i18n import _
15 14 from mercurial.pycompat import open
16 15 from mercurial import (
17 16 error,
18 17 filelog,
19 18 lock as lockmod,
20 19 node as nodemod,
21 20 pycompat,
22 21 revlog,
23 22 )
23 from mercurial.utils import hashutil
24 24 from . import (
25 25 constants,
26 26 datapack,
27 27 fileserverclient,
28 28 historypack,
29 29 repack,
30 30 shallowutil,
31 31 )
32 32
33 33
34 34 def debugremotefilelog(ui, path, **opts):
35 35 decompress = opts.get('decompress')
36 36
37 37 size, firstnode, mapping = parsefileblob(path, decompress)
38 38
39 39 ui.status(_(b"size: %d bytes\n") % size)
40 40 ui.status(_(b"path: %s \n") % path)
41 41 ui.status(_(b"key: %s \n") % (short(firstnode)))
42 42 ui.status(_(b"\n"))
43 43 ui.status(
44 44 _(b"%12s => %12s %13s %13s %12s\n")
45 45 % (b"node", b"p1", b"p2", b"linknode", b"copyfrom")
46 46 )
47 47
48 48 queue = [firstnode]
49 49 while queue:
50 50 node = queue.pop(0)
51 51 p1, p2, linknode, copyfrom = mapping[node]
52 52 ui.status(
53 53 _(b"%s => %s %s %s %s\n")
54 54 % (short(node), short(p1), short(p2), short(linknode), copyfrom)
55 55 )
56 56 if p1 != nullid:
57 57 queue.append(p1)
58 58 if p2 != nullid:
59 59 queue.append(p2)
60 60
61 61
62 62 def buildtemprevlog(repo, file):
63 63 # get filename key
64 filekey = nodemod.hex(hashlib.sha1(file).digest())
64 filekey = nodemod.hex(hashutil.sha1(file).digest())
65 65 filedir = os.path.join(repo.path, b'store/data', filekey)
66 66
67 67 # sort all entries based on linkrev
68 68 fctxs = []
69 69 for filenode in os.listdir(filedir):
70 70 if b'_old' not in filenode:
71 71 fctxs.append(repo.filectx(file, fileid=bin(filenode)))
72 72
73 73 fctxs = sorted(fctxs, key=lambda x: x.linkrev())
74 74
75 75 # add to revlog
76 76 temppath = repo.sjoin(b'data/temprevlog.i')
77 77 if os.path.exists(temppath):
78 78 os.remove(temppath)
79 79 r = filelog.filelog(repo.svfs, b'temprevlog')
80 80
81 81 class faket(object):
82 82 def add(self, a, b, c):
83 83 pass
84 84
85 85 t = faket()
86 86 for fctx in fctxs:
87 87 if fctx.node() not in repo:
88 88 continue
89 89
90 90 p = fctx.filelog().parents(fctx.filenode())
91 91 meta = {}
92 92 if fctx.renamed():
93 93 meta[b'copy'] = fctx.renamed()[0]
94 94 meta[b'copyrev'] = hex(fctx.renamed()[1])
95 95
96 96 r.add(fctx.data(), meta, t, fctx.linkrev(), p[0], p[1])
97 97
98 98 return r
99 99
100 100
101 101 def debugindex(orig, ui, repo, file_=None, **opts):
102 102 """dump the contents of an index file"""
103 103 if (
104 104 opts.get('changelog')
105 105 or opts.get('manifest')
106 106 or opts.get('dir')
107 107 or not shallowutil.isenabled(repo)
108 108 or not repo.shallowmatch(file_)
109 109 ):
110 110 return orig(ui, repo, file_, **opts)
111 111
112 112 r = buildtemprevlog(repo, file_)
113 113
114 114 # debugindex like normal
115 115 format = opts.get(b'format', 0)
116 116 if format not in (0, 1):
117 117 raise error.Abort(_(b"unknown format %d") % format)
118 118
119 119 generaldelta = r.version & revlog.FLAG_GENERALDELTA
120 120 if generaldelta:
121 121 basehdr = b' delta'
122 122 else:
123 123 basehdr = b' base'
124 124
125 125 if format == 0:
126 126 ui.write(
127 127 (
128 128 b" rev offset length " + basehdr + b" linkrev"
129 129 b" nodeid p1 p2\n"
130 130 )
131 131 )
132 132 elif format == 1:
133 133 ui.write(
134 134 (
135 135 b" rev flag offset length"
136 136 b" size " + basehdr + b" link p1 p2"
137 137 b" nodeid\n"
138 138 )
139 139 )
140 140
141 141 for i in r:
142 142 node = r.node(i)
143 143 if generaldelta:
144 144 base = r.deltaparent(i)
145 145 else:
146 146 base = r.chainbase(i)
147 147 if format == 0:
148 148 try:
149 149 pp = r.parents(node)
150 150 except Exception:
151 151 pp = [nullid, nullid]
152 152 ui.write(
153 153 b"% 6d % 9d % 7d % 6d % 7d %s %s %s\n"
154 154 % (
155 155 i,
156 156 r.start(i),
157 157 r.length(i),
158 158 base,
159 159 r.linkrev(i),
160 160 short(node),
161 161 short(pp[0]),
162 162 short(pp[1]),
163 163 )
164 164 )
165 165 elif format == 1:
166 166 pr = r.parentrevs(i)
167 167 ui.write(
168 168 b"% 6d %04x % 8d % 8d % 8d % 6d % 6d % 6d % 6d %s\n"
169 169 % (
170 170 i,
171 171 r.flags(i),
172 172 r.start(i),
173 173 r.length(i),
174 174 r.rawsize(i),
175 175 base,
176 176 r.linkrev(i),
177 177 pr[0],
178 178 pr[1],
179 179 short(node),
180 180 )
181 181 )
182 182
183 183
184 184 def debugindexdot(orig, ui, repo, file_):
185 185 """dump an index DAG as a graphviz dot file"""
186 186 if not shallowutil.isenabled(repo):
187 187 return orig(ui, repo, file_)
188 188
189 189 r = buildtemprevlog(repo, os.path.basename(file_)[:-2])
190 190
191 191 ui.writenoi18n(b"digraph G {\n")
192 192 for i in r:
193 193 node = r.node(i)
194 194 pp = r.parents(node)
195 195 ui.write(b"\t%d -> %d\n" % (r.rev(pp[0]), i))
196 196 if pp[1] != nullid:
197 197 ui.write(b"\t%d -> %d\n" % (r.rev(pp[1]), i))
198 198 ui.write(b"}\n")
199 199
200 200
201 201 def verifyremotefilelog(ui, path, **opts):
202 202 decompress = opts.get('decompress')
203 203
204 204 for root, dirs, files in os.walk(path):
205 205 for file in files:
206 206 if file == b"repos":
207 207 continue
208 208 filepath = os.path.join(root, file)
209 209 size, firstnode, mapping = parsefileblob(filepath, decompress)
210 210 for p1, p2, linknode, copyfrom in pycompat.itervalues(mapping):
211 211 if linknode == nullid:
212 212 actualpath = os.path.relpath(root, path)
213 213 key = fileserverclient.getcachekey(
214 214 b"reponame", actualpath, file
215 215 )
216 216 ui.status(
217 217 b"%s %s\n" % (key, os.path.relpath(filepath, path))
218 218 )
219 219
220 220
221 221 def _decompressblob(raw):
222 222 return zlib.decompress(raw)
223 223
224 224
225 225 def parsefileblob(path, decompress):
226 226 f = open(path, b"rb")
227 227 try:
228 228 raw = f.read()
229 229 finally:
230 230 f.close()
231 231
232 232 if decompress:
233 233 raw = _decompressblob(raw)
234 234
235 235 offset, size, flags = shallowutil.parsesizeflags(raw)
236 236 start = offset + size
237 237
238 238 firstnode = None
239 239
240 240 mapping = {}
241 241 while start < len(raw):
242 242 divider = raw.index(b'\0', start + 80)
243 243
244 244 currentnode = raw[start : (start + 20)]
245 245 if not firstnode:
246 246 firstnode = currentnode
247 247
248 248 p1 = raw[(start + 20) : (start + 40)]
249 249 p2 = raw[(start + 40) : (start + 60)]
250 250 linknode = raw[(start + 60) : (start + 80)]
251 251 copyfrom = raw[(start + 80) : divider]
252 252
253 253 mapping[currentnode] = (p1, p2, linknode, copyfrom)
254 254 start = divider + 1
255 255
256 256 return size, firstnode, mapping
257 257
258 258
259 259 def debugdatapack(ui, *paths, **opts):
260 260 for path in paths:
261 261 if b'.data' in path:
262 262 path = path[: path.index(b'.data')]
263 263 ui.write(b"%s:\n" % path)
264 264 dpack = datapack.datapack(path)
265 265 node = opts.get('node')
266 266 if node:
267 267 deltachain = dpack.getdeltachain(b'', bin(node))
268 268 dumpdeltachain(ui, deltachain, **opts)
269 269 return
270 270
271 271 if opts.get('long'):
272 272 hashformatter = hex
273 273 hashlen = 42
274 274 else:
275 275 hashformatter = short
276 276 hashlen = 14
277 277
278 278 lastfilename = None
279 279 totaldeltasize = 0
280 280 totalblobsize = 0
281 281
282 282 def printtotals():
283 283 if lastfilename is not None:
284 284 ui.write(b"\n")
285 285 if not totaldeltasize or not totalblobsize:
286 286 return
287 287 difference = totalblobsize - totaldeltasize
288 288 deltastr = b"%0.1f%% %s" % (
289 289 (100.0 * abs(difference) / totalblobsize),
290 290 (b"smaller" if difference > 0 else b"bigger"),
291 291 )
292 292
293 293 ui.writenoi18n(
294 294 b"Total:%s%s %s (%s)\n"
295 295 % (
296 296 b"".ljust(2 * hashlen - len(b"Total:")),
297 297 (b'%d' % totaldeltasize).ljust(12),
298 298 (b'%d' % totalblobsize).ljust(9),
299 299 deltastr,
300 300 )
301 301 )
302 302
303 303 bases = {}
304 304 nodes = set()
305 305 failures = 0
306 306 for filename, node, deltabase, deltalen in dpack.iterentries():
307 307 bases[node] = deltabase
308 308 if node in nodes:
309 309 ui.write((b"Bad entry: %s appears twice\n" % short(node)))
310 310 failures += 1
311 311 nodes.add(node)
312 312 if filename != lastfilename:
313 313 printtotals()
314 314 name = b'(empty name)' if filename == b'' else filename
315 315 ui.write(b"%s:\n" % name)
316 316 ui.write(
317 317 b"%s%s%s%s\n"
318 318 % (
319 319 b"Node".ljust(hashlen),
320 320 b"Delta Base".ljust(hashlen),
321 321 b"Delta Length".ljust(14),
322 322 b"Blob Size".ljust(9),
323 323 )
324 324 )
325 325 lastfilename = filename
326 326 totalblobsize = 0
327 327 totaldeltasize = 0
328 328
329 329 # Metadata could be missing, in which case it will be an empty dict.
330 330 meta = dpack.getmeta(filename, node)
331 331 if constants.METAKEYSIZE in meta:
332 332 blobsize = meta[constants.METAKEYSIZE]
333 333 totaldeltasize += deltalen
334 334 totalblobsize += blobsize
335 335 else:
336 336 blobsize = b"(missing)"
337 337 ui.write(
338 338 b"%s %s %s%s\n"
339 339 % (
340 340 hashformatter(node),
341 341 hashformatter(deltabase),
342 342 (b'%d' % deltalen).ljust(14),
343 343 pycompat.bytestr(blobsize),
344 344 )
345 345 )
346 346
347 347 if filename is not None:
348 348 printtotals()
349 349
350 350 failures += _sanitycheck(ui, set(nodes), bases)
351 351 if failures > 1:
352 352 ui.warn((b"%d failures\n" % failures))
353 353 return 1
354 354
355 355
356 356 def _sanitycheck(ui, nodes, bases):
357 357 """
358 358 Does some basic sanity checking on a packfiles with ``nodes`` ``bases`` (a
359 359 mapping of node->base):
360 360
361 361 - Each deltabase must itself be a node elsewhere in the pack
362 362 - There must be no cycles
363 363 """
364 364 failures = 0
365 365 for node in nodes:
366 366 seen = set()
367 367 current = node
368 368 deltabase = bases[current]
369 369
370 370 while deltabase != nullid:
371 371 if deltabase not in nodes:
372 372 ui.warn(
373 373 (
374 374 b"Bad entry: %s has an unknown deltabase (%s)\n"
375 375 % (short(node), short(deltabase))
376 376 )
377 377 )
378 378 failures += 1
379 379 break
380 380
381 381 if deltabase in seen:
382 382 ui.warn(
383 383 (
384 384 b"Bad entry: %s has a cycle (at %s)\n"
385 385 % (short(node), short(deltabase))
386 386 )
387 387 )
388 388 failures += 1
389 389 break
390 390
391 391 current = deltabase
392 392 seen.add(current)
393 393 deltabase = bases[current]
394 394 # Since ``node`` begins a valid chain, reset/memoize its base to nullid
395 395 # so we don't traverse it again.
396 396 bases[node] = nullid
397 397 return failures
398 398
399 399
400 400 def dumpdeltachain(ui, deltachain, **opts):
401 401 hashformatter = hex
402 402 hashlen = 40
403 403
404 404 lastfilename = None
405 405 for filename, node, filename, deltabasenode, delta in deltachain:
406 406 if filename != lastfilename:
407 407 ui.write(b"\n%s\n" % filename)
408 408 lastfilename = filename
409 409 ui.write(
410 410 b"%s %s %s %s\n"
411 411 % (
412 412 b"Node".ljust(hashlen),
413 413 b"Delta Base".ljust(hashlen),
414 414 b"Delta SHA1".ljust(hashlen),
415 415 b"Delta Length".ljust(6),
416 416 )
417 417 )
418 418
419 419 ui.write(
420 420 b"%s %s %s %d\n"
421 421 % (
422 422 hashformatter(node),
423 423 hashformatter(deltabasenode),
424 nodemod.hex(hashlib.sha1(delta).digest()),
424 nodemod.hex(hashutil.sha1(delta).digest()),
425 425 len(delta),
426 426 )
427 427 )
428 428
429 429
430 430 def debughistorypack(ui, path):
431 431 if b'.hist' in path:
432 432 path = path[: path.index(b'.hist')]
433 433 hpack = historypack.historypack(path)
434 434
435 435 lastfilename = None
436 436 for entry in hpack.iterentries():
437 437 filename, node, p1node, p2node, linknode, copyfrom = entry
438 438 if filename != lastfilename:
439 439 ui.write(b"\n%s\n" % filename)
440 440 ui.write(
441 441 b"%s%s%s%s%s\n"
442 442 % (
443 443 b"Node".ljust(14),
444 444 b"P1 Node".ljust(14),
445 445 b"P2 Node".ljust(14),
446 446 b"Link Node".ljust(14),
447 447 b"Copy From",
448 448 )
449 449 )
450 450 lastfilename = filename
451 451 ui.write(
452 452 b"%s %s %s %s %s\n"
453 453 % (
454 454 short(node),
455 455 short(p1node),
456 456 short(p2node),
457 457 short(linknode),
458 458 copyfrom,
459 459 )
460 460 )
461 461
462 462
463 463 def debugwaitonrepack(repo):
464 464 with lockmod.lock(repack.repacklockvfs(repo), b"repacklock", timeout=-1):
465 465 return
466 466
467 467
468 468 def debugwaitonprefetch(repo):
469 469 with repo._lock(
470 470 repo.svfs,
471 471 b"prefetchlock",
472 472 True,
473 473 None,
474 474 None,
475 475 _(b'prefetching in %s') % repo.origroot,
476 476 ):
477 477 pass
@@ -1,667 +1,669 b''
1 1 # fileserverclient.py - client for communicating with the cache process
2 2 #
3 3 # Copyright 2013 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 import hashlib
11 10 import io
12 11 import os
13 12 import threading
14 13 import time
15 14 import zlib
16 15
17 16 from mercurial.i18n import _
18 17 from mercurial.node import bin, hex, nullid
19 18 from mercurial import (
20 19 error,
21 20 node,
22 21 pycompat,
23 22 revlog,
24 23 sshpeer,
25 24 util,
26 25 wireprotov1peer,
27 26 )
28 from mercurial.utils import procutil
27 from mercurial.utils import (
28 hashutil,
29 procutil,
30 )
29 31
30 32 from . import (
31 33 constants,
32 34 contentstore,
33 35 metadatastore,
34 36 )
35 37
36 38 _sshv1peer = sshpeer.sshv1peer
37 39
38 40 # Statistics for debugging
39 41 fetchcost = 0
40 42 fetches = 0
41 43 fetched = 0
42 44 fetchmisses = 0
43 45
44 46 _lfsmod = None
45 47
46 48
47 49 def getcachekey(reponame, file, id):
48 pathhash = node.hex(hashlib.sha1(file).digest())
50 pathhash = node.hex(hashutil.sha1(file).digest())
49 51 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
50 52
51 53
52 54 def getlocalkey(file, id):
53 pathhash = node.hex(hashlib.sha1(file).digest())
55 pathhash = node.hex(hashutil.sha1(file).digest())
54 56 return os.path.join(pathhash, id)
55 57
56 58
57 59 def peersetup(ui, peer):
58 60 class remotefilepeer(peer.__class__):
59 61 @wireprotov1peer.batchable
60 62 def x_rfl_getfile(self, file, node):
61 63 if not self.capable(b'x_rfl_getfile'):
62 64 raise error.Abort(
63 65 b'configured remotefile server does not support getfile'
64 66 )
65 67 f = wireprotov1peer.future()
66 68 yield {b'file': file, b'node': node}, f
67 69 code, data = f.value.split(b'\0', 1)
68 70 if int(code):
69 71 raise error.LookupError(file, node, data)
70 72 yield data
71 73
72 74 @wireprotov1peer.batchable
73 75 def x_rfl_getflogheads(self, path):
74 76 if not self.capable(b'x_rfl_getflogheads'):
75 77 raise error.Abort(
76 78 b'configured remotefile server does not '
77 79 b'support getflogheads'
78 80 )
79 81 f = wireprotov1peer.future()
80 82 yield {b'path': path}, f
81 83 heads = f.value.split(b'\n') if f.value else []
82 84 yield heads
83 85
84 86 def _updatecallstreamopts(self, command, opts):
85 87 if command != b'getbundle':
86 88 return
87 89 if (
88 90 constants.NETWORK_CAP_LEGACY_SSH_GETFILES
89 91 not in self.capabilities()
90 92 ):
91 93 return
92 94 if not util.safehasattr(self, '_localrepo'):
93 95 return
94 96 if (
95 97 constants.SHALLOWREPO_REQUIREMENT
96 98 not in self._localrepo.requirements
97 99 ):
98 100 return
99 101
100 102 bundlecaps = opts.get(b'bundlecaps')
101 103 if bundlecaps:
102 104 bundlecaps = [bundlecaps]
103 105 else:
104 106 bundlecaps = []
105 107
106 108 # shallow, includepattern, and excludepattern are a hacky way of
107 109 # carrying over data from the local repo to this getbundle
108 110 # command. We need to do it this way because bundle1 getbundle
109 111 # doesn't provide any other place we can hook in to manipulate
110 112 # getbundle args before it goes across the wire. Once we get rid
111 113 # of bundle1, we can use bundle2's _pullbundle2extraprepare to
112 114 # do this more cleanly.
113 115 bundlecaps.append(constants.BUNDLE2_CAPABLITY)
114 116 if self._localrepo.includepattern:
115 117 patterns = b'\0'.join(self._localrepo.includepattern)
116 118 includecap = b"includepattern=" + patterns
117 119 bundlecaps.append(includecap)
118 120 if self._localrepo.excludepattern:
119 121 patterns = b'\0'.join(self._localrepo.excludepattern)
120 122 excludecap = b"excludepattern=" + patterns
121 123 bundlecaps.append(excludecap)
122 124 opts[b'bundlecaps'] = b','.join(bundlecaps)
123 125
124 126 def _sendrequest(self, command, args, **opts):
125 127 self._updatecallstreamopts(command, args)
126 128 return super(remotefilepeer, self)._sendrequest(
127 129 command, args, **opts
128 130 )
129 131
130 132 def _callstream(self, command, **opts):
131 133 supertype = super(remotefilepeer, self)
132 134 if not util.safehasattr(supertype, '_sendrequest'):
133 135 self._updatecallstreamopts(command, pycompat.byteskwargs(opts))
134 136 return super(remotefilepeer, self)._callstream(command, **opts)
135 137
136 138 peer.__class__ = remotefilepeer
137 139
138 140
139 141 class cacheconnection(object):
140 142 """The connection for communicating with the remote cache. Performs
141 143 gets and sets by communicating with an external process that has the
142 144 cache-specific implementation.
143 145 """
144 146
145 147 def __init__(self):
146 148 self.pipeo = self.pipei = self.pipee = None
147 149 self.subprocess = None
148 150 self.connected = False
149 151
150 152 def connect(self, cachecommand):
151 153 if self.pipeo:
152 154 raise error.Abort(_(b"cache connection already open"))
153 155 self.pipei, self.pipeo, self.pipee, self.subprocess = procutil.popen4(
154 156 cachecommand
155 157 )
156 158 self.connected = True
157 159
158 160 def close(self):
159 161 def tryclose(pipe):
160 162 try:
161 163 pipe.close()
162 164 except Exception:
163 165 pass
164 166
165 167 if self.connected:
166 168 try:
167 169 self.pipei.write(b"exit\n")
168 170 except Exception:
169 171 pass
170 172 tryclose(self.pipei)
171 173 self.pipei = None
172 174 tryclose(self.pipeo)
173 175 self.pipeo = None
174 176 tryclose(self.pipee)
175 177 self.pipee = None
176 178 try:
177 179 # Wait for process to terminate, making sure to avoid deadlock.
178 180 # See https://docs.python.org/2/library/subprocess.html for
179 181 # warnings about wait() and deadlocking.
180 182 self.subprocess.communicate()
181 183 except Exception:
182 184 pass
183 185 self.subprocess = None
184 186 self.connected = False
185 187
186 188 def request(self, request, flush=True):
187 189 if self.connected:
188 190 try:
189 191 self.pipei.write(request)
190 192 if flush:
191 193 self.pipei.flush()
192 194 except IOError:
193 195 self.close()
194 196
195 197 def receiveline(self):
196 198 if not self.connected:
197 199 return None
198 200 try:
199 201 result = self.pipeo.readline()[:-1]
200 202 if not result:
201 203 self.close()
202 204 except IOError:
203 205 self.close()
204 206
205 207 return result
206 208
207 209
208 210 def _getfilesbatch(
209 211 remote, receivemissing, progresstick, missed, idmap, batchsize
210 212 ):
211 213 # Over http(s), iterbatch is a streamy method and we can start
212 214 # looking at results early. This means we send one (potentially
213 215 # large) request, but then we show nice progress as we process
214 216 # file results, rather than showing chunks of $batchsize in
215 217 # progress.
216 218 #
217 219 # Over ssh, iterbatch isn't streamy because batch() wasn't
218 220 # explicitly designed as a streaming method. In the future we
219 221 # should probably introduce a streambatch() method upstream and
220 222 # use that for this.
221 223 with remote.commandexecutor() as e:
222 224 futures = []
223 225 for m in missed:
224 226 futures.append(
225 227 e.callcommand(
226 228 b'x_rfl_getfile', {b'file': idmap[m], b'node': m[-40:]}
227 229 )
228 230 )
229 231
230 232 for i, m in enumerate(missed):
231 233 r = futures[i].result()
232 234 futures[i] = None # release memory
233 235 file_ = idmap[m]
234 236 node = m[-40:]
235 237 receivemissing(io.BytesIO(b'%d\n%s' % (len(r), r)), file_, node)
236 238 progresstick()
237 239
238 240
239 241 def _getfiles_optimistic(
240 242 remote, receivemissing, progresstick, missed, idmap, step
241 243 ):
242 244 remote._callstream(b"x_rfl_getfiles")
243 245 i = 0
244 246 pipeo = remote._pipeo
245 247 pipei = remote._pipei
246 248 while i < len(missed):
247 249 # issue a batch of requests
248 250 start = i
249 251 end = min(len(missed), start + step)
250 252 i = end
251 253 for missingid in missed[start:end]:
252 254 # issue new request
253 255 versionid = missingid[-40:]
254 256 file = idmap[missingid]
255 257 sshrequest = b"%s%s\n" % (versionid, file)
256 258 pipeo.write(sshrequest)
257 259 pipeo.flush()
258 260
259 261 # receive batch results
260 262 for missingid in missed[start:end]:
261 263 versionid = missingid[-40:]
262 264 file = idmap[missingid]
263 265 receivemissing(pipei, file, versionid)
264 266 progresstick()
265 267
266 268 # End the command
267 269 pipeo.write(b'\n')
268 270 pipeo.flush()
269 271
270 272
271 273 def _getfiles_threaded(
272 274 remote, receivemissing, progresstick, missed, idmap, step
273 275 ):
274 276 remote._callstream(b"getfiles")
275 277 pipeo = remote._pipeo
276 278 pipei = remote._pipei
277 279
278 280 def writer():
279 281 for missingid in missed:
280 282 versionid = missingid[-40:]
281 283 file = idmap[missingid]
282 284 sshrequest = b"%s%s\n" % (versionid, file)
283 285 pipeo.write(sshrequest)
284 286 pipeo.flush()
285 287
286 288 writerthread = threading.Thread(target=writer)
287 289 writerthread.daemon = True
288 290 writerthread.start()
289 291
290 292 for missingid in missed:
291 293 versionid = missingid[-40:]
292 294 file = idmap[missingid]
293 295 receivemissing(pipei, file, versionid)
294 296 progresstick()
295 297
296 298 writerthread.join()
297 299 # End the command
298 300 pipeo.write(b'\n')
299 301 pipeo.flush()
300 302
301 303
302 304 class fileserverclient(object):
303 305 """A client for requesting files from the remote file server.
304 306 """
305 307
306 308 def __init__(self, repo):
307 309 ui = repo.ui
308 310 self.repo = repo
309 311 self.ui = ui
310 312 self.cacheprocess = ui.config(b"remotefilelog", b"cacheprocess")
311 313 if self.cacheprocess:
312 314 self.cacheprocess = util.expandpath(self.cacheprocess)
313 315
314 316 # This option causes remotefilelog to pass the full file path to the
315 317 # cacheprocess instead of a hashed key.
316 318 self.cacheprocesspasspath = ui.configbool(
317 319 b"remotefilelog", b"cacheprocess.includepath"
318 320 )
319 321
320 322 self.debugoutput = ui.configbool(b"remotefilelog", b"debug")
321 323
322 324 self.remotecache = cacheconnection()
323 325
324 326 def setstore(self, datastore, historystore, writedata, writehistory):
325 327 self.datastore = datastore
326 328 self.historystore = historystore
327 329 self.writedata = writedata
328 330 self.writehistory = writehistory
329 331
330 332 def _connect(self):
331 333 return self.repo.connectionpool.get(self.repo.fallbackpath)
332 334
333 335 def request(self, fileids):
334 336 """Takes a list of filename/node pairs and fetches them from the
335 337 server. Files are stored in the local cache.
336 338 A list of nodes that the server couldn't find is returned.
337 339 If the connection fails, an exception is raised.
338 340 """
339 341 if not self.remotecache.connected:
340 342 self.connect()
341 343 cache = self.remotecache
342 344 writedata = self.writedata
343 345
344 346 repo = self.repo
345 347 total = len(fileids)
346 348 request = b"get\n%d\n" % total
347 349 idmap = {}
348 350 reponame = repo.name
349 351 for file, id in fileids:
350 352 fullid = getcachekey(reponame, file, id)
351 353 if self.cacheprocesspasspath:
352 354 request += file + b'\0'
353 355 request += fullid + b"\n"
354 356 idmap[fullid] = file
355 357
356 358 cache.request(request)
357 359
358 360 progress = self.ui.makeprogress(_(b'downloading'), total=total)
359 361 progress.update(0)
360 362
361 363 missed = []
362 364 while True:
363 365 missingid = cache.receiveline()
364 366 if not missingid:
365 367 missedset = set(missed)
366 368 for missingid in idmap:
367 369 if not missingid in missedset:
368 370 missed.append(missingid)
369 371 self.ui.warn(
370 372 _(
371 373 b"warning: cache connection closed early - "
372 374 + b"falling back to server\n"
373 375 )
374 376 )
375 377 break
376 378 if missingid == b"0":
377 379 break
378 380 if missingid.startswith(b"_hits_"):
379 381 # receive progress reports
380 382 parts = missingid.split(b"_")
381 383 progress.increment(int(parts[2]))
382 384 continue
383 385
384 386 missed.append(missingid)
385 387
386 388 global fetchmisses
387 389 fetchmisses += len(missed)
388 390
389 391 fromcache = total - len(missed)
390 392 progress.update(fromcache, total=total)
391 393 self.ui.log(
392 394 b"remotefilelog",
393 395 b"remote cache hit rate is %r of %r\n",
394 396 fromcache,
395 397 total,
396 398 hit=fromcache,
397 399 total=total,
398 400 )
399 401
400 402 oldumask = os.umask(0o002)
401 403 try:
402 404 # receive cache misses from master
403 405 if missed:
404 406 # When verbose is true, sshpeer prints 'running ssh...'
405 407 # to stdout, which can interfere with some command
406 408 # outputs
407 409 verbose = self.ui.verbose
408 410 self.ui.verbose = False
409 411 try:
410 412 with self._connect() as conn:
411 413 remote = conn.peer
412 414 if remote.capable(
413 415 constants.NETWORK_CAP_LEGACY_SSH_GETFILES
414 416 ):
415 417 if not isinstance(remote, _sshv1peer):
416 418 raise error.Abort(
417 419 b'remotefilelog requires ssh servers'
418 420 )
419 421 step = self.ui.configint(
420 422 b'remotefilelog', b'getfilesstep'
421 423 )
422 424 getfilestype = self.ui.config(
423 425 b'remotefilelog', b'getfilestype'
424 426 )
425 427 if getfilestype == b'threaded':
426 428 _getfiles = _getfiles_threaded
427 429 else:
428 430 _getfiles = _getfiles_optimistic
429 431 _getfiles(
430 432 remote,
431 433 self.receivemissing,
432 434 progress.increment,
433 435 missed,
434 436 idmap,
435 437 step,
436 438 )
437 439 elif remote.capable(b"x_rfl_getfile"):
438 440 if remote.capable(b'batch'):
439 441 batchdefault = 100
440 442 else:
441 443 batchdefault = 10
442 444 batchsize = self.ui.configint(
443 445 b'remotefilelog', b'batchsize', batchdefault
444 446 )
445 447 self.ui.debug(
446 448 b'requesting %d files from '
447 449 b'remotefilelog server...\n' % len(missed)
448 450 )
449 451 _getfilesbatch(
450 452 remote,
451 453 self.receivemissing,
452 454 progress.increment,
453 455 missed,
454 456 idmap,
455 457 batchsize,
456 458 )
457 459 else:
458 460 raise error.Abort(
459 461 b"configured remotefilelog server"
460 462 b" does not support remotefilelog"
461 463 )
462 464
463 465 self.ui.log(
464 466 b"remotefilefetchlog",
465 467 b"Success\n",
466 468 fetched_files=progress.pos - fromcache,
467 469 total_to_fetch=total - fromcache,
468 470 )
469 471 except Exception:
470 472 self.ui.log(
471 473 b"remotefilefetchlog",
472 474 b"Fail\n",
473 475 fetched_files=progress.pos - fromcache,
474 476 total_to_fetch=total - fromcache,
475 477 )
476 478 raise
477 479 finally:
478 480 self.ui.verbose = verbose
479 481 # send to memcache
480 482 request = b"set\n%d\n%s\n" % (len(missed), b"\n".join(missed))
481 483 cache.request(request)
482 484
483 485 progress.complete()
484 486
485 487 # mark ourselves as a user of this cache
486 488 writedata.markrepo(self.repo.path)
487 489 finally:
488 490 os.umask(oldumask)
489 491
490 492 def receivemissing(self, pipe, filename, node):
491 493 line = pipe.readline()[:-1]
492 494 if not line:
493 495 raise error.ResponseError(
494 496 _(b"error downloading file contents:"),
495 497 _(b"connection closed early"),
496 498 )
497 499 size = int(line)
498 500 data = pipe.read(size)
499 501 if len(data) != size:
500 502 raise error.ResponseError(
501 503 _(b"error downloading file contents:"),
502 504 _(b"only received %s of %s bytes") % (len(data), size),
503 505 )
504 506
505 507 self.writedata.addremotefilelognode(
506 508 filename, bin(node), zlib.decompress(data)
507 509 )
508 510
509 511 def connect(self):
510 512 if self.cacheprocess:
511 513 cmd = b"%s %s" % (self.cacheprocess, self.writedata._path)
512 514 self.remotecache.connect(cmd)
513 515 else:
514 516 # If no cache process is specified, we fake one that always
515 517 # returns cache misses. This enables tests to run easily
516 518 # and may eventually allow us to be a drop in replacement
517 519 # for the largefiles extension.
518 520 class simplecache(object):
519 521 def __init__(self):
520 522 self.missingids = []
521 523 self.connected = True
522 524
523 525 def close(self):
524 526 pass
525 527
526 528 def request(self, value, flush=True):
527 529 lines = value.split(b"\n")
528 530 if lines[0] != b"get":
529 531 return
530 532 self.missingids = lines[2:-1]
531 533 self.missingids.append(b'0')
532 534
533 535 def receiveline(self):
534 536 if len(self.missingids) > 0:
535 537 return self.missingids.pop(0)
536 538 return None
537 539
538 540 self.remotecache = simplecache()
539 541
540 542 def close(self):
541 543 if fetches:
542 544 msg = (
543 545 b"%d files fetched over %d fetches - "
544 546 + b"(%d misses, %0.2f%% hit ratio) over %0.2fs\n"
545 547 ) % (
546 548 fetched,
547 549 fetches,
548 550 fetchmisses,
549 551 float(fetched - fetchmisses) / float(fetched) * 100.0,
550 552 fetchcost,
551 553 )
552 554 if self.debugoutput:
553 555 self.ui.warn(msg)
554 556 self.ui.log(
555 557 b"remotefilelog.prefetch",
556 558 msg.replace(b"%", b"%%"),
557 559 remotefilelogfetched=fetched,
558 560 remotefilelogfetches=fetches,
559 561 remotefilelogfetchmisses=fetchmisses,
560 562 remotefilelogfetchtime=fetchcost * 1000,
561 563 )
562 564
563 565 if self.remotecache.connected:
564 566 self.remotecache.close()
565 567
566 568 def prefetch(
567 569 self, fileids, force=False, fetchdata=True, fetchhistory=False
568 570 ):
569 571 """downloads the given file versions to the cache
570 572 """
571 573 repo = self.repo
572 574 idstocheck = []
573 575 for file, id in fileids:
574 576 # hack
575 577 # - we don't use .hgtags
576 578 # - workingctx produces ids with length 42,
577 579 # which we skip since they aren't in any cache
578 580 if (
579 581 file == b'.hgtags'
580 582 or len(id) == 42
581 583 or not repo.shallowmatch(file)
582 584 ):
583 585 continue
584 586
585 587 idstocheck.append((file, bin(id)))
586 588
587 589 datastore = self.datastore
588 590 historystore = self.historystore
589 591 if force:
590 592 datastore = contentstore.unioncontentstore(*repo.shareddatastores)
591 593 historystore = metadatastore.unionmetadatastore(
592 594 *repo.sharedhistorystores
593 595 )
594 596
595 597 missingids = set()
596 598 if fetchdata:
597 599 missingids.update(datastore.getmissing(idstocheck))
598 600 if fetchhistory:
599 601 missingids.update(historystore.getmissing(idstocheck))
600 602
601 603 # partition missing nodes into nullid and not-nullid so we can
602 604 # warn about this filtering potentially shadowing bugs.
603 605 nullids = len([None for unused, id in missingids if id == nullid])
604 606 if nullids:
605 607 missingids = [(f, id) for f, id in missingids if id != nullid]
606 608 repo.ui.develwarn(
607 609 (
608 610 b'remotefilelog not fetching %d null revs'
609 611 b' - this is likely hiding bugs' % nullids
610 612 ),
611 613 config=b'remotefilelog-ext',
612 614 )
613 615 if missingids:
614 616 global fetches, fetched, fetchcost
615 617 fetches += 1
616 618
617 619 # We want to be able to detect excess individual file downloads, so
618 620 # let's log that information for debugging.
619 621 if fetches >= 15 and fetches < 18:
620 622 if fetches == 15:
621 623 fetchwarning = self.ui.config(
622 624 b'remotefilelog', b'fetchwarning'
623 625 )
624 626 if fetchwarning:
625 627 self.ui.warn(fetchwarning + b'\n')
626 628 self.logstacktrace()
627 629 missingids = [(file, hex(id)) for file, id in sorted(missingids)]
628 630 fetched += len(missingids)
629 631 start = time.time()
630 632 missingids = self.request(missingids)
631 633 if missingids:
632 634 raise error.Abort(
633 635 _(b"unable to download %d files") % len(missingids)
634 636 )
635 637 fetchcost += time.time() - start
636 638 self._lfsprefetch(fileids)
637 639
638 640 def _lfsprefetch(self, fileids):
639 641 if not _lfsmod or not util.safehasattr(
640 642 self.repo.svfs, b'lfslocalblobstore'
641 643 ):
642 644 return
643 645 if not _lfsmod.wrapper.candownload(self.repo):
644 646 return
645 647 pointers = []
646 648 store = self.repo.svfs.lfslocalblobstore
647 649 for file, id in fileids:
648 650 node = bin(id)
649 651 rlog = self.repo.file(file)
650 652 if rlog.flags(node) & revlog.REVIDX_EXTSTORED:
651 653 text = rlog.rawdata(node)
652 654 p = _lfsmod.pointer.deserialize(text)
653 655 oid = p.oid()
654 656 if not store.has(oid):
655 657 pointers.append(p)
656 658 if len(pointers) > 0:
657 659 self.repo.svfs.lfsremoteblobstore.readbatch(pointers, store)
658 660 assert all(store.has(p.oid()) for p in pointers)
659 661
660 662 def logstacktrace(self):
661 663 import traceback
662 664
663 665 self.ui.log(
664 666 b'remotefilelog',
665 667 b'excess remotefilelog fetching:\n%s\n',
666 668 b''.join(pycompat.sysbytes(s) for s in traceback.format_stack()),
667 669 )
@@ -1,572 +1,572 b''
1 1 from __future__ import absolute_import
2 2
3 import hashlib
4 3 import struct
5 4
6 5 from mercurial.node import hex, nullid
7 6 from mercurial import (
8 7 pycompat,
9 8 util,
10 9 )
10 from mercurial.utils import hashutil
11 11 from . import (
12 12 basepack,
13 13 constants,
14 14 shallowutil,
15 15 )
16 16
17 17 # (filename hash, offset, size)
18 18 INDEXFORMAT2 = b'!20sQQII'
19 19 INDEXENTRYLENGTH2 = struct.calcsize(INDEXFORMAT2)
20 20 NODELENGTH = 20
21 21
22 22 NODEINDEXFORMAT = b'!20sQ'
23 23 NODEINDEXENTRYLENGTH = struct.calcsize(NODEINDEXFORMAT)
24 24
25 25 # (node, p1, p2, linknode)
26 26 PACKFORMAT = b"!20s20s20s20sH"
27 27 PACKENTRYLENGTH = 82
28 28
29 29 ENTRYCOUNTSIZE = 4
30 30
31 31 INDEXSUFFIX = b'.histidx'
32 32 PACKSUFFIX = b'.histpack'
33 33
34 34 ANC_NODE = 0
35 35 ANC_P1NODE = 1
36 36 ANC_P2NODE = 2
37 37 ANC_LINKNODE = 3
38 38 ANC_COPYFROM = 4
39 39
40 40
41 41 class historypackstore(basepack.basepackstore):
42 42 INDEXSUFFIX = INDEXSUFFIX
43 43 PACKSUFFIX = PACKSUFFIX
44 44
45 45 def getpack(self, path):
46 46 return historypack(path)
47 47
48 48 def getancestors(self, name, node, known=None):
49 49 for pack in self.packs:
50 50 try:
51 51 return pack.getancestors(name, node, known=known)
52 52 except KeyError:
53 53 pass
54 54
55 55 for pack in self.refresh():
56 56 try:
57 57 return pack.getancestors(name, node, known=known)
58 58 except KeyError:
59 59 pass
60 60
61 61 raise KeyError((name, node))
62 62
63 63 def getnodeinfo(self, name, node):
64 64 for pack in self.packs:
65 65 try:
66 66 return pack.getnodeinfo(name, node)
67 67 except KeyError:
68 68 pass
69 69
70 70 for pack in self.refresh():
71 71 try:
72 72 return pack.getnodeinfo(name, node)
73 73 except KeyError:
74 74 pass
75 75
76 76 raise KeyError((name, node))
77 77
78 78 def add(self, filename, node, p1, p2, linknode, copyfrom):
79 79 raise RuntimeError(
80 80 b"cannot add to historypackstore (%s:%s)" % (filename, hex(node))
81 81 )
82 82
83 83
84 84 class historypack(basepack.basepack):
85 85 INDEXSUFFIX = INDEXSUFFIX
86 86 PACKSUFFIX = PACKSUFFIX
87 87
88 88 SUPPORTED_VERSIONS = [2]
89 89
90 90 def __init__(self, path):
91 91 super(historypack, self).__init__(path)
92 92 self.INDEXFORMAT = INDEXFORMAT2
93 93 self.INDEXENTRYLENGTH = INDEXENTRYLENGTH2
94 94
95 95 def getmissing(self, keys):
96 96 missing = []
97 97 for name, node in keys:
98 98 try:
99 99 self._findnode(name, node)
100 100 except KeyError:
101 101 missing.append((name, node))
102 102
103 103 return missing
104 104
105 105 def getancestors(self, name, node, known=None):
106 106 """Returns as many ancestors as we're aware of.
107 107
108 108 return value: {
109 109 node: (p1, p2, linknode, copyfrom),
110 110 ...
111 111 }
112 112 """
113 113 if known and node in known:
114 114 return []
115 115
116 116 ancestors = self._getancestors(name, node, known=known)
117 117 results = {}
118 118 for ancnode, p1, p2, linknode, copyfrom in ancestors:
119 119 results[ancnode] = (p1, p2, linknode, copyfrom)
120 120
121 121 if not results:
122 122 raise KeyError((name, node))
123 123 return results
124 124
125 125 def getnodeinfo(self, name, node):
126 126 # Drop the node from the tuple before returning, since the result should
127 127 # just be (p1, p2, linknode, copyfrom)
128 128 return self._findnode(name, node)[1:]
129 129
130 130 def _getancestors(self, name, node, known=None):
131 131 if known is None:
132 132 known = set()
133 133 section = self._findsection(name)
134 134 filename, offset, size, nodeindexoffset, nodeindexsize = section
135 135 pending = set((node,))
136 136 o = 0
137 137 while o < size:
138 138 if not pending:
139 139 break
140 140 entry, copyfrom = self._readentry(offset + o)
141 141 o += PACKENTRYLENGTH
142 142 if copyfrom:
143 143 o += len(copyfrom)
144 144
145 145 ancnode = entry[ANC_NODE]
146 146 if ancnode in pending:
147 147 pending.remove(ancnode)
148 148 p1node = entry[ANC_P1NODE]
149 149 p2node = entry[ANC_P2NODE]
150 150 if p1node != nullid and p1node not in known:
151 151 pending.add(p1node)
152 152 if p2node != nullid and p2node not in known:
153 153 pending.add(p2node)
154 154
155 155 yield (ancnode, p1node, p2node, entry[ANC_LINKNODE], copyfrom)
156 156
157 157 def _readentry(self, offset):
158 158 data = self._data
159 159 entry = struct.unpack(
160 160 PACKFORMAT, data[offset : offset + PACKENTRYLENGTH]
161 161 )
162 162 copyfrom = None
163 163 copyfromlen = entry[ANC_COPYFROM]
164 164 if copyfromlen != 0:
165 165 offset += PACKENTRYLENGTH
166 166 copyfrom = data[offset : offset + copyfromlen]
167 167 return entry, copyfrom
168 168
169 169 def add(self, filename, node, p1, p2, linknode, copyfrom):
170 170 raise RuntimeError(
171 171 b"cannot add to historypack (%s:%s)" % (filename, hex(node))
172 172 )
173 173
174 174 def _findnode(self, name, node):
175 175 if self.VERSION == 0:
176 176 ancestors = self._getancestors(name, node)
177 177 for ancnode, p1node, p2node, linknode, copyfrom in ancestors:
178 178 if ancnode == node:
179 179 return (ancnode, p1node, p2node, linknode, copyfrom)
180 180 else:
181 181 section = self._findsection(name)
182 182 nodeindexoffset, nodeindexsize = section[3:]
183 183 entry = self._bisect(
184 184 node,
185 185 nodeindexoffset,
186 186 nodeindexoffset + nodeindexsize,
187 187 NODEINDEXENTRYLENGTH,
188 188 )
189 189 if entry is not None:
190 190 node, offset = struct.unpack(NODEINDEXFORMAT, entry)
191 191 entry, copyfrom = self._readentry(offset)
192 192 # Drop the copyfromlen from the end of entry, and replace it
193 193 # with the copyfrom string.
194 194 return entry[:4] + (copyfrom,)
195 195
196 196 raise KeyError(b"unable to find history for %s:%s" % (name, hex(node)))
197 197
198 198 def _findsection(self, name):
199 199 params = self.params
200 namehash = hashlib.sha1(name).digest()
200 namehash = hashutil.sha1(name).digest()
201 201 fanoutkey = struct.unpack(
202 202 params.fanoutstruct, namehash[: params.fanoutprefix]
203 203 )[0]
204 204 fanout = self._fanouttable
205 205
206 206 start = fanout[fanoutkey] + params.indexstart
207 207 indexend = self._indexend
208 208
209 209 for i in pycompat.xrange(fanoutkey + 1, params.fanoutcount):
210 210 end = fanout[i] + params.indexstart
211 211 if end != start:
212 212 break
213 213 else:
214 214 end = indexend
215 215
216 216 entry = self._bisect(namehash, start, end, self.INDEXENTRYLENGTH)
217 217 if not entry:
218 218 raise KeyError(name)
219 219
220 220 rawentry = struct.unpack(self.INDEXFORMAT, entry)
221 221 x, offset, size, nodeindexoffset, nodeindexsize = rawentry
222 222 rawnamelen = self._index[
223 223 nodeindexoffset : nodeindexoffset + constants.FILENAMESIZE
224 224 ]
225 225 actualnamelen = struct.unpack(b'!H', rawnamelen)[0]
226 226 nodeindexoffset += constants.FILENAMESIZE
227 227 actualname = self._index[
228 228 nodeindexoffset : nodeindexoffset + actualnamelen
229 229 ]
230 230 if actualname != name:
231 231 raise KeyError(
232 232 b"found file name %s when looking for %s" % (actualname, name)
233 233 )
234 234 nodeindexoffset += actualnamelen
235 235
236 236 filenamelength = struct.unpack(
237 237 b'!H', self._data[offset : offset + constants.FILENAMESIZE]
238 238 )[0]
239 239 offset += constants.FILENAMESIZE
240 240
241 241 actualname = self._data[offset : offset + filenamelength]
242 242 offset += filenamelength
243 243
244 244 if name != actualname:
245 245 raise KeyError(
246 246 b"found file name %s when looking for %s" % (actualname, name)
247 247 )
248 248
249 249 # Skip entry list size
250 250 offset += ENTRYCOUNTSIZE
251 251
252 252 nodelistoffset = offset
253 253 nodelistsize = (
254 254 size - constants.FILENAMESIZE - filenamelength - ENTRYCOUNTSIZE
255 255 )
256 256 return (
257 257 name,
258 258 nodelistoffset,
259 259 nodelistsize,
260 260 nodeindexoffset,
261 261 nodeindexsize,
262 262 )
263 263
264 264 def _bisect(self, node, start, end, entrylen):
265 265 # Bisect between start and end to find node
266 266 origstart = start
267 267 startnode = self._index[start : start + NODELENGTH]
268 268 endnode = self._index[end : end + NODELENGTH]
269 269
270 270 if startnode == node:
271 271 return self._index[start : start + entrylen]
272 272 elif endnode == node:
273 273 return self._index[end : end + entrylen]
274 274 else:
275 275 while start < end - entrylen:
276 276 mid = start + (end - start) // 2
277 277 mid = mid - ((mid - origstart) % entrylen)
278 278 midnode = self._index[mid : mid + NODELENGTH]
279 279 if midnode == node:
280 280 return self._index[mid : mid + entrylen]
281 281 if node > midnode:
282 282 start = mid
283 283 elif node < midnode:
284 284 end = mid
285 285 return None
286 286
287 287 def markledger(self, ledger, options=None):
288 288 for filename, node in self:
289 289 ledger.markhistoryentry(self, filename, node)
290 290
291 291 def cleanup(self, ledger):
292 292 entries = ledger.sources.get(self, [])
293 293 allkeys = set(self)
294 294 repackedkeys = set(
295 295 (e.filename, e.node) for e in entries if e.historyrepacked
296 296 )
297 297
298 298 if len(allkeys - repackedkeys) == 0:
299 299 if self.path not in ledger.created:
300 300 util.unlinkpath(self.indexpath, ignoremissing=True)
301 301 util.unlinkpath(self.packpath, ignoremissing=True)
302 302
303 303 def __iter__(self):
304 304 for f, n, x, x, x, x in self.iterentries():
305 305 yield f, n
306 306
307 307 def iterentries(self):
308 308 # Start at 1 to skip the header
309 309 offset = 1
310 310 while offset < self.datasize:
311 311 data = self._data
312 312 # <2 byte len> + <filename>
313 313 filenamelen = struct.unpack(
314 314 b'!H', data[offset : offset + constants.FILENAMESIZE]
315 315 )[0]
316 316 offset += constants.FILENAMESIZE
317 317 filename = data[offset : offset + filenamelen]
318 318 offset += filenamelen
319 319
320 320 revcount = struct.unpack(
321 321 b'!I', data[offset : offset + ENTRYCOUNTSIZE]
322 322 )[0]
323 323 offset += ENTRYCOUNTSIZE
324 324
325 325 for i in pycompat.xrange(revcount):
326 326 entry = struct.unpack(
327 327 PACKFORMAT, data[offset : offset + PACKENTRYLENGTH]
328 328 )
329 329 offset += PACKENTRYLENGTH
330 330
331 331 copyfrom = data[offset : offset + entry[ANC_COPYFROM]]
332 332 offset += entry[ANC_COPYFROM]
333 333
334 334 yield (
335 335 filename,
336 336 entry[ANC_NODE],
337 337 entry[ANC_P1NODE],
338 338 entry[ANC_P2NODE],
339 339 entry[ANC_LINKNODE],
340 340 copyfrom,
341 341 )
342 342
343 343 self._pagedin += PACKENTRYLENGTH
344 344
345 345 # If we've read a lot of data from the mmap, free some memory.
346 346 self.freememory()
347 347
348 348
349 349 class mutablehistorypack(basepack.mutablebasepack):
350 350 """A class for constructing and serializing a histpack file and index.
351 351
352 352 A history pack is a pair of files that contain the revision history for
353 353 various file revisions in Mercurial. It contains only revision history (like
354 354 parent pointers and linknodes), not any revision content information.
355 355
356 356 It consists of two files, with the following format:
357 357
358 358 .histpack
359 359 The pack itself is a series of file revisions with some basic header
360 360 information on each.
361 361
362 362 datapack = <version: 1 byte>
363 363 [<filesection>,...]
364 364 filesection = <filename len: 2 byte unsigned int>
365 365 <filename>
366 366 <revision count: 4 byte unsigned int>
367 367 [<revision>,...]
368 368 revision = <node: 20 byte>
369 369 <p1node: 20 byte>
370 370 <p2node: 20 byte>
371 371 <linknode: 20 byte>
372 372 <copyfromlen: 2 byte>
373 373 <copyfrom>
374 374
375 375 The revisions within each filesection are stored in topological order
376 376 (newest first). If a given entry has a parent from another file (a copy)
377 377 then p1node is the node from the other file, and copyfrom is the
378 378 filepath of the other file.
379 379
380 380 .histidx
381 381 The index file provides a mapping from filename to the file section in
382 382 the histpack. In V1 it also contains sub-indexes for specific nodes
383 383 within each file. It consists of three parts, the fanout, the file index
384 384 and the node indexes.
385 385
386 386 The file index is a list of index entries, sorted by filename hash (one
387 387 per file section in the pack). Each entry has:
388 388
389 389 - node (The 20 byte hash of the filename)
390 390 - pack entry offset (The location of this file section in the histpack)
391 391 - pack content size (The on-disk length of this file section's pack
392 392 data)
393 393 - node index offset (The location of the file's node index in the index
394 394 file) [1]
395 395 - node index size (the on-disk length of this file's node index) [1]
396 396
397 397 The fanout is a quick lookup table to reduce the number of steps for
398 398 bisecting the index. It is a series of 4 byte pointers to positions
399 399 within the index. It has 2^16 entries, which corresponds to hash
400 400 prefixes [00, 01, 02,..., FD, FE, FF]. Example: the pointer in slot 4F
401 401 points to the index position of the first revision whose node starts
402 402 with 4F. This saves log(2^16) bisect steps.
403 403
404 404 dataidx = <fanouttable>
405 405 <file count: 8 byte unsigned> [1]
406 406 <fileindex>
407 407 <node count: 8 byte unsigned> [1]
408 408 [<nodeindex>,...] [1]
409 409 fanouttable = [<index offset: 4 byte unsigned int>,...] (2^16 entries)
410 410
411 411 fileindex = [<file index entry>,...]
412 412 fileindexentry = <node: 20 byte>
413 413 <pack file section offset: 8 byte unsigned int>
414 414 <pack file section size: 8 byte unsigned int>
415 415 <node index offset: 4 byte unsigned int> [1]
416 416 <node index size: 4 byte unsigned int> [1]
417 417 nodeindex = <filename>[<node index entry>,...] [1]
418 418 filename = <filename len : 2 byte unsigned int><filename value> [1]
419 419 nodeindexentry = <node: 20 byte> [1]
420 420 <pack file node offset: 8 byte unsigned int> [1]
421 421
422 422 [1]: new in version 1.
423 423 """
424 424
425 425 INDEXSUFFIX = INDEXSUFFIX
426 426 PACKSUFFIX = PACKSUFFIX
427 427
428 428 SUPPORTED_VERSIONS = [2]
429 429
430 430 def __init__(self, ui, packpath, version=2):
431 431 super(mutablehistorypack, self).__init__(ui, packpath, version=version)
432 432 self.files = {}
433 433 self.entrylocations = {}
434 434 self.fileentries = {}
435 435
436 436 self.INDEXFORMAT = INDEXFORMAT2
437 437 self.INDEXENTRYLENGTH = INDEXENTRYLENGTH2
438 438
439 439 self.NODEINDEXFORMAT = NODEINDEXFORMAT
440 440 self.NODEINDEXENTRYLENGTH = NODEINDEXENTRYLENGTH
441 441
442 442 def add(self, filename, node, p1, p2, linknode, copyfrom):
443 443 copyfrom = copyfrom or b''
444 444 copyfromlen = struct.pack(b'!H', len(copyfrom))
445 445 self.fileentries.setdefault(filename, []).append(
446 446 (node, p1, p2, linknode, copyfromlen, copyfrom)
447 447 )
448 448
449 449 def _write(self):
450 450 for filename in sorted(self.fileentries):
451 451 entries = self.fileentries[filename]
452 452 sectionstart = self.packfp.tell()
453 453
454 454 # Write the file section content
455 455 entrymap = dict((e[0], e) for e in entries)
456 456
457 457 def parentfunc(node):
458 458 x, p1, p2, x, x, x = entrymap[node]
459 459 parents = []
460 460 if p1 != nullid:
461 461 parents.append(p1)
462 462 if p2 != nullid:
463 463 parents.append(p2)
464 464 return parents
465 465
466 466 sortednodes = list(
467 467 reversed(
468 468 shallowutil.sortnodes((e[0] for e in entries), parentfunc)
469 469 )
470 470 )
471 471
472 472 # Write the file section header
473 473 self.writeraw(
474 474 b"%s%s%s"
475 475 % (
476 476 struct.pack(b'!H', len(filename)),
477 477 filename,
478 478 struct.pack(b'!I', len(sortednodes)),
479 479 )
480 480 )
481 481
482 482 sectionlen = constants.FILENAMESIZE + len(filename) + 4
483 483
484 484 rawstrings = []
485 485
486 486 # Record the node locations for the index
487 487 locations = self.entrylocations.setdefault(filename, {})
488 488 offset = sectionstart + sectionlen
489 489 for node in sortednodes:
490 490 locations[node] = offset
491 491 raw = b'%s%s%s%s%s%s' % entrymap[node]
492 492 rawstrings.append(raw)
493 493 offset += len(raw)
494 494
495 495 rawdata = b''.join(rawstrings)
496 496 sectionlen += len(rawdata)
497 497
498 498 self.writeraw(rawdata)
499 499
500 500 # Record metadata for the index
501 501 self.files[filename] = (sectionstart, sectionlen)
502 node = hashlib.sha1(filename).digest()
502 node = hashutil.sha1(filename).digest()
503 503 self.entries[node] = node
504 504
505 505 def close(self, ledger=None):
506 506 if self._closed:
507 507 return
508 508
509 509 self._write()
510 510
511 511 return super(mutablehistorypack, self).close(ledger=ledger)
512 512
513 513 def createindex(self, nodelocations, indexoffset):
514 514 fileindexformat = self.INDEXFORMAT
515 515 fileindexlength = self.INDEXENTRYLENGTH
516 516 nodeindexformat = self.NODEINDEXFORMAT
517 517 nodeindexlength = self.NODEINDEXENTRYLENGTH
518 518
519 519 files = (
520 (hashlib.sha1(filename).digest(), filename, offset, size)
520 (hashutil.sha1(filename).digest(), filename, offset, size)
521 521 for filename, (offset, size) in pycompat.iteritems(self.files)
522 522 )
523 523 files = sorted(files)
524 524
525 525 # node index is after file index size, file index, and node index size
526 526 indexlensize = struct.calcsize(b'!Q')
527 527 nodeindexoffset = (
528 528 indexoffset
529 529 + indexlensize
530 530 + (len(files) * fileindexlength)
531 531 + indexlensize
532 532 )
533 533
534 534 fileindexentries = []
535 535 nodeindexentries = []
536 536 nodecount = 0
537 537 for namehash, filename, offset, size in files:
538 538 # File section index
539 539 nodelocations = self.entrylocations[filename]
540 540
541 541 nodeindexsize = len(nodelocations) * nodeindexlength
542 542
543 543 rawentry = struct.pack(
544 544 fileindexformat,
545 545 namehash,
546 546 offset,
547 547 size,
548 548 nodeindexoffset,
549 549 nodeindexsize,
550 550 )
551 551 # Node index
552 552 nodeindexentries.append(
553 553 struct.pack(constants.FILENAMESTRUCT, len(filename)) + filename
554 554 )
555 555 nodeindexoffset += constants.FILENAMESIZE + len(filename)
556 556
557 557 for node, location in sorted(pycompat.iteritems(nodelocations)):
558 558 nodeindexentries.append(
559 559 struct.pack(nodeindexformat, node, location)
560 560 )
561 561 nodecount += 1
562 562
563 563 nodeindexoffset += len(nodelocations) * nodeindexlength
564 564
565 565 fileindexentries.append(rawentry)
566 566
567 567 nodecountraw = struct.pack(b'!Q', nodecount)
568 568 return (
569 569 b''.join(fileindexentries)
570 570 + nodecountraw
571 571 + b''.join(nodeindexentries)
572 572 )
@@ -1,536 +1,536 b''
1 1 # shallowutil.py -- remotefilelog utilities
2 2 #
3 3 # Copyright 2014 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9 import collections
10 10 import errno
11 import hashlib
12 11 import os
13 12 import stat
14 13 import struct
15 14 import tempfile
16 15
17 16 from mercurial.i18n import _
18 17 from mercurial.pycompat import open
19 18 from mercurial import (
20 19 error,
21 20 node,
22 21 pycompat,
23 22 revlog,
24 23 util,
25 24 )
26 25 from mercurial.utils import (
26 hashutil,
27 27 storageutil,
28 28 stringutil,
29 29 )
30 30 from . import constants
31 31
32 32 if not pycompat.iswindows:
33 33 import grp
34 34
35 35
36 36 def isenabled(repo):
37 37 """returns whether the repository is remotefilelog enabled or not"""
38 38 return constants.SHALLOWREPO_REQUIREMENT in repo.requirements
39 39
40 40
41 41 def getcachekey(reponame, file, id):
42 pathhash = node.hex(hashlib.sha1(file).digest())
42 pathhash = node.hex(hashutil.sha1(file).digest())
43 43 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
44 44
45 45
46 46 def getlocalkey(file, id):
47 pathhash = node.hex(hashlib.sha1(file).digest())
47 pathhash = node.hex(hashutil.sha1(file).digest())
48 48 return os.path.join(pathhash, id)
49 49
50 50
51 51 def getcachepath(ui, allowempty=False):
52 52 cachepath = ui.config(b"remotefilelog", b"cachepath")
53 53 if not cachepath:
54 54 if allowempty:
55 55 return None
56 56 else:
57 57 raise error.Abort(
58 58 _(b"could not find config option remotefilelog.cachepath")
59 59 )
60 60 return util.expandpath(cachepath)
61 61
62 62
63 63 def getcachepackpath(repo, category):
64 64 cachepath = getcachepath(repo.ui)
65 65 if category != constants.FILEPACK_CATEGORY:
66 66 return os.path.join(cachepath, repo.name, b'packs', category)
67 67 else:
68 68 return os.path.join(cachepath, repo.name, b'packs')
69 69
70 70
71 71 def getlocalpackpath(base, category):
72 72 return os.path.join(base, b'packs', category)
73 73
74 74
75 75 def createrevlogtext(text, copyfrom=None, copyrev=None):
76 76 """returns a string that matches the revlog contents in a
77 77 traditional revlog
78 78 """
79 79 meta = {}
80 80 if copyfrom or text.startswith(b'\1\n'):
81 81 if copyfrom:
82 82 meta[b'copy'] = copyfrom
83 83 meta[b'copyrev'] = copyrev
84 84 text = storageutil.packmeta(meta, text)
85 85
86 86 return text
87 87
88 88
89 89 def parsemeta(text):
90 90 """parse mercurial filelog metadata"""
91 91 meta, size = storageutil.parsemeta(text)
92 92 if text.startswith(b'\1\n'):
93 93 s = text.index(b'\1\n', 2)
94 94 text = text[s + 2 :]
95 95 return meta or {}, text
96 96
97 97
98 98 def sumdicts(*dicts):
99 99 """Adds all the values of *dicts together into one dictionary. This assumes
100 100 the values in *dicts are all summable.
101 101
102 102 e.g. [{'a': 4', 'b': 2}, {'b': 3, 'c': 1}] -> {'a': 4, 'b': 5, 'c': 1}
103 103 """
104 104 result = collections.defaultdict(lambda: 0)
105 105 for dict in dicts:
106 106 for k, v in pycompat.iteritems(dict):
107 107 result[k] += v
108 108 return result
109 109
110 110
111 111 def prefixkeys(dict, prefix):
112 112 """Returns ``dict`` with ``prefix`` prepended to all its keys."""
113 113 result = {}
114 114 for k, v in pycompat.iteritems(dict):
115 115 result[prefix + k] = v
116 116 return result
117 117
118 118
119 119 def reportpackmetrics(ui, prefix, *stores):
120 120 dicts = [s.getmetrics() for s in stores]
121 121 dict = prefixkeys(sumdicts(*dicts), prefix + b'_')
122 122 ui.log(prefix + b"_packsizes", b"\n", **pycompat.strkwargs(dict))
123 123
124 124
125 125 def _parsepackmeta(metabuf):
126 126 """parse datapack meta, bytes (<metadata-list>) -> dict
127 127
128 128 The dict contains raw content - both keys and values are strings.
129 129 Upper-level business may want to convert some of them to other types like
130 130 integers, on their own.
131 131
132 132 raise ValueError if the data is corrupted
133 133 """
134 134 metadict = {}
135 135 offset = 0
136 136 buflen = len(metabuf)
137 137 while buflen - offset >= 3:
138 138 key = metabuf[offset : offset + 1]
139 139 offset += 1
140 140 metalen = struct.unpack_from(b'!H', metabuf, offset)[0]
141 141 offset += 2
142 142 if offset + metalen > buflen:
143 143 raise ValueError(b'corrupted metadata: incomplete buffer')
144 144 value = metabuf[offset : offset + metalen]
145 145 metadict[key] = value
146 146 offset += metalen
147 147 if offset != buflen:
148 148 raise ValueError(b'corrupted metadata: redundant data')
149 149 return metadict
150 150
151 151
152 152 def _buildpackmeta(metadict):
153 153 """reverse of _parsepackmeta, dict -> bytes (<metadata-list>)
154 154
155 155 The dict contains raw content - both keys and values are strings.
156 156 Upper-level business may want to serialize some of other types (like
157 157 integers) to strings before calling this function.
158 158
159 159 raise ProgrammingError when metadata key is illegal, or ValueError if
160 160 length limit is exceeded
161 161 """
162 162 metabuf = b''
163 163 for k, v in sorted(pycompat.iteritems((metadict or {}))):
164 164 if len(k) != 1:
165 165 raise error.ProgrammingError(b'packmeta: illegal key: %s' % k)
166 166 if len(v) > 0xFFFE:
167 167 raise ValueError(
168 168 b'metadata value is too long: 0x%x > 0xfffe' % len(v)
169 169 )
170 170 metabuf += k
171 171 metabuf += struct.pack(b'!H', len(v))
172 172 metabuf += v
173 173 # len(metabuf) is guaranteed representable in 4 bytes, because there are
174 174 # only 256 keys, and for each value, len(value) <= 0xfffe.
175 175 return metabuf
176 176
177 177
178 178 _metaitemtypes = {
179 179 constants.METAKEYFLAG: (int, pycompat.long),
180 180 constants.METAKEYSIZE: (int, pycompat.long),
181 181 }
182 182
183 183
184 184 def buildpackmeta(metadict):
185 185 """like _buildpackmeta, but typechecks metadict and normalize it.
186 186
187 187 This means, METAKEYSIZE and METAKEYSIZE should have integers as values,
188 188 and METAKEYFLAG will be dropped if its value is 0.
189 189 """
190 190 newmeta = {}
191 191 for k, v in pycompat.iteritems(metadict or {}):
192 192 expectedtype = _metaitemtypes.get(k, (bytes,))
193 193 if not isinstance(v, expectedtype):
194 194 raise error.ProgrammingError(b'packmeta: wrong type of key %s' % k)
195 195 # normalize int to binary buffer
196 196 if int in expectedtype:
197 197 # optimization: remove flag if it's 0 to save space
198 198 if k == constants.METAKEYFLAG and v == 0:
199 199 continue
200 200 v = int2bin(v)
201 201 newmeta[k] = v
202 202 return _buildpackmeta(newmeta)
203 203
204 204
205 205 def parsepackmeta(metabuf):
206 206 """like _parsepackmeta, but convert fields to desired types automatically.
207 207
208 208 This means, METAKEYFLAG and METAKEYSIZE fields will be converted to
209 209 integers.
210 210 """
211 211 metadict = _parsepackmeta(metabuf)
212 212 for k, v in pycompat.iteritems(metadict):
213 213 if k in _metaitemtypes and int in _metaitemtypes[k]:
214 214 metadict[k] = bin2int(v)
215 215 return metadict
216 216
217 217
218 218 def int2bin(n):
219 219 """convert a non-negative integer to raw binary buffer"""
220 220 buf = bytearray()
221 221 while n > 0:
222 222 buf.insert(0, n & 0xFF)
223 223 n >>= 8
224 224 return bytes(buf)
225 225
226 226
227 227 def bin2int(buf):
228 228 """the reverse of int2bin, convert a binary buffer to an integer"""
229 229 x = 0
230 230 for b in bytearray(buf):
231 231 x <<= 8
232 232 x |= b
233 233 return x
234 234
235 235
236 236 def parsesizeflags(raw):
237 237 """given a remotefilelog blob, return (headersize, rawtextsize, flags)
238 238
239 239 see remotefilelogserver.createfileblob for the format.
240 240 raise RuntimeError if the content is illformed.
241 241 """
242 242 flags = revlog.REVIDX_DEFAULT_FLAGS
243 243 size = None
244 244 try:
245 245 index = raw.index(b'\0')
246 246 header = raw[:index]
247 247 if header.startswith(b'v'):
248 248 # v1 and above, header starts with 'v'
249 249 if header.startswith(b'v1\n'):
250 250 for s in header.split(b'\n'):
251 251 if s.startswith(constants.METAKEYSIZE):
252 252 size = int(s[len(constants.METAKEYSIZE) :])
253 253 elif s.startswith(constants.METAKEYFLAG):
254 254 flags = int(s[len(constants.METAKEYFLAG) :])
255 255 else:
256 256 raise RuntimeError(
257 257 b'unsupported remotefilelog header: %s' % header
258 258 )
259 259 else:
260 260 # v0, str(int(size)) is the header
261 261 size = int(header)
262 262 except ValueError:
263 263 raise RuntimeError("unexpected remotefilelog header: illegal format")
264 264 if size is None:
265 265 raise RuntimeError("unexpected remotefilelog header: no size found")
266 266 return index + 1, size, flags
267 267
268 268
269 269 def buildfileblobheader(size, flags, version=None):
270 270 """return the header of a remotefilelog blob.
271 271
272 272 see remotefilelogserver.createfileblob for the format.
273 273 approximately the reverse of parsesizeflags.
274 274
275 275 version could be 0 or 1, or None (auto decide).
276 276 """
277 277 # choose v0 if flags is empty, otherwise v1
278 278 if version is None:
279 279 version = int(bool(flags))
280 280 if version == 1:
281 281 header = b'v1\n%s%d\n%s%d' % (
282 282 constants.METAKEYSIZE,
283 283 size,
284 284 constants.METAKEYFLAG,
285 285 flags,
286 286 )
287 287 elif version == 0:
288 288 if flags:
289 289 raise error.ProgrammingError(b'fileblob v0 does not support flag')
290 290 header = b'%d' % size
291 291 else:
292 292 raise error.ProgrammingError(b'unknown fileblob version %d' % version)
293 293 return header
294 294
295 295
296 296 def ancestormap(raw):
297 297 offset, size, flags = parsesizeflags(raw)
298 298 start = offset + size
299 299
300 300 mapping = {}
301 301 while start < len(raw):
302 302 divider = raw.index(b'\0', start + 80)
303 303
304 304 currentnode = raw[start : (start + 20)]
305 305 p1 = raw[(start + 20) : (start + 40)]
306 306 p2 = raw[(start + 40) : (start + 60)]
307 307 linknode = raw[(start + 60) : (start + 80)]
308 308 copyfrom = raw[(start + 80) : divider]
309 309
310 310 mapping[currentnode] = (p1, p2, linknode, copyfrom)
311 311 start = divider + 1
312 312
313 313 return mapping
314 314
315 315
316 316 def readfile(path):
317 317 f = open(path, b'rb')
318 318 try:
319 319 result = f.read()
320 320
321 321 # we should never have empty files
322 322 if not result:
323 323 os.remove(path)
324 324 raise IOError(b"empty file: %s" % path)
325 325
326 326 return result
327 327 finally:
328 328 f.close()
329 329
330 330
331 331 def unlinkfile(filepath):
332 332 if pycompat.iswindows:
333 333 # On Windows, os.unlink cannnot delete readonly files
334 334 os.chmod(filepath, stat.S_IWUSR)
335 335 os.unlink(filepath)
336 336
337 337
338 338 def renamefile(source, destination):
339 339 if pycompat.iswindows:
340 340 # On Windows, os.rename cannot rename readonly files
341 341 # and cannot overwrite destination if it exists
342 342 os.chmod(source, stat.S_IWUSR)
343 343 if os.path.isfile(destination):
344 344 os.chmod(destination, stat.S_IWUSR)
345 345 os.unlink(destination)
346 346
347 347 os.rename(source, destination)
348 348
349 349
350 350 def writefile(path, content, readonly=False):
351 351 dirname, filename = os.path.split(path)
352 352 if not os.path.exists(dirname):
353 353 try:
354 354 os.makedirs(dirname)
355 355 except OSError as ex:
356 356 if ex.errno != errno.EEXIST:
357 357 raise
358 358
359 359 fd, temp = tempfile.mkstemp(prefix=b'.%s-' % filename, dir=dirname)
360 360 os.close(fd)
361 361
362 362 try:
363 363 f = util.posixfile(temp, b'wb')
364 364 f.write(content)
365 365 f.close()
366 366
367 367 if readonly:
368 368 mode = 0o444
369 369 else:
370 370 # tempfiles are created with 0o600, so we need to manually set the
371 371 # mode.
372 372 oldumask = os.umask(0)
373 373 # there's no way to get the umask without modifying it, so set it
374 374 # back
375 375 os.umask(oldumask)
376 376 mode = ~oldumask
377 377
378 378 renamefile(temp, path)
379 379 os.chmod(path, mode)
380 380 except Exception:
381 381 try:
382 382 unlinkfile(temp)
383 383 except OSError:
384 384 pass
385 385 raise
386 386
387 387
388 388 def sortnodes(nodes, parentfunc):
389 389 """Topologically sorts the nodes, using the parentfunc to find
390 390 the parents of nodes."""
391 391 nodes = set(nodes)
392 392 childmap = {}
393 393 parentmap = {}
394 394 roots = []
395 395
396 396 # Build a child and parent map
397 397 for n in nodes:
398 398 parents = [p for p in parentfunc(n) if p in nodes]
399 399 parentmap[n] = set(parents)
400 400 for p in parents:
401 401 childmap.setdefault(p, set()).add(n)
402 402 if not parents:
403 403 roots.append(n)
404 404
405 405 roots.sort()
406 406 # Process roots, adding children to the queue as they become roots
407 407 results = []
408 408 while roots:
409 409 n = roots.pop(0)
410 410 results.append(n)
411 411 if n in childmap:
412 412 children = childmap[n]
413 413 for c in children:
414 414 childparents = parentmap[c]
415 415 childparents.remove(n)
416 416 if len(childparents) == 0:
417 417 # insert at the beginning, that way child nodes
418 418 # are likely to be output immediately after their
419 419 # parents. This gives better compression results.
420 420 roots.insert(0, c)
421 421
422 422 return results
423 423
424 424
425 425 def readexactly(stream, n):
426 426 '''read n bytes from stream.read and abort if less was available'''
427 427 s = stream.read(n)
428 428 if len(s) < n:
429 429 raise error.Abort(
430 430 _(b"stream ended unexpectedly (got %d bytes, expected %d)")
431 431 % (len(s), n)
432 432 )
433 433 return s
434 434
435 435
436 436 def readunpack(stream, fmt):
437 437 data = readexactly(stream, struct.calcsize(fmt))
438 438 return struct.unpack(fmt, data)
439 439
440 440
441 441 def readpath(stream):
442 442 rawlen = readexactly(stream, constants.FILENAMESIZE)
443 443 pathlen = struct.unpack(constants.FILENAMESTRUCT, rawlen)[0]
444 444 return readexactly(stream, pathlen)
445 445
446 446
447 447 def readnodelist(stream):
448 448 rawlen = readexactly(stream, constants.NODECOUNTSIZE)
449 449 nodecount = struct.unpack(constants.NODECOUNTSTRUCT, rawlen)[0]
450 450 for i in pycompat.xrange(nodecount):
451 451 yield readexactly(stream, constants.NODESIZE)
452 452
453 453
454 454 def readpathlist(stream):
455 455 rawlen = readexactly(stream, constants.PATHCOUNTSIZE)
456 456 pathcount = struct.unpack(constants.PATHCOUNTSTRUCT, rawlen)[0]
457 457 for i in pycompat.xrange(pathcount):
458 458 yield readpath(stream)
459 459
460 460
461 461 def getgid(groupname):
462 462 try:
463 463 gid = grp.getgrnam(pycompat.fsdecode(groupname)).gr_gid
464 464 return gid
465 465 except KeyError:
466 466 return None
467 467
468 468
469 469 def setstickygroupdir(path, gid, warn=None):
470 470 if gid is None:
471 471 return
472 472 try:
473 473 os.chown(path, -1, gid)
474 474 os.chmod(path, 0o2775)
475 475 except (IOError, OSError) as ex:
476 476 if warn:
477 477 warn(_(b'unable to chown/chmod on %s: %s\n') % (path, ex))
478 478
479 479
480 480 def mkstickygroupdir(ui, path):
481 481 """Creates the given directory (if it doesn't exist) and give it a
482 482 particular group with setgid enabled."""
483 483 gid = None
484 484 groupname = ui.config(b"remotefilelog", b"cachegroup")
485 485 if groupname:
486 486 gid = getgid(groupname)
487 487 if gid is None:
488 488 ui.warn(_(b'unable to resolve group name: %s\n') % groupname)
489 489
490 490 # we use a single stat syscall to test the existence and mode / group bit
491 491 st = None
492 492 try:
493 493 st = os.stat(path)
494 494 except OSError:
495 495 pass
496 496
497 497 if st:
498 498 # exists
499 499 if (st.st_mode & 0o2775) != 0o2775 or st.st_gid != gid:
500 500 # permission needs to be fixed
501 501 setstickygroupdir(path, gid, ui.warn)
502 502 return
503 503
504 504 oldumask = os.umask(0o002)
505 505 try:
506 506 missingdirs = [path]
507 507 path = os.path.dirname(path)
508 508 while path and not os.path.exists(path):
509 509 missingdirs.append(path)
510 510 path = os.path.dirname(path)
511 511
512 512 for path in reversed(missingdirs):
513 513 try:
514 514 os.mkdir(path)
515 515 except OSError as ex:
516 516 if ex.errno != errno.EEXIST:
517 517 raise
518 518
519 519 for path in missingdirs:
520 520 setstickygroupdir(path, gid, ui.warn)
521 521 finally:
522 522 os.umask(oldumask)
523 523
524 524
525 525 def getusername(ui):
526 526 try:
527 527 return stringutil.shortuser(ui.username())
528 528 except Exception:
529 529 return b'unknown'
530 530
531 531
532 532 def getreponame(ui):
533 533 reponame = ui.config(b'paths', b'default')
534 534 if reponame:
535 535 return os.path.basename(reponame)
536 536 return b"unknown"
@@ -1,1293 +1,1295 b''
1 1 # sqlitestore.py - Storage backend that uses SQLite
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """store repository data in SQLite (EXPERIMENTAL)
9 9
10 10 The sqlitestore extension enables the storage of repository data in SQLite.
11 11
12 12 This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY
13 13 GUARANTEES. This means that repositories created with this extension may
14 14 only be usable with the exact version of this extension/Mercurial that was
15 15 used. The extension attempts to enforce this in order to prevent repository
16 16 corruption.
17 17
18 18 In addition, several features are not yet supported or have known bugs:
19 19
20 20 * Only some data is stored in SQLite. Changeset, manifest, and other repository
21 21 data is not yet stored in SQLite.
22 22 * Transactions are not robust. If the process is aborted at the right time
23 23 during transaction close/rollback, the repository could be in an inconsistent
24 24 state. This problem will diminish once all repository data is tracked by
25 25 SQLite.
26 26 * Bundle repositories do not work (the ability to use e.g.
27 27 `hg -R <bundle-file> log` to automatically overlay a bundle on top of the
28 28 existing repository).
29 29 * Various other features don't work.
30 30
31 31 This extension should work for basic clone/pull, update, and commit workflows.
32 32 Some history rewriting operations may fail due to lack of support for bundle
33 33 repositories.
34 34
35 35 To use, activate the extension and set the ``storage.new-repo-backend`` config
36 36 option to ``sqlite`` to enable new repositories to use SQLite for storage.
37 37 """
38 38
39 39 # To run the test suite with repos using SQLite by default, execute the
40 40 # following:
41 41 #
42 42 # HGREPOFEATURES="sqlitestore" run-tests.py \
43 43 # --extra-config-opt extensions.sqlitestore= \
44 44 # --extra-config-opt storage.new-repo-backend=sqlite
45 45
46 46 from __future__ import absolute_import
47 47
48 import hashlib
49 48 import sqlite3
50 49 import struct
51 50 import threading
52 51 import zlib
53 52
54 53 from mercurial.i18n import _
55 54 from mercurial.node import (
56 55 nullid,
57 56 nullrev,
58 57 short,
59 58 )
60 59 from mercurial.thirdparty import attr
61 60 from mercurial import (
62 61 ancestor,
63 62 dagop,
64 63 encoding,
65 64 error,
66 65 extensions,
67 66 localrepo,
68 67 mdiff,
69 68 pycompat,
70 69 registrar,
71 70 util,
72 71 verify,
73 72 )
74 73 from mercurial.interfaces import (
75 74 repository,
76 75 util as interfaceutil,
77 76 )
78 from mercurial.utils import storageutil
77 from mercurial.utils import (
78 hashutil,
79 storageutil,
80 )
79 81
80 82 try:
81 83 from mercurial import zstd
82 84
83 85 zstd.__version__
84 86 except ImportError:
85 87 zstd = None
86 88
87 89 configtable = {}
88 90 configitem = registrar.configitem(configtable)
89 91
90 92 # experimental config: storage.sqlite.compression
91 93 configitem(
92 94 b'storage',
93 95 b'sqlite.compression',
94 96 default=b'zstd' if zstd else b'zlib',
95 97 experimental=True,
96 98 )
97 99
98 100 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
99 101 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
100 102 # be specifying the version(s) of Mercurial they are tested with, or
101 103 # leave the attribute unspecified.
102 104 testedwith = b'ships-with-hg-core'
103 105
104 106 REQUIREMENT = b'exp-sqlite-001'
105 107 REQUIREMENT_ZSTD = b'exp-sqlite-comp-001=zstd'
106 108 REQUIREMENT_ZLIB = b'exp-sqlite-comp-001=zlib'
107 109 REQUIREMENT_NONE = b'exp-sqlite-comp-001=none'
108 110 REQUIREMENT_SHALLOW_FILES = b'exp-sqlite-shallow-files'
109 111
110 112 CURRENT_SCHEMA_VERSION = 1
111 113
112 114 COMPRESSION_NONE = 1
113 115 COMPRESSION_ZSTD = 2
114 116 COMPRESSION_ZLIB = 3
115 117
116 118 FLAG_CENSORED = 1
117 119 FLAG_MISSING_P1 = 2
118 120 FLAG_MISSING_P2 = 4
119 121
120 122 CREATE_SCHEMA = [
121 123 # Deltas are stored as content-indexed blobs.
122 124 # compression column holds COMPRESSION_* constant for how the
123 125 # delta is encoded.
124 126 'CREATE TABLE delta ('
125 127 ' id INTEGER PRIMARY KEY, '
126 128 ' compression INTEGER NOT NULL, '
127 129 ' hash BLOB UNIQUE ON CONFLICT ABORT, '
128 130 ' delta BLOB NOT NULL '
129 131 ')',
130 132 # Tracked paths are denormalized to integers to avoid redundant
131 133 # storage of the path name.
132 134 'CREATE TABLE filepath ('
133 135 ' id INTEGER PRIMARY KEY, '
134 136 ' path BLOB NOT NULL '
135 137 ')',
136 138 'CREATE UNIQUE INDEX filepath_path ON filepath (path)',
137 139 # We have a single table for all file revision data.
138 140 # Each file revision is uniquely described by a (path, rev) and
139 141 # (path, node).
140 142 #
141 143 # Revision data is stored as a pointer to the delta producing this
142 144 # revision and the file revision whose delta should be applied before
143 145 # that one. One can reconstruct the delta chain by recursively following
144 146 # the delta base revision pointers until one encounters NULL.
145 147 #
146 148 # flags column holds bitwise integer flags controlling storage options.
147 149 # These flags are defined by the FLAG_* constants.
148 150 'CREATE TABLE fileindex ('
149 151 ' id INTEGER PRIMARY KEY, '
150 152 ' pathid INTEGER REFERENCES filepath(id), '
151 153 ' revnum INTEGER NOT NULL, '
152 154 ' p1rev INTEGER NOT NULL, '
153 155 ' p2rev INTEGER NOT NULL, '
154 156 ' linkrev INTEGER NOT NULL, '
155 157 ' flags INTEGER NOT NULL, '
156 158 ' deltaid INTEGER REFERENCES delta(id), '
157 159 ' deltabaseid INTEGER REFERENCES fileindex(id), '
158 160 ' node BLOB NOT NULL '
159 161 ')',
160 162 'CREATE UNIQUE INDEX fileindex_pathrevnum '
161 163 ' ON fileindex (pathid, revnum)',
162 164 'CREATE UNIQUE INDEX fileindex_pathnode ON fileindex (pathid, node)',
163 165 # Provide a view over all file data for convenience.
164 166 'CREATE VIEW filedata AS '
165 167 'SELECT '
166 168 ' fileindex.id AS id, '
167 169 ' filepath.id AS pathid, '
168 170 ' filepath.path AS path, '
169 171 ' fileindex.revnum AS revnum, '
170 172 ' fileindex.node AS node, '
171 173 ' fileindex.p1rev AS p1rev, '
172 174 ' fileindex.p2rev AS p2rev, '
173 175 ' fileindex.linkrev AS linkrev, '
174 176 ' fileindex.flags AS flags, '
175 177 ' fileindex.deltaid AS deltaid, '
176 178 ' fileindex.deltabaseid AS deltabaseid '
177 179 'FROM filepath, fileindex '
178 180 'WHERE fileindex.pathid=filepath.id',
179 181 'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION,
180 182 ]
181 183
182 184
183 185 def resolvedeltachain(db, pathid, node, revisioncache, stoprids, zstddctx=None):
184 186 """Resolve a delta chain for a file node."""
185 187
186 188 # TODO the "not in ({stops})" here is possibly slowing down the query
187 189 # because it needs to perform the lookup on every recursive invocation.
188 190 # This could possibly be faster if we created a temporary query with
189 191 # baseid "poisoned" to null and limited the recursive filter to
190 192 # "is not null".
191 193 res = db.execute(
192 194 'WITH RECURSIVE '
193 195 ' deltachain(deltaid, baseid) AS ('
194 196 ' SELECT deltaid, deltabaseid FROM fileindex '
195 197 ' WHERE pathid=? AND node=? '
196 198 ' UNION ALL '
197 199 ' SELECT fileindex.deltaid, deltabaseid '
198 200 ' FROM fileindex, deltachain '
199 201 ' WHERE '
200 202 ' fileindex.id=deltachain.baseid '
201 203 ' AND deltachain.baseid IS NOT NULL '
202 204 ' AND fileindex.id NOT IN ({stops}) '
203 205 ' ) '
204 206 'SELECT deltachain.baseid, compression, delta '
205 207 'FROM deltachain, delta '
206 208 'WHERE delta.id=deltachain.deltaid'.format(
207 209 stops=','.join(['?'] * len(stoprids))
208 210 ),
209 211 tuple([pathid, node] + list(stoprids.keys())),
210 212 )
211 213
212 214 deltas = []
213 215 lastdeltabaseid = None
214 216
215 217 for deltabaseid, compression, delta in res:
216 218 lastdeltabaseid = deltabaseid
217 219
218 220 if compression == COMPRESSION_ZSTD:
219 221 delta = zstddctx.decompress(delta)
220 222 elif compression == COMPRESSION_NONE:
221 223 delta = delta
222 224 elif compression == COMPRESSION_ZLIB:
223 225 delta = zlib.decompress(delta)
224 226 else:
225 227 raise SQLiteStoreError(
226 228 b'unhandled compression type: %d' % compression
227 229 )
228 230
229 231 deltas.append(delta)
230 232
231 233 if lastdeltabaseid in stoprids:
232 234 basetext = revisioncache[stoprids[lastdeltabaseid]]
233 235 else:
234 236 basetext = deltas.pop()
235 237
236 238 deltas.reverse()
237 239 fulltext = mdiff.patches(basetext, deltas)
238 240
239 241 # SQLite returns buffer instances for blob columns on Python 2. This
240 242 # type can propagate through the delta application layer. Because
241 243 # downstream callers assume revisions are bytes, cast as needed.
242 244 if not isinstance(fulltext, bytes):
243 245 fulltext = bytes(delta)
244 246
245 247 return fulltext
246 248
247 249
248 250 def insertdelta(db, compression, hash, delta):
249 251 try:
250 252 return db.execute(
251 253 'INSERT INTO delta (compression, hash, delta) VALUES (?, ?, ?)',
252 254 (compression, hash, delta),
253 255 ).lastrowid
254 256 except sqlite3.IntegrityError:
255 257 return db.execute(
256 258 'SELECT id FROM delta WHERE hash=?', (hash,)
257 259 ).fetchone()[0]
258 260
259 261
260 262 class SQLiteStoreError(error.StorageError):
261 263 pass
262 264
263 265
264 266 @attr.s
265 267 class revisionentry(object):
266 268 rid = attr.ib()
267 269 rev = attr.ib()
268 270 node = attr.ib()
269 271 p1rev = attr.ib()
270 272 p2rev = attr.ib()
271 273 p1node = attr.ib()
272 274 p2node = attr.ib()
273 275 linkrev = attr.ib()
274 276 flags = attr.ib()
275 277
276 278
277 279 @interfaceutil.implementer(repository.irevisiondelta)
278 280 @attr.s(slots=True)
279 281 class sqliterevisiondelta(object):
280 282 node = attr.ib()
281 283 p1node = attr.ib()
282 284 p2node = attr.ib()
283 285 basenode = attr.ib()
284 286 flags = attr.ib()
285 287 baserevisionsize = attr.ib()
286 288 revision = attr.ib()
287 289 delta = attr.ib()
288 290 linknode = attr.ib(default=None)
289 291
290 292
291 293 @interfaceutil.implementer(repository.iverifyproblem)
292 294 @attr.s(frozen=True)
293 295 class sqliteproblem(object):
294 296 warning = attr.ib(default=None)
295 297 error = attr.ib(default=None)
296 298 node = attr.ib(default=None)
297 299
298 300
299 301 @interfaceutil.implementer(repository.ifilestorage)
300 302 class sqlitefilestore(object):
301 303 """Implements storage for an individual tracked path."""
302 304
303 305 def __init__(self, db, path, compression):
304 306 self._db = db
305 307 self._path = path
306 308
307 309 self._pathid = None
308 310
309 311 # revnum -> node
310 312 self._revtonode = {}
311 313 # node -> revnum
312 314 self._nodetorev = {}
313 315 # node -> data structure
314 316 self._revisions = {}
315 317
316 318 self._revisioncache = util.lrucachedict(10)
317 319
318 320 self._compengine = compression
319 321
320 322 if compression == b'zstd':
321 323 self._cctx = zstd.ZstdCompressor(level=3)
322 324 self._dctx = zstd.ZstdDecompressor()
323 325 else:
324 326 self._cctx = None
325 327 self._dctx = None
326 328
327 329 self._refreshindex()
328 330
329 331 def _refreshindex(self):
330 332 self._revtonode = {}
331 333 self._nodetorev = {}
332 334 self._revisions = {}
333 335
334 336 res = list(
335 337 self._db.execute(
336 338 'SELECT id FROM filepath WHERE path=?', (self._path,)
337 339 )
338 340 )
339 341
340 342 if not res:
341 343 self._pathid = None
342 344 return
343 345
344 346 self._pathid = res[0][0]
345 347
346 348 res = self._db.execute(
347 349 'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags '
348 350 'FROM fileindex '
349 351 'WHERE pathid=? '
350 352 'ORDER BY revnum ASC',
351 353 (self._pathid,),
352 354 )
353 355
354 356 for i, row in enumerate(res):
355 357 rid, rev, node, p1rev, p2rev, linkrev, flags = row
356 358
357 359 if i != rev:
358 360 raise SQLiteStoreError(
359 361 _(b'sqlite database has inconsistent revision numbers')
360 362 )
361 363
362 364 if p1rev == nullrev:
363 365 p1node = nullid
364 366 else:
365 367 p1node = self._revtonode[p1rev]
366 368
367 369 if p2rev == nullrev:
368 370 p2node = nullid
369 371 else:
370 372 p2node = self._revtonode[p2rev]
371 373
372 374 entry = revisionentry(
373 375 rid=rid,
374 376 rev=rev,
375 377 node=node,
376 378 p1rev=p1rev,
377 379 p2rev=p2rev,
378 380 p1node=p1node,
379 381 p2node=p2node,
380 382 linkrev=linkrev,
381 383 flags=flags,
382 384 )
383 385
384 386 self._revtonode[rev] = node
385 387 self._nodetorev[node] = rev
386 388 self._revisions[node] = entry
387 389
388 390 # Start of ifileindex interface.
389 391
390 392 def __len__(self):
391 393 return len(self._revisions)
392 394
393 395 def __iter__(self):
394 396 return iter(pycompat.xrange(len(self._revisions)))
395 397
396 398 def hasnode(self, node):
397 399 if node == nullid:
398 400 return False
399 401
400 402 return node in self._nodetorev
401 403
402 404 def revs(self, start=0, stop=None):
403 405 return storageutil.iterrevs(
404 406 len(self._revisions), start=start, stop=stop
405 407 )
406 408
407 409 def parents(self, node):
408 410 if node == nullid:
409 411 return nullid, nullid
410 412
411 413 if node not in self._revisions:
412 414 raise error.LookupError(node, self._path, _(b'no node'))
413 415
414 416 entry = self._revisions[node]
415 417 return entry.p1node, entry.p2node
416 418
417 419 def parentrevs(self, rev):
418 420 if rev == nullrev:
419 421 return nullrev, nullrev
420 422
421 423 if rev not in self._revtonode:
422 424 raise IndexError(rev)
423 425
424 426 entry = self._revisions[self._revtonode[rev]]
425 427 return entry.p1rev, entry.p2rev
426 428
427 429 def rev(self, node):
428 430 if node == nullid:
429 431 return nullrev
430 432
431 433 if node not in self._nodetorev:
432 434 raise error.LookupError(node, self._path, _(b'no node'))
433 435
434 436 return self._nodetorev[node]
435 437
436 438 def node(self, rev):
437 439 if rev == nullrev:
438 440 return nullid
439 441
440 442 if rev not in self._revtonode:
441 443 raise IndexError(rev)
442 444
443 445 return self._revtonode[rev]
444 446
445 447 def lookup(self, node):
446 448 return storageutil.fileidlookup(self, node, self._path)
447 449
448 450 def linkrev(self, rev):
449 451 if rev == nullrev:
450 452 return nullrev
451 453
452 454 if rev not in self._revtonode:
453 455 raise IndexError(rev)
454 456
455 457 entry = self._revisions[self._revtonode[rev]]
456 458 return entry.linkrev
457 459
458 460 def iscensored(self, rev):
459 461 if rev == nullrev:
460 462 return False
461 463
462 464 if rev not in self._revtonode:
463 465 raise IndexError(rev)
464 466
465 467 return self._revisions[self._revtonode[rev]].flags & FLAG_CENSORED
466 468
467 469 def commonancestorsheads(self, node1, node2):
468 470 rev1 = self.rev(node1)
469 471 rev2 = self.rev(node2)
470 472
471 473 ancestors = ancestor.commonancestorsheads(self.parentrevs, rev1, rev2)
472 474 return pycompat.maplist(self.node, ancestors)
473 475
474 476 def descendants(self, revs):
475 477 # TODO we could implement this using a recursive SQL query, which
476 478 # might be faster.
477 479 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
478 480
479 481 def heads(self, start=None, stop=None):
480 482 if start is None and stop is None:
481 483 if not len(self):
482 484 return [nullid]
483 485
484 486 startrev = self.rev(start) if start is not None else nullrev
485 487 stoprevs = {self.rev(n) for n in stop or []}
486 488
487 489 revs = dagop.headrevssubset(
488 490 self.revs, self.parentrevs, startrev=startrev, stoprevs=stoprevs
489 491 )
490 492
491 493 return [self.node(rev) for rev in revs]
492 494
493 495 def children(self, node):
494 496 rev = self.rev(node)
495 497
496 498 res = self._db.execute(
497 499 'SELECT'
498 500 ' node '
499 501 ' FROM filedata '
500 502 ' WHERE path=? AND (p1rev=? OR p2rev=?) '
501 503 ' ORDER BY revnum ASC',
502 504 (self._path, rev, rev),
503 505 )
504 506
505 507 return [row[0] for row in res]
506 508
507 509 # End of ifileindex interface.
508 510
509 511 # Start of ifiledata interface.
510 512
511 513 def size(self, rev):
512 514 if rev == nullrev:
513 515 return 0
514 516
515 517 if rev not in self._revtonode:
516 518 raise IndexError(rev)
517 519
518 520 node = self._revtonode[rev]
519 521
520 522 if self.renamed(node):
521 523 return len(self.read(node))
522 524
523 525 return len(self.revision(node))
524 526
525 527 def revision(self, node, raw=False, _verifyhash=True):
526 528 if node in (nullid, nullrev):
527 529 return b''
528 530
529 531 if isinstance(node, int):
530 532 node = self.node(node)
531 533
532 534 if node not in self._nodetorev:
533 535 raise error.LookupError(node, self._path, _(b'no node'))
534 536
535 537 if node in self._revisioncache:
536 538 return self._revisioncache[node]
537 539
538 540 # Because we have a fulltext revision cache, we are able to
539 541 # short-circuit delta chain traversal and decompression as soon as
540 542 # we encounter a revision in the cache.
541 543
542 544 stoprids = {self._revisions[n].rid: n for n in self._revisioncache}
543 545
544 546 if not stoprids:
545 547 stoprids[-1] = None
546 548
547 549 fulltext = resolvedeltachain(
548 550 self._db,
549 551 self._pathid,
550 552 node,
551 553 self._revisioncache,
552 554 stoprids,
553 555 zstddctx=self._dctx,
554 556 )
555 557
556 558 # Don't verify hashes if parent nodes were rewritten, as the hash
557 559 # wouldn't verify.
558 560 if self._revisions[node].flags & (FLAG_MISSING_P1 | FLAG_MISSING_P2):
559 561 _verifyhash = False
560 562
561 563 if _verifyhash:
562 564 self._checkhash(fulltext, node)
563 565 self._revisioncache[node] = fulltext
564 566
565 567 return fulltext
566 568
567 569 def rawdata(self, *args, **kwargs):
568 570 return self.revision(*args, **kwargs)
569 571
570 572 def read(self, node):
571 573 return storageutil.filtermetadata(self.revision(node))
572 574
573 575 def renamed(self, node):
574 576 return storageutil.filerevisioncopied(self, node)
575 577
576 578 def cmp(self, node, fulltext):
577 579 return not storageutil.filedataequivalent(self, node, fulltext)
578 580
579 581 def emitrevisions(
580 582 self,
581 583 nodes,
582 584 nodesorder=None,
583 585 revisiondata=False,
584 586 assumehaveparentrevisions=False,
585 587 deltamode=repository.CG_DELTAMODE_STD,
586 588 ):
587 589 if nodesorder not in (b'nodes', b'storage', b'linear', None):
588 590 raise error.ProgrammingError(
589 591 b'unhandled value for nodesorder: %s' % nodesorder
590 592 )
591 593
592 594 nodes = [n for n in nodes if n != nullid]
593 595
594 596 if not nodes:
595 597 return
596 598
597 599 # TODO perform in a single query.
598 600 res = self._db.execute(
599 601 'SELECT revnum, deltaid FROM fileindex '
600 602 'WHERE pathid=? '
601 603 ' AND node in (%s)' % (','.join(['?'] * len(nodes))),
602 604 tuple([self._pathid] + nodes),
603 605 )
604 606
605 607 deltabases = {}
606 608
607 609 for rev, deltaid in res:
608 610 res = self._db.execute(
609 611 'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?',
610 612 (self._pathid, deltaid),
611 613 )
612 614 deltabases[rev] = res.fetchone()[0]
613 615
614 616 # TODO define revdifffn so we can use delta from storage.
615 617 for delta in storageutil.emitrevisions(
616 618 self,
617 619 nodes,
618 620 nodesorder,
619 621 sqliterevisiondelta,
620 622 deltaparentfn=deltabases.__getitem__,
621 623 revisiondata=revisiondata,
622 624 assumehaveparentrevisions=assumehaveparentrevisions,
623 625 deltamode=deltamode,
624 626 ):
625 627
626 628 yield delta
627 629
628 630 # End of ifiledata interface.
629 631
630 632 # Start of ifilemutation interface.
631 633
632 634 def add(self, filedata, meta, transaction, linkrev, p1, p2):
633 635 if meta or filedata.startswith(b'\x01\n'):
634 636 filedata = storageutil.packmeta(meta, filedata)
635 637
636 638 return self.addrevision(filedata, transaction, linkrev, p1, p2)
637 639
638 640 def addrevision(
639 641 self,
640 642 revisiondata,
641 643 transaction,
642 644 linkrev,
643 645 p1,
644 646 p2,
645 647 node=None,
646 648 flags=0,
647 649 cachedelta=None,
648 650 ):
649 651 if flags:
650 652 raise SQLiteStoreError(_(b'flags not supported on revisions'))
651 653
652 654 validatehash = node is not None
653 655 node = node or storageutil.hashrevisionsha1(revisiondata, p1, p2)
654 656
655 657 if validatehash:
656 658 self._checkhash(revisiondata, node, p1, p2)
657 659
658 660 if node in self._nodetorev:
659 661 return node
660 662
661 663 node = self._addrawrevision(
662 664 node, revisiondata, transaction, linkrev, p1, p2
663 665 )
664 666
665 667 self._revisioncache[node] = revisiondata
666 668 return node
667 669
668 670 def addgroup(
669 671 self,
670 672 deltas,
671 673 linkmapper,
672 674 transaction,
673 675 addrevisioncb=None,
674 676 maybemissingparents=False,
675 677 ):
676 678 nodes = []
677 679
678 680 for node, p1, p2, linknode, deltabase, delta, wireflags in deltas:
679 681 storeflags = 0
680 682
681 683 if wireflags & repository.REVISION_FLAG_CENSORED:
682 684 storeflags |= FLAG_CENSORED
683 685
684 686 if wireflags & ~repository.REVISION_FLAG_CENSORED:
685 687 raise SQLiteStoreError(b'unhandled revision flag')
686 688
687 689 if maybemissingparents:
688 690 if p1 != nullid and not self.hasnode(p1):
689 691 p1 = nullid
690 692 storeflags |= FLAG_MISSING_P1
691 693
692 694 if p2 != nullid and not self.hasnode(p2):
693 695 p2 = nullid
694 696 storeflags |= FLAG_MISSING_P2
695 697
696 698 baserev = self.rev(deltabase)
697 699
698 700 # If base is censored, delta must be full replacement in a single
699 701 # patch operation.
700 702 if baserev != nullrev and self.iscensored(baserev):
701 703 hlen = struct.calcsize(b'>lll')
702 704 oldlen = len(self.rawdata(deltabase, _verifyhash=False))
703 705 newlen = len(delta) - hlen
704 706
705 707 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
706 708 raise error.CensoredBaseError(self._path, deltabase)
707 709
708 710 if not (storeflags & FLAG_CENSORED) and storageutil.deltaiscensored(
709 711 delta, baserev, lambda x: len(self.rawdata(x))
710 712 ):
711 713 storeflags |= FLAG_CENSORED
712 714
713 715 linkrev = linkmapper(linknode)
714 716
715 717 nodes.append(node)
716 718
717 719 if node in self._revisions:
718 720 # Possibly reset parents to make them proper.
719 721 entry = self._revisions[node]
720 722
721 723 if entry.flags & FLAG_MISSING_P1 and p1 != nullid:
722 724 entry.p1node = p1
723 725 entry.p1rev = self._nodetorev[p1]
724 726 entry.flags &= ~FLAG_MISSING_P1
725 727
726 728 self._db.execute(
727 729 'UPDATE fileindex SET p1rev=?, flags=? WHERE id=?',
728 730 (self._nodetorev[p1], entry.flags, entry.rid),
729 731 )
730 732
731 733 if entry.flags & FLAG_MISSING_P2 and p2 != nullid:
732 734 entry.p2node = p2
733 735 entry.p2rev = self._nodetorev[p2]
734 736 entry.flags &= ~FLAG_MISSING_P2
735 737
736 738 self._db.execute(
737 739 'UPDATE fileindex SET p2rev=?, flags=? WHERE id=?',
738 740 (self._nodetorev[p1], entry.flags, entry.rid),
739 741 )
740 742
741 743 continue
742 744
743 745 if deltabase == nullid:
744 746 text = mdiff.patch(b'', delta)
745 747 storedelta = None
746 748 else:
747 749 text = None
748 750 storedelta = (deltabase, delta)
749 751
750 752 self._addrawrevision(
751 753 node,
752 754 text,
753 755 transaction,
754 756 linkrev,
755 757 p1,
756 758 p2,
757 759 storedelta=storedelta,
758 760 flags=storeflags,
759 761 )
760 762
761 763 if addrevisioncb:
762 764 addrevisioncb(self, node)
763 765
764 766 return nodes
765 767
766 768 def censorrevision(self, tr, censornode, tombstone=b''):
767 769 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
768 770
769 771 # This restriction is cargo culted from revlogs and makes no sense for
770 772 # SQLite, since columns can be resized at will.
771 773 if len(tombstone) > len(self.rawdata(censornode)):
772 774 raise error.Abort(
773 775 _(b'censor tombstone must be no longer than censored data')
774 776 )
775 777
776 778 # We need to replace the censored revision's data with the tombstone.
777 779 # But replacing that data will have implications for delta chains that
778 780 # reference it.
779 781 #
780 782 # While "better," more complex strategies are possible, we do something
781 783 # simple: we find delta chain children of the censored revision and we
782 784 # replace those incremental deltas with fulltexts of their corresponding
783 785 # revision. Then we delete the now-unreferenced delta and original
784 786 # revision and insert a replacement.
785 787
786 788 # Find the delta to be censored.
787 789 censoreddeltaid = self._db.execute(
788 790 'SELECT deltaid FROM fileindex WHERE id=?',
789 791 (self._revisions[censornode].rid,),
790 792 ).fetchone()[0]
791 793
792 794 # Find all its delta chain children.
793 795 # TODO once we support storing deltas for !files, we'll need to look
794 796 # for those delta chains too.
795 797 rows = list(
796 798 self._db.execute(
797 799 'SELECT id, pathid, node FROM fileindex '
798 800 'WHERE deltabaseid=? OR deltaid=?',
799 801 (censoreddeltaid, censoreddeltaid),
800 802 )
801 803 )
802 804
803 805 for row in rows:
804 806 rid, pathid, node = row
805 807
806 808 fulltext = resolvedeltachain(
807 809 self._db, pathid, node, {}, {-1: None}, zstddctx=self._dctx
808 810 )
809 811
810 deltahash = hashlib.sha1(fulltext).digest()
812 deltahash = hashutil.sha1(fulltext).digest()
811 813
812 814 if self._compengine == b'zstd':
813 815 deltablob = self._cctx.compress(fulltext)
814 816 compression = COMPRESSION_ZSTD
815 817 elif self._compengine == b'zlib':
816 818 deltablob = zlib.compress(fulltext)
817 819 compression = COMPRESSION_ZLIB
818 820 elif self._compengine == b'none':
819 821 deltablob = fulltext
820 822 compression = COMPRESSION_NONE
821 823 else:
822 824 raise error.ProgrammingError(
823 825 b'unhandled compression engine: %s' % self._compengine
824 826 )
825 827
826 828 if len(deltablob) >= len(fulltext):
827 829 deltablob = fulltext
828 830 compression = COMPRESSION_NONE
829 831
830 832 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
831 833
832 834 self._db.execute(
833 835 'UPDATE fileindex SET deltaid=?, deltabaseid=NULL '
834 836 'WHERE id=?',
835 837 (deltaid, rid),
836 838 )
837 839
838 840 # Now create the tombstone delta and replace the delta on the censored
839 841 # node.
840 deltahash = hashlib.sha1(tombstone).digest()
842 deltahash = hashutil.sha1(tombstone).digest()
841 843 tombstonedeltaid = insertdelta(
842 844 self._db, COMPRESSION_NONE, deltahash, tombstone
843 845 )
844 846
845 847 flags = self._revisions[censornode].flags
846 848 flags |= FLAG_CENSORED
847 849
848 850 self._db.execute(
849 851 'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL '
850 852 'WHERE pathid=? AND node=?',
851 853 (flags, tombstonedeltaid, self._pathid, censornode),
852 854 )
853 855
854 856 self._db.execute('DELETE FROM delta WHERE id=?', (censoreddeltaid,))
855 857
856 858 self._refreshindex()
857 859 self._revisioncache.clear()
858 860
859 861 def getstrippoint(self, minlink):
860 862 return storageutil.resolvestripinfo(
861 863 minlink,
862 864 len(self) - 1,
863 865 [self.rev(n) for n in self.heads()],
864 866 self.linkrev,
865 867 self.parentrevs,
866 868 )
867 869
868 870 def strip(self, minlink, transaction):
869 871 if not len(self):
870 872 return
871 873
872 874 rev, _ignored = self.getstrippoint(minlink)
873 875
874 876 if rev == len(self):
875 877 return
876 878
877 879 for rev in self.revs(rev):
878 880 self._db.execute(
879 881 'DELETE FROM fileindex WHERE pathid=? AND node=?',
880 882 (self._pathid, self.node(rev)),
881 883 )
882 884
883 885 # TODO how should we garbage collect data in delta table?
884 886
885 887 self._refreshindex()
886 888
887 889 # End of ifilemutation interface.
888 890
889 891 # Start of ifilestorage interface.
890 892
891 893 def files(self):
892 894 return []
893 895
894 896 def storageinfo(
895 897 self,
896 898 exclusivefiles=False,
897 899 sharedfiles=False,
898 900 revisionscount=False,
899 901 trackedsize=False,
900 902 storedsize=False,
901 903 ):
902 904 d = {}
903 905
904 906 if exclusivefiles:
905 907 d[b'exclusivefiles'] = []
906 908
907 909 if sharedfiles:
908 910 # TODO list sqlite file(s) here.
909 911 d[b'sharedfiles'] = []
910 912
911 913 if revisionscount:
912 914 d[b'revisionscount'] = len(self)
913 915
914 916 if trackedsize:
915 917 d[b'trackedsize'] = sum(
916 918 len(self.revision(node)) for node in self._nodetorev
917 919 )
918 920
919 921 if storedsize:
920 922 # TODO implement this?
921 923 d[b'storedsize'] = None
922 924
923 925 return d
924 926
925 927 def verifyintegrity(self, state):
926 928 state[b'skipread'] = set()
927 929
928 930 for rev in self:
929 931 node = self.node(rev)
930 932
931 933 try:
932 934 self.revision(node)
933 935 except Exception as e:
934 936 yield sqliteproblem(
935 937 error=_(b'unpacking %s: %s') % (short(node), e), node=node
936 938 )
937 939
938 940 state[b'skipread'].add(node)
939 941
940 942 # End of ifilestorage interface.
941 943
942 944 def _checkhash(self, fulltext, node, p1=None, p2=None):
943 945 if p1 is None and p2 is None:
944 946 p1, p2 = self.parents(node)
945 947
946 948 if node == storageutil.hashrevisionsha1(fulltext, p1, p2):
947 949 return
948 950
949 951 try:
950 952 del self._revisioncache[node]
951 953 except KeyError:
952 954 pass
953 955
954 956 if storageutil.iscensoredtext(fulltext):
955 957 raise error.CensoredNodeError(self._path, node, fulltext)
956 958
957 959 raise SQLiteStoreError(_(b'integrity check failed on %s') % self._path)
958 960
959 961 def _addrawrevision(
960 962 self,
961 963 node,
962 964 revisiondata,
963 965 transaction,
964 966 linkrev,
965 967 p1,
966 968 p2,
967 969 storedelta=None,
968 970 flags=0,
969 971 ):
970 972 if self._pathid is None:
971 973 res = self._db.execute(
972 974 'INSERT INTO filepath (path) VALUES (?)', (self._path,)
973 975 )
974 976 self._pathid = res.lastrowid
975 977
976 978 # For simplicity, always store a delta against p1.
977 979 # TODO we need a lot more logic here to make behavior reasonable.
978 980
979 981 if storedelta:
980 982 deltabase, delta = storedelta
981 983
982 984 if isinstance(deltabase, int):
983 985 deltabase = self.node(deltabase)
984 986
985 987 else:
986 988 assert revisiondata is not None
987 989 deltabase = p1
988 990
989 991 if deltabase == nullid:
990 992 delta = revisiondata
991 993 else:
992 994 delta = mdiff.textdiff(
993 995 self.revision(self.rev(deltabase)), revisiondata
994 996 )
995 997
996 998 # File index stores a pointer to its delta and the parent delta.
997 999 # The parent delta is stored via a pointer to the fileindex PK.
998 1000 if deltabase == nullid:
999 1001 baseid = None
1000 1002 else:
1001 1003 baseid = self._revisions[deltabase].rid
1002 1004
1003 1005 # Deltas are stored with a hash of their content. This allows
1004 1006 # us to de-duplicate. The table is configured to ignore conflicts
1005 1007 # and it is faster to just insert and silently noop than to look
1006 1008 # first.
1007 deltahash = hashlib.sha1(delta).digest()
1009 deltahash = hashutil.sha1(delta).digest()
1008 1010
1009 1011 if self._compengine == b'zstd':
1010 1012 deltablob = self._cctx.compress(delta)
1011 1013 compression = COMPRESSION_ZSTD
1012 1014 elif self._compengine == b'zlib':
1013 1015 deltablob = zlib.compress(delta)
1014 1016 compression = COMPRESSION_ZLIB
1015 1017 elif self._compengine == b'none':
1016 1018 deltablob = delta
1017 1019 compression = COMPRESSION_NONE
1018 1020 else:
1019 1021 raise error.ProgrammingError(
1020 1022 b'unhandled compression engine: %s' % self._compengine
1021 1023 )
1022 1024
1023 1025 # Don't store compressed data if it isn't practical.
1024 1026 if len(deltablob) >= len(delta):
1025 1027 deltablob = delta
1026 1028 compression = COMPRESSION_NONE
1027 1029
1028 1030 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
1029 1031
1030 1032 rev = len(self)
1031 1033
1032 1034 if p1 == nullid:
1033 1035 p1rev = nullrev
1034 1036 else:
1035 1037 p1rev = self._nodetorev[p1]
1036 1038
1037 1039 if p2 == nullid:
1038 1040 p2rev = nullrev
1039 1041 else:
1040 1042 p2rev = self._nodetorev[p2]
1041 1043
1042 1044 rid = self._db.execute(
1043 1045 'INSERT INTO fileindex ('
1044 1046 ' pathid, revnum, node, p1rev, p2rev, linkrev, flags, '
1045 1047 ' deltaid, deltabaseid) '
1046 1048 ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
1047 1049 (
1048 1050 self._pathid,
1049 1051 rev,
1050 1052 node,
1051 1053 p1rev,
1052 1054 p2rev,
1053 1055 linkrev,
1054 1056 flags,
1055 1057 deltaid,
1056 1058 baseid,
1057 1059 ),
1058 1060 ).lastrowid
1059 1061
1060 1062 entry = revisionentry(
1061 1063 rid=rid,
1062 1064 rev=rev,
1063 1065 node=node,
1064 1066 p1rev=p1rev,
1065 1067 p2rev=p2rev,
1066 1068 p1node=p1,
1067 1069 p2node=p2,
1068 1070 linkrev=linkrev,
1069 1071 flags=flags,
1070 1072 )
1071 1073
1072 1074 self._nodetorev[node] = rev
1073 1075 self._revtonode[rev] = node
1074 1076 self._revisions[node] = entry
1075 1077
1076 1078 return node
1077 1079
1078 1080
1079 1081 class sqliterepository(localrepo.localrepository):
1080 1082 def cancopy(self):
1081 1083 return False
1082 1084
1083 1085 def transaction(self, *args, **kwargs):
1084 1086 current = self.currenttransaction()
1085 1087
1086 1088 tr = super(sqliterepository, self).transaction(*args, **kwargs)
1087 1089
1088 1090 if current:
1089 1091 return tr
1090 1092
1091 1093 self._dbconn.execute('BEGIN TRANSACTION')
1092 1094
1093 1095 def committransaction(_):
1094 1096 self._dbconn.commit()
1095 1097
1096 1098 tr.addfinalize(b'sqlitestore', committransaction)
1097 1099
1098 1100 return tr
1099 1101
1100 1102 @property
1101 1103 def _dbconn(self):
1102 1104 # SQLite connections can only be used on the thread that created
1103 1105 # them. In most cases, this "just works." However, hgweb uses
1104 1106 # multiple threads.
1105 1107 tid = threading.current_thread().ident
1106 1108
1107 1109 if self._db:
1108 1110 if self._db[0] == tid:
1109 1111 return self._db[1]
1110 1112
1111 1113 db = makedb(self.svfs.join(b'db.sqlite'))
1112 1114 self._db = (tid, db)
1113 1115
1114 1116 return db
1115 1117
1116 1118
1117 1119 def makedb(path):
1118 1120 """Construct a database handle for a database at path."""
1119 1121
1120 1122 db = sqlite3.connect(encoding.strfromlocal(path))
1121 1123 db.text_factory = bytes
1122 1124
1123 1125 res = db.execute('PRAGMA user_version').fetchone()[0]
1124 1126
1125 1127 # New database.
1126 1128 if res == 0:
1127 1129 for statement in CREATE_SCHEMA:
1128 1130 db.execute(statement)
1129 1131
1130 1132 db.commit()
1131 1133
1132 1134 elif res == CURRENT_SCHEMA_VERSION:
1133 1135 pass
1134 1136
1135 1137 else:
1136 1138 raise error.Abort(_(b'sqlite database has unrecognized version'))
1137 1139
1138 1140 db.execute('PRAGMA journal_mode=WAL')
1139 1141
1140 1142 return db
1141 1143
1142 1144
1143 1145 def featuresetup(ui, supported):
1144 1146 supported.add(REQUIREMENT)
1145 1147
1146 1148 if zstd:
1147 1149 supported.add(REQUIREMENT_ZSTD)
1148 1150
1149 1151 supported.add(REQUIREMENT_ZLIB)
1150 1152 supported.add(REQUIREMENT_NONE)
1151 1153 supported.add(REQUIREMENT_SHALLOW_FILES)
1152 1154 supported.add(repository.NARROW_REQUIREMENT)
1153 1155
1154 1156
1155 1157 def newreporequirements(orig, ui, createopts):
1156 1158 if createopts[b'backend'] != b'sqlite':
1157 1159 return orig(ui, createopts)
1158 1160
1159 1161 # This restriction can be lifted once we have more confidence.
1160 1162 if b'sharedrepo' in createopts:
1161 1163 raise error.Abort(
1162 1164 _(b'shared repositories not supported with SQLite store')
1163 1165 )
1164 1166
1165 1167 # This filtering is out of an abundance of caution: we want to ensure
1166 1168 # we honor creation options and we do that by annotating exactly the
1167 1169 # creation options we recognize.
1168 1170 known = {
1169 1171 b'narrowfiles',
1170 1172 b'backend',
1171 1173 b'shallowfilestore',
1172 1174 }
1173 1175
1174 1176 unsupported = set(createopts) - known
1175 1177 if unsupported:
1176 1178 raise error.Abort(
1177 1179 _(b'SQLite store does not support repo creation option: %s')
1178 1180 % b', '.join(sorted(unsupported))
1179 1181 )
1180 1182
1181 1183 # Since we're a hybrid store that still relies on revlogs, we fall back
1182 1184 # to using the revlogv1 backend's storage requirements then adding our
1183 1185 # own requirement.
1184 1186 createopts[b'backend'] = b'revlogv1'
1185 1187 requirements = orig(ui, createopts)
1186 1188 requirements.add(REQUIREMENT)
1187 1189
1188 1190 compression = ui.config(b'storage', b'sqlite.compression')
1189 1191
1190 1192 if compression == b'zstd' and not zstd:
1191 1193 raise error.Abort(
1192 1194 _(
1193 1195 b'storage.sqlite.compression set to "zstd" but '
1194 1196 b'zstandard compression not available to this '
1195 1197 b'Mercurial install'
1196 1198 )
1197 1199 )
1198 1200
1199 1201 if compression == b'zstd':
1200 1202 requirements.add(REQUIREMENT_ZSTD)
1201 1203 elif compression == b'zlib':
1202 1204 requirements.add(REQUIREMENT_ZLIB)
1203 1205 elif compression == b'none':
1204 1206 requirements.add(REQUIREMENT_NONE)
1205 1207 else:
1206 1208 raise error.Abort(
1207 1209 _(
1208 1210 b'unknown compression engine defined in '
1209 1211 b'storage.sqlite.compression: %s'
1210 1212 )
1211 1213 % compression
1212 1214 )
1213 1215
1214 1216 if createopts.get(b'shallowfilestore'):
1215 1217 requirements.add(REQUIREMENT_SHALLOW_FILES)
1216 1218
1217 1219 return requirements
1218 1220
1219 1221
1220 1222 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1221 1223 class sqlitefilestorage(object):
1222 1224 """Repository file storage backed by SQLite."""
1223 1225
1224 1226 def file(self, path):
1225 1227 if path[0] == b'/':
1226 1228 path = path[1:]
1227 1229
1228 1230 if REQUIREMENT_ZSTD in self.requirements:
1229 1231 compression = b'zstd'
1230 1232 elif REQUIREMENT_ZLIB in self.requirements:
1231 1233 compression = b'zlib'
1232 1234 elif REQUIREMENT_NONE in self.requirements:
1233 1235 compression = b'none'
1234 1236 else:
1235 1237 raise error.Abort(
1236 1238 _(
1237 1239 b'unable to determine what compression engine '
1238 1240 b'to use for SQLite storage'
1239 1241 )
1240 1242 )
1241 1243
1242 1244 return sqlitefilestore(self._dbconn, path, compression)
1243 1245
1244 1246
1245 1247 def makefilestorage(orig, requirements, features, **kwargs):
1246 1248 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1247 1249 if REQUIREMENT in requirements:
1248 1250 if REQUIREMENT_SHALLOW_FILES in requirements:
1249 1251 features.add(repository.REPO_FEATURE_SHALLOW_FILE_STORAGE)
1250 1252
1251 1253 return sqlitefilestorage
1252 1254 else:
1253 1255 return orig(requirements=requirements, features=features, **kwargs)
1254 1256
1255 1257
1256 1258 def makemain(orig, ui, requirements, **kwargs):
1257 1259 if REQUIREMENT in requirements:
1258 1260 if REQUIREMENT_ZSTD in requirements and not zstd:
1259 1261 raise error.Abort(
1260 1262 _(
1261 1263 b'repository uses zstandard compression, which '
1262 1264 b'is not available to this Mercurial install'
1263 1265 )
1264 1266 )
1265 1267
1266 1268 return sqliterepository
1267 1269
1268 1270 return orig(requirements=requirements, **kwargs)
1269 1271
1270 1272
1271 1273 def verifierinit(orig, self, *args, **kwargs):
1272 1274 orig(self, *args, **kwargs)
1273 1275
1274 1276 # We don't care that files in the store don't align with what is
1275 1277 # advertised. So suppress these warnings.
1276 1278 self.warnorphanstorefiles = False
1277 1279
1278 1280
1279 1281 def extsetup(ui):
1280 1282 localrepo.featuresetupfuncs.add(featuresetup)
1281 1283 extensions.wrapfunction(
1282 1284 localrepo, b'newreporequirements', newreporequirements
1283 1285 )
1284 1286 extensions.wrapfunction(localrepo, b'makefilestorage', makefilestorage)
1285 1287 extensions.wrapfunction(localrepo, b'makemain', makemain)
1286 1288 extensions.wrapfunction(verify.verifier, b'__init__', verifierinit)
1287 1289
1288 1290
1289 1291 def reposetup(ui, repo):
1290 1292 if isinstance(repo, sqliterepository):
1291 1293 repo._db = None
1292 1294
1293 1295 # TODO check for bundlerepository?
General Comments 0
You need to be logged in to leave comments. Login now