##// END OF EJS Templates
py3: use node.hex(m.digest()) instead of m.hexdigest()...
Pulkit Goyal -
r40711:9fcf8084 default
parent child Browse files
Show More
@@ -1,829 +1,829 b''
1 1 # Copyright 2016-present Facebook. All Rights Reserved.
2 2 #
3 3 # context: context needed to annotate a file
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import contextlib
12 12 import hashlib
13 13 import os
14 14
15 15 from mercurial.i18n import _
16 16 from mercurial import (
17 17 error,
18 18 linelog as linelogmod,
19 19 lock as lockmod,
20 20 mdiff,
21 21 node,
22 22 pycompat,
23 23 scmutil,
24 24 util,
25 25 )
26 26 from mercurial.utils import (
27 27 stringutil,
28 28 )
29 29
30 30 from . import (
31 31 error as faerror,
32 32 revmap as revmapmod,
33 33 )
34 34
35 35 # given path, get filelog, cached
36 36 @util.lrucachefunc
37 37 def _getflog(repo, path):
38 38 return repo.file(path)
39 39
40 40 # extracted from mercurial.context.basefilectx.annotate
41 41 def _parents(f, follow=True):
42 42 # Cut _descendantrev here to mitigate the penalty of lazy linkrev
43 43 # adjustment. Otherwise, p._adjustlinkrev() would walk changelog
44 44 # from the topmost introrev (= srcrev) down to p.linkrev() if it
45 45 # isn't an ancestor of the srcrev.
46 46 f._changeid
47 47 pl = f.parents()
48 48
49 49 # Don't return renamed parents if we aren't following.
50 50 if not follow:
51 51 pl = [p for p in pl if p.path() == f.path()]
52 52
53 53 # renamed filectx won't have a filelog yet, so set it
54 54 # from the cache to save time
55 55 for p in pl:
56 56 if not '_filelog' in p.__dict__:
57 57 p._filelog = _getflog(f._repo, p.path())
58 58
59 59 return pl
60 60
61 61 # extracted from mercurial.context.basefilectx.annotate. slightly modified
62 62 # so it takes a fctx instead of a pair of text and fctx.
63 63 def _decorate(fctx):
64 64 text = fctx.data()
65 65 linecount = text.count('\n')
66 66 if text and not text.endswith('\n'):
67 67 linecount += 1
68 68 return ([(fctx, i) for i in pycompat.xrange(linecount)], text)
69 69
70 70 # extracted from mercurial.context.basefilectx.annotate. slightly modified
71 71 # so it takes an extra "blocks" parameter calculated elsewhere, instead of
72 72 # calculating diff here.
73 73 def _pair(parent, child, blocks):
74 74 for (a1, a2, b1, b2), t in blocks:
75 75 # Changed blocks ('!') or blocks made only of blank lines ('~')
76 76 # belong to the child.
77 77 if t == '=':
78 78 child[0][b1:b2] = parent[0][a1:a2]
79 79 return child
80 80
81 81 # like scmutil.revsingle, but with lru cache, so their states (like manifests)
82 82 # could be reused
83 83 _revsingle = util.lrucachefunc(scmutil.revsingle)
84 84
85 85 def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None):
86 86 """(repo, str, str) -> fctx
87 87
88 88 get the filectx object from repo, rev, path, in an efficient way.
89 89
90 90 if resolverev is True, "rev" is a revision specified by the revset
91 91 language, otherwise "rev" is a nodeid, or a revision number that can
92 92 be consumed by repo.__getitem__.
93 93
94 94 if adjustctx is not None, the returned fctx will point to a changeset
95 95 that introduces the change (last modified the file). if adjustctx
96 96 is 'linkrev', trust the linkrev and do not adjust it. this is noticeably
97 97 faster for big repos but is incorrect for some cases.
98 98 """
99 99 if resolverev and not isinstance(rev, int) and rev is not None:
100 100 ctx = _revsingle(repo, rev)
101 101 else:
102 102 ctx = repo[rev]
103 103
104 104 # If we don't need to adjust the linkrev, create the filectx using the
105 105 # changectx instead of using ctx[path]. This means it already has the
106 106 # changectx information, so blame -u will be able to look directly at the
107 107 # commitctx object instead of having to resolve it by going through the
108 108 # manifest. In a lazy-manifest world this can prevent us from downloading a
109 109 # lot of data.
110 110 if adjustctx is None:
111 111 # ctx.rev() is None means it's the working copy, which is a special
112 112 # case.
113 113 if ctx.rev() is None:
114 114 fctx = ctx[path]
115 115 else:
116 116 fctx = repo.filectx(path, changeid=ctx.rev())
117 117 else:
118 118 fctx = ctx[path]
119 119 if adjustctx == 'linkrev':
120 120 introrev = fctx.linkrev()
121 121 else:
122 122 introrev = fctx.introrev()
123 123 if introrev != ctx.rev():
124 124 fctx._changeid = introrev
125 125 fctx._changectx = repo[introrev]
126 126 return fctx
127 127
128 128 # like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock
129 129 def encodedir(path):
130 130 return (path
131 131 .replace('.hg/', '.hg.hg/')
132 132 .replace('.l/', '.l.hg/')
133 133 .replace('.m/', '.m.hg/')
134 134 .replace('.lock/', '.lock.hg/'))
135 135
136 136 def hashdiffopts(diffopts):
137 137 diffoptstr = stringutil.pprint(sorted(
138 138 (k, getattr(diffopts, k))
139 139 for k in mdiff.diffopts.defaults
140 140 ))
141 return hashlib.sha1(diffoptstr).hexdigest()[:6]
141 return node.hex(hashlib.sha1(diffoptstr).digest())[:6]
142 142
143 143 _defaultdiffopthash = hashdiffopts(mdiff.defaultopts)
144 144
145 145 class annotateopts(object):
146 146 """like mercurial.mdiff.diffopts, but is for annotate
147 147
148 148 followrename: follow renames, like "hg annotate -f"
149 149 followmerge: follow p2 of a merge changeset, otherwise p2 is ignored
150 150 """
151 151
152 152 defaults = {
153 153 'diffopts': None,
154 154 'followrename': True,
155 155 'followmerge': True,
156 156 }
157 157
158 158 def __init__(self, **opts):
159 159 opts = pycompat.byteskwargs(opts)
160 160 for k, v in self.defaults.iteritems():
161 161 setattr(self, k, opts.get(k, v))
162 162
163 163 @util.propertycache
164 164 def shortstr(self):
165 165 """represent opts in a short string, suitable for a directory name"""
166 166 result = ''
167 167 if not self.followrename:
168 168 result += 'r0'
169 169 if not self.followmerge:
170 170 result += 'm0'
171 171 if self.diffopts is not None:
172 172 assert isinstance(self.diffopts, mdiff.diffopts)
173 173 diffopthash = hashdiffopts(self.diffopts)
174 174 if diffopthash != _defaultdiffopthash:
175 175 result += 'i' + diffopthash
176 176 return result or 'default'
177 177
178 178 defaultopts = annotateopts()
179 179
180 180 class _annotatecontext(object):
181 181 """do not use this class directly as it does not use lock to protect
182 182 writes. use "with annotatecontext(...)" instead.
183 183 """
184 184
185 185 def __init__(self, repo, path, linelogpath, revmappath, opts):
186 186 self.repo = repo
187 187 self.ui = repo.ui
188 188 self.path = path
189 189 self.opts = opts
190 190 self.linelogpath = linelogpath
191 191 self.revmappath = revmappath
192 192 self._linelog = None
193 193 self._revmap = None
194 194 self._node2path = {} # {str: str}
195 195
196 196 @property
197 197 def linelog(self):
198 198 if self._linelog is None:
199 199 if os.path.exists(self.linelogpath):
200 200 with open(self.linelogpath, 'rb') as f:
201 201 try:
202 202 self._linelog = linelogmod.linelog.fromdata(f.read())
203 203 except linelogmod.LineLogError:
204 204 self._linelog = linelogmod.linelog()
205 205 else:
206 206 self._linelog = linelogmod.linelog()
207 207 return self._linelog
208 208
209 209 @property
210 210 def revmap(self):
211 211 if self._revmap is None:
212 212 self._revmap = revmapmod.revmap(self.revmappath)
213 213 return self._revmap
214 214
215 215 def close(self):
216 216 if self._revmap is not None:
217 217 self._revmap.flush()
218 218 self._revmap = None
219 219 if self._linelog is not None:
220 220 with open(self.linelogpath, 'wb') as f:
221 221 f.write(self._linelog.encode())
222 222 self._linelog = None
223 223
224 224 __del__ = close
225 225
226 226 def rebuild(self):
227 227 """delete linelog and revmap, useful for rebuilding"""
228 228 self.close()
229 229 self._node2path.clear()
230 230 _unlinkpaths([self.revmappath, self.linelogpath])
231 231
232 232 @property
233 233 def lastnode(self):
234 234 """return last node in revmap, or None if revmap is empty"""
235 235 if self._revmap is None:
236 236 # fast path, read revmap without loading its full content
237 237 return revmapmod.getlastnode(self.revmappath)
238 238 else:
239 239 return self._revmap.rev2hsh(self._revmap.maxrev)
240 240
241 241 def isuptodate(self, master, strict=True):
242 242 """return True if the revmap / linelog is up-to-date, or the file
243 243 does not exist in the master revision. False otherwise.
244 244
245 245 it tries to be fast and could return false negatives, because of the
246 246 use of linkrev instead of introrev.
247 247
248 248 useful for both server and client to decide whether to update
249 249 fastannotate cache or not.
250 250
251 251 if strict is True, even if fctx exists in the revmap, but is not the
252 252 last node, isuptodate will return False. it's good for performance - no
253 253 expensive check was done.
254 254
255 255 if strict is False, if fctx exists in the revmap, this function may
256 256 return True. this is useful for the client to skip downloading the
257 257 cache if the client's master is behind the server's.
258 258 """
259 259 lastnode = self.lastnode
260 260 try:
261 261 f = self._resolvefctx(master, resolverev=True)
262 262 # choose linkrev instead of introrev as the check is meant to be
263 263 # *fast*.
264 264 linknode = self.repo.changelog.node(f.linkrev())
265 265 if not strict and lastnode and linknode != lastnode:
266 266 # check if f.node() is in the revmap. note: this loads the
267 267 # revmap and can be slow.
268 268 return self.revmap.hsh2rev(linknode) is not None
269 269 # avoid resolving old manifest, or slow adjustlinkrev to be fast,
270 270 # false negatives are acceptable in this case.
271 271 return linknode == lastnode
272 272 except LookupError:
273 273 # master does not have the file, or the revmap is ahead
274 274 return True
275 275
276 276 def annotate(self, rev, master=None, showpath=False, showlines=False):
277 277 """incrementally update the cache so it includes revisions in the main
278 278 branch till 'master'. and run annotate on 'rev', which may or may not be
279 279 included in the main branch.
280 280
281 281 if master is None, do not update linelog.
282 282
283 283 the first value returned is the annotate result, it is [(node, linenum)]
284 284 by default. [(node, linenum, path)] if showpath is True.
285 285
286 286 if showlines is True, a second value will be returned, it is a list of
287 287 corresponding line contents.
288 288 """
289 289
290 290 # the fast path test requires commit hash, convert rev number to hash,
291 291 # so it may hit the fast path. note: in the "fctx" mode, the "annotate"
292 292 # command could give us a revision number even if the user passes a
293 293 # commit hash.
294 294 if isinstance(rev, int):
295 295 rev = node.hex(self.repo.changelog.node(rev))
296 296
297 297 # fast path: if rev is in the main branch already
298 298 directly, revfctx = self.canannotatedirectly(rev)
299 299 if directly:
300 300 if self.ui.debugflag:
301 301 self.ui.debug('fastannotate: %s: using fast path '
302 302 '(resolved fctx: %s)\n'
303 303 % (self.path,
304 304 stringutil.pprint(util.safehasattr(revfctx,
305 305 'node'))))
306 306 return self.annotatedirectly(revfctx, showpath, showlines)
307 307
308 308 # resolve master
309 309 masterfctx = None
310 310 if master:
311 311 try:
312 312 masterfctx = self._resolvefctx(master, resolverev=True,
313 313 adjustctx=True)
314 314 except LookupError: # master does not have the file
315 315 pass
316 316 else:
317 317 if masterfctx in self.revmap: # no need to update linelog
318 318 masterfctx = None
319 319
320 320 # ... - @ <- rev (can be an arbitrary changeset,
321 321 # / not necessarily a descendant
322 322 # master -> o of master)
323 323 # |
324 324 # a merge -> o 'o': new changesets in the main branch
325 325 # |\ '#': revisions in the main branch that
326 326 # o * exist in linelog / revmap
327 327 # | . '*': changesets in side branches, or
328 328 # last master -> # . descendants of master
329 329 # | .
330 330 # # * joint: '#', and is a parent of a '*'
331 331 # |/
332 332 # a joint -> # ^^^^ --- side branches
333 333 # |
334 334 # ^ --- main branch (in linelog)
335 335
336 336 # these DFSes are similar to the traditional annotate algorithm.
337 337 # we cannot really reuse the code for perf reason.
338 338
339 339 # 1st DFS calculates merges, joint points, and needed.
340 340 # "needed" is a simple reference counting dict to free items in
341 341 # "hist", reducing its memory usage otherwise could be huge.
342 342 initvisit = [revfctx]
343 343 if masterfctx:
344 344 if masterfctx.rev() is None:
345 345 raise error.Abort(_('cannot update linelog to wdir()'),
346 346 hint=_('set fastannotate.mainbranch'))
347 347 initvisit.append(masterfctx)
348 348 visit = initvisit[:]
349 349 pcache = {}
350 350 needed = {revfctx: 1}
351 351 hist = {} # {fctx: ([(llrev or fctx, linenum)], text)}
352 352 while visit:
353 353 f = visit.pop()
354 354 if f in pcache or f in hist:
355 355 continue
356 356 if f in self.revmap: # in the old main branch, it's a joint
357 357 llrev = self.revmap.hsh2rev(f.node())
358 358 self.linelog.annotate(llrev)
359 359 result = self.linelog.annotateresult
360 360 hist[f] = (result, f.data())
361 361 continue
362 362 pl = self._parentfunc(f)
363 363 pcache[f] = pl
364 364 for p in pl:
365 365 needed[p] = needed.get(p, 0) + 1
366 366 if p not in pcache:
367 367 visit.append(p)
368 368
369 369 # 2nd (simple) DFS calculates new changesets in the main branch
370 370 # ('o' nodes in # the above graph), so we know when to update linelog.
371 371 newmainbranch = set()
372 372 f = masterfctx
373 373 while f and f not in self.revmap:
374 374 newmainbranch.add(f)
375 375 pl = pcache[f]
376 376 if pl:
377 377 f = pl[0]
378 378 else:
379 379 f = None
380 380 break
381 381
382 382 # f, if present, is the position where the last build stopped at, and
383 383 # should be the "master" last time. check to see if we can continue
384 384 # building the linelog incrementally. (we cannot if diverged)
385 385 if masterfctx is not None:
386 386 self._checklastmasterhead(f)
387 387
388 388 if self.ui.debugflag:
389 389 if newmainbranch:
390 390 self.ui.debug('fastannotate: %s: %d new changesets in the main'
391 391 ' branch\n' % (self.path, len(newmainbranch)))
392 392 elif not hist: # no joints, no updates
393 393 self.ui.debug('fastannotate: %s: linelog cannot help in '
394 394 'annotating this revision\n' % self.path)
395 395
396 396 # prepare annotateresult so we can update linelog incrementally
397 397 self.linelog.annotate(self.linelog.maxrev)
398 398
399 399 # 3rd DFS does the actual annotate
400 400 visit = initvisit[:]
401 401 progress = 0
402 402 while visit:
403 403 f = visit[-1]
404 404 if f in hist:
405 405 visit.pop()
406 406 continue
407 407
408 408 ready = True
409 409 pl = pcache[f]
410 410 for p in pl:
411 411 if p not in hist:
412 412 ready = False
413 413 visit.append(p)
414 414 if not ready:
415 415 continue
416 416
417 417 visit.pop()
418 418 blocks = None # mdiff blocks, used for appending linelog
419 419 ismainbranch = (f in newmainbranch)
420 420 # curr is the same as the traditional annotate algorithm,
421 421 # if we only care about linear history (do not follow merge),
422 422 # then curr is not actually used.
423 423 assert f not in hist
424 424 curr = _decorate(f)
425 425 for i, p in enumerate(pl):
426 426 bs = list(self._diffblocks(hist[p][1], curr[1]))
427 427 if i == 0 and ismainbranch:
428 428 blocks = bs
429 429 curr = _pair(hist[p], curr, bs)
430 430 if needed[p] == 1:
431 431 del hist[p]
432 432 del needed[p]
433 433 else:
434 434 needed[p] -= 1
435 435
436 436 hist[f] = curr
437 437 del pcache[f]
438 438
439 439 if ismainbranch: # need to write to linelog
440 440 if not self.ui.quiet:
441 441 progress += 1
442 442 self.ui.progress(_('building cache'), progress,
443 443 total=len(newmainbranch))
444 444 bannotated = None
445 445 if len(pl) == 2 and self.opts.followmerge: # merge
446 446 bannotated = curr[0]
447 447 if blocks is None: # no parents, add an empty one
448 448 blocks = list(self._diffblocks('', curr[1]))
449 449 self._appendrev(f, blocks, bannotated)
450 450 elif showpath: # not append linelog, but we need to record path
451 451 self._node2path[f.node()] = f.path()
452 452
453 453 if progress: # clean progress bar
454 454 self.ui.write()
455 455
456 456 result = [
457 457 ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l)
458 458 for fr, l in hist[revfctx][0]] # [(node, linenumber)]
459 459 return self._refineannotateresult(result, revfctx, showpath, showlines)
460 460
461 461 def canannotatedirectly(self, rev):
462 462 """(str) -> bool, fctx or node.
463 463 return (True, f) if we can annotate without updating the linelog, pass
464 464 f to annotatedirectly.
465 465 return (False, f) if we need extra calculation. f is the fctx resolved
466 466 from rev.
467 467 """
468 468 result = True
469 469 f = None
470 470 if not isinstance(rev, int) and rev is not None:
471 471 hsh = {20: bytes, 40: node.bin}.get(len(rev), lambda x: None)(rev)
472 472 if hsh is not None and (hsh, self.path) in self.revmap:
473 473 f = hsh
474 474 if f is None:
475 475 adjustctx = 'linkrev' if self._perfhack else True
476 476 f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True)
477 477 result = f in self.revmap
478 478 if not result and self._perfhack:
479 479 # redo the resolution without perfhack - as we are going to
480 480 # do write operations, we need a correct fctx.
481 481 f = self._resolvefctx(rev, adjustctx=True, resolverev=True)
482 482 return result, f
483 483
484 484 def annotatealllines(self, rev, showpath=False, showlines=False):
485 485 """(rev : str) -> [(node : str, linenum : int, path : str)]
486 486
487 487 the result has the same format with annotate, but include all (including
488 488 deleted) lines up to rev. call this after calling annotate(rev, ...) for
489 489 better performance and accuracy.
490 490 """
491 491 revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True)
492 492
493 493 # find a chain from rev to anything in the mainbranch
494 494 if revfctx not in self.revmap:
495 495 chain = [revfctx]
496 496 a = ''
497 497 while True:
498 498 f = chain[-1]
499 499 pl = self._parentfunc(f)
500 500 if not pl:
501 501 break
502 502 if pl[0] in self.revmap:
503 503 a = pl[0].data()
504 504 break
505 505 chain.append(pl[0])
506 506
507 507 # both self.linelog and self.revmap is backed by filesystem. now
508 508 # we want to modify them but do not want to write changes back to
509 509 # files. so we create in-memory objects and copy them. it's like
510 510 # a "fork".
511 511 linelog = linelogmod.linelog()
512 512 linelog.copyfrom(self.linelog)
513 513 linelog.annotate(linelog.maxrev)
514 514 revmap = revmapmod.revmap()
515 515 revmap.copyfrom(self.revmap)
516 516
517 517 for f in reversed(chain):
518 518 b = f.data()
519 519 blocks = list(self._diffblocks(a, b))
520 520 self._doappendrev(linelog, revmap, f, blocks)
521 521 a = b
522 522 else:
523 523 # fastpath: use existing linelog, revmap as we don't write to them
524 524 linelog = self.linelog
525 525 revmap = self.revmap
526 526
527 527 lines = linelog.getalllines()
528 528 hsh = revfctx.node()
529 529 llrev = revmap.hsh2rev(hsh)
530 530 result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev]
531 531 # cannot use _refineannotateresult since we need custom logic for
532 532 # resolving line contents
533 533 if showpath:
534 534 result = self._addpathtoresult(result, revmap)
535 535 if showlines:
536 536 linecontents = self._resolvelines(result, revmap, linelog)
537 537 result = (result, linecontents)
538 538 return result
539 539
540 540 def _resolvelines(self, annotateresult, revmap, linelog):
541 541 """(annotateresult) -> [line]. designed for annotatealllines.
542 542 this is probably the most inefficient code in the whole fastannotate
543 543 directory. but we have made a decision that the linelog does not
544 544 store line contents. so getting them requires random accesses to
545 545 the revlog data, since they can be many, it can be very slow.
546 546 """
547 547 # [llrev]
548 548 revs = [revmap.hsh2rev(l[0]) for l in annotateresult]
549 549 result = [None] * len(annotateresult)
550 550 # {(rev, linenum): [lineindex]}
551 551 key2idxs = collections.defaultdict(list)
552 552 for i in pycompat.xrange(len(result)):
553 553 key2idxs[(revs[i], annotateresult[i][1])].append(i)
554 554 while key2idxs:
555 555 # find an unresolved line and its linelog rev to annotate
556 556 hsh = None
557 557 try:
558 558 for (rev, _linenum), idxs in key2idxs.iteritems():
559 559 if revmap.rev2flag(rev) & revmapmod.sidebranchflag:
560 560 continue
561 561 hsh = annotateresult[idxs[0]][0]
562 562 break
563 563 except StopIteration: # no more unresolved lines
564 564 return result
565 565 if hsh is None:
566 566 # the remaining key2idxs are not in main branch, resolving them
567 567 # using the hard way...
568 568 revlines = {}
569 569 for (rev, linenum), idxs in key2idxs.iteritems():
570 570 if rev not in revlines:
571 571 hsh = annotateresult[idxs[0]][0]
572 572 if self.ui.debugflag:
573 573 self.ui.debug('fastannotate: reading %s line #%d '
574 574 'to resolve lines %r\n'
575 575 % (node.short(hsh), linenum, idxs))
576 576 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
577 577 lines = mdiff.splitnewlines(fctx.data())
578 578 revlines[rev] = lines
579 579 for idx in idxs:
580 580 result[idx] = revlines[rev][linenum]
581 581 assert all(x is not None for x in result)
582 582 return result
583 583
584 584 # run the annotate and the lines should match to the file content
585 585 self.ui.debug('fastannotate: annotate %s to resolve lines\n'
586 586 % node.short(hsh))
587 587 linelog.annotate(rev)
588 588 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
589 589 annotated = linelog.annotateresult
590 590 lines = mdiff.splitnewlines(fctx.data())
591 591 if len(lines) != len(annotated):
592 592 raise faerror.CorruptedFileError('unexpected annotated lines')
593 593 # resolve lines from the annotate result
594 594 for i, line in enumerate(lines):
595 595 k = annotated[i]
596 596 if k in key2idxs:
597 597 for idx in key2idxs[k]:
598 598 result[idx] = line
599 599 del key2idxs[k]
600 600 return result
601 601
602 602 def annotatedirectly(self, f, showpath, showlines):
603 603 """like annotate, but when we know that f is in linelog.
604 604 f can be either a 20-char str (node) or a fctx. this is for perf - in
605 605 the best case, the user provides a node and we don't need to read the
606 606 filelog or construct any filecontext.
607 607 """
608 608 if isinstance(f, str):
609 609 hsh = f
610 610 else:
611 611 hsh = f.node()
612 612 llrev = self.revmap.hsh2rev(hsh)
613 613 if not llrev:
614 614 raise faerror.CorruptedFileError('%s is not in revmap'
615 615 % node.hex(hsh))
616 616 if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0:
617 617 raise faerror.CorruptedFileError('%s is not in revmap mainbranch'
618 618 % node.hex(hsh))
619 619 self.linelog.annotate(llrev)
620 620 result = [(self.revmap.rev2hsh(r), l)
621 621 for r, l in self.linelog.annotateresult]
622 622 return self._refineannotateresult(result, f, showpath, showlines)
623 623
624 624 def _refineannotateresult(self, result, f, showpath, showlines):
625 625 """add the missing path or line contents, they can be expensive.
626 626 f could be either node or fctx.
627 627 """
628 628 if showpath:
629 629 result = self._addpathtoresult(result)
630 630 if showlines:
631 631 if isinstance(f, str): # f: node or fctx
632 632 llrev = self.revmap.hsh2rev(f)
633 633 fctx = self._resolvefctx(f, self.revmap.rev2path(llrev))
634 634 else:
635 635 fctx = f
636 636 lines = mdiff.splitnewlines(fctx.data())
637 637 if len(lines) != len(result): # linelog is probably corrupted
638 638 raise faerror.CorruptedFileError()
639 639 result = (result, lines)
640 640 return result
641 641
642 642 def _appendrev(self, fctx, blocks, bannotated=None):
643 643 self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated)
644 644
645 645 def _diffblocks(self, a, b):
646 646 return mdiff.allblocks(a, b, self.opts.diffopts)
647 647
648 648 @staticmethod
649 649 def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None):
650 650 """append a revision to linelog and revmap"""
651 651
652 652 def getllrev(f):
653 653 """(fctx) -> int"""
654 654 # f should not be a linelog revision
655 655 if isinstance(f, int):
656 656 raise error.ProgrammingError('f should not be an int')
657 657 # f is a fctx, allocate linelog rev on demand
658 658 hsh = f.node()
659 659 rev = revmap.hsh2rev(hsh)
660 660 if rev is None:
661 661 rev = revmap.append(hsh, sidebranch=True, path=f.path())
662 662 return rev
663 663
664 664 # append sidebranch revisions to revmap
665 665 siderevs = []
666 666 siderevmap = {} # node: int
667 667 if bannotated is not None:
668 668 for (a1, a2, b1, b2), op in blocks:
669 669 if op != '=':
670 670 # f could be either linelong rev, or fctx.
671 671 siderevs += [f for f, l in bannotated[b1:b2]
672 672 if not isinstance(f, int)]
673 673 siderevs = set(siderevs)
674 674 if fctx in siderevs: # mainnode must be appended seperately
675 675 siderevs.remove(fctx)
676 676 for f in siderevs:
677 677 siderevmap[f] = getllrev(f)
678 678
679 679 # the changeset in the main branch, could be a merge
680 680 llrev = revmap.append(fctx.node(), path=fctx.path())
681 681 siderevmap[fctx] = llrev
682 682
683 683 for (a1, a2, b1, b2), op in reversed(blocks):
684 684 if op == '=':
685 685 continue
686 686 if bannotated is None:
687 687 linelog.replacelines(llrev, a1, a2, b1, b2)
688 688 else:
689 689 blines = [((r if isinstance(r, int) else siderevmap[r]), l)
690 690 for r, l in bannotated[b1:b2]]
691 691 linelog.replacelines_vec(llrev, a1, a2, blines)
692 692
693 693 def _addpathtoresult(self, annotateresult, revmap=None):
694 694 """(revmap, [(node, linenum)]) -> [(node, linenum, path)]"""
695 695 if revmap is None:
696 696 revmap = self.revmap
697 697
698 698 def _getpath(nodeid):
699 699 path = self._node2path.get(nodeid)
700 700 if path is None:
701 701 path = revmap.rev2path(revmap.hsh2rev(nodeid))
702 702 self._node2path[nodeid] = path
703 703 return path
704 704
705 705 return [(n, l, _getpath(n)) for n, l in annotateresult]
706 706
707 707 def _checklastmasterhead(self, fctx):
708 708 """check if fctx is the master's head last time, raise if not"""
709 709 if fctx is None:
710 710 llrev = 0
711 711 else:
712 712 llrev = self.revmap.hsh2rev(fctx.node())
713 713 if not llrev:
714 714 raise faerror.CannotReuseError()
715 715 if self.linelog.maxrev != llrev:
716 716 raise faerror.CannotReuseError()
717 717
718 718 @util.propertycache
719 719 def _parentfunc(self):
720 720 """-> (fctx) -> [fctx]"""
721 721 followrename = self.opts.followrename
722 722 followmerge = self.opts.followmerge
723 723 def parents(f):
724 724 pl = _parents(f, follow=followrename)
725 725 if not followmerge:
726 726 pl = pl[:1]
727 727 return pl
728 728 return parents
729 729
730 730 @util.propertycache
731 731 def _perfhack(self):
732 732 return self.ui.configbool('fastannotate', 'perfhack')
733 733
734 734 def _resolvefctx(self, rev, path=None, **kwds):
735 735 return resolvefctx(self.repo, rev, (path or self.path), **kwds)
736 736
737 737 def _unlinkpaths(paths):
738 738 """silent, best-effort unlink"""
739 739 for path in paths:
740 740 try:
741 741 util.unlink(path)
742 742 except OSError:
743 743 pass
744 744
745 745 class pathhelper(object):
746 746 """helper for getting paths for lockfile, linelog and revmap"""
747 747
748 748 def __init__(self, repo, path, opts=defaultopts):
749 749 # different options use different directories
750 750 self._vfspath = os.path.join('fastannotate',
751 751 opts.shortstr, encodedir(path))
752 752 self._repo = repo
753 753
754 754 @property
755 755 def dirname(self):
756 756 return os.path.dirname(self._repo.vfs.join(self._vfspath))
757 757
758 758 @property
759 759 def linelogpath(self):
760 760 return self._repo.vfs.join(self._vfspath + '.l')
761 761
762 762 def lock(self):
763 763 return lockmod.lock(self._repo.vfs, self._vfspath + '.lock')
764 764
765 765 @contextlib.contextmanager
766 766 def _lockflock(self):
767 767 """the same as 'lock' but use flock instead of lockmod.lock, to avoid
768 768 creating temporary symlinks."""
769 769 import fcntl
770 770 lockpath = self.linelogpath
771 771 util.makedirs(os.path.dirname(lockpath))
772 772 lockfd = os.open(lockpath, os.O_RDONLY | os.O_CREAT, 0o664)
773 773 fcntl.flock(lockfd, fcntl.LOCK_EX)
774 774 try:
775 775 yield
776 776 finally:
777 777 fcntl.flock(lockfd, fcntl.LOCK_UN)
778 778 os.close(lockfd)
779 779
780 780 @property
781 781 def revmappath(self):
782 782 return self._repo.vfs.join(self._vfspath + '.m')
783 783
784 784 @contextlib.contextmanager
785 785 def annotatecontext(repo, path, opts=defaultopts, rebuild=False):
786 786 """context needed to perform (fast) annotate on a file
787 787
788 788 an annotatecontext of a single file consists of two structures: the
789 789 linelog and the revmap. this function takes care of locking. only 1
790 790 process is allowed to write that file's linelog and revmap at a time.
791 791
792 792 when something goes wrong, this function will assume the linelog and the
793 793 revmap are in a bad state, and remove them from disk.
794 794
795 795 use this function in the following way:
796 796
797 797 with annotatecontext(...) as actx:
798 798 actx. ....
799 799 """
800 800 helper = pathhelper(repo, path, opts)
801 801 util.makedirs(helper.dirname)
802 802 revmappath = helper.revmappath
803 803 linelogpath = helper.linelogpath
804 804 actx = None
805 805 try:
806 806 with helper.lock():
807 807 actx = _annotatecontext(repo, path, linelogpath, revmappath, opts)
808 808 if rebuild:
809 809 actx.rebuild()
810 810 yield actx
811 811 except Exception:
812 812 if actx is not None:
813 813 actx.rebuild()
814 814 repo.ui.debug('fastannotate: %s: cache broken and deleted\n' % path)
815 815 raise
816 816 finally:
817 817 if actx is not None:
818 818 actx.close()
819 819
820 820 def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False):
821 821 """like annotatecontext but get the context from a fctx. convenient when
822 822 used in fctx.annotate
823 823 """
824 824 repo = fctx._repo
825 825 path = fctx._path
826 826 if repo.ui.configbool('fastannotate', 'forcefollow', True):
827 827 follow = True
828 828 aopts = annotateopts(diffopts=diffopts, followrename=follow)
829 829 return annotatecontext(repo, path, aopts, rebuild)
@@ -1,609 +1,609 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''High-level command function for lfconvert, plus the cmdtable.'''
10 10 from __future__ import absolute_import
11 11
12 12 import errno
13 13 import hashlib
14 14 import os
15 15 import shutil
16 16
17 17 from mercurial.i18n import _
18 18
19 19 from mercurial import (
20 20 cmdutil,
21 21 context,
22 22 error,
23 23 hg,
24 24 lock,
25 25 match as matchmod,
26 26 node,
27 27 pycompat,
28 28 registrar,
29 29 scmutil,
30 30 util,
31 31 )
32 32
33 33 from ..convert import (
34 34 convcmd,
35 35 filemap,
36 36 )
37 37
38 38 from . import (
39 39 lfutil,
40 40 storefactory
41 41 )
42 42
43 43 release = lock.release
44 44
45 45 # -- Commands ----------------------------------------------------------
46 46
47 47 cmdtable = {}
48 48 command = registrar.command(cmdtable)
49 49
50 50 @command('lfconvert',
51 51 [('s', 'size', '',
52 52 _('minimum size (MB) for files to be converted as largefiles'), 'SIZE'),
53 53 ('', 'to-normal', False,
54 54 _('convert from a largefiles repo to a normal repo')),
55 55 ],
56 56 _('hg lfconvert SOURCE DEST [FILE ...]'),
57 57 norepo=True,
58 58 inferrepo=True)
59 59 def lfconvert(ui, src, dest, *pats, **opts):
60 60 '''convert a normal repository to a largefiles repository
61 61
62 62 Convert repository SOURCE to a new repository DEST, identical to
63 63 SOURCE except that certain files will be converted as largefiles:
64 64 specifically, any file that matches any PATTERN *or* whose size is
65 65 above the minimum size threshold is converted as a largefile. The
66 66 size used to determine whether or not to track a file as a
67 67 largefile is the size of the first version of the file. The
68 68 minimum size can be specified either with --size or in
69 69 configuration as ``largefiles.size``.
70 70
71 71 After running this command you will need to make sure that
72 72 largefiles is enabled anywhere you intend to push the new
73 73 repository.
74 74
75 75 Use --to-normal to convert largefiles back to normal files; after
76 76 this, the DEST repository can be used without largefiles at all.'''
77 77
78 78 opts = pycompat.byteskwargs(opts)
79 79 if opts['to_normal']:
80 80 tolfile = False
81 81 else:
82 82 tolfile = True
83 83 size = lfutil.getminsize(ui, True, opts.get('size'), default=None)
84 84
85 85 if not hg.islocal(src):
86 86 raise error.Abort(_('%s is not a local Mercurial repo') % src)
87 87 if not hg.islocal(dest):
88 88 raise error.Abort(_('%s is not a local Mercurial repo') % dest)
89 89
90 90 rsrc = hg.repository(ui, src)
91 91 ui.status(_('initializing destination %s\n') % dest)
92 92 rdst = hg.repository(ui, dest, create=True)
93 93
94 94 success = False
95 95 dstwlock = dstlock = None
96 96 try:
97 97 # Get a list of all changesets in the source. The easy way to do this
98 98 # is to simply walk the changelog, using changelog.nodesbetween().
99 99 # Take a look at mercurial/revlog.py:639 for more details.
100 100 # Use a generator instead of a list to decrease memory usage
101 101 ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None,
102 102 rsrc.heads())[0])
103 103 revmap = {node.nullid: node.nullid}
104 104 if tolfile:
105 105 # Lock destination to prevent modification while it is converted to.
106 106 # Don't need to lock src because we are just reading from its
107 107 # history which can't change.
108 108 dstwlock = rdst.wlock()
109 109 dstlock = rdst.lock()
110 110
111 111 lfiles = set()
112 112 normalfiles = set()
113 113 if not pats:
114 114 pats = ui.configlist(lfutil.longname, 'patterns')
115 115 if pats:
116 116 matcher = matchmod.match(rsrc.root, '', list(pats))
117 117 else:
118 118 matcher = None
119 119
120 120 lfiletohash = {}
121 121 with ui.makeprogress(_('converting revisions'),
122 122 unit=_('revisions'),
123 123 total=rsrc['tip'].rev()) as progress:
124 124 for ctx in ctxs:
125 125 progress.update(ctx.rev())
126 126 _lfconvert_addchangeset(rsrc, rdst, ctx, revmap,
127 127 lfiles, normalfiles, matcher, size, lfiletohash)
128 128
129 129 if rdst.wvfs.exists(lfutil.shortname):
130 130 rdst.wvfs.rmtree(lfutil.shortname)
131 131
132 132 for f in lfiletohash.keys():
133 133 if rdst.wvfs.isfile(f):
134 134 rdst.wvfs.unlink(f)
135 135 try:
136 136 rdst.wvfs.removedirs(rdst.wvfs.dirname(f))
137 137 except OSError:
138 138 pass
139 139
140 140 # If there were any files converted to largefiles, add largefiles
141 141 # to the destination repository's requirements.
142 142 if lfiles:
143 143 rdst.requirements.add('largefiles')
144 144 rdst._writerequirements()
145 145 else:
146 146 class lfsource(filemap.filemap_source):
147 147 def __init__(self, ui, source):
148 148 super(lfsource, self).__init__(ui, source, None)
149 149 self.filemapper.rename[lfutil.shortname] = '.'
150 150
151 151 def getfile(self, name, rev):
152 152 realname, realrev = rev
153 153 f = super(lfsource, self).getfile(name, rev)
154 154
155 155 if (not realname.startswith(lfutil.shortnameslash)
156 156 or f[0] is None):
157 157 return f
158 158
159 159 # Substitute in the largefile data for the hash
160 160 hash = f[0].strip()
161 161 path = lfutil.findfile(rsrc, hash)
162 162
163 163 if path is None:
164 164 raise error.Abort(_("missing largefile for '%s' in %s")
165 165 % (realname, realrev))
166 166 return util.readfile(path), f[1]
167 167
168 168 class converter(convcmd.converter):
169 169 def __init__(self, ui, source, dest, revmapfile, opts):
170 170 src = lfsource(ui, source)
171 171
172 172 super(converter, self).__init__(ui, src, dest, revmapfile,
173 173 opts)
174 174
175 175 found, missing = downloadlfiles(ui, rsrc)
176 176 if missing != 0:
177 177 raise error.Abort(_("all largefiles must be present locally"))
178 178
179 179 orig = convcmd.converter
180 180 convcmd.converter = converter
181 181
182 182 try:
183 183 convcmd.convert(ui, src, dest, source_type='hg', dest_type='hg')
184 184 finally:
185 185 convcmd.converter = orig
186 186 success = True
187 187 finally:
188 188 if tolfile:
189 189 rdst.dirstate.clear()
190 190 release(dstlock, dstwlock)
191 191 if not success:
192 192 # we failed, remove the new directory
193 193 shutil.rmtree(rdst.root)
194 194
195 195 def _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles,
196 196 matcher, size, lfiletohash):
197 197 # Convert src parents to dst parents
198 198 parents = _convertparents(ctx, revmap)
199 199
200 200 # Generate list of changed files
201 201 files = _getchangedfiles(ctx, parents)
202 202
203 203 dstfiles = []
204 204 for f in files:
205 205 if f not in lfiles and f not in normalfiles:
206 206 islfile = _islfile(f, ctx, matcher, size)
207 207 # If this file was renamed or copied then copy
208 208 # the largefile-ness of its predecessor
209 209 if f in ctx.manifest():
210 210 fctx = ctx.filectx(f)
211 211 renamed = fctx.renamed()
212 212 if renamed is None:
213 213 # the code below assumes renamed to be a boolean or a list
214 214 # and won't quite work with the value None
215 215 renamed = False
216 216 renamedlfile = renamed and renamed[0] in lfiles
217 217 islfile |= renamedlfile
218 218 if 'l' in fctx.flags():
219 219 if renamedlfile:
220 220 raise error.Abort(
221 221 _('renamed/copied largefile %s becomes symlink')
222 222 % f)
223 223 islfile = False
224 224 if islfile:
225 225 lfiles.add(f)
226 226 else:
227 227 normalfiles.add(f)
228 228
229 229 if f in lfiles:
230 230 fstandin = lfutil.standin(f)
231 231 dstfiles.append(fstandin)
232 232 # largefile in manifest if it has not been removed/renamed
233 233 if f in ctx.manifest():
234 234 fctx = ctx.filectx(f)
235 235 if 'l' in fctx.flags():
236 236 renamed = fctx.renamed()
237 237 if renamed and renamed[0] in lfiles:
238 238 raise error.Abort(_('largefile %s becomes symlink') % f)
239 239
240 240 # largefile was modified, update standins
241 241 m = hashlib.sha1('')
242 242 m.update(ctx[f].data())
243 hash = m.hexdigest()
243 hash = node.hex(m.digest())
244 244 if f not in lfiletohash or lfiletohash[f] != hash:
245 245 rdst.wwrite(f, ctx[f].data(), ctx[f].flags())
246 246 executable = 'x' in ctx[f].flags()
247 247 lfutil.writestandin(rdst, fstandin, hash,
248 248 executable)
249 249 lfiletohash[f] = hash
250 250 else:
251 251 # normal file
252 252 dstfiles.append(f)
253 253
254 254 def getfilectx(repo, memctx, f):
255 255 srcfname = lfutil.splitstandin(f)
256 256 if srcfname is not None:
257 257 # if the file isn't in the manifest then it was removed
258 258 # or renamed, return None to indicate this
259 259 try:
260 260 fctx = ctx.filectx(srcfname)
261 261 except error.LookupError:
262 262 return None
263 263 renamed = fctx.renamed()
264 264 if renamed:
265 265 # standin is always a largefile because largefile-ness
266 266 # doesn't change after rename or copy
267 267 renamed = lfutil.standin(renamed[0])
268 268
269 269 return context.memfilectx(repo, memctx, f,
270 270 lfiletohash[srcfname] + '\n',
271 271 'l' in fctx.flags(), 'x' in fctx.flags(),
272 272 renamed)
273 273 else:
274 274 return _getnormalcontext(repo, ctx, f, revmap)
275 275
276 276 # Commit
277 277 _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
278 278
279 279 def _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap):
280 280 mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
281 281 getfilectx, ctx.user(), ctx.date(), ctx.extra())
282 282 ret = rdst.commitctx(mctx)
283 283 lfutil.copyalltostore(rdst, ret)
284 284 rdst.setparents(ret)
285 285 revmap[ctx.node()] = rdst.changelog.tip()
286 286
287 287 # Generate list of changed files
288 288 def _getchangedfiles(ctx, parents):
289 289 files = set(ctx.files())
290 290 if node.nullid not in parents:
291 291 mc = ctx.manifest()
292 292 mp1 = ctx.parents()[0].manifest()
293 293 mp2 = ctx.parents()[1].manifest()
294 294 files |= (set(mp1) | set(mp2)) - set(mc)
295 295 for f in mc:
296 296 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
297 297 files.add(f)
298 298 return files
299 299
300 300 # Convert src parents to dst parents
301 301 def _convertparents(ctx, revmap):
302 302 parents = []
303 303 for p in ctx.parents():
304 304 parents.append(revmap[p.node()])
305 305 while len(parents) < 2:
306 306 parents.append(node.nullid)
307 307 return parents
308 308
309 309 # Get memfilectx for a normal file
310 310 def _getnormalcontext(repo, ctx, f, revmap):
311 311 try:
312 312 fctx = ctx.filectx(f)
313 313 except error.LookupError:
314 314 return None
315 315 renamed = fctx.renamed()
316 316 if renamed:
317 317 renamed = renamed[0]
318 318
319 319 data = fctx.data()
320 320 if f == '.hgtags':
321 321 data = _converttags (repo.ui, revmap, data)
322 322 return context.memfilectx(repo, ctx, f, data, 'l' in fctx.flags(),
323 323 'x' in fctx.flags(), renamed)
324 324
325 325 # Remap tag data using a revision map
326 326 def _converttags(ui, revmap, data):
327 327 newdata = []
328 328 for line in data.splitlines():
329 329 try:
330 330 id, name = line.split(' ', 1)
331 331 except ValueError:
332 332 ui.warn(_('skipping incorrectly formatted tag %s\n')
333 333 % line)
334 334 continue
335 335 try:
336 336 newid = node.bin(id)
337 337 except TypeError:
338 338 ui.warn(_('skipping incorrectly formatted id %s\n')
339 339 % id)
340 340 continue
341 341 try:
342 342 newdata.append('%s %s\n' % (node.hex(revmap[newid]),
343 343 name))
344 344 except KeyError:
345 345 ui.warn(_('no mapping for id %s\n') % id)
346 346 continue
347 347 return ''.join(newdata)
348 348
349 349 def _islfile(file, ctx, matcher, size):
350 350 '''Return true if file should be considered a largefile, i.e.
351 351 matcher matches it or it is larger than size.'''
352 352 # never store special .hg* files as largefiles
353 353 if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs':
354 354 return False
355 355 if matcher and matcher(file):
356 356 return True
357 357 try:
358 358 return ctx.filectx(file).size() >= size * 1024 * 1024
359 359 except error.LookupError:
360 360 return False
361 361
362 362 def uploadlfiles(ui, rsrc, rdst, files):
363 363 '''upload largefiles to the central store'''
364 364
365 365 if not files:
366 366 return
367 367
368 368 store = storefactory.openstore(rsrc, rdst, put=True)
369 369
370 370 at = 0
371 371 ui.debug("sending statlfile command for %d largefiles\n" % len(files))
372 372 retval = store.exists(files)
373 373 files = [h for h in files if not retval[h]]
374 374 ui.debug("%d largefiles need to be uploaded\n" % len(files))
375 375
376 376 with ui.makeprogress(_('uploading largefiles'), unit=_('files'),
377 377 total=len(files)) as progress:
378 378 for hash in files:
379 379 progress.update(at)
380 380 source = lfutil.findfile(rsrc, hash)
381 381 if not source:
382 382 raise error.Abort(_('largefile %s missing from store'
383 383 ' (needs to be uploaded)') % hash)
384 384 # XXX check for errors here
385 385 store.put(source, hash)
386 386 at += 1
387 387
388 388 def verifylfiles(ui, repo, all=False, contents=False):
389 389 '''Verify that every largefile revision in the current changeset
390 390 exists in the central store. With --contents, also verify that
391 391 the contents of each local largefile file revision are correct (SHA-1 hash
392 392 matches the revision ID). With --all, check every changeset in
393 393 this repository.'''
394 394 if all:
395 395 revs = repo.revs('all()')
396 396 else:
397 397 revs = ['.']
398 398
399 399 store = storefactory.openstore(repo)
400 400 return store.verify(revs, contents=contents)
401 401
402 402 def cachelfiles(ui, repo, node, filelist=None):
403 403 '''cachelfiles ensures that all largefiles needed by the specified revision
404 404 are present in the repository's largefile cache.
405 405
406 406 returns a tuple (cached, missing). cached is the list of files downloaded
407 407 by this operation; missing is the list of files that were needed but could
408 408 not be found.'''
409 409 lfiles = lfutil.listlfiles(repo, node)
410 410 if filelist:
411 411 lfiles = set(lfiles) & set(filelist)
412 412 toget = []
413 413
414 414 ctx = repo[node]
415 415 for lfile in lfiles:
416 416 try:
417 417 expectedhash = lfutil.readasstandin(ctx[lfutil.standin(lfile)])
418 418 except IOError as err:
419 419 if err.errno == errno.ENOENT:
420 420 continue # node must be None and standin wasn't found in wctx
421 421 raise
422 422 if not lfutil.findfile(repo, expectedhash):
423 423 toget.append((lfile, expectedhash))
424 424
425 425 if toget:
426 426 store = storefactory.openstore(repo)
427 427 ret = store.get(toget)
428 428 return ret
429 429
430 430 return ([], [])
431 431
432 432 def downloadlfiles(ui, repo, rev=None):
433 433 match = scmutil.match(repo[None], [repo.wjoin(lfutil.shortname)], {})
434 434 def prepare(ctx, fns):
435 435 pass
436 436 totalsuccess = 0
437 437 totalmissing = 0
438 438 if rev != []: # walkchangerevs on empty list would return all revs
439 439 for ctx in cmdutil.walkchangerevs(repo, match, {'rev' : rev},
440 440 prepare):
441 441 success, missing = cachelfiles(ui, repo, ctx.node())
442 442 totalsuccess += len(success)
443 443 totalmissing += len(missing)
444 444 ui.status(_("%d additional largefiles cached\n") % totalsuccess)
445 445 if totalmissing > 0:
446 446 ui.status(_("%d largefiles failed to download\n") % totalmissing)
447 447 return totalsuccess, totalmissing
448 448
449 449 def updatelfiles(ui, repo, filelist=None, printmessage=None,
450 450 normallookup=False):
451 451 '''Update largefiles according to standins in the working directory
452 452
453 453 If ``printmessage`` is other than ``None``, it means "print (or
454 454 ignore, for false) message forcibly".
455 455 '''
456 456 statuswriter = lfutil.getstatuswriter(ui, repo, printmessage)
457 457 with repo.wlock():
458 458 lfdirstate = lfutil.openlfdirstate(ui, repo)
459 459 lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
460 460
461 461 if filelist is not None:
462 462 filelist = set(filelist)
463 463 lfiles = [f for f in lfiles if f in filelist]
464 464
465 465 update = {}
466 466 dropped = set()
467 467 updated, removed = 0, 0
468 468 wvfs = repo.wvfs
469 469 wctx = repo[None]
470 470 for lfile in lfiles:
471 471 rellfile = lfile
472 472 rellfileorig = os.path.relpath(
473 473 scmutil.origpath(ui, repo, wvfs.join(rellfile)),
474 474 start=repo.root)
475 475 relstandin = lfutil.standin(lfile)
476 476 relstandinorig = os.path.relpath(
477 477 scmutil.origpath(ui, repo, wvfs.join(relstandin)),
478 478 start=repo.root)
479 479 if wvfs.exists(relstandin):
480 480 if (wvfs.exists(relstandinorig) and
481 481 wvfs.exists(rellfile)):
482 482 shutil.copyfile(wvfs.join(rellfile),
483 483 wvfs.join(rellfileorig))
484 484 wvfs.unlinkpath(relstandinorig)
485 485 expecthash = lfutil.readasstandin(wctx[relstandin])
486 486 if expecthash != '':
487 487 if lfile not in wctx: # not switched to normal file
488 488 if repo.dirstate[relstandin] != '?':
489 489 wvfs.unlinkpath(rellfile, ignoremissing=True)
490 490 else:
491 491 dropped.add(rellfile)
492 492
493 493 # use normallookup() to allocate an entry in largefiles
494 494 # dirstate to prevent lfilesrepo.status() from reporting
495 495 # missing files as removed.
496 496 lfdirstate.normallookup(lfile)
497 497 update[lfile] = expecthash
498 498 else:
499 499 # Remove lfiles for which the standin is deleted, unless the
500 500 # lfile is added to the repository again. This happens when a
501 501 # largefile is converted back to a normal file: the standin
502 502 # disappears, but a new (normal) file appears as the lfile.
503 503 if (wvfs.exists(rellfile) and
504 504 repo.dirstate.normalize(lfile) not in wctx):
505 505 wvfs.unlinkpath(rellfile)
506 506 removed += 1
507 507
508 508 # largefile processing might be slow and be interrupted - be prepared
509 509 lfdirstate.write()
510 510
511 511 if lfiles:
512 512 lfiles = [f for f in lfiles if f not in dropped]
513 513
514 514 for f in dropped:
515 515 repo.wvfs.unlinkpath(lfutil.standin(f))
516 516
517 517 # This needs to happen for dropped files, otherwise they stay in
518 518 # the M state.
519 519 lfutil.synclfdirstate(repo, lfdirstate, f, normallookup)
520 520
521 521 statuswriter(_('getting changed largefiles\n'))
522 522 cachelfiles(ui, repo, None, lfiles)
523 523
524 524 for lfile in lfiles:
525 525 update1 = 0
526 526
527 527 expecthash = update.get(lfile)
528 528 if expecthash:
529 529 if not lfutil.copyfromcache(repo, expecthash, lfile):
530 530 # failed ... but already removed and set to normallookup
531 531 continue
532 532 # Synchronize largefile dirstate to the last modified
533 533 # time of the file
534 534 lfdirstate.normal(lfile)
535 535 update1 = 1
536 536
537 537 # copy the exec mode of largefile standin from the repository's
538 538 # dirstate to its state in the lfdirstate.
539 539 rellfile = lfile
540 540 relstandin = lfutil.standin(lfile)
541 541 if wvfs.exists(relstandin):
542 542 # exec is decided by the users permissions using mask 0o100
543 543 standinexec = wvfs.stat(relstandin).st_mode & 0o100
544 544 st = wvfs.stat(rellfile)
545 545 mode = st.st_mode
546 546 if standinexec != mode & 0o100:
547 547 # first remove all X bits, then shift all R bits to X
548 548 mode &= ~0o111
549 549 if standinexec:
550 550 mode |= (mode >> 2) & 0o111 & ~util.umask
551 551 wvfs.chmod(rellfile, mode)
552 552 update1 = 1
553 553
554 554 updated += update1
555 555
556 556 lfutil.synclfdirstate(repo, lfdirstate, lfile, normallookup)
557 557
558 558 lfdirstate.write()
559 559 if lfiles:
560 560 statuswriter(_('%d largefiles updated, %d removed\n') % (updated,
561 561 removed))
562 562
563 563 @command('lfpull',
564 564 [('r', 'rev', [], _('pull largefiles for these revisions'))
565 565 ] + cmdutil.remoteopts,
566 566 _('-r REV... [-e CMD] [--remotecmd CMD] [SOURCE]'))
567 567 def lfpull(ui, repo, source="default", **opts):
568 568 """pull largefiles for the specified revisions from the specified source
569 569
570 570 Pull largefiles that are referenced from local changesets but missing
571 571 locally, pulling from a remote repository to the local cache.
572 572
573 573 If SOURCE is omitted, the 'default' path will be used.
574 574 See :hg:`help urls` for more information.
575 575
576 576 .. container:: verbose
577 577
578 578 Some examples:
579 579
580 580 - pull largefiles for all branch heads::
581 581
582 582 hg lfpull -r "head() and not closed()"
583 583
584 584 - pull largefiles on the default branch::
585 585
586 586 hg lfpull -r "branch(default)"
587 587 """
588 588 repo.lfpullsource = source
589 589
590 590 revs = opts.get(r'rev', [])
591 591 if not revs:
592 592 raise error.Abort(_('no revisions specified'))
593 593 revs = scmutil.revrange(repo, revs)
594 594
595 595 numcached = 0
596 596 for rev in revs:
597 597 ui.note(_('pulling largefiles for revision %d\n') % rev)
598 598 (cached, missing) = cachelfiles(ui, repo, rev)
599 599 numcached += len(cached)
600 600 ui.status(_("%d largefiles cached\n") % numcached)
601 601
602 602 @command('debuglfput',
603 603 [] + cmdutil.remoteopts,
604 604 _('FILE'))
605 605 def debuglfput(ui, repo, filepath, **kwargs):
606 606 hash = lfutil.hashfile(filepath)
607 607 storefactory.openstore(repo).put(filepath, hash)
608 608 ui.write('%s\n' % hash)
609 609 return 0
@@ -1,643 +1,644 b''
1 1 # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
2 2 #
3 3 # Copyright 2017 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import contextlib
11 11 import errno
12 12 import hashlib
13 13 import json
14 14 import os
15 15 import re
16 16 import socket
17 17
18 18 from mercurial.i18n import _
19 19
20 20 from mercurial import (
21 21 encoding,
22 22 error,
23 node,
23 24 pathutil,
24 25 pycompat,
25 26 url as urlmod,
26 27 util,
27 28 vfs as vfsmod,
28 29 worker,
29 30 )
30 31
31 32 from mercurial.utils import (
32 33 stringutil,
33 34 )
34 35
35 36 from ..largefiles import lfutil
36 37
37 38 # 64 bytes for SHA256
38 39 _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
39 40
40 41 class lfsvfs(vfsmod.vfs):
41 42 def join(self, path):
42 43 """split the path at first two characters, like: XX/XXXXX..."""
43 44 if not _lfsre.match(path):
44 45 raise error.ProgrammingError('unexpected lfs path: %s' % path)
45 46 return super(lfsvfs, self).join(path[0:2], path[2:])
46 47
47 48 def walk(self, path=None, onerror=None):
48 49 """Yield (dirpath, [], oids) tuple for blobs under path
49 50
50 51 Oids only exist in the root of this vfs, so dirpath is always ''.
51 52 """
52 53 root = os.path.normpath(self.base)
53 54 # when dirpath == root, dirpath[prefixlen:] becomes empty
54 55 # because len(dirpath) < prefixlen.
55 56 prefixlen = len(pathutil.normasprefix(root))
56 57 oids = []
57 58
58 59 for dirpath, dirs, files in os.walk(self.reljoin(self.base, path or ''),
59 60 onerror=onerror):
60 61 dirpath = dirpath[prefixlen:]
61 62
62 63 # Silently skip unexpected files and directories
63 64 if len(dirpath) == 2:
64 65 oids.extend([dirpath + f for f in files
65 66 if _lfsre.match(dirpath + f)])
66 67
67 68 yield ('', [], oids)
68 69
69 70 class nullvfs(lfsvfs):
70 71 def __init__(self):
71 72 pass
72 73
73 74 def exists(self, oid):
74 75 return False
75 76
76 77 def read(self, oid):
77 78 # store.read() calls into here if the blob doesn't exist in its
78 79 # self.vfs. Raise the same error as a normal vfs when asked to read a
79 80 # file that doesn't exist. The only difference is the full file path
80 81 # isn't available in the error.
81 82 raise IOError(errno.ENOENT, '%s: No such file or directory' % oid)
82 83
83 84 def walk(self, path=None, onerror=None):
84 85 return ('', [], [])
85 86
86 87 def write(self, oid, data):
87 88 pass
88 89
89 90 class filewithprogress(object):
90 91 """a file-like object that supports __len__ and read.
91 92
92 93 Useful to provide progress information for how many bytes are read.
93 94 """
94 95
95 96 def __init__(self, fp, callback):
96 97 self._fp = fp
97 98 self._callback = callback # func(readsize)
98 99 fp.seek(0, os.SEEK_END)
99 100 self._len = fp.tell()
100 101 fp.seek(0)
101 102
102 103 def __len__(self):
103 104 return self._len
104 105
105 106 def read(self, size):
106 107 if self._fp is None:
107 108 return b''
108 109 data = self._fp.read(size)
109 110 if data:
110 111 if self._callback:
111 112 self._callback(len(data))
112 113 else:
113 114 self._fp.close()
114 115 self._fp = None
115 116 return data
116 117
117 118 class local(object):
118 119 """Local blobstore for large file contents.
119 120
120 121 This blobstore is used both as a cache and as a staging area for large blobs
121 122 to be uploaded to the remote blobstore.
122 123 """
123 124
124 125 def __init__(self, repo):
125 126 fullpath = repo.svfs.join('lfs/objects')
126 127 self.vfs = lfsvfs(fullpath)
127 128
128 129 if repo.ui.configbool('experimental', 'lfs.disableusercache'):
129 130 self.cachevfs = nullvfs()
130 131 else:
131 132 usercache = lfutil._usercachedir(repo.ui, 'lfs')
132 133 self.cachevfs = lfsvfs(usercache)
133 134 self.ui = repo.ui
134 135
135 136 def open(self, oid):
136 137 """Open a read-only file descriptor to the named blob, in either the
137 138 usercache or the local store."""
138 139 # The usercache is the most likely place to hold the file. Commit will
139 140 # write to both it and the local store, as will anything that downloads
140 141 # the blobs. However, things like clone without an update won't
141 142 # populate the local store. For an init + push of a local clone,
142 143 # the usercache is the only place it _could_ be. If not present, the
143 144 # missing file msg here will indicate the local repo, not the usercache.
144 145 if self.cachevfs.exists(oid):
145 146 return self.cachevfs(oid, 'rb')
146 147
147 148 return self.vfs(oid, 'rb')
148 149
149 150 def download(self, oid, src):
150 151 """Read the blob from the remote source in chunks, verify the content,
151 152 and write to this local blobstore."""
152 153 sha256 = hashlib.sha256()
153 154
154 155 with self.vfs(oid, 'wb', atomictemp=True) as fp:
155 156 for chunk in util.filechunkiter(src, size=1048576):
156 157 fp.write(chunk)
157 158 sha256.update(chunk)
158 159
159 realoid = sha256.hexdigest()
160 realoid = node.hex(sha256.digest())
160 161 if realoid != oid:
161 162 raise LfsCorruptionError(_('corrupt remote lfs object: %s')
162 163 % oid)
163 164
164 165 self._linktousercache(oid)
165 166
166 167 def write(self, oid, data):
167 168 """Write blob to local blobstore.
168 169
169 170 This should only be called from the filelog during a commit or similar.
170 171 As such, there is no need to verify the data. Imports from a remote
171 172 store must use ``download()`` instead."""
172 173 with self.vfs(oid, 'wb', atomictemp=True) as fp:
173 174 fp.write(data)
174 175
175 176 self._linktousercache(oid)
176 177
177 178 def linkfromusercache(self, oid):
178 179 """Link blobs found in the user cache into this store.
179 180
180 181 The server module needs to do this when it lets the client know not to
181 182 upload the blob, to ensure it is always available in this store.
182 183 Normally this is done implicitly when the client reads or writes the
183 184 blob, but that doesn't happen when the server tells the client that it
184 185 already has the blob.
185 186 """
186 187 if (not isinstance(self.cachevfs, nullvfs)
187 188 and not self.vfs.exists(oid)):
188 189 self.ui.note(_('lfs: found %s in the usercache\n') % oid)
189 190 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
190 191
191 192 def _linktousercache(self, oid):
192 193 # XXX: should we verify the content of the cache, and hardlink back to
193 194 # the local store on success, but truncate, write and link on failure?
194 195 if (not self.cachevfs.exists(oid)
195 196 and not isinstance(self.cachevfs, nullvfs)):
196 197 self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
197 198 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
198 199
199 200 def read(self, oid, verify=True):
200 201 """Read blob from local blobstore."""
201 202 if not self.vfs.exists(oid):
202 203 blob = self._read(self.cachevfs, oid, verify)
203 204
204 205 # Even if revlog will verify the content, it needs to be verified
205 206 # now before making the hardlink to avoid propagating corrupt blobs.
206 207 # Don't abort if corruption is detected, because `hg verify` will
207 208 # give more useful info about the corruption- simply don't add the
208 209 # hardlink.
209 if verify or hashlib.sha256(blob).hexdigest() == oid:
210 if verify or node.hex(hashlib.sha256(blob).digest()) == oid:
210 211 self.ui.note(_('lfs: found %s in the usercache\n') % oid)
211 212 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
212 213 else:
213 214 self.ui.note(_('lfs: found %s in the local lfs store\n') % oid)
214 215 blob = self._read(self.vfs, oid, verify)
215 216 return blob
216 217
217 218 def _read(self, vfs, oid, verify):
218 219 """Read blob (after verifying) from the given store"""
219 220 blob = vfs.read(oid)
220 221 if verify:
221 222 _verify(oid, blob)
222 223 return blob
223 224
224 225 def verify(self, oid):
225 226 """Indicate whether or not the hash of the underlying file matches its
226 227 name."""
227 228 sha256 = hashlib.sha256()
228 229
229 230 with self.open(oid) as fp:
230 231 for chunk in util.filechunkiter(fp, size=1048576):
231 232 sha256.update(chunk)
232 233
233 return oid == sha256.hexdigest()
234 return oid == node.hex(sha256.digest())
234 235
235 236 def has(self, oid):
236 237 """Returns True if the local blobstore contains the requested blob,
237 238 False otherwise."""
238 239 return self.cachevfs.exists(oid) or self.vfs.exists(oid)
239 240
240 241 def _urlerrorreason(urlerror):
241 242 '''Create a friendly message for the given URLError to be used in an
242 243 LfsRemoteError message.
243 244 '''
244 245 inst = urlerror
245 246
246 247 if isinstance(urlerror.reason, Exception):
247 248 inst = urlerror.reason
248 249
249 250 if util.safehasattr(inst, 'reason'):
250 251 try: # usually it is in the form (errno, strerror)
251 252 reason = inst.reason.args[1]
252 253 except (AttributeError, IndexError):
253 254 # it might be anything, for example a string
254 255 reason = inst.reason
255 256 if isinstance(reason, pycompat.unicode):
256 257 # SSLError of Python 2.7.9 contains a unicode
257 258 reason = encoding.unitolocal(reason)
258 259 return reason
259 260 elif getattr(inst, "strerror", None):
260 261 return encoding.strtolocal(inst.strerror)
261 262 else:
262 263 return stringutil.forcebytestr(urlerror)
263 264
264 265 class _gitlfsremote(object):
265 266
266 267 def __init__(self, repo, url):
267 268 ui = repo.ui
268 269 self.ui = ui
269 270 baseurl, authinfo = url.authinfo()
270 271 self.baseurl = baseurl.rstrip('/')
271 272 useragent = repo.ui.config('experimental', 'lfs.user-agent')
272 273 if not useragent:
273 274 useragent = 'git-lfs/2.3.4 (Mercurial %s)' % util.version()
274 275 self.urlopener = urlmod.opener(ui, authinfo, useragent)
275 276 self.retry = ui.configint('lfs', 'retry')
276 277
277 278 def writebatch(self, pointers, fromstore):
278 279 """Batch upload from local to remote blobstore."""
279 280 self._batch(_deduplicate(pointers), fromstore, 'upload')
280 281
281 282 def readbatch(self, pointers, tostore):
282 283 """Batch download from remote to local blostore."""
283 284 self._batch(_deduplicate(pointers), tostore, 'download')
284 285
285 286 def _batchrequest(self, pointers, action):
286 287 """Get metadata about objects pointed by pointers for given action
287 288
288 289 Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
289 290 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
290 291 """
291 292 objects = [{'oid': p.oid(), 'size': p.size()} for p in pointers]
292 293 requestdata = json.dumps({
293 294 'objects': objects,
294 295 'operation': action,
295 296 })
296 297 url = '%s/objects/batch' % self.baseurl
297 298 batchreq = util.urlreq.request(url, data=requestdata)
298 299 batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
299 300 batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
300 301 try:
301 302 with contextlib.closing(self.urlopener.open(batchreq)) as rsp:
302 303 rawjson = rsp.read()
303 304 except util.urlerr.httperror as ex:
304 305 hints = {
305 306 400: _('check that lfs serving is enabled on %s and "%s" is '
306 307 'supported') % (self.baseurl, action),
307 308 404: _('the "lfs.url" config may be used to override %s')
308 309 % self.baseurl,
309 310 }
310 311 hint = hints.get(ex.code, _('api=%s, action=%s') % (url, action))
311 312 raise LfsRemoteError(_('LFS HTTP error: %s') % ex, hint=hint)
312 313 except util.urlerr.urlerror as ex:
313 314 hint = (_('the "lfs.url" config may be used to override %s')
314 315 % self.baseurl)
315 316 raise LfsRemoteError(_('LFS error: %s') % _urlerrorreason(ex),
316 317 hint=hint)
317 318 try:
318 319 response = json.loads(rawjson)
319 320 except ValueError:
320 321 raise LfsRemoteError(_('LFS server returns invalid JSON: %s')
321 322 % rawjson)
322 323
323 324 if self.ui.debugflag:
324 325 self.ui.debug('Status: %d\n' % rsp.status)
325 326 # lfs-test-server and hg serve return headers in different order
326 327 self.ui.debug('%s\n'
327 328 % '\n'.join(sorted(str(rsp.info()).splitlines())))
328 329
329 330 if 'objects' in response:
330 331 response['objects'] = sorted(response['objects'],
331 332 key=lambda p: p['oid'])
332 333 self.ui.debug('%s\n'
333 334 % json.dumps(response, indent=2,
334 335 separators=('', ': '), sort_keys=True))
335 336
336 337 return response
337 338
338 339 def _checkforservererror(self, pointers, responses, action):
339 340 """Scans errors from objects
340 341
341 342 Raises LfsRemoteError if any objects have an error"""
342 343 for response in responses:
343 344 # The server should return 404 when objects cannot be found. Some
344 345 # server implementation (ex. lfs-test-server) does not set "error"
345 346 # but just removes "download" from "actions". Treat that case
346 347 # as the same as 404 error.
347 348 if 'error' not in response:
348 349 if (action == 'download'
349 350 and action not in response.get('actions', [])):
350 351 code = 404
351 352 else:
352 353 continue
353 354 else:
354 355 # An error dict without a code doesn't make much sense, so
355 356 # treat as a server error.
356 357 code = response.get('error').get('code', 500)
357 358
358 359 ptrmap = {p.oid(): p for p in pointers}
359 360 p = ptrmap.get(response['oid'], None)
360 361 if p:
361 362 filename = getattr(p, 'filename', 'unknown')
362 363 errors = {
363 364 404: 'The object does not exist',
364 365 410: 'The object was removed by the owner',
365 366 422: 'Validation error',
366 367 500: 'Internal server error',
367 368 }
368 369 msg = errors.get(code, 'status code %d' % code)
369 370 raise LfsRemoteError(_('LFS server error for "%s": %s')
370 371 % (filename, msg))
371 372 else:
372 373 raise LfsRemoteError(
373 374 _('LFS server error. Unsolicited response for oid %s')
374 375 % response['oid'])
375 376
376 377 def _extractobjects(self, response, pointers, action):
377 378 """extract objects from response of the batch API
378 379
379 380 response: parsed JSON object returned by batch API
380 381 return response['objects'] filtered by action
381 382 raise if any object has an error
382 383 """
383 384 # Scan errors from objects - fail early
384 385 objects = response.get('objects', [])
385 386 self._checkforservererror(pointers, objects, action)
386 387
387 388 # Filter objects with given action. Practically, this skips uploading
388 389 # objects which exist in the server.
389 390 filteredobjects = [o for o in objects if action in o.get('actions', [])]
390 391
391 392 return filteredobjects
392 393
393 394 def _basictransfer(self, obj, action, localstore):
394 395 """Download or upload a single object using basic transfer protocol
395 396
396 397 obj: dict, an object description returned by batch API
397 398 action: string, one of ['upload', 'download']
398 399 localstore: blobstore.local
399 400
400 401 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
401 402 basic-transfers.md
402 403 """
403 404 oid = pycompat.bytestr(obj['oid'])
404 405
405 406 href = pycompat.bytestr(obj['actions'][action].get('href'))
406 407 headers = obj['actions'][action].get('header', {}).items()
407 408
408 409 request = util.urlreq.request(href)
409 410 if action == 'upload':
410 411 # If uploading blobs, read data from local blobstore.
411 412 if not localstore.verify(oid):
412 413 raise error.Abort(_('detected corrupt lfs object: %s') % oid,
413 414 hint=_('run hg verify'))
414 415 request.data = filewithprogress(localstore.open(oid), None)
415 416 request.get_method = lambda: 'PUT'
416 417 request.add_header('Content-Type', 'application/octet-stream')
417 418
418 419 for k, v in headers:
419 420 request.add_header(k, v)
420 421
421 422 response = b''
422 423 try:
423 424 with contextlib.closing(self.urlopener.open(request)) as req:
424 425 ui = self.ui # Shorten debug lines
425 426 if self.ui.debugflag:
426 427 ui.debug('Status: %d\n' % req.status)
427 428 # lfs-test-server and hg serve return headers in different
428 429 # order
429 430 ui.debug('%s\n'
430 431 % '\n'.join(sorted(str(req.info()).splitlines())))
431 432
432 433 if action == 'download':
433 434 # If downloading blobs, store downloaded data to local
434 435 # blobstore
435 436 localstore.download(oid, req)
436 437 else:
437 438 while True:
438 439 data = req.read(1048576)
439 440 if not data:
440 441 break
441 442 response += data
442 443 if response:
443 444 ui.debug('lfs %s response: %s' % (action, response))
444 445 except util.urlerr.httperror as ex:
445 446 if self.ui.debugflag:
446 447 self.ui.debug('%s: %s\n' % (oid, ex.read()))
447 448 raise LfsRemoteError(_('LFS HTTP error: %s (oid=%s, action=%s)')
448 449 % (ex, oid, action))
449 450 except util.urlerr.urlerror as ex:
450 451 hint = (_('attempted connection to %s')
451 452 % util.urllibcompat.getfullurl(request))
452 453 raise LfsRemoteError(_('LFS error: %s') % _urlerrorreason(ex),
453 454 hint=hint)
454 455
455 456 def _batch(self, pointers, localstore, action):
456 457 if action not in ['upload', 'download']:
457 458 raise error.ProgrammingError('invalid Git-LFS action: %s' % action)
458 459
459 460 response = self._batchrequest(pointers, action)
460 461 objects = self._extractobjects(response, pointers, action)
461 462 total = sum(x.get('size', 0) for x in objects)
462 463 sizes = {}
463 464 for obj in objects:
464 465 sizes[obj.get('oid')] = obj.get('size', 0)
465 466 topic = {'upload': _('lfs uploading'),
466 467 'download': _('lfs downloading')}[action]
467 468 if len(objects) > 1:
468 469 self.ui.note(_('lfs: need to transfer %d objects (%s)\n')
469 470 % (len(objects), util.bytecount(total)))
470 471
471 472 def transfer(chunk):
472 473 for obj in chunk:
473 474 objsize = obj.get('size', 0)
474 475 if self.ui.verbose:
475 476 if action == 'download':
476 477 msg = _('lfs: downloading %s (%s)\n')
477 478 elif action == 'upload':
478 479 msg = _('lfs: uploading %s (%s)\n')
479 480 self.ui.note(msg % (obj.get('oid'),
480 481 util.bytecount(objsize)))
481 482 retry = self.retry
482 483 while True:
483 484 try:
484 485 self._basictransfer(obj, action, localstore)
485 486 yield 1, obj.get('oid')
486 487 break
487 488 except socket.error as ex:
488 489 if retry > 0:
489 490 self.ui.note(
490 491 _('lfs: failed: %r (remaining retry %d)\n')
491 492 % (ex, retry))
492 493 retry -= 1
493 494 continue
494 495 raise
495 496
496 497 # Until https multiplexing gets sorted out
497 498 if self.ui.configbool('experimental', 'lfs.worker-enable'):
498 499 oids = worker.worker(self.ui, 0.1, transfer, (),
499 500 sorted(objects, key=lambda o: o.get('oid')))
500 501 else:
501 502 oids = transfer(sorted(objects, key=lambda o: o.get('oid')))
502 503
503 504 with self.ui.makeprogress(topic, total=total) as progress:
504 505 progress.update(0)
505 506 processed = 0
506 507 blobs = 0
507 508 for _one, oid in oids:
508 509 processed += sizes[oid]
509 510 blobs += 1
510 511 progress.update(processed)
511 512 self.ui.note(_('lfs: processed: %s\n') % oid)
512 513
513 514 if blobs > 0:
514 515 if action == 'upload':
515 516 self.ui.status(_('lfs: uploaded %d files (%s)\n')
516 517 % (blobs, util.bytecount(processed)))
517 518 elif action == 'download':
518 519 self.ui.status(_('lfs: downloaded %d files (%s)\n')
519 520 % (blobs, util.bytecount(processed)))
520 521
521 522 def __del__(self):
522 523 # copied from mercurial/httppeer.py
523 524 urlopener = getattr(self, 'urlopener', None)
524 525 if urlopener:
525 526 for h in urlopener.handlers:
526 527 h.close()
527 528 getattr(h, "close_all", lambda : None)()
528 529
529 530 class _dummyremote(object):
530 531 """Dummy store storing blobs to temp directory."""
531 532
532 533 def __init__(self, repo, url):
533 534 fullpath = repo.vfs.join('lfs', url.path)
534 535 self.vfs = lfsvfs(fullpath)
535 536
536 537 def writebatch(self, pointers, fromstore):
537 538 for p in _deduplicate(pointers):
538 539 content = fromstore.read(p.oid(), verify=True)
539 540 with self.vfs(p.oid(), 'wb', atomictemp=True) as fp:
540 541 fp.write(content)
541 542
542 543 def readbatch(self, pointers, tostore):
543 544 for p in _deduplicate(pointers):
544 545 with self.vfs(p.oid(), 'rb') as fp:
545 546 tostore.download(p.oid(), fp)
546 547
547 548 class _nullremote(object):
548 549 """Null store storing blobs to /dev/null."""
549 550
550 551 def __init__(self, repo, url):
551 552 pass
552 553
553 554 def writebatch(self, pointers, fromstore):
554 555 pass
555 556
556 557 def readbatch(self, pointers, tostore):
557 558 pass
558 559
559 560 class _promptremote(object):
560 561 """Prompt user to set lfs.url when accessed."""
561 562
562 563 def __init__(self, repo, url):
563 564 pass
564 565
565 566 def writebatch(self, pointers, fromstore, ui=None):
566 567 self._prompt()
567 568
568 569 def readbatch(self, pointers, tostore, ui=None):
569 570 self._prompt()
570 571
571 572 def _prompt(self):
572 573 raise error.Abort(_('lfs.url needs to be configured'))
573 574
574 575 _storemap = {
575 576 'https': _gitlfsremote,
576 577 'http': _gitlfsremote,
577 578 'file': _dummyremote,
578 579 'null': _nullremote,
579 580 None: _promptremote,
580 581 }
581 582
582 583 def _deduplicate(pointers):
583 584 """Remove any duplicate oids that exist in the list"""
584 585 reduced = util.sortdict()
585 586 for p in pointers:
586 587 reduced[p.oid()] = p
587 588 return reduced.values()
588 589
589 590 def _verify(oid, content):
590 realoid = hashlib.sha256(content).hexdigest()
591 realoid = node.hex(hashlib.sha256(content).digest())
591 592 if realoid != oid:
592 593 raise LfsCorruptionError(_('detected corrupt lfs object: %s') % oid,
593 594 hint=_('run hg verify'))
594 595
595 596 def remote(repo, remote=None):
596 597 """remotestore factory. return a store in _storemap depending on config
597 598
598 599 If ``lfs.url`` is specified, use that remote endpoint. Otherwise, try to
599 600 infer the endpoint, based on the remote repository using the same path
600 601 adjustments as git. As an extension, 'http' is supported as well so that
601 602 ``hg serve`` works out of the box.
602 603
603 604 https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
604 605 """
605 606 lfsurl = repo.ui.config('lfs', 'url')
606 607 url = util.url(lfsurl or '')
607 608 if lfsurl is None:
608 609 if remote:
609 610 path = remote
610 611 elif util.safehasattr(repo, '_subtoppath'):
611 612 # The pull command sets this during the optional update phase, which
612 613 # tells exactly where the pull originated, whether 'paths.default'
613 614 # or explicit.
614 615 path = repo._subtoppath
615 616 else:
616 617 # TODO: investigate 'paths.remote:lfsurl' style path customization,
617 618 # and fall back to inferring from 'paths.remote' if unspecified.
618 619 path = repo.ui.config('paths', 'default') or ''
619 620
620 621 defaulturl = util.url(path)
621 622
622 623 # TODO: support local paths as well.
623 624 # TODO: consider the ssh -> https transformation that git applies
624 625 if defaulturl.scheme in (b'http', b'https'):
625 626 if defaulturl.path and defaulturl.path[:-1] != b'/':
626 627 defaulturl.path += b'/'
627 628 defaulturl.path = (defaulturl.path or b'') + b'.git/info/lfs'
628 629
629 630 url = util.url(bytes(defaulturl))
630 631 repo.ui.note(_('lfs: assuming remote store: %s\n') % url)
631 632
632 633 scheme = url.scheme
633 634 if scheme not in _storemap:
634 635 raise error.Abort(_('lfs: unknown url scheme: %s') % scheme)
635 636 return _storemap[scheme](repo, url)
636 637
637 638 class LfsRemoteError(error.StorageError):
638 639 pass
639 640
640 641 class LfsCorruptionError(error.Abort):
641 642 """Raised when a corrupt blob is detected, aborting an operation
642 643
643 644 It exists to allow specialized handling on the server side."""
@@ -1,539 +1,540 b''
1 1 from __future__ import absolute_import
2 2
3 3 import collections
4 4 import errno
5 5 import hashlib
6 6 import mmap
7 7 import os
8 8 import struct
9 9 import time
10 10
11 11 from mercurial.i18n import _
12 12 from mercurial import (
13 node as nodemod,
13 14 policy,
14 15 pycompat,
15 16 util,
16 17 vfs as vfsmod,
17 18 )
18 19 from . import shallowutil
19 20
20 21 osutil = policy.importmod(r'osutil')
21 22
22 23 # The pack version supported by this implementation. This will need to be
23 24 # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
24 25 # changing any of the int sizes, changing the delta algorithm, etc.
25 26 PACKVERSIONSIZE = 1
26 27 INDEXVERSIONSIZE = 2
27 28
28 29 FANOUTSTART = INDEXVERSIONSIZE
29 30
30 31 # Constant that indicates a fanout table entry hasn't been filled in. (This does
31 32 # not get serialized)
32 33 EMPTYFANOUT = -1
33 34
34 35 # The fanout prefix is the number of bytes that can be addressed by the fanout
35 36 # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
36 37 # look in the fanout table (which will be 2^8 entries long).
37 38 SMALLFANOUTPREFIX = 1
38 39 LARGEFANOUTPREFIX = 2
39 40
40 41 # The number of entries in the index at which point we switch to a large fanout.
41 42 # It is chosen to balance the linear scan through a sparse fanout, with the
42 43 # size of the bisect in actual index.
43 44 # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
44 45 # bisect) with (8 step fanout scan + 1 step bisect)
45 46 # 5 step bisect = log(2^16 / 8 / 255) # fanout
46 47 # 10 step fanout scan = 2^16 / (2^16 / 8) # fanout space divided by entries
47 48 SMALLFANOUTCUTOFF = 2**16 / 8
48 49
49 50 # The amount of time to wait between checking for new packs. This prevents an
50 51 # exception when data is moved to a new pack after the process has already
51 52 # loaded the pack list.
52 53 REFRESHRATE = 0.1
53 54
54 55 if pycompat.isposix:
55 56 # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening.
56 57 # The 'e' flag will be ignored on older versions of glibc.
57 58 PACKOPENMODE = 'rbe'
58 59 else:
59 60 PACKOPENMODE = 'rb'
60 61
61 62 class _cachebackedpacks(object):
62 63 def __init__(self, packs, cachesize):
63 64 self._packs = set(packs)
64 65 self._lrucache = util.lrucachedict(cachesize)
65 66 self._lastpack = None
66 67
67 68 # Avoid cold start of the cache by populating the most recent packs
68 69 # in the cache.
69 70 for i in reversed(range(min(cachesize, len(packs)))):
70 71 self._movetofront(packs[i])
71 72
72 73 def _movetofront(self, pack):
73 74 # This effectively makes pack the first entry in the cache.
74 75 self._lrucache[pack] = True
75 76
76 77 def _registerlastpackusage(self):
77 78 if self._lastpack is not None:
78 79 self._movetofront(self._lastpack)
79 80 self._lastpack = None
80 81
81 82 def add(self, pack):
82 83 self._registerlastpackusage()
83 84
84 85 # This method will mostly be called when packs are not in cache.
85 86 # Therefore, adding pack to the cache.
86 87 self._movetofront(pack)
87 88 self._packs.add(pack)
88 89
89 90 def __iter__(self):
90 91 self._registerlastpackusage()
91 92
92 93 # Cache iteration is based on LRU.
93 94 for pack in self._lrucache:
94 95 self._lastpack = pack
95 96 yield pack
96 97
97 98 cachedpacks = set(pack for pack in self._lrucache)
98 99 # Yield for paths not in the cache.
99 100 for pack in self._packs - cachedpacks:
100 101 self._lastpack = pack
101 102 yield pack
102 103
103 104 # Data not found in any pack.
104 105 self._lastpack = None
105 106
106 107 class basepackstore(object):
107 108 # Default cache size limit for the pack files.
108 109 DEFAULTCACHESIZE = 100
109 110
110 111 def __init__(self, ui, path):
111 112 self.ui = ui
112 113 self.path = path
113 114
114 115 # lastrefesh is 0 so we'll immediately check for new packs on the first
115 116 # failure.
116 117 self.lastrefresh = 0
117 118
118 119 packs = []
119 120 for filepath, __, __ in self._getavailablepackfilessorted():
120 121 try:
121 122 pack = self.getpack(filepath)
122 123 except Exception as ex:
123 124 # An exception may be thrown if the pack file is corrupted
124 125 # somehow. Log a warning but keep going in this case, just
125 126 # skipping this pack file.
126 127 #
127 128 # If this is an ENOENT error then don't even bother logging.
128 129 # Someone could have removed the file since we retrieved the
129 130 # list of paths.
130 131 if getattr(ex, 'errno', None) != errno.ENOENT:
131 132 ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex))
132 133 continue
133 134 packs.append(pack)
134 135
135 136 self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
136 137
137 138 def _getavailablepackfiles(self):
138 139 """For each pack file (a index/data file combo), yields:
139 140 (full path without extension, mtime, size)
140 141
141 142 mtime will be the mtime of the index/data file (whichever is newer)
142 143 size is the combined size of index/data file
143 144 """
144 145 indexsuffixlen = len(self.INDEXSUFFIX)
145 146 packsuffixlen = len(self.PACKSUFFIX)
146 147
147 148 ids = set()
148 149 sizes = collections.defaultdict(lambda: 0)
149 150 mtimes = collections.defaultdict(lambda: [])
150 151 try:
151 152 for filename, type, stat in osutil.listdir(self.path, stat=True):
152 153 id = None
153 154 if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
154 155 id = filename[:-indexsuffixlen]
155 156 elif filename[-packsuffixlen:] == self.PACKSUFFIX:
156 157 id = filename[:-packsuffixlen]
157 158
158 159 # Since we expect to have two files corresponding to each ID
159 160 # (the index file and the pack file), we can yield once we see
160 161 # it twice.
161 162 if id:
162 163 sizes[id] += stat.st_size # Sum both files' sizes together
163 164 mtimes[id].append(stat.st_mtime)
164 165 if id in ids:
165 166 yield (os.path.join(self.path, id), max(mtimes[id]),
166 167 sizes[id])
167 168 else:
168 169 ids.add(id)
169 170 except OSError as ex:
170 171 if ex.errno != errno.ENOENT:
171 172 raise
172 173
173 174 def _getavailablepackfilessorted(self):
174 175 """Like `_getavailablepackfiles`, but also sorts the files by mtime,
175 176 yielding newest files first.
176 177
177 178 This is desirable, since it is more likely newer packfiles have more
178 179 desirable data.
179 180 """
180 181 files = []
181 182 for path, mtime, size in self._getavailablepackfiles():
182 183 files.append((mtime, size, path))
183 184 files = sorted(files, reverse=True)
184 185 for mtime, size, path in files:
185 186 yield path, mtime, size
186 187
187 188 def gettotalsizeandcount(self):
188 189 """Returns the total disk size (in bytes) of all the pack files in
189 190 this store, and the count of pack files.
190 191
191 192 (This might be smaller than the total size of the ``self.path``
192 193 directory, since this only considers fuly-writen pack files, and not
193 194 temporary files or other detritus on the directory.)
194 195 """
195 196 totalsize = 0
196 197 count = 0
197 198 for __, __, size in self._getavailablepackfiles():
198 199 totalsize += size
199 200 count += 1
200 201 return totalsize, count
201 202
202 203 def getmetrics(self):
203 204 """Returns metrics on the state of this store."""
204 205 size, count = self.gettotalsizeandcount()
205 206 return {
206 207 'numpacks': count,
207 208 'totalpacksize': size,
208 209 }
209 210
210 211 def getpack(self, path):
211 212 raise NotImplementedError()
212 213
213 214 def getmissing(self, keys):
214 215 missing = keys
215 216 for pack in self.packs:
216 217 missing = pack.getmissing(missing)
217 218
218 219 # Ensures better performance of the cache by keeping the most
219 220 # recently accessed pack at the beginning in subsequent iterations.
220 221 if not missing:
221 222 return missing
222 223
223 224 if missing:
224 225 for pack in self.refresh():
225 226 missing = pack.getmissing(missing)
226 227
227 228 return missing
228 229
229 230 def markledger(self, ledger, options=None):
230 231 for pack in self.packs:
231 232 pack.markledger(ledger)
232 233
233 234 def markforrefresh(self):
234 235 """Tells the store that there may be new pack files, so the next time it
235 236 has a lookup miss it should check for new files."""
236 237 self.lastrefresh = 0
237 238
238 239 def refresh(self):
239 240 """Checks for any new packs on disk, adds them to the main pack list,
240 241 and returns a list of just the new packs."""
241 242 now = time.time()
242 243
243 244 # If we experience a lot of misses (like in the case of getmissing() on
244 245 # new objects), let's only actually check disk for new stuff every once
245 246 # in a while. Generally this code path should only ever matter when a
246 247 # repack is going on in the background, and that should be pretty rare
247 248 # to have that happen twice in quick succession.
248 249 newpacks = []
249 250 if now > self.lastrefresh + REFRESHRATE:
250 251 self.lastrefresh = now
251 252 previous = set(p.path for p in self.packs)
252 253 for filepath, __, __ in self._getavailablepackfilessorted():
253 254 if filepath not in previous:
254 255 newpack = self.getpack(filepath)
255 256 newpacks.append(newpack)
256 257 self.packs.add(newpack)
257 258
258 259 return newpacks
259 260
260 261 class versionmixin(object):
261 262 # Mix-in for classes with multiple supported versions
262 263 VERSION = None
263 264 SUPPORTED_VERSIONS = [2]
264 265
265 266 def _checkversion(self, version):
266 267 if version in self.SUPPORTED_VERSIONS:
267 268 if self.VERSION is None:
268 269 # only affect this instance
269 270 self.VERSION = version
270 271 elif self.VERSION != version:
271 272 raise RuntimeError('inconsistent version: %s' % version)
272 273 else:
273 274 raise RuntimeError('unsupported version: %s' % version)
274 275
275 276 class basepack(versionmixin):
276 277 # The maximum amount we should read via mmap before remmaping so the old
277 278 # pages can be released (100MB)
278 279 MAXPAGEDIN = 100 * 1024**2
279 280
280 281 SUPPORTED_VERSIONS = [2]
281 282
282 283 def __init__(self, path):
283 284 self.path = path
284 285 self.packpath = path + self.PACKSUFFIX
285 286 self.indexpath = path + self.INDEXSUFFIX
286 287
287 288 self.indexsize = os.stat(self.indexpath).st_size
288 289 self.datasize = os.stat(self.packpath).st_size
289 290
290 291 self._index = None
291 292 self._data = None
292 293 self.freememory() # initialize the mmap
293 294
294 295 version = struct.unpack('!B', self._data[:PACKVERSIONSIZE])[0]
295 296 self._checkversion(version)
296 297
297 298 version, config = struct.unpack('!BB', self._index[:INDEXVERSIONSIZE])
298 299 self._checkversion(version)
299 300
300 301 if 0b10000000 & config:
301 302 self.params = indexparams(LARGEFANOUTPREFIX, version)
302 303 else:
303 304 self.params = indexparams(SMALLFANOUTPREFIX, version)
304 305
305 306 @util.propertycache
306 307 def _fanouttable(self):
307 308 params = self.params
308 309 rawfanout = self._index[FANOUTSTART:FANOUTSTART + params.fanoutsize]
309 310 fanouttable = []
310 311 for i in pycompat.xrange(0, params.fanoutcount):
311 312 loc = i * 4
312 313 fanoutentry = struct.unpack('!I', rawfanout[loc:loc + 4])[0]
313 314 fanouttable.append(fanoutentry)
314 315 return fanouttable
315 316
316 317 @util.propertycache
317 318 def _indexend(self):
318 319 nodecount = struct.unpack_from('!Q', self._index,
319 320 self.params.indexstart - 8)[0]
320 321 return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
321 322
322 323 def freememory(self):
323 324 """Unmap and remap the memory to free it up after known expensive
324 325 operations. Return True if self._data and self._index were reloaded.
325 326 """
326 327 if self._index:
327 328 if self._pagedin < self.MAXPAGEDIN:
328 329 return False
329 330
330 331 self._index.close()
331 332 self._data.close()
332 333
333 334 # TODO: use an opener/vfs to access these paths
334 335 with open(self.indexpath, PACKOPENMODE) as indexfp:
335 336 # memory-map the file, size 0 means whole file
336 337 self._index = mmap.mmap(indexfp.fileno(), 0,
337 338 access=mmap.ACCESS_READ)
338 339 with open(self.packpath, PACKOPENMODE) as datafp:
339 340 self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
340 341
341 342 self._pagedin = 0
342 343 return True
343 344
344 345 def getmissing(self, keys):
345 346 raise NotImplementedError()
346 347
347 348 def markledger(self, ledger, options=None):
348 349 raise NotImplementedError()
349 350
350 351 def cleanup(self, ledger):
351 352 raise NotImplementedError()
352 353
353 354 def __iter__(self):
354 355 raise NotImplementedError()
355 356
356 357 def iterentries(self):
357 358 raise NotImplementedError()
358 359
359 360 class mutablebasepack(versionmixin):
360 361
361 362 def __init__(self, ui, packdir, version=2):
362 363 self._checkversion(version)
363 364 # TODO(augie): make this configurable
364 365 self._compressor = 'GZ'
365 366 opener = vfsmod.vfs(packdir)
366 367 opener.createmode = 0o444
367 368 self.opener = opener
368 369
369 370 self.entries = {}
370 371
371 372 shallowutil.mkstickygroupdir(ui, packdir)
372 373 self.packfp, self.packpath = opener.mkstemp(
373 374 suffix=self.PACKSUFFIX + '-tmp')
374 375 self.idxfp, self.idxpath = opener.mkstemp(
375 376 suffix=self.INDEXSUFFIX + '-tmp')
376 377 self.packfp = os.fdopen(self.packfp, r'wb+')
377 378 self.idxfp = os.fdopen(self.idxfp, r'wb+')
378 379 self.sha = hashlib.sha1()
379 380 self._closed = False
380 381
381 382 # The opener provides no way of doing permission fixup on files created
382 383 # via mkstemp, so we must fix it ourselves. We can probably fix this
383 384 # upstream in vfs.mkstemp so we don't need to use the private method.
384 385 opener._fixfilemode(opener.join(self.packpath))
385 386 opener._fixfilemode(opener.join(self.idxpath))
386 387
387 388 # Write header
388 389 # TODO: make it extensible (ex: allow specifying compression algorithm,
389 390 # a flexible key/value header, delta algorithm, fanout size, etc)
390 391 versionbuf = struct.pack('!B', self.VERSION) # unsigned 1 byte int
391 392 self.writeraw(versionbuf)
392 393
393 394 def __enter__(self):
394 395 return self
395 396
396 397 def __exit__(self, exc_type, exc_value, traceback):
397 398 if exc_type is None:
398 399 self.close()
399 400 else:
400 401 self.abort()
401 402
402 403 def abort(self):
403 404 # Unclean exit
404 405 self._cleantemppacks()
405 406
406 407 def writeraw(self, data):
407 408 self.packfp.write(data)
408 409 self.sha.update(data)
409 410
410 411 def close(self, ledger=None):
411 412 if self._closed:
412 413 return
413 414
414 415 try:
415 sha = self.sha.hexdigest()
416 sha = nodemod.hex(self.sha.digest())
416 417 self.packfp.close()
417 418 self.writeindex()
418 419
419 420 if len(self.entries) == 0:
420 421 # Empty pack
421 422 self._cleantemppacks()
422 423 self._closed = True
423 424 return None
424 425
425 426 self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
426 427 try:
427 428 self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
428 429 except Exception as ex:
429 430 try:
430 431 self.opener.unlink(sha + self.PACKSUFFIX)
431 432 except Exception:
432 433 pass
433 434 # Throw exception 'ex' explicitly since a normal 'raise' would
434 435 # potentially throw an exception from the unlink cleanup.
435 436 raise ex
436 437 except Exception:
437 438 # Clean up temp packs in all exception cases
438 439 self._cleantemppacks()
439 440 raise
440 441
441 442 self._closed = True
442 443 result = self.opener.join(sha)
443 444 if ledger:
444 445 ledger.addcreated(result)
445 446 return result
446 447
447 448 def _cleantemppacks(self):
448 449 try:
449 450 self.opener.unlink(self.packpath)
450 451 except Exception:
451 452 pass
452 453 try:
453 454 self.opener.unlink(self.idxpath)
454 455 except Exception:
455 456 pass
456 457
457 458 def writeindex(self):
458 459 rawindex = ''
459 460
460 461 largefanout = len(self.entries) > SMALLFANOUTCUTOFF
461 462 if largefanout:
462 463 params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
463 464 else:
464 465 params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
465 466
466 467 fanouttable = [EMPTYFANOUT] * params.fanoutcount
467 468
468 469 # Precompute the location of each entry
469 470 locations = {}
470 471 count = 0
471 472 for node in sorted(self.entries):
472 473 location = count * self.INDEXENTRYLENGTH
473 474 locations[node] = location
474 475 count += 1
475 476
476 477 # Must use [0] on the unpack result since it's always a tuple.
477 478 fanoutkey = struct.unpack(params.fanoutstruct,
478 479 node[:params.fanoutprefix])[0]
479 480 if fanouttable[fanoutkey] == EMPTYFANOUT:
480 481 fanouttable[fanoutkey] = location
481 482
482 483 rawfanouttable = ''
483 484 last = 0
484 485 for offset in fanouttable:
485 486 offset = offset if offset != EMPTYFANOUT else last
486 487 last = offset
487 488 rawfanouttable += struct.pack('!I', offset)
488 489
489 490 rawentrieslength = struct.pack('!Q', len(self.entries))
490 491
491 492 # The index offset is the it's location in the file. So after the 2 byte
492 493 # header and the fanouttable.
493 494 rawindex = self.createindex(locations, 2 + len(rawfanouttable))
494 495
495 496 self._writeheader(params)
496 497 self.idxfp.write(rawfanouttable)
497 498 self.idxfp.write(rawentrieslength)
498 499 self.idxfp.write(rawindex)
499 500 self.idxfp.close()
500 501
501 502 def createindex(self, nodelocations):
502 503 raise NotImplementedError()
503 504
504 505 def _writeheader(self, indexparams):
505 506 # Index header
506 507 # <version: 1 byte>
507 508 # <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
508 509 # <unused: 7 bit> # future use (compression, delta format, etc)
509 510 config = 0
510 511 if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
511 512 config = 0b10000000
512 513 self.idxfp.write(struct.pack('!BB', self.VERSION, config))
513 514
514 515 class indexparams(object):
515 516 __slots__ = (r'fanoutprefix', r'fanoutstruct', r'fanoutcount',
516 517 r'fanoutsize', r'indexstart')
517 518
518 519 def __init__(self, prefixsize, version):
519 520 self.fanoutprefix = prefixsize
520 521
521 522 # The struct pack format for fanout table location (i.e. the format that
522 523 # converts the node prefix into an integer location in the fanout
523 524 # table).
524 525 if prefixsize == SMALLFANOUTPREFIX:
525 526 self.fanoutstruct = '!B'
526 527 elif prefixsize == LARGEFANOUTPREFIX:
527 528 self.fanoutstruct = '!H'
528 529 else:
529 530 raise ValueError("invalid fanout prefix size: %s" % prefixsize)
530 531
531 532 # The number of fanout table entries
532 533 self.fanoutcount = 2**(prefixsize * 8)
533 534
534 535 # The total bytes used by the fanout table
535 536 self.fanoutsize = self.fanoutcount * 4
536 537
537 538 self.indexstart = FANOUTSTART + self.fanoutsize
538 539 # Skip the index length
539 540 self.indexstart += 8
General Comments 0
You need to be logged in to leave comments. Login now