##// END OF EJS Templates
py3: add utility to forward __str__() to __bytes__()...
Yuya Nishihara -
r33022:ce96efec default
parent child Browse files
Show More
@@ -1,2312 +1,2303 b''
1 1 # context.py - changeset and file context objects for mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import os
12 12 import re
13 13 import stat
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 addednodeid,
18 18 bin,
19 19 hex,
20 20 modifiednodeid,
21 21 nullid,
22 22 nullrev,
23 23 short,
24 24 wdirid,
25 25 wdirnodes,
26 26 wdirrev,
27 27 )
28 28 from . import (
29 29 encoding,
30 30 error,
31 31 fileset,
32 32 match as matchmod,
33 33 mdiff,
34 34 obsolete as obsmod,
35 35 patch,
36 36 phases,
37 37 pycompat,
38 38 repoview,
39 39 revlog,
40 40 scmutil,
41 41 subrepo,
42 42 util,
43 43 )
44 44
45 45 propertycache = util.propertycache
46 46
47 47 nonascii = re.compile(r'[^\x21-\x7f]').search
48 48
49 49 class basectx(object):
50 50 """A basectx object represents the common logic for its children:
51 51 changectx: read-only context that is already present in the repo,
52 52 workingctx: a context that represents the working directory and can
53 53 be committed,
54 54 memctx: a context that represents changes in-memory and can also
55 55 be committed."""
56 56 def __new__(cls, repo, changeid='', *args, **kwargs):
57 57 if isinstance(changeid, basectx):
58 58 return changeid
59 59
60 60 o = super(basectx, cls).__new__(cls)
61 61
62 62 o._repo = repo
63 63 o._rev = nullrev
64 64 o._node = nullid
65 65
66 66 return o
67 67
68 def __str__(self):
69 r = short(self.node())
70 if pycompat.ispy3:
71 return r.decode('ascii')
72 return r
73
74 68 def __bytes__(self):
75 69 return short(self.node())
76 70
71 __str__ = encoding.strmethod(__bytes__)
72
77 73 def __int__(self):
78 74 return self.rev()
79 75
80 76 def __repr__(self):
81 77 return r"<%s %s>" % (type(self).__name__, str(self))
82 78
83 79 def __eq__(self, other):
84 80 try:
85 81 return type(self) == type(other) and self._rev == other._rev
86 82 except AttributeError:
87 83 return False
88 84
89 85 def __ne__(self, other):
90 86 return not (self == other)
91 87
92 88 def __contains__(self, key):
93 89 return key in self._manifest
94 90
95 91 def __getitem__(self, key):
96 92 return self.filectx(key)
97 93
98 94 def __iter__(self):
99 95 return iter(self._manifest)
100 96
101 97 def _buildstatusmanifest(self, status):
102 98 """Builds a manifest that includes the given status results, if this is
103 99 a working copy context. For non-working copy contexts, it just returns
104 100 the normal manifest."""
105 101 return self.manifest()
106 102
107 103 def _matchstatus(self, other, match):
108 104 """return match.always if match is none
109 105
110 106 This internal method provides a way for child objects to override the
111 107 match operator.
112 108 """
113 109 return match or matchmod.always(self._repo.root, self._repo.getcwd())
114 110
115 111 def _buildstatus(self, other, s, match, listignored, listclean,
116 112 listunknown):
117 113 """build a status with respect to another context"""
118 114 # Load earliest manifest first for caching reasons. More specifically,
119 115 # if you have revisions 1000 and 1001, 1001 is probably stored as a
120 116 # delta against 1000. Thus, if you read 1000 first, we'll reconstruct
121 117 # 1000 and cache it so that when you read 1001, we just need to apply a
122 118 # delta to what's in the cache. So that's one full reconstruction + one
123 119 # delta application.
124 120 mf2 = None
125 121 if self.rev() is not None and self.rev() < other.rev():
126 122 mf2 = self._buildstatusmanifest(s)
127 123 mf1 = other._buildstatusmanifest(s)
128 124 if mf2 is None:
129 125 mf2 = self._buildstatusmanifest(s)
130 126
131 127 modified, added = [], []
132 128 removed = []
133 129 clean = []
134 130 deleted, unknown, ignored = s.deleted, s.unknown, s.ignored
135 131 deletedset = set(deleted)
136 132 d = mf1.diff(mf2, match=match, clean=listclean)
137 133 for fn, value in d.iteritems():
138 134 if fn in deletedset:
139 135 continue
140 136 if value is None:
141 137 clean.append(fn)
142 138 continue
143 139 (node1, flag1), (node2, flag2) = value
144 140 if node1 is None:
145 141 added.append(fn)
146 142 elif node2 is None:
147 143 removed.append(fn)
148 144 elif flag1 != flag2:
149 145 modified.append(fn)
150 146 elif node2 not in wdirnodes:
151 147 # When comparing files between two commits, we save time by
152 148 # not comparing the file contents when the nodeids differ.
153 149 # Note that this means we incorrectly report a reverted change
154 150 # to a file as a modification.
155 151 modified.append(fn)
156 152 elif self[fn].cmp(other[fn]):
157 153 modified.append(fn)
158 154 else:
159 155 clean.append(fn)
160 156
161 157 if removed:
162 158 # need to filter files if they are already reported as removed
163 159 unknown = [fn for fn in unknown if fn not in mf1 and
164 160 (not match or match(fn))]
165 161 ignored = [fn for fn in ignored if fn not in mf1 and
166 162 (not match or match(fn))]
167 163 # if they're deleted, don't report them as removed
168 164 removed = [fn for fn in removed if fn not in deletedset]
169 165
170 166 return scmutil.status(modified, added, removed, deleted, unknown,
171 167 ignored, clean)
172 168
173 169 @propertycache
174 170 def substate(self):
175 171 return subrepo.state(self, self._repo.ui)
176 172
177 173 def subrev(self, subpath):
178 174 return self.substate[subpath][1]
179 175
180 176 def rev(self):
181 177 return self._rev
182 178 def node(self):
183 179 return self._node
184 180 def hex(self):
185 181 return hex(self.node())
186 182 def manifest(self):
187 183 return self._manifest
188 184 def manifestctx(self):
189 185 return self._manifestctx
190 186 def repo(self):
191 187 return self._repo
192 188 def phasestr(self):
193 189 return phases.phasenames[self.phase()]
194 190 def mutable(self):
195 191 return self.phase() > phases.public
196 192
197 193 def getfileset(self, expr):
198 194 return fileset.getfileset(self, expr)
199 195
200 196 def obsolete(self):
201 197 """True if the changeset is obsolete"""
202 198 return self.rev() in obsmod.getrevs(self._repo, 'obsolete')
203 199
204 200 def extinct(self):
205 201 """True if the changeset is extinct"""
206 202 return self.rev() in obsmod.getrevs(self._repo, 'extinct')
207 203
208 204 def unstable(self):
209 205 """True if the changeset is not obsolete but it's ancestor are"""
210 206 return self.rev() in obsmod.getrevs(self._repo, 'unstable')
211 207
212 208 def bumped(self):
213 209 """True if the changeset try to be a successor of a public changeset
214 210
215 211 Only non-public and non-obsolete changesets may be bumped.
216 212 """
217 213 return self.rev() in obsmod.getrevs(self._repo, 'bumped')
218 214
219 215 def divergent(self):
220 216 """Is a successors of a changeset with multiple possible successors set
221 217
222 218 Only non-public and non-obsolete changesets may be divergent.
223 219 """
224 220 return self.rev() in obsmod.getrevs(self._repo, 'divergent')
225 221
226 222 def troubled(self):
227 223 """True if the changeset is either unstable, bumped or divergent"""
228 224 return self.unstable() or self.bumped() or self.divergent()
229 225
230 226 def troubles(self):
231 227 """return the list of troubles affecting this changesets.
232 228
233 229 Troubles are returned as strings. possible values are:
234 230 - unstable,
235 231 - bumped,
236 232 - divergent.
237 233 """
238 234 troubles = []
239 235 if self.unstable():
240 236 troubles.append('unstable')
241 237 if self.bumped():
242 238 troubles.append('bumped')
243 239 if self.divergent():
244 240 troubles.append('divergent')
245 241 return troubles
246 242
247 243 def parents(self):
248 244 """return contexts for each parent changeset"""
249 245 return self._parents
250 246
251 247 def p1(self):
252 248 return self._parents[0]
253 249
254 250 def p2(self):
255 251 parents = self._parents
256 252 if len(parents) == 2:
257 253 return parents[1]
258 254 return changectx(self._repo, nullrev)
259 255
260 256 def _fileinfo(self, path):
261 257 if r'_manifest' in self.__dict__:
262 258 try:
263 259 return self._manifest[path], self._manifest.flags(path)
264 260 except KeyError:
265 261 raise error.ManifestLookupError(self._node, path,
266 262 _('not found in manifest'))
267 263 if r'_manifestdelta' in self.__dict__ or path in self.files():
268 264 if path in self._manifestdelta:
269 265 return (self._manifestdelta[path],
270 266 self._manifestdelta.flags(path))
271 267 mfl = self._repo.manifestlog
272 268 try:
273 269 node, flag = mfl[self._changeset.manifest].find(path)
274 270 except KeyError:
275 271 raise error.ManifestLookupError(self._node, path,
276 272 _('not found in manifest'))
277 273
278 274 return node, flag
279 275
280 276 def filenode(self, path):
281 277 return self._fileinfo(path)[0]
282 278
283 279 def flags(self, path):
284 280 try:
285 281 return self._fileinfo(path)[1]
286 282 except error.LookupError:
287 283 return ''
288 284
289 285 def sub(self, path, allowcreate=True):
290 286 '''return a subrepo for the stored revision of path, never wdir()'''
291 287 return subrepo.subrepo(self, path, allowcreate=allowcreate)
292 288
293 289 def nullsub(self, path, pctx):
294 290 return subrepo.nullsubrepo(self, path, pctx)
295 291
296 292 def workingsub(self, path):
297 293 '''return a subrepo for the stored revision, or wdir if this is a wdir
298 294 context.
299 295 '''
300 296 return subrepo.subrepo(self, path, allowwdir=True)
301 297
302 298 def match(self, pats=None, include=None, exclude=None, default='glob',
303 299 listsubrepos=False, badfn=None):
304 300 r = self._repo
305 301 return matchmod.match(r.root, r.getcwd(), pats,
306 302 include, exclude, default,
307 303 auditor=r.nofsauditor, ctx=self,
308 304 listsubrepos=listsubrepos, badfn=badfn)
309 305
310 306 def diff(self, ctx2=None, match=None, **opts):
311 307 """Returns a diff generator for the given contexts and matcher"""
312 308 if ctx2 is None:
313 309 ctx2 = self.p1()
314 310 if ctx2 is not None:
315 311 ctx2 = self._repo[ctx2]
316 312 diffopts = patch.diffopts(self._repo.ui, opts)
317 313 return patch.diff(self._repo, ctx2, self, match=match, opts=diffopts)
318 314
319 315 def dirs(self):
320 316 return self._manifest.dirs()
321 317
322 318 def hasdir(self, dir):
323 319 return self._manifest.hasdir(dir)
324 320
325 321 def status(self, other=None, match=None, listignored=False,
326 322 listclean=False, listunknown=False, listsubrepos=False):
327 323 """return status of files between two nodes or node and working
328 324 directory.
329 325
330 326 If other is None, compare this node with working directory.
331 327
332 328 returns (modified, added, removed, deleted, unknown, ignored, clean)
333 329 """
334 330
335 331 ctx1 = self
336 332 ctx2 = self._repo[other]
337 333
338 334 # This next code block is, admittedly, fragile logic that tests for
339 335 # reversing the contexts and wouldn't need to exist if it weren't for
340 336 # the fast (and common) code path of comparing the working directory
341 337 # with its first parent.
342 338 #
343 339 # What we're aiming for here is the ability to call:
344 340 #
345 341 # workingctx.status(parentctx)
346 342 #
347 343 # If we always built the manifest for each context and compared those,
348 344 # then we'd be done. But the special case of the above call means we
349 345 # just copy the manifest of the parent.
350 346 reversed = False
351 347 if (not isinstance(ctx1, changectx)
352 348 and isinstance(ctx2, changectx)):
353 349 reversed = True
354 350 ctx1, ctx2 = ctx2, ctx1
355 351
356 352 match = ctx2._matchstatus(ctx1, match)
357 353 r = scmutil.status([], [], [], [], [], [], [])
358 354 r = ctx2._buildstatus(ctx1, r, match, listignored, listclean,
359 355 listunknown)
360 356
361 357 if reversed:
362 358 # Reverse added and removed. Clear deleted, unknown and ignored as
363 359 # these make no sense to reverse.
364 360 r = scmutil.status(r.modified, r.removed, r.added, [], [], [],
365 361 r.clean)
366 362
367 363 if listsubrepos:
368 364 for subpath, sub in scmutil.itersubrepos(ctx1, ctx2):
369 365 try:
370 366 rev2 = ctx2.subrev(subpath)
371 367 except KeyError:
372 368 # A subrepo that existed in node1 was deleted between
373 369 # node1 and node2 (inclusive). Thus, ctx2's substate
374 370 # won't contain that subpath. The best we can do ignore it.
375 371 rev2 = None
376 372 submatch = matchmod.subdirmatcher(subpath, match)
377 373 s = sub.status(rev2, match=submatch, ignored=listignored,
378 374 clean=listclean, unknown=listunknown,
379 375 listsubrepos=True)
380 376 for rfiles, sfiles in zip(r, s):
381 377 rfiles.extend("%s/%s" % (subpath, f) for f in sfiles)
382 378
383 379 for l in r:
384 380 l.sort()
385 381
386 382 return r
387 383
388 384 def _filterederror(repo, changeid):
389 385 """build an exception to be raised about a filtered changeid
390 386
391 387 This is extracted in a function to help extensions (eg: evolve) to
392 388 experiment with various message variants."""
393 389 if repo.filtername.startswith('visible'):
394 390 msg = _("hidden revision '%s'") % changeid
395 391 hint = _('use --hidden to access hidden revisions')
396 392 return error.FilteredRepoLookupError(msg, hint=hint)
397 393 msg = _("filtered revision '%s' (not in '%s' subset)")
398 394 msg %= (changeid, repo.filtername)
399 395 return error.FilteredRepoLookupError(msg)
400 396
401 397 class changectx(basectx):
402 398 """A changecontext object makes access to data related to a particular
403 399 changeset convenient. It represents a read-only context already present in
404 400 the repo."""
405 401 def __init__(self, repo, changeid=''):
406 402 """changeid is a revision number, node, or tag"""
407 403
408 404 # since basectx.__new__ already took care of copying the object, we
409 405 # don't need to do anything in __init__, so we just exit here
410 406 if isinstance(changeid, basectx):
411 407 return
412 408
413 409 if changeid == '':
414 410 changeid = '.'
415 411 self._repo = repo
416 412
417 413 try:
418 414 if isinstance(changeid, int):
419 415 self._node = repo.changelog.node(changeid)
420 416 self._rev = changeid
421 417 return
422 418 if not pycompat.ispy3 and isinstance(changeid, long):
423 419 changeid = str(changeid)
424 420 if changeid == 'null':
425 421 self._node = nullid
426 422 self._rev = nullrev
427 423 return
428 424 if changeid == 'tip':
429 425 self._node = repo.changelog.tip()
430 426 self._rev = repo.changelog.rev(self._node)
431 427 return
432 428 if changeid == '.' or changeid == repo.dirstate.p1():
433 429 # this is a hack to delay/avoid loading obsmarkers
434 430 # when we know that '.' won't be hidden
435 431 self._node = repo.dirstate.p1()
436 432 self._rev = repo.unfiltered().changelog.rev(self._node)
437 433 return
438 434 if len(changeid) == 20:
439 435 try:
440 436 self._node = changeid
441 437 self._rev = repo.changelog.rev(changeid)
442 438 return
443 439 except error.FilteredRepoLookupError:
444 440 raise
445 441 except LookupError:
446 442 pass
447 443
448 444 try:
449 445 r = int(changeid)
450 446 if '%d' % r != changeid:
451 447 raise ValueError
452 448 l = len(repo.changelog)
453 449 if r < 0:
454 450 r += l
455 451 if r < 0 or r >= l and r != wdirrev:
456 452 raise ValueError
457 453 self._rev = r
458 454 self._node = repo.changelog.node(r)
459 455 return
460 456 except error.FilteredIndexError:
461 457 raise
462 458 except (ValueError, OverflowError, IndexError):
463 459 pass
464 460
465 461 if len(changeid) == 40:
466 462 try:
467 463 self._node = bin(changeid)
468 464 self._rev = repo.changelog.rev(self._node)
469 465 return
470 466 except error.FilteredLookupError:
471 467 raise
472 468 except (TypeError, LookupError):
473 469 pass
474 470
475 471 # lookup bookmarks through the name interface
476 472 try:
477 473 self._node = repo.names.singlenode(repo, changeid)
478 474 self._rev = repo.changelog.rev(self._node)
479 475 return
480 476 except KeyError:
481 477 pass
482 478 except error.FilteredRepoLookupError:
483 479 raise
484 480 except error.RepoLookupError:
485 481 pass
486 482
487 483 self._node = repo.unfiltered().changelog._partialmatch(changeid)
488 484 if self._node is not None:
489 485 self._rev = repo.changelog.rev(self._node)
490 486 return
491 487
492 488 # lookup failed
493 489 # check if it might have come from damaged dirstate
494 490 #
495 491 # XXX we could avoid the unfiltered if we had a recognizable
496 492 # exception for filtered changeset access
497 493 if changeid in repo.unfiltered().dirstate.parents():
498 494 msg = _("working directory has unknown parent '%s'!")
499 495 raise error.Abort(msg % short(changeid))
500 496 try:
501 497 if len(changeid) == 20 and nonascii(changeid):
502 498 changeid = hex(changeid)
503 499 except TypeError:
504 500 pass
505 501 except (error.FilteredIndexError, error.FilteredLookupError,
506 502 error.FilteredRepoLookupError):
507 503 raise _filterederror(repo, changeid)
508 504 except IndexError:
509 505 pass
510 506 raise error.RepoLookupError(
511 507 _("unknown revision '%s'") % changeid)
512 508
513 509 def __hash__(self):
514 510 try:
515 511 return hash(self._rev)
516 512 except AttributeError:
517 513 return id(self)
518 514
519 515 def __nonzero__(self):
520 516 return self._rev != nullrev
521 517
522 518 __bool__ = __nonzero__
523 519
524 520 @propertycache
525 521 def _changeset(self):
526 522 return self._repo.changelog.changelogrevision(self.rev())
527 523
528 524 @propertycache
529 525 def _manifest(self):
530 526 return self._manifestctx.read()
531 527
532 528 @property
533 529 def _manifestctx(self):
534 530 return self._repo.manifestlog[self._changeset.manifest]
535 531
536 532 @propertycache
537 533 def _manifestdelta(self):
538 534 return self._manifestctx.readdelta()
539 535
540 536 @propertycache
541 537 def _parents(self):
542 538 repo = self._repo
543 539 p1, p2 = repo.changelog.parentrevs(self._rev)
544 540 if p2 == nullrev:
545 541 return [changectx(repo, p1)]
546 542 return [changectx(repo, p1), changectx(repo, p2)]
547 543
548 544 def changeset(self):
549 545 c = self._changeset
550 546 return (
551 547 c.manifest,
552 548 c.user,
553 549 c.date,
554 550 c.files,
555 551 c.description,
556 552 c.extra,
557 553 )
558 554 def manifestnode(self):
559 555 return self._changeset.manifest
560 556
561 557 def user(self):
562 558 return self._changeset.user
563 559 def date(self):
564 560 return self._changeset.date
565 561 def files(self):
566 562 return self._changeset.files
567 563 def description(self):
568 564 return self._changeset.description
569 565 def branch(self):
570 566 return encoding.tolocal(self._changeset.extra.get("branch"))
571 567 def closesbranch(self):
572 568 return 'close' in self._changeset.extra
573 569 def extra(self):
574 570 return self._changeset.extra
575 571 def tags(self):
576 572 return self._repo.nodetags(self._node)
577 573 def bookmarks(self):
578 574 return self._repo.nodebookmarks(self._node)
579 575 def phase(self):
580 576 return self._repo._phasecache.phase(self._repo, self._rev)
581 577 def hidden(self):
582 578 return self._rev in repoview.filterrevs(self._repo, 'visible')
583 579
584 580 def children(self):
585 581 """return contexts for each child changeset"""
586 582 c = self._repo.changelog.children(self._node)
587 583 return [changectx(self._repo, x) for x in c]
588 584
589 585 def ancestors(self):
590 586 for a in self._repo.changelog.ancestors([self._rev]):
591 587 yield changectx(self._repo, a)
592 588
593 589 def descendants(self):
594 590 for d in self._repo.changelog.descendants([self._rev]):
595 591 yield changectx(self._repo, d)
596 592
597 593 def filectx(self, path, fileid=None, filelog=None):
598 594 """get a file context from this changeset"""
599 595 if fileid is None:
600 596 fileid = self.filenode(path)
601 597 return filectx(self._repo, path, fileid=fileid,
602 598 changectx=self, filelog=filelog)
603 599
604 600 def ancestor(self, c2, warn=False):
605 601 """return the "best" ancestor context of self and c2
606 602
607 603 If there are multiple candidates, it will show a message and check
608 604 merge.preferancestor configuration before falling back to the
609 605 revlog ancestor."""
610 606 # deal with workingctxs
611 607 n2 = c2._node
612 608 if n2 is None:
613 609 n2 = c2._parents[0]._node
614 610 cahs = self._repo.changelog.commonancestorsheads(self._node, n2)
615 611 if not cahs:
616 612 anc = nullid
617 613 elif len(cahs) == 1:
618 614 anc = cahs[0]
619 615 else:
620 616 # experimental config: merge.preferancestor
621 617 for r in self._repo.ui.configlist('merge', 'preferancestor', ['*']):
622 618 try:
623 619 ctx = changectx(self._repo, r)
624 620 except error.RepoLookupError:
625 621 continue
626 622 anc = ctx.node()
627 623 if anc in cahs:
628 624 break
629 625 else:
630 626 anc = self._repo.changelog.ancestor(self._node, n2)
631 627 if warn:
632 628 self._repo.ui.status(
633 629 (_("note: using %s as ancestor of %s and %s\n") %
634 630 (short(anc), short(self._node), short(n2))) +
635 631 ''.join(_(" alternatively, use --config "
636 632 "merge.preferancestor=%s\n") %
637 633 short(n) for n in sorted(cahs) if n != anc))
638 634 return changectx(self._repo, anc)
639 635
640 636 def descendant(self, other):
641 637 """True if other is descendant of this changeset"""
642 638 return self._repo.changelog.descendant(self._rev, other._rev)
643 639
644 640 def walk(self, match):
645 641 '''Generates matching file names.'''
646 642
647 643 # Wrap match.bad method to have message with nodeid
648 644 def bad(fn, msg):
649 645 # The manifest doesn't know about subrepos, so don't complain about
650 646 # paths into valid subrepos.
651 647 if any(fn == s or fn.startswith(s + '/')
652 648 for s in self.substate):
653 649 return
654 650 match.bad(fn, _('no such file in rev %s') % self)
655 651
656 652 m = matchmod.badmatch(match, bad)
657 653 return self._manifest.walk(m)
658 654
659 655 def matches(self, match):
660 656 return self.walk(match)
661 657
662 658 class basefilectx(object):
663 659 """A filecontext object represents the common logic for its children:
664 660 filectx: read-only access to a filerevision that is already present
665 661 in the repo,
666 662 workingfilectx: a filecontext that represents files from the working
667 663 directory,
668 664 memfilectx: a filecontext that represents files in-memory,
669 665 overlayfilectx: duplicate another filecontext with some fields overridden.
670 666 """
671 667 @propertycache
672 668 def _filelog(self):
673 669 return self._repo.file(self._path)
674 670
675 671 @propertycache
676 672 def _changeid(self):
677 673 if r'_changeid' in self.__dict__:
678 674 return self._changeid
679 675 elif r'_changectx' in self.__dict__:
680 676 return self._changectx.rev()
681 677 elif r'_descendantrev' in self.__dict__:
682 678 # this file context was created from a revision with a known
683 679 # descendant, we can (lazily) correct for linkrev aliases
684 680 return self._adjustlinkrev(self._descendantrev)
685 681 else:
686 682 return self._filelog.linkrev(self._filerev)
687 683
688 684 @propertycache
689 685 def _filenode(self):
690 686 if r'_fileid' in self.__dict__:
691 687 return self._filelog.lookup(self._fileid)
692 688 else:
693 689 return self._changectx.filenode(self._path)
694 690
695 691 @propertycache
696 692 def _filerev(self):
697 693 return self._filelog.rev(self._filenode)
698 694
699 695 @propertycache
700 696 def _repopath(self):
701 697 return self._path
702 698
703 699 def __nonzero__(self):
704 700 try:
705 701 self._filenode
706 702 return True
707 703 except error.LookupError:
708 704 # file is missing
709 705 return False
710 706
711 707 __bool__ = __nonzero__
712 708
713 def __str__(self):
709 def __bytes__(self):
714 710 try:
715 711 return "%s@%s" % (self.path(), self._changectx)
716 712 except error.LookupError:
717 713 return "%s@???" % self.path()
718 714
719 def __bytes__(self):
720 try:
721 return "%s@%s" % (self.path(), self._changectx)
722 except error.LookupError:
723 return "%s@???" % self.path()
715 __str__ = encoding.strmethod(__bytes__)
724 716
725 717 def __repr__(self):
726 718 return "<%s %s>" % (type(self).__name__, str(self))
727 719
728 720 def __hash__(self):
729 721 try:
730 722 return hash((self._path, self._filenode))
731 723 except AttributeError:
732 724 return id(self)
733 725
734 726 def __eq__(self, other):
735 727 try:
736 728 return (type(self) == type(other) and self._path == other._path
737 729 and self._filenode == other._filenode)
738 730 except AttributeError:
739 731 return False
740 732
741 733 def __ne__(self, other):
742 734 return not (self == other)
743 735
744 736 def filerev(self):
745 737 return self._filerev
746 738 def filenode(self):
747 739 return self._filenode
748 740 @propertycache
749 741 def _flags(self):
750 742 return self._changectx.flags(self._path)
751 743 def flags(self):
752 744 return self._flags
753 745 def filelog(self):
754 746 return self._filelog
755 747 def rev(self):
756 748 return self._changeid
757 749 def linkrev(self):
758 750 return self._filelog.linkrev(self._filerev)
759 751 def node(self):
760 752 return self._changectx.node()
761 753 def hex(self):
762 754 return self._changectx.hex()
763 755 def user(self):
764 756 return self._changectx.user()
765 757 def date(self):
766 758 return self._changectx.date()
767 759 def files(self):
768 760 return self._changectx.files()
769 761 def description(self):
770 762 return self._changectx.description()
771 763 def branch(self):
772 764 return self._changectx.branch()
773 765 def extra(self):
774 766 return self._changectx.extra()
775 767 def phase(self):
776 768 return self._changectx.phase()
777 769 def phasestr(self):
778 770 return self._changectx.phasestr()
779 771 def manifest(self):
780 772 return self._changectx.manifest()
781 773 def changectx(self):
782 774 return self._changectx
783 775 def renamed(self):
784 776 return self._copied
785 777 def repo(self):
786 778 return self._repo
787 779 def size(self):
788 780 return len(self.data())
789 781
790 782 def path(self):
791 783 return self._path
792 784
793 785 def isbinary(self):
794 786 try:
795 787 return util.binary(self.data())
796 788 except IOError:
797 789 return False
798 790 def isexec(self):
799 791 return 'x' in self.flags()
800 792 def islink(self):
801 793 return 'l' in self.flags()
802 794
803 795 def isabsent(self):
804 796 """whether this filectx represents a file not in self._changectx
805 797
806 798 This is mainly for merge code to detect change/delete conflicts. This is
807 799 expected to be True for all subclasses of basectx."""
808 800 return False
809 801
810 802 _customcmp = False
811 803 def cmp(self, fctx):
812 804 """compare with other file context
813 805
814 806 returns True if different than fctx.
815 807 """
816 808 if fctx._customcmp:
817 809 return fctx.cmp(self)
818 810
819 811 if (fctx._filenode is None
820 812 and (self._repo._encodefilterpats
821 813 # if file data starts with '\1\n', empty metadata block is
822 814 # prepended, which adds 4 bytes to filelog.size().
823 815 or self.size() - 4 == fctx.size())
824 816 or self.size() == fctx.size()):
825 817 return self._filelog.cmp(self._filenode, fctx.data())
826 818
827 819 return True
828 820
829 821 def _adjustlinkrev(self, srcrev, inclusive=False):
830 822 """return the first ancestor of <srcrev> introducing <fnode>
831 823
832 824 If the linkrev of the file revision does not point to an ancestor of
833 825 srcrev, we'll walk down the ancestors until we find one introducing
834 826 this file revision.
835 827
836 828 :srcrev: the changeset revision we search ancestors from
837 829 :inclusive: if true, the src revision will also be checked
838 830 """
839 831 repo = self._repo
840 832 cl = repo.unfiltered().changelog
841 833 mfl = repo.manifestlog
842 834 # fetch the linkrev
843 835 lkr = self.linkrev()
844 836 # hack to reuse ancestor computation when searching for renames
845 837 memberanc = getattr(self, '_ancestrycontext', None)
846 838 iteranc = None
847 839 if srcrev is None:
848 840 # wctx case, used by workingfilectx during mergecopy
849 841 revs = [p.rev() for p in self._repo[None].parents()]
850 842 inclusive = True # we skipped the real (revless) source
851 843 else:
852 844 revs = [srcrev]
853 845 if memberanc is None:
854 846 memberanc = iteranc = cl.ancestors(revs, lkr,
855 847 inclusive=inclusive)
856 848 # check if this linkrev is an ancestor of srcrev
857 849 if lkr not in memberanc:
858 850 if iteranc is None:
859 851 iteranc = cl.ancestors(revs, lkr, inclusive=inclusive)
860 852 fnode = self._filenode
861 853 path = self._path
862 854 for a in iteranc:
863 855 ac = cl.read(a) # get changeset data (we avoid object creation)
864 856 if path in ac[3]: # checking the 'files' field.
865 857 # The file has been touched, check if the content is
866 858 # similar to the one we search for.
867 859 if fnode == mfl[ac[0]].readfast().get(path):
868 860 return a
869 861 # In theory, we should never get out of that loop without a result.
870 862 # But if manifest uses a buggy file revision (not children of the
871 863 # one it replaces) we could. Such a buggy situation will likely
872 864 # result is crash somewhere else at to some point.
873 865 return lkr
874 866
875 867 def introrev(self):
876 868 """return the rev of the changeset which introduced this file revision
877 869
878 870 This method is different from linkrev because it take into account the
879 871 changeset the filectx was created from. It ensures the returned
880 872 revision is one of its ancestors. This prevents bugs from
881 873 'linkrev-shadowing' when a file revision is used by multiple
882 874 changesets.
883 875 """
884 876 lkr = self.linkrev()
885 877 attrs = vars(self)
886 878 noctx = not ('_changeid' in attrs or '_changectx' in attrs)
887 879 if noctx or self.rev() == lkr:
888 880 return self.linkrev()
889 881 return self._adjustlinkrev(self.rev(), inclusive=True)
890 882
891 883 def _parentfilectx(self, path, fileid, filelog):
892 884 """create parent filectx keeping ancestry info for _adjustlinkrev()"""
893 885 fctx = filectx(self._repo, path, fileid=fileid, filelog=filelog)
894 886 if '_changeid' in vars(self) or '_changectx' in vars(self):
895 887 # If self is associated with a changeset (probably explicitly
896 888 # fed), ensure the created filectx is associated with a
897 889 # changeset that is an ancestor of self.changectx.
898 890 # This lets us later use _adjustlinkrev to get a correct link.
899 891 fctx._descendantrev = self.rev()
900 892 fctx._ancestrycontext = getattr(self, '_ancestrycontext', None)
901 893 elif '_descendantrev' in vars(self):
902 894 # Otherwise propagate _descendantrev if we have one associated.
903 895 fctx._descendantrev = self._descendantrev
904 896 fctx._ancestrycontext = getattr(self, '_ancestrycontext', None)
905 897 return fctx
906 898
907 899 def parents(self):
908 900 _path = self._path
909 901 fl = self._filelog
910 902 parents = self._filelog.parents(self._filenode)
911 903 pl = [(_path, node, fl) for node in parents if node != nullid]
912 904
913 905 r = fl.renamed(self._filenode)
914 906 if r:
915 907 # - In the simple rename case, both parent are nullid, pl is empty.
916 908 # - In case of merge, only one of the parent is null id and should
917 909 # be replaced with the rename information. This parent is -always-
918 910 # the first one.
919 911 #
920 912 # As null id have always been filtered out in the previous list
921 913 # comprehension, inserting to 0 will always result in "replacing
922 914 # first nullid parent with rename information.
923 915 pl.insert(0, (r[0], r[1], self._repo.file(r[0])))
924 916
925 917 return [self._parentfilectx(path, fnode, l) for path, fnode, l in pl]
926 918
927 919 def p1(self):
928 920 return self.parents()[0]
929 921
930 922 def p2(self):
931 923 p = self.parents()
932 924 if len(p) == 2:
933 925 return p[1]
934 926 return filectx(self._repo, self._path, fileid=-1, filelog=self._filelog)
935 927
936 928 def annotate(self, follow=False, linenumber=False, skiprevs=None,
937 929 diffopts=None):
938 930 '''returns a list of tuples of ((ctx, number), line) for each line
939 931 in the file, where ctx is the filectx of the node where
940 932 that line was last changed; if linenumber parameter is true, number is
941 933 the line number at the first appearance in the managed file, otherwise,
942 934 number has a fixed value of False.
943 935 '''
944 936
945 937 def lines(text):
946 938 if text.endswith("\n"):
947 939 return text.count("\n")
948 940 return text.count("\n") + int(bool(text))
949 941
950 942 if linenumber:
951 943 def decorate(text, rev):
952 944 return ([(rev, i) for i in xrange(1, lines(text) + 1)], text)
953 945 else:
954 946 def decorate(text, rev):
955 947 return ([(rev, False)] * lines(text), text)
956 948
957 949 getlog = util.lrucachefunc(lambda x: self._repo.file(x))
958 950
959 951 def parents(f):
960 952 # Cut _descendantrev here to mitigate the penalty of lazy linkrev
961 953 # adjustment. Otherwise, p._adjustlinkrev() would walk changelog
962 954 # from the topmost introrev (= srcrev) down to p.linkrev() if it
963 955 # isn't an ancestor of the srcrev.
964 956 f._changeid
965 957 pl = f.parents()
966 958
967 959 # Don't return renamed parents if we aren't following.
968 960 if not follow:
969 961 pl = [p for p in pl if p.path() == f.path()]
970 962
971 963 # renamed filectx won't have a filelog yet, so set it
972 964 # from the cache to save time
973 965 for p in pl:
974 966 if not '_filelog' in p.__dict__:
975 967 p._filelog = getlog(p.path())
976 968
977 969 return pl
978 970
979 971 # use linkrev to find the first changeset where self appeared
980 972 base = self
981 973 introrev = self.introrev()
982 974 if self.rev() != introrev:
983 975 base = self.filectx(self.filenode(), changeid=introrev)
984 976 if getattr(base, '_ancestrycontext', None) is None:
985 977 cl = self._repo.changelog
986 978 if introrev is None:
987 979 # wctx is not inclusive, but works because _ancestrycontext
988 980 # is used to test filelog revisions
989 981 ac = cl.ancestors([p.rev() for p in base.parents()],
990 982 inclusive=True)
991 983 else:
992 984 ac = cl.ancestors([introrev], inclusive=True)
993 985 base._ancestrycontext = ac
994 986
995 987 # This algorithm would prefer to be recursive, but Python is a
996 988 # bit recursion-hostile. Instead we do an iterative
997 989 # depth-first search.
998 990
999 991 # 1st DFS pre-calculates pcache and needed
1000 992 visit = [base]
1001 993 pcache = {}
1002 994 needed = {base: 1}
1003 995 while visit:
1004 996 f = visit.pop()
1005 997 if f in pcache:
1006 998 continue
1007 999 pl = parents(f)
1008 1000 pcache[f] = pl
1009 1001 for p in pl:
1010 1002 needed[p] = needed.get(p, 0) + 1
1011 1003 if p not in pcache:
1012 1004 visit.append(p)
1013 1005
1014 1006 # 2nd DFS does the actual annotate
1015 1007 visit[:] = [base]
1016 1008 hist = {}
1017 1009 while visit:
1018 1010 f = visit[-1]
1019 1011 if f in hist:
1020 1012 visit.pop()
1021 1013 continue
1022 1014
1023 1015 ready = True
1024 1016 pl = pcache[f]
1025 1017 for p in pl:
1026 1018 if p not in hist:
1027 1019 ready = False
1028 1020 visit.append(p)
1029 1021 if ready:
1030 1022 visit.pop()
1031 1023 curr = decorate(f.data(), f)
1032 1024 skipchild = False
1033 1025 if skiprevs is not None:
1034 1026 skipchild = f._changeid in skiprevs
1035 1027 curr = _annotatepair([hist[p] for p in pl], f, curr, skipchild,
1036 1028 diffopts)
1037 1029 for p in pl:
1038 1030 if needed[p] == 1:
1039 1031 del hist[p]
1040 1032 del needed[p]
1041 1033 else:
1042 1034 needed[p] -= 1
1043 1035
1044 1036 hist[f] = curr
1045 1037 del pcache[f]
1046 1038
1047 1039 return zip(hist[base][0], hist[base][1].splitlines(True))
1048 1040
1049 1041 def ancestors(self, followfirst=False):
1050 1042 visit = {}
1051 1043 c = self
1052 1044 if followfirst:
1053 1045 cut = 1
1054 1046 else:
1055 1047 cut = None
1056 1048
1057 1049 while True:
1058 1050 for parent in c.parents()[:cut]:
1059 1051 visit[(parent.linkrev(), parent.filenode())] = parent
1060 1052 if not visit:
1061 1053 break
1062 1054 c = visit.pop(max(visit))
1063 1055 yield c
1064 1056
1065 1057 def _annotatepair(parents, childfctx, child, skipchild, diffopts):
1066 1058 r'''
1067 1059 Given parent and child fctxes and annotate data for parents, for all lines
1068 1060 in either parent that match the child, annotate the child with the parent's
1069 1061 data.
1070 1062
1071 1063 Additionally, if `skipchild` is True, replace all other lines with parent
1072 1064 annotate data as well such that child is never blamed for any lines.
1073 1065
1074 1066 >>> oldfctx = 'old'
1075 1067 >>> p1fctx, p2fctx, childfctx = 'p1', 'p2', 'c'
1076 1068 >>> olddata = 'a\nb\n'
1077 1069 >>> p1data = 'a\nb\nc\n'
1078 1070 >>> p2data = 'a\nc\nd\n'
1079 1071 >>> childdata = 'a\nb2\nc\nc2\nd\n'
1080 1072 >>> diffopts = mdiff.diffopts()
1081 1073
1082 1074 >>> def decorate(text, rev):
1083 1075 ... return ([(rev, i) for i in xrange(1, text.count('\n') + 1)], text)
1084 1076
1085 1077 Basic usage:
1086 1078
1087 1079 >>> oldann = decorate(olddata, oldfctx)
1088 1080 >>> p1ann = decorate(p1data, p1fctx)
1089 1081 >>> p1ann = _annotatepair([oldann], p1fctx, p1ann, False, diffopts)
1090 1082 >>> p1ann[0]
1091 1083 [('old', 1), ('old', 2), ('p1', 3)]
1092 1084 >>> p2ann = decorate(p2data, p2fctx)
1093 1085 >>> p2ann = _annotatepair([oldann], p2fctx, p2ann, False, diffopts)
1094 1086 >>> p2ann[0]
1095 1087 [('old', 1), ('p2', 2), ('p2', 3)]
1096 1088
1097 1089 Test with multiple parents (note the difference caused by ordering):
1098 1090
1099 1091 >>> childann = decorate(childdata, childfctx)
1100 1092 >>> childann = _annotatepair([p1ann, p2ann], childfctx, childann, False,
1101 1093 ... diffopts)
1102 1094 >>> childann[0]
1103 1095 [('old', 1), ('c', 2), ('p2', 2), ('c', 4), ('p2', 3)]
1104 1096
1105 1097 >>> childann = decorate(childdata, childfctx)
1106 1098 >>> childann = _annotatepair([p2ann, p1ann], childfctx, childann, False,
1107 1099 ... diffopts)
1108 1100 >>> childann[0]
1109 1101 [('old', 1), ('c', 2), ('p1', 3), ('c', 4), ('p2', 3)]
1110 1102
1111 1103 Test with skipchild (note the difference caused by ordering):
1112 1104
1113 1105 >>> childann = decorate(childdata, childfctx)
1114 1106 >>> childann = _annotatepair([p1ann, p2ann], childfctx, childann, True,
1115 1107 ... diffopts)
1116 1108 >>> childann[0]
1117 1109 [('old', 1), ('old', 2), ('p2', 2), ('p2', 2), ('p2', 3)]
1118 1110
1119 1111 >>> childann = decorate(childdata, childfctx)
1120 1112 >>> childann = _annotatepair([p2ann, p1ann], childfctx, childann, True,
1121 1113 ... diffopts)
1122 1114 >>> childann[0]
1123 1115 [('old', 1), ('old', 2), ('p1', 3), ('p1', 3), ('p2', 3)]
1124 1116 '''
1125 1117 pblocks = [(parent, mdiff.allblocks(parent[1], child[1], opts=diffopts))
1126 1118 for parent in parents]
1127 1119
1128 1120 if skipchild:
1129 1121 # Need to iterate over the blocks twice -- make it a list
1130 1122 pblocks = [(p, list(blocks)) for (p, blocks) in pblocks]
1131 1123 # Mercurial currently prefers p2 over p1 for annotate.
1132 1124 # TODO: change this?
1133 1125 for parent, blocks in pblocks:
1134 1126 for (a1, a2, b1, b2), t in blocks:
1135 1127 # Changed blocks ('!') or blocks made only of blank lines ('~')
1136 1128 # belong to the child.
1137 1129 if t == '=':
1138 1130 child[0][b1:b2] = parent[0][a1:a2]
1139 1131
1140 1132 if skipchild:
1141 1133 # Now try and match up anything that couldn't be matched,
1142 1134 # Reversing pblocks maintains bias towards p2, matching above
1143 1135 # behavior.
1144 1136 pblocks.reverse()
1145 1137
1146 1138 # The heuristics are:
1147 1139 # * Work on blocks of changed lines (effectively diff hunks with -U0).
1148 1140 # This could potentially be smarter but works well enough.
1149 1141 # * For a non-matching section, do a best-effort fit. Match lines in
1150 1142 # diff hunks 1:1, dropping lines as necessary.
1151 1143 # * Repeat the last line as a last resort.
1152 1144
1153 1145 # First, replace as much as possible without repeating the last line.
1154 1146 remaining = [(parent, []) for parent, _blocks in pblocks]
1155 1147 for idx, (parent, blocks) in enumerate(pblocks):
1156 1148 for (a1, a2, b1, b2), _t in blocks:
1157 1149 if a2 - a1 >= b2 - b1:
1158 1150 for bk in xrange(b1, b2):
1159 1151 if child[0][bk][0] == childfctx:
1160 1152 ak = min(a1 + (bk - b1), a2 - 1)
1161 1153 child[0][bk] = parent[0][ak]
1162 1154 else:
1163 1155 remaining[idx][1].append((a1, a2, b1, b2))
1164 1156
1165 1157 # Then, look at anything left, which might involve repeating the last
1166 1158 # line.
1167 1159 for parent, blocks in remaining:
1168 1160 for a1, a2, b1, b2 in blocks:
1169 1161 for bk in xrange(b1, b2):
1170 1162 if child[0][bk][0] == childfctx:
1171 1163 ak = min(a1 + (bk - b1), a2 - 1)
1172 1164 child[0][bk] = parent[0][ak]
1173 1165 return child
1174 1166
1175 1167 class filectx(basefilectx):
1176 1168 """A filecontext object makes access to data related to a particular
1177 1169 filerevision convenient."""
1178 1170 def __init__(self, repo, path, changeid=None, fileid=None,
1179 1171 filelog=None, changectx=None):
1180 1172 """changeid can be a changeset revision, node, or tag.
1181 1173 fileid can be a file revision or node."""
1182 1174 self._repo = repo
1183 1175 self._path = path
1184 1176
1185 1177 assert (changeid is not None
1186 1178 or fileid is not None
1187 1179 or changectx is not None), \
1188 1180 ("bad args: changeid=%r, fileid=%r, changectx=%r"
1189 1181 % (changeid, fileid, changectx))
1190 1182
1191 1183 if filelog is not None:
1192 1184 self._filelog = filelog
1193 1185
1194 1186 if changeid is not None:
1195 1187 self._changeid = changeid
1196 1188 if changectx is not None:
1197 1189 self._changectx = changectx
1198 1190 if fileid is not None:
1199 1191 self._fileid = fileid
1200 1192
1201 1193 @propertycache
1202 1194 def _changectx(self):
1203 1195 try:
1204 1196 return changectx(self._repo, self._changeid)
1205 1197 except error.FilteredRepoLookupError:
1206 1198 # Linkrev may point to any revision in the repository. When the
1207 1199 # repository is filtered this may lead to `filectx` trying to build
1208 1200 # `changectx` for filtered revision. In such case we fallback to
1209 1201 # creating `changectx` on the unfiltered version of the reposition.
1210 1202 # This fallback should not be an issue because `changectx` from
1211 1203 # `filectx` are not used in complex operations that care about
1212 1204 # filtering.
1213 1205 #
1214 1206 # This fallback is a cheap and dirty fix that prevent several
1215 1207 # crashes. It does not ensure the behavior is correct. However the
1216 1208 # behavior was not correct before filtering either and "incorrect
1217 1209 # behavior" is seen as better as "crash"
1218 1210 #
1219 1211 # Linkrevs have several serious troubles with filtering that are
1220 1212 # complicated to solve. Proper handling of the issue here should be
1221 1213 # considered when solving linkrev issue are on the table.
1222 1214 return changectx(self._repo.unfiltered(), self._changeid)
1223 1215
1224 1216 def filectx(self, fileid, changeid=None):
1225 1217 '''opens an arbitrary revision of the file without
1226 1218 opening a new filelog'''
1227 1219 return filectx(self._repo, self._path, fileid=fileid,
1228 1220 filelog=self._filelog, changeid=changeid)
1229 1221
1230 1222 def rawdata(self):
1231 1223 return self._filelog.revision(self._filenode, raw=True)
1232 1224
1233 1225 def rawflags(self):
1234 1226 """low-level revlog flags"""
1235 1227 return self._filelog.flags(self._filerev)
1236 1228
1237 1229 def data(self):
1238 1230 try:
1239 1231 return self._filelog.read(self._filenode)
1240 1232 except error.CensoredNodeError:
1241 1233 if self._repo.ui.config("censor", "policy", "abort") == "ignore":
1242 1234 return ""
1243 1235 raise error.Abort(_("censored node: %s") % short(self._filenode),
1244 1236 hint=_("set censor.policy to ignore errors"))
1245 1237
1246 1238 def size(self):
1247 1239 return self._filelog.size(self._filerev)
1248 1240
1249 1241 @propertycache
1250 1242 def _copied(self):
1251 1243 """check if file was actually renamed in this changeset revision
1252 1244
1253 1245 If rename logged in file revision, we report copy for changeset only
1254 1246 if file revisions linkrev points back to the changeset in question
1255 1247 or both changeset parents contain different file revisions.
1256 1248 """
1257 1249
1258 1250 renamed = self._filelog.renamed(self._filenode)
1259 1251 if not renamed:
1260 1252 return renamed
1261 1253
1262 1254 if self.rev() == self.linkrev():
1263 1255 return renamed
1264 1256
1265 1257 name = self.path()
1266 1258 fnode = self._filenode
1267 1259 for p in self._changectx.parents():
1268 1260 try:
1269 1261 if fnode == p.filenode(name):
1270 1262 return None
1271 1263 except error.LookupError:
1272 1264 pass
1273 1265 return renamed
1274 1266
1275 1267 def children(self):
1276 1268 # hard for renames
1277 1269 c = self._filelog.children(self._filenode)
1278 1270 return [filectx(self._repo, self._path, fileid=x,
1279 1271 filelog=self._filelog) for x in c]
1280 1272
1281 1273 class committablectx(basectx):
1282 1274 """A committablectx object provides common functionality for a context that
1283 1275 wants the ability to commit, e.g. workingctx or memctx."""
1284 1276 def __init__(self, repo, text="", user=None, date=None, extra=None,
1285 1277 changes=None):
1286 1278 self._repo = repo
1287 1279 self._rev = None
1288 1280 self._node = None
1289 1281 self._text = text
1290 1282 if date:
1291 1283 self._date = util.parsedate(date)
1292 1284 if user:
1293 1285 self._user = user
1294 1286 if changes:
1295 1287 self._status = changes
1296 1288
1297 1289 self._extra = {}
1298 1290 if extra:
1299 1291 self._extra = extra.copy()
1300 1292 if 'branch' not in self._extra:
1301 1293 try:
1302 1294 branch = encoding.fromlocal(self._repo.dirstate.branch())
1303 1295 except UnicodeDecodeError:
1304 1296 raise error.Abort(_('branch name not in UTF-8!'))
1305 1297 self._extra['branch'] = branch
1306 1298 if self._extra['branch'] == '':
1307 1299 self._extra['branch'] = 'default'
1308 1300
1309 def __str__(self):
1310 return str(self._parents[0]) + r"+"
1311
1312 1301 def __bytes__(self):
1313 1302 return bytes(self._parents[0]) + "+"
1314 1303
1304 __str__ = encoding.strmethod(__bytes__)
1305
1315 1306 def __nonzero__(self):
1316 1307 return True
1317 1308
1318 1309 __bool__ = __nonzero__
1319 1310
1320 1311 def _buildflagfunc(self):
1321 1312 # Create a fallback function for getting file flags when the
1322 1313 # filesystem doesn't support them
1323 1314
1324 1315 copiesget = self._repo.dirstate.copies().get
1325 1316 parents = self.parents()
1326 1317 if len(parents) < 2:
1327 1318 # when we have one parent, it's easy: copy from parent
1328 1319 man = parents[0].manifest()
1329 1320 def func(f):
1330 1321 f = copiesget(f, f)
1331 1322 return man.flags(f)
1332 1323 else:
1333 1324 # merges are tricky: we try to reconstruct the unstored
1334 1325 # result from the merge (issue1802)
1335 1326 p1, p2 = parents
1336 1327 pa = p1.ancestor(p2)
1337 1328 m1, m2, ma = p1.manifest(), p2.manifest(), pa.manifest()
1338 1329
1339 1330 def func(f):
1340 1331 f = copiesget(f, f) # may be wrong for merges with copies
1341 1332 fl1, fl2, fla = m1.flags(f), m2.flags(f), ma.flags(f)
1342 1333 if fl1 == fl2:
1343 1334 return fl1
1344 1335 if fl1 == fla:
1345 1336 return fl2
1346 1337 if fl2 == fla:
1347 1338 return fl1
1348 1339 return '' # punt for conflicts
1349 1340
1350 1341 return func
1351 1342
1352 1343 @propertycache
1353 1344 def _flagfunc(self):
1354 1345 return self._repo.dirstate.flagfunc(self._buildflagfunc)
1355 1346
1356 1347 @propertycache
1357 1348 def _status(self):
1358 1349 return self._repo.status()
1359 1350
1360 1351 @propertycache
1361 1352 def _user(self):
1362 1353 return self._repo.ui.username()
1363 1354
1364 1355 @propertycache
1365 1356 def _date(self):
1366 1357 ui = self._repo.ui
1367 1358 date = ui.configdate('devel', 'default-date')
1368 1359 if date is None:
1369 1360 date = util.makedate()
1370 1361 return date
1371 1362
1372 1363 def subrev(self, subpath):
1373 1364 return None
1374 1365
1375 1366 def manifestnode(self):
1376 1367 return None
1377 1368 def user(self):
1378 1369 return self._user or self._repo.ui.username()
1379 1370 def date(self):
1380 1371 return self._date
1381 1372 def description(self):
1382 1373 return self._text
1383 1374 def files(self):
1384 1375 return sorted(self._status.modified + self._status.added +
1385 1376 self._status.removed)
1386 1377
1387 1378 def modified(self):
1388 1379 return self._status.modified
1389 1380 def added(self):
1390 1381 return self._status.added
1391 1382 def removed(self):
1392 1383 return self._status.removed
1393 1384 def deleted(self):
1394 1385 return self._status.deleted
1395 1386 def branch(self):
1396 1387 return encoding.tolocal(self._extra['branch'])
1397 1388 def closesbranch(self):
1398 1389 return 'close' in self._extra
1399 1390 def extra(self):
1400 1391 return self._extra
1401 1392
1402 1393 def tags(self):
1403 1394 return []
1404 1395
1405 1396 def bookmarks(self):
1406 1397 b = []
1407 1398 for p in self.parents():
1408 1399 b.extend(p.bookmarks())
1409 1400 return b
1410 1401
1411 1402 def phase(self):
1412 1403 phase = phases.draft # default phase to draft
1413 1404 for p in self.parents():
1414 1405 phase = max(phase, p.phase())
1415 1406 return phase
1416 1407
1417 1408 def hidden(self):
1418 1409 return False
1419 1410
1420 1411 def children(self):
1421 1412 return []
1422 1413
1423 1414 def flags(self, path):
1424 1415 if r'_manifest' in self.__dict__:
1425 1416 try:
1426 1417 return self._manifest.flags(path)
1427 1418 except KeyError:
1428 1419 return ''
1429 1420
1430 1421 try:
1431 1422 return self._flagfunc(path)
1432 1423 except OSError:
1433 1424 return ''
1434 1425
1435 1426 def ancestor(self, c2):
1436 1427 """return the "best" ancestor context of self and c2"""
1437 1428 return self._parents[0].ancestor(c2) # punt on two parents for now
1438 1429
1439 1430 def walk(self, match):
1440 1431 '''Generates matching file names.'''
1441 1432 return sorted(self._repo.dirstate.walk(match, sorted(self.substate),
1442 1433 True, False))
1443 1434
1444 1435 def matches(self, match):
1445 1436 return sorted(self._repo.dirstate.matches(match))
1446 1437
1447 1438 def ancestors(self):
1448 1439 for p in self._parents:
1449 1440 yield p
1450 1441 for a in self._repo.changelog.ancestors(
1451 1442 [p.rev() for p in self._parents]):
1452 1443 yield changectx(self._repo, a)
1453 1444
1454 1445 def markcommitted(self, node):
1455 1446 """Perform post-commit cleanup necessary after committing this ctx
1456 1447
1457 1448 Specifically, this updates backing stores this working context
1458 1449 wraps to reflect the fact that the changes reflected by this
1459 1450 workingctx have been committed. For example, it marks
1460 1451 modified and added files as normal in the dirstate.
1461 1452
1462 1453 """
1463 1454
1464 1455 with self._repo.dirstate.parentchange():
1465 1456 for f in self.modified() + self.added():
1466 1457 self._repo.dirstate.normal(f)
1467 1458 for f in self.removed():
1468 1459 self._repo.dirstate.drop(f)
1469 1460 self._repo.dirstate.setparents(node)
1470 1461
1471 1462 # write changes out explicitly, because nesting wlock at
1472 1463 # runtime may prevent 'wlock.release()' in 'repo.commit()'
1473 1464 # from immediately doing so for subsequent changing files
1474 1465 self._repo.dirstate.write(self._repo.currenttransaction())
1475 1466
1476 1467 def dirty(self, missing=False, merge=True, branch=True):
1477 1468 return False
1478 1469
1479 1470 class workingctx(committablectx):
1480 1471 """A workingctx object makes access to data related to
1481 1472 the current working directory convenient.
1482 1473 date - any valid date string or (unixtime, offset), or None.
1483 1474 user - username string, or None.
1484 1475 extra - a dictionary of extra values, or None.
1485 1476 changes - a list of file lists as returned by localrepo.status()
1486 1477 or None to use the repository status.
1487 1478 """
1488 1479 def __init__(self, repo, text="", user=None, date=None, extra=None,
1489 1480 changes=None):
1490 1481 super(workingctx, self).__init__(repo, text, user, date, extra, changes)
1491 1482
1492 1483 def __iter__(self):
1493 1484 d = self._repo.dirstate
1494 1485 for f in d:
1495 1486 if d[f] != 'r':
1496 1487 yield f
1497 1488
1498 1489 def __contains__(self, key):
1499 1490 return self._repo.dirstate[key] not in "?r"
1500 1491
1501 1492 def hex(self):
1502 1493 return hex(wdirid)
1503 1494
1504 1495 @propertycache
1505 1496 def _parents(self):
1506 1497 p = self._repo.dirstate.parents()
1507 1498 if p[1] == nullid:
1508 1499 p = p[:-1]
1509 1500 return [changectx(self._repo, x) for x in p]
1510 1501
1511 1502 def filectx(self, path, filelog=None):
1512 1503 """get a file context from the working directory"""
1513 1504 return workingfilectx(self._repo, path, workingctx=self,
1514 1505 filelog=filelog)
1515 1506
1516 1507 def dirty(self, missing=False, merge=True, branch=True):
1517 1508 "check whether a working directory is modified"
1518 1509 # check subrepos first
1519 1510 for s in sorted(self.substate):
1520 1511 if self.sub(s).dirty():
1521 1512 return True
1522 1513 # check current working dir
1523 1514 return ((merge and self.p2()) or
1524 1515 (branch and self.branch() != self.p1().branch()) or
1525 1516 self.modified() or self.added() or self.removed() or
1526 1517 (missing and self.deleted()))
1527 1518
1528 1519 def add(self, list, prefix=""):
1529 1520 join = lambda f: os.path.join(prefix, f)
1530 1521 with self._repo.wlock():
1531 1522 ui, ds = self._repo.ui, self._repo.dirstate
1532 1523 rejected = []
1533 1524 lstat = self._repo.wvfs.lstat
1534 1525 for f in list:
1535 1526 scmutil.checkportable(ui, join(f))
1536 1527 try:
1537 1528 st = lstat(f)
1538 1529 except OSError:
1539 1530 ui.warn(_("%s does not exist!\n") % join(f))
1540 1531 rejected.append(f)
1541 1532 continue
1542 1533 if st.st_size > 10000000:
1543 1534 ui.warn(_("%s: up to %d MB of RAM may be required "
1544 1535 "to manage this file\n"
1545 1536 "(use 'hg revert %s' to cancel the "
1546 1537 "pending addition)\n")
1547 1538 % (f, 3 * st.st_size // 1000000, join(f)))
1548 1539 if not (stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode)):
1549 1540 ui.warn(_("%s not added: only files and symlinks "
1550 1541 "supported currently\n") % join(f))
1551 1542 rejected.append(f)
1552 1543 elif ds[f] in 'amn':
1553 1544 ui.warn(_("%s already tracked!\n") % join(f))
1554 1545 elif ds[f] == 'r':
1555 1546 ds.normallookup(f)
1556 1547 else:
1557 1548 ds.add(f)
1558 1549 return rejected
1559 1550
1560 1551 def forget(self, files, prefix=""):
1561 1552 join = lambda f: os.path.join(prefix, f)
1562 1553 with self._repo.wlock():
1563 1554 rejected = []
1564 1555 for f in files:
1565 1556 if f not in self._repo.dirstate:
1566 1557 self._repo.ui.warn(_("%s not tracked!\n") % join(f))
1567 1558 rejected.append(f)
1568 1559 elif self._repo.dirstate[f] != 'a':
1569 1560 self._repo.dirstate.remove(f)
1570 1561 else:
1571 1562 self._repo.dirstate.drop(f)
1572 1563 return rejected
1573 1564
1574 1565 def undelete(self, list):
1575 1566 pctxs = self.parents()
1576 1567 with self._repo.wlock():
1577 1568 for f in list:
1578 1569 if self._repo.dirstate[f] != 'r':
1579 1570 self._repo.ui.warn(_("%s not removed!\n") % f)
1580 1571 else:
1581 1572 fctx = f in pctxs[0] and pctxs[0][f] or pctxs[1][f]
1582 1573 t = fctx.data()
1583 1574 self._repo.wwrite(f, t, fctx.flags())
1584 1575 self._repo.dirstate.normal(f)
1585 1576
1586 1577 def copy(self, source, dest):
1587 1578 try:
1588 1579 st = self._repo.wvfs.lstat(dest)
1589 1580 except OSError as err:
1590 1581 if err.errno != errno.ENOENT:
1591 1582 raise
1592 1583 self._repo.ui.warn(_("%s does not exist!\n") % dest)
1593 1584 return
1594 1585 if not (stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode)):
1595 1586 self._repo.ui.warn(_("copy failed: %s is not a file or a "
1596 1587 "symbolic link\n") % dest)
1597 1588 else:
1598 1589 with self._repo.wlock():
1599 1590 if self._repo.dirstate[dest] in '?':
1600 1591 self._repo.dirstate.add(dest)
1601 1592 elif self._repo.dirstate[dest] in 'r':
1602 1593 self._repo.dirstate.normallookup(dest)
1603 1594 self._repo.dirstate.copy(source, dest)
1604 1595
1605 1596 def match(self, pats=None, include=None, exclude=None, default='glob',
1606 1597 listsubrepos=False, badfn=None):
1607 1598 r = self._repo
1608 1599
1609 1600 # Only a case insensitive filesystem needs magic to translate user input
1610 1601 # to actual case in the filesystem.
1611 1602 icasefs = not util.fscasesensitive(r.root)
1612 1603 return matchmod.match(r.root, r.getcwd(), pats, include, exclude,
1613 1604 default, auditor=r.auditor, ctx=self,
1614 1605 listsubrepos=listsubrepos, badfn=badfn,
1615 1606 icasefs=icasefs)
1616 1607
1617 1608 def _filtersuspectsymlink(self, files):
1618 1609 if not files or self._repo.dirstate._checklink:
1619 1610 return files
1620 1611
1621 1612 # Symlink placeholders may get non-symlink-like contents
1622 1613 # via user error or dereferencing by NFS or Samba servers,
1623 1614 # so we filter out any placeholders that don't look like a
1624 1615 # symlink
1625 1616 sane = []
1626 1617 for f in files:
1627 1618 if self.flags(f) == 'l':
1628 1619 d = self[f].data()
1629 1620 if d == '' or len(d) >= 1024 or '\n' in d or util.binary(d):
1630 1621 self._repo.ui.debug('ignoring suspect symlink placeholder'
1631 1622 ' "%s"\n' % f)
1632 1623 continue
1633 1624 sane.append(f)
1634 1625 return sane
1635 1626
1636 1627 def _checklookup(self, files):
1637 1628 # check for any possibly clean files
1638 1629 if not files:
1639 1630 return [], [], []
1640 1631
1641 1632 modified = []
1642 1633 deleted = []
1643 1634 fixup = []
1644 1635 pctx = self._parents[0]
1645 1636 # do a full compare of any files that might have changed
1646 1637 for f in sorted(files):
1647 1638 try:
1648 1639 # This will return True for a file that got replaced by a
1649 1640 # directory in the interim, but fixing that is pretty hard.
1650 1641 if (f not in pctx or self.flags(f) != pctx.flags(f)
1651 1642 or pctx[f].cmp(self[f])):
1652 1643 modified.append(f)
1653 1644 else:
1654 1645 fixup.append(f)
1655 1646 except (IOError, OSError):
1656 1647 # A file become inaccessible in between? Mark it as deleted,
1657 1648 # matching dirstate behavior (issue5584).
1658 1649 # The dirstate has more complex behavior around whether a
1659 1650 # missing file matches a directory, etc, but we don't need to
1660 1651 # bother with that: if f has made it to this point, we're sure
1661 1652 # it's in the dirstate.
1662 1653 deleted.append(f)
1663 1654
1664 1655 return modified, deleted, fixup
1665 1656
1666 1657 def _poststatusfixup(self, status, fixup):
1667 1658 """update dirstate for files that are actually clean"""
1668 1659 poststatus = self._repo.postdsstatus()
1669 1660 if fixup or poststatus:
1670 1661 try:
1671 1662 oldid = self._repo.dirstate.identity()
1672 1663
1673 1664 # updating the dirstate is optional
1674 1665 # so we don't wait on the lock
1675 1666 # wlock can invalidate the dirstate, so cache normal _after_
1676 1667 # taking the lock
1677 1668 with self._repo.wlock(False):
1678 1669 if self._repo.dirstate.identity() == oldid:
1679 1670 if fixup:
1680 1671 normal = self._repo.dirstate.normal
1681 1672 for f in fixup:
1682 1673 normal(f)
1683 1674 # write changes out explicitly, because nesting
1684 1675 # wlock at runtime may prevent 'wlock.release()'
1685 1676 # after this block from doing so for subsequent
1686 1677 # changing files
1687 1678 tr = self._repo.currenttransaction()
1688 1679 self._repo.dirstate.write(tr)
1689 1680
1690 1681 if poststatus:
1691 1682 for ps in poststatus:
1692 1683 ps(self, status)
1693 1684 else:
1694 1685 # in this case, writing changes out breaks
1695 1686 # consistency, because .hg/dirstate was
1696 1687 # already changed simultaneously after last
1697 1688 # caching (see also issue5584 for detail)
1698 1689 self._repo.ui.debug('skip updating dirstate: '
1699 1690 'identity mismatch\n')
1700 1691 except error.LockError:
1701 1692 pass
1702 1693 finally:
1703 1694 # Even if the wlock couldn't be grabbed, clear out the list.
1704 1695 self._repo.clearpostdsstatus()
1705 1696
1706 1697 def _dirstatestatus(self, match=None, ignored=False, clean=False,
1707 1698 unknown=False):
1708 1699 '''Gets the status from the dirstate -- internal use only.'''
1709 1700 listignored, listclean, listunknown = ignored, clean, unknown
1710 1701 match = match or matchmod.always(self._repo.root, self._repo.getcwd())
1711 1702 subrepos = []
1712 1703 if '.hgsub' in self:
1713 1704 subrepos = sorted(self.substate)
1714 1705 cmp, s = self._repo.dirstate.status(match, subrepos, listignored,
1715 1706 listclean, listunknown)
1716 1707
1717 1708 # check for any possibly clean files
1718 1709 fixup = []
1719 1710 if cmp:
1720 1711 modified2, deleted2, fixup = self._checklookup(cmp)
1721 1712 s.modified.extend(modified2)
1722 1713 s.deleted.extend(deleted2)
1723 1714
1724 1715 if fixup and listclean:
1725 1716 s.clean.extend(fixup)
1726 1717
1727 1718 self._poststatusfixup(s, fixup)
1728 1719
1729 1720 if match.always():
1730 1721 # cache for performance
1731 1722 if s.unknown or s.ignored or s.clean:
1732 1723 # "_status" is cached with list*=False in the normal route
1733 1724 self._status = scmutil.status(s.modified, s.added, s.removed,
1734 1725 s.deleted, [], [], [])
1735 1726 else:
1736 1727 self._status = s
1737 1728
1738 1729 return s
1739 1730
1740 1731 @propertycache
1741 1732 def _manifest(self):
1742 1733 """generate a manifest corresponding to the values in self._status
1743 1734
1744 1735 This reuse the file nodeid from parent, but we use special node
1745 1736 identifiers for added and modified files. This is used by manifests
1746 1737 merge to see that files are different and by update logic to avoid
1747 1738 deleting newly added files.
1748 1739 """
1749 1740 return self._buildstatusmanifest(self._status)
1750 1741
1751 1742 def _buildstatusmanifest(self, status):
1752 1743 """Builds a manifest that includes the given status results."""
1753 1744 parents = self.parents()
1754 1745
1755 1746 man = parents[0].manifest().copy()
1756 1747
1757 1748 ff = self._flagfunc
1758 1749 for i, l in ((addednodeid, status.added),
1759 1750 (modifiednodeid, status.modified)):
1760 1751 for f in l:
1761 1752 man[f] = i
1762 1753 try:
1763 1754 man.setflag(f, ff(f))
1764 1755 except OSError:
1765 1756 pass
1766 1757
1767 1758 for f in status.deleted + status.removed:
1768 1759 if f in man:
1769 1760 del man[f]
1770 1761
1771 1762 return man
1772 1763
1773 1764 def _buildstatus(self, other, s, match, listignored, listclean,
1774 1765 listunknown):
1775 1766 """build a status with respect to another context
1776 1767
1777 1768 This includes logic for maintaining the fast path of status when
1778 1769 comparing the working directory against its parent, which is to skip
1779 1770 building a new manifest if self (working directory) is not comparing
1780 1771 against its parent (repo['.']).
1781 1772 """
1782 1773 s = self._dirstatestatus(match, listignored, listclean, listunknown)
1783 1774 # Filter out symlinks that, in the case of FAT32 and NTFS filesystems,
1784 1775 # might have accidentally ended up with the entire contents of the file
1785 1776 # they are supposed to be linking to.
1786 1777 s.modified[:] = self._filtersuspectsymlink(s.modified)
1787 1778 if other != self._repo['.']:
1788 1779 s = super(workingctx, self)._buildstatus(other, s, match,
1789 1780 listignored, listclean,
1790 1781 listunknown)
1791 1782 return s
1792 1783
1793 1784 def _matchstatus(self, other, match):
1794 1785 """override the match method with a filter for directory patterns
1795 1786
1796 1787 We use inheritance to customize the match.bad method only in cases of
1797 1788 workingctx since it belongs only to the working directory when
1798 1789 comparing against the parent changeset.
1799 1790
1800 1791 If we aren't comparing against the working directory's parent, then we
1801 1792 just use the default match object sent to us.
1802 1793 """
1803 1794 superself = super(workingctx, self)
1804 1795 match = superself._matchstatus(other, match)
1805 1796 if other != self._repo['.']:
1806 1797 def bad(f, msg):
1807 1798 # 'f' may be a directory pattern from 'match.files()',
1808 1799 # so 'f not in ctx1' is not enough
1809 1800 if f not in other and not other.hasdir(f):
1810 1801 self._repo.ui.warn('%s: %s\n' %
1811 1802 (self._repo.dirstate.pathto(f), msg))
1812 1803 match.bad = bad
1813 1804 return match
1814 1805
1815 1806 class committablefilectx(basefilectx):
1816 1807 """A committablefilectx provides common functionality for a file context
1817 1808 that wants the ability to commit, e.g. workingfilectx or memfilectx."""
1818 1809 def __init__(self, repo, path, filelog=None, ctx=None):
1819 1810 self._repo = repo
1820 1811 self._path = path
1821 1812 self._changeid = None
1822 1813 self._filerev = self._filenode = None
1823 1814
1824 1815 if filelog is not None:
1825 1816 self._filelog = filelog
1826 1817 if ctx:
1827 1818 self._changectx = ctx
1828 1819
1829 1820 def __nonzero__(self):
1830 1821 return True
1831 1822
1832 1823 __bool__ = __nonzero__
1833 1824
1834 1825 def linkrev(self):
1835 1826 # linked to self._changectx no matter if file is modified or not
1836 1827 return self.rev()
1837 1828
1838 1829 def parents(self):
1839 1830 '''return parent filectxs, following copies if necessary'''
1840 1831 def filenode(ctx, path):
1841 1832 return ctx._manifest.get(path, nullid)
1842 1833
1843 1834 path = self._path
1844 1835 fl = self._filelog
1845 1836 pcl = self._changectx._parents
1846 1837 renamed = self.renamed()
1847 1838
1848 1839 if renamed:
1849 1840 pl = [renamed + (None,)]
1850 1841 else:
1851 1842 pl = [(path, filenode(pcl[0], path), fl)]
1852 1843
1853 1844 for pc in pcl[1:]:
1854 1845 pl.append((path, filenode(pc, path), fl))
1855 1846
1856 1847 return [self._parentfilectx(p, fileid=n, filelog=l)
1857 1848 for p, n, l in pl if n != nullid]
1858 1849
1859 1850 def children(self):
1860 1851 return []
1861 1852
1862 1853 class workingfilectx(committablefilectx):
1863 1854 """A workingfilectx object makes access to data related to a particular
1864 1855 file in the working directory convenient."""
1865 1856 def __init__(self, repo, path, filelog=None, workingctx=None):
1866 1857 super(workingfilectx, self).__init__(repo, path, filelog, workingctx)
1867 1858
1868 1859 @propertycache
1869 1860 def _changectx(self):
1870 1861 return workingctx(self._repo)
1871 1862
1872 1863 def data(self):
1873 1864 return self._repo.wread(self._path)
1874 1865 def renamed(self):
1875 1866 rp = self._repo.dirstate.copied(self._path)
1876 1867 if not rp:
1877 1868 return None
1878 1869 return rp, self._changectx._parents[0]._manifest.get(rp, nullid)
1879 1870
1880 1871 def size(self):
1881 1872 return self._repo.wvfs.lstat(self._path).st_size
1882 1873 def date(self):
1883 1874 t, tz = self._changectx.date()
1884 1875 try:
1885 1876 return (self._repo.wvfs.lstat(self._path).st_mtime, tz)
1886 1877 except OSError as err:
1887 1878 if err.errno != errno.ENOENT:
1888 1879 raise
1889 1880 return (t, tz)
1890 1881
1891 1882 def cmp(self, fctx):
1892 1883 """compare with other file context
1893 1884
1894 1885 returns True if different than fctx.
1895 1886 """
1896 1887 # fctx should be a filectx (not a workingfilectx)
1897 1888 # invert comparison to reuse the same code path
1898 1889 return fctx.cmp(self)
1899 1890
1900 1891 def remove(self, ignoremissing=False):
1901 1892 """wraps unlink for a repo's working directory"""
1902 1893 self._repo.wvfs.unlinkpath(self._path, ignoremissing=ignoremissing)
1903 1894
1904 1895 def write(self, data, flags):
1905 1896 """wraps repo.wwrite"""
1906 1897 self._repo.wwrite(self._path, data, flags)
1907 1898
1908 1899 class workingcommitctx(workingctx):
1909 1900 """A workingcommitctx object makes access to data related to
1910 1901 the revision being committed convenient.
1911 1902
1912 1903 This hides changes in the working directory, if they aren't
1913 1904 committed in this context.
1914 1905 """
1915 1906 def __init__(self, repo, changes,
1916 1907 text="", user=None, date=None, extra=None):
1917 1908 super(workingctx, self).__init__(repo, text, user, date, extra,
1918 1909 changes)
1919 1910
1920 1911 def _dirstatestatus(self, match=None, ignored=False, clean=False,
1921 1912 unknown=False):
1922 1913 """Return matched files only in ``self._status``
1923 1914
1924 1915 Uncommitted files appear "clean" via this context, even if
1925 1916 they aren't actually so in the working directory.
1926 1917 """
1927 1918 match = match or matchmod.always(self._repo.root, self._repo.getcwd())
1928 1919 if clean:
1929 1920 clean = [f for f in self._manifest if f not in self._changedset]
1930 1921 else:
1931 1922 clean = []
1932 1923 return scmutil.status([f for f in self._status.modified if match(f)],
1933 1924 [f for f in self._status.added if match(f)],
1934 1925 [f for f in self._status.removed if match(f)],
1935 1926 [], [], [], clean)
1936 1927
1937 1928 @propertycache
1938 1929 def _changedset(self):
1939 1930 """Return the set of files changed in this context
1940 1931 """
1941 1932 changed = set(self._status.modified)
1942 1933 changed.update(self._status.added)
1943 1934 changed.update(self._status.removed)
1944 1935 return changed
1945 1936
1946 1937 def makecachingfilectxfn(func):
1947 1938 """Create a filectxfn that caches based on the path.
1948 1939
1949 1940 We can't use util.cachefunc because it uses all arguments as the cache
1950 1941 key and this creates a cycle since the arguments include the repo and
1951 1942 memctx.
1952 1943 """
1953 1944 cache = {}
1954 1945
1955 1946 def getfilectx(repo, memctx, path):
1956 1947 if path not in cache:
1957 1948 cache[path] = func(repo, memctx, path)
1958 1949 return cache[path]
1959 1950
1960 1951 return getfilectx
1961 1952
1962 1953 def memfilefromctx(ctx):
1963 1954 """Given a context return a memfilectx for ctx[path]
1964 1955
1965 1956 This is a convenience method for building a memctx based on another
1966 1957 context.
1967 1958 """
1968 1959 def getfilectx(repo, memctx, path):
1969 1960 fctx = ctx[path]
1970 1961 # this is weird but apparently we only keep track of one parent
1971 1962 # (why not only store that instead of a tuple?)
1972 1963 copied = fctx.renamed()
1973 1964 if copied:
1974 1965 copied = copied[0]
1975 1966 return memfilectx(repo, path, fctx.data(),
1976 1967 islink=fctx.islink(), isexec=fctx.isexec(),
1977 1968 copied=copied, memctx=memctx)
1978 1969
1979 1970 return getfilectx
1980 1971
1981 1972 def memfilefrompatch(patchstore):
1982 1973 """Given a patch (e.g. patchstore object) return a memfilectx
1983 1974
1984 1975 This is a convenience method for building a memctx based on a patchstore.
1985 1976 """
1986 1977 def getfilectx(repo, memctx, path):
1987 1978 data, mode, copied = patchstore.getfile(path)
1988 1979 if data is None:
1989 1980 return None
1990 1981 islink, isexec = mode
1991 1982 return memfilectx(repo, path, data, islink=islink,
1992 1983 isexec=isexec, copied=copied,
1993 1984 memctx=memctx)
1994 1985
1995 1986 return getfilectx
1996 1987
1997 1988 class memctx(committablectx):
1998 1989 """Use memctx to perform in-memory commits via localrepo.commitctx().
1999 1990
2000 1991 Revision information is supplied at initialization time while
2001 1992 related files data and is made available through a callback
2002 1993 mechanism. 'repo' is the current localrepo, 'parents' is a
2003 1994 sequence of two parent revisions identifiers (pass None for every
2004 1995 missing parent), 'text' is the commit message and 'files' lists
2005 1996 names of files touched by the revision (normalized and relative to
2006 1997 repository root).
2007 1998
2008 1999 filectxfn(repo, memctx, path) is a callable receiving the
2009 2000 repository, the current memctx object and the normalized path of
2010 2001 requested file, relative to repository root. It is fired by the
2011 2002 commit function for every file in 'files', but calls order is
2012 2003 undefined. If the file is available in the revision being
2013 2004 committed (updated or added), filectxfn returns a memfilectx
2014 2005 object. If the file was removed, filectxfn return None for recent
2015 2006 Mercurial. Moved files are represented by marking the source file
2016 2007 removed and the new file added with copy information (see
2017 2008 memfilectx).
2018 2009
2019 2010 user receives the committer name and defaults to current
2020 2011 repository username, date is the commit date in any format
2021 2012 supported by util.parsedate() and defaults to current date, extra
2022 2013 is a dictionary of metadata or is left empty.
2023 2014 """
2024 2015
2025 2016 # Mercurial <= 3.1 expects the filectxfn to raise IOError for missing files.
2026 2017 # Extensions that need to retain compatibility across Mercurial 3.1 can use
2027 2018 # this field to determine what to do in filectxfn.
2028 2019 _returnnoneformissingfiles = True
2029 2020
2030 2021 def __init__(self, repo, parents, text, files, filectxfn, user=None,
2031 2022 date=None, extra=None, branch=None, editor=False):
2032 2023 super(memctx, self).__init__(repo, text, user, date, extra)
2033 2024 self._rev = None
2034 2025 self._node = None
2035 2026 parents = [(p or nullid) for p in parents]
2036 2027 p1, p2 = parents
2037 2028 self._parents = [changectx(self._repo, p) for p in (p1, p2)]
2038 2029 files = sorted(set(files))
2039 2030 self._files = files
2040 2031 if branch is not None:
2041 2032 self._extra['branch'] = encoding.fromlocal(branch)
2042 2033 self.substate = {}
2043 2034
2044 2035 if isinstance(filectxfn, patch.filestore):
2045 2036 filectxfn = memfilefrompatch(filectxfn)
2046 2037 elif not callable(filectxfn):
2047 2038 # if store is not callable, wrap it in a function
2048 2039 filectxfn = memfilefromctx(filectxfn)
2049 2040
2050 2041 # memoizing increases performance for e.g. vcs convert scenarios.
2051 2042 self._filectxfn = makecachingfilectxfn(filectxfn)
2052 2043
2053 2044 if editor:
2054 2045 self._text = editor(self._repo, self, [])
2055 2046 self._repo.savecommitmessage(self._text)
2056 2047
2057 2048 def filectx(self, path, filelog=None):
2058 2049 """get a file context from the working directory
2059 2050
2060 2051 Returns None if file doesn't exist and should be removed."""
2061 2052 return self._filectxfn(self._repo, self, path)
2062 2053
2063 2054 def commit(self):
2064 2055 """commit context to the repo"""
2065 2056 return self._repo.commitctx(self)
2066 2057
2067 2058 @propertycache
2068 2059 def _manifest(self):
2069 2060 """generate a manifest based on the return values of filectxfn"""
2070 2061
2071 2062 # keep this simple for now; just worry about p1
2072 2063 pctx = self._parents[0]
2073 2064 man = pctx.manifest().copy()
2074 2065
2075 2066 for f in self._status.modified:
2076 2067 p1node = nullid
2077 2068 p2node = nullid
2078 2069 p = pctx[f].parents() # if file isn't in pctx, check p2?
2079 2070 if len(p) > 0:
2080 2071 p1node = p[0].filenode()
2081 2072 if len(p) > 1:
2082 2073 p2node = p[1].filenode()
2083 2074 man[f] = revlog.hash(self[f].data(), p1node, p2node)
2084 2075
2085 2076 for f in self._status.added:
2086 2077 man[f] = revlog.hash(self[f].data(), nullid, nullid)
2087 2078
2088 2079 for f in self._status.removed:
2089 2080 if f in man:
2090 2081 del man[f]
2091 2082
2092 2083 return man
2093 2084
2094 2085 @propertycache
2095 2086 def _status(self):
2096 2087 """Calculate exact status from ``files`` specified at construction
2097 2088 """
2098 2089 man1 = self.p1().manifest()
2099 2090 p2 = self._parents[1]
2100 2091 # "1 < len(self._parents)" can't be used for checking
2101 2092 # existence of the 2nd parent, because "memctx._parents" is
2102 2093 # explicitly initialized by the list, of which length is 2.
2103 2094 if p2.node() != nullid:
2104 2095 man2 = p2.manifest()
2105 2096 managing = lambda f: f in man1 or f in man2
2106 2097 else:
2107 2098 managing = lambda f: f in man1
2108 2099
2109 2100 modified, added, removed = [], [], []
2110 2101 for f in self._files:
2111 2102 if not managing(f):
2112 2103 added.append(f)
2113 2104 elif self[f]:
2114 2105 modified.append(f)
2115 2106 else:
2116 2107 removed.append(f)
2117 2108
2118 2109 return scmutil.status(modified, added, removed, [], [], [], [])
2119 2110
2120 2111 class memfilectx(committablefilectx):
2121 2112 """memfilectx represents an in-memory file to commit.
2122 2113
2123 2114 See memctx and committablefilectx for more details.
2124 2115 """
2125 2116 def __init__(self, repo, path, data, islink=False,
2126 2117 isexec=False, copied=None, memctx=None):
2127 2118 """
2128 2119 path is the normalized file path relative to repository root.
2129 2120 data is the file content as a string.
2130 2121 islink is True if the file is a symbolic link.
2131 2122 isexec is True if the file is executable.
2132 2123 copied is the source file path if current file was copied in the
2133 2124 revision being committed, or None."""
2134 2125 super(memfilectx, self).__init__(repo, path, None, memctx)
2135 2126 self._data = data
2136 2127 self._flags = (islink and 'l' or '') + (isexec and 'x' or '')
2137 2128 self._copied = None
2138 2129 if copied:
2139 2130 self._copied = (copied, nullid)
2140 2131
2141 2132 def data(self):
2142 2133 return self._data
2143 2134
2144 2135 def remove(self, ignoremissing=False):
2145 2136 """wraps unlink for a repo's working directory"""
2146 2137 # need to figure out what to do here
2147 2138 del self._changectx[self._path]
2148 2139
2149 2140 def write(self, data, flags):
2150 2141 """wraps repo.wwrite"""
2151 2142 self._data = data
2152 2143
2153 2144 class overlayfilectx(committablefilectx):
2154 2145 """Like memfilectx but take an original filectx and optional parameters to
2155 2146 override parts of it. This is useful when fctx.data() is expensive (i.e.
2156 2147 flag processor is expensive) and raw data, flags, and filenode could be
2157 2148 reused (ex. rebase or mode-only amend a REVIDX_EXTSTORED file).
2158 2149 """
2159 2150
2160 2151 def __init__(self, originalfctx, datafunc=None, path=None, flags=None,
2161 2152 copied=None, ctx=None):
2162 2153 """originalfctx: filecontext to duplicate
2163 2154
2164 2155 datafunc: None or a function to override data (file content). It is a
2165 2156 function to be lazy. path, flags, copied, ctx: None or overridden value
2166 2157
2167 2158 copied could be (path, rev), or False. copied could also be just path,
2168 2159 and will be converted to (path, nullid). This simplifies some callers.
2169 2160 """
2170 2161
2171 2162 if path is None:
2172 2163 path = originalfctx.path()
2173 2164 if ctx is None:
2174 2165 ctx = originalfctx.changectx()
2175 2166 ctxmatch = lambda: True
2176 2167 else:
2177 2168 ctxmatch = lambda: ctx == originalfctx.changectx()
2178 2169
2179 2170 repo = originalfctx.repo()
2180 2171 flog = originalfctx.filelog()
2181 2172 super(overlayfilectx, self).__init__(repo, path, flog, ctx)
2182 2173
2183 2174 if copied is None:
2184 2175 copied = originalfctx.renamed()
2185 2176 copiedmatch = lambda: True
2186 2177 else:
2187 2178 if copied and not isinstance(copied, tuple):
2188 2179 # repo._filecommit will recalculate copyrev so nullid is okay
2189 2180 copied = (copied, nullid)
2190 2181 copiedmatch = lambda: copied == originalfctx.renamed()
2191 2182
2192 2183 # When data, copied (could affect data), ctx (could affect filelog
2193 2184 # parents) are not overridden, rawdata, rawflags, and filenode may be
2194 2185 # reused (repo._filecommit should double check filelog parents).
2195 2186 #
2196 2187 # path, flags are not hashed in filelog (but in manifestlog) so they do
2197 2188 # not affect reusable here.
2198 2189 #
2199 2190 # If ctx or copied is overridden to a same value with originalfctx,
2200 2191 # still consider it's reusable. originalfctx.renamed() may be a bit
2201 2192 # expensive so it's not called unless necessary. Assuming datafunc is
2202 2193 # always expensive, do not call it for this "reusable" test.
2203 2194 reusable = datafunc is None and ctxmatch() and copiedmatch()
2204 2195
2205 2196 if datafunc is None:
2206 2197 datafunc = originalfctx.data
2207 2198 if flags is None:
2208 2199 flags = originalfctx.flags()
2209 2200
2210 2201 self._datafunc = datafunc
2211 2202 self._flags = flags
2212 2203 self._copied = copied
2213 2204
2214 2205 if reusable:
2215 2206 # copy extra fields from originalfctx
2216 2207 attrs = ['rawdata', 'rawflags', '_filenode', '_filerev']
2217 2208 for attr in attrs:
2218 2209 if util.safehasattr(originalfctx, attr):
2219 2210 setattr(self, attr, getattr(originalfctx, attr))
2220 2211
2221 2212 def data(self):
2222 2213 return self._datafunc()
2223 2214
2224 2215 class metadataonlyctx(committablectx):
2225 2216 """Like memctx but it's reusing the manifest of different commit.
2226 2217 Intended to be used by lightweight operations that are creating
2227 2218 metadata-only changes.
2228 2219
2229 2220 Revision information is supplied at initialization time. 'repo' is the
2230 2221 current localrepo, 'ctx' is original revision which manifest we're reuisng
2231 2222 'parents' is a sequence of two parent revisions identifiers (pass None for
2232 2223 every missing parent), 'text' is the commit.
2233 2224
2234 2225 user receives the committer name and defaults to current repository
2235 2226 username, date is the commit date in any format supported by
2236 2227 util.parsedate() and defaults to current date, extra is a dictionary of
2237 2228 metadata or is left empty.
2238 2229 """
2239 2230 def __new__(cls, repo, originalctx, *args, **kwargs):
2240 2231 return super(metadataonlyctx, cls).__new__(cls, repo)
2241 2232
2242 2233 def __init__(self, repo, originalctx, parents, text, user=None, date=None,
2243 2234 extra=None, editor=False):
2244 2235 super(metadataonlyctx, self).__init__(repo, text, user, date, extra)
2245 2236 self._rev = None
2246 2237 self._node = None
2247 2238 self._originalctx = originalctx
2248 2239 self._manifestnode = originalctx.manifestnode()
2249 2240 parents = [(p or nullid) for p in parents]
2250 2241 p1, p2 = self._parents = [changectx(self._repo, p) for p in parents]
2251 2242
2252 2243 # sanity check to ensure that the reused manifest parents are
2253 2244 # manifests of our commit parents
2254 2245 mp1, mp2 = self.manifestctx().parents
2255 2246 if p1 != nullid and p1.manifestnode() != mp1:
2256 2247 raise RuntimeError('can\'t reuse the manifest: '
2257 2248 'its p1 doesn\'t match the new ctx p1')
2258 2249 if p2 != nullid and p2.manifestnode() != mp2:
2259 2250 raise RuntimeError('can\'t reuse the manifest: '
2260 2251 'its p2 doesn\'t match the new ctx p2')
2261 2252
2262 2253 self._files = originalctx.files()
2263 2254 self.substate = {}
2264 2255
2265 2256 if editor:
2266 2257 self._text = editor(self._repo, self, [])
2267 2258 self._repo.savecommitmessage(self._text)
2268 2259
2269 2260 def manifestnode(self):
2270 2261 return self._manifestnode
2271 2262
2272 2263 @property
2273 2264 def _manifestctx(self):
2274 2265 return self._repo.manifestlog[self._manifestnode]
2275 2266
2276 2267 def filectx(self, path, filelog=None):
2277 2268 return self._originalctx.filectx(path, filelog=filelog)
2278 2269
2279 2270 def commit(self):
2280 2271 """commit context to the repo"""
2281 2272 return self._repo.commitctx(self)
2282 2273
2283 2274 @property
2284 2275 def _manifest(self):
2285 2276 return self._originalctx.manifest()
2286 2277
2287 2278 @propertycache
2288 2279 def _status(self):
2289 2280 """Calculate exact status from ``files`` specified in the ``origctx``
2290 2281 and parents manifests.
2291 2282 """
2292 2283 man1 = self.p1().manifest()
2293 2284 p2 = self._parents[1]
2294 2285 # "1 < len(self._parents)" can't be used for checking
2295 2286 # existence of the 2nd parent, because "metadataonlyctx._parents" is
2296 2287 # explicitly initialized by the list, of which length is 2.
2297 2288 if p2.node() != nullid:
2298 2289 man2 = p2.manifest()
2299 2290 managing = lambda f: f in man1 or f in man2
2300 2291 else:
2301 2292 managing = lambda f: f in man1
2302 2293
2303 2294 modified, added, removed = [], [], []
2304 2295 for f in self._files:
2305 2296 if not managing(f):
2306 2297 added.append(f)
2307 2298 elif self[f]:
2308 2299 modified.append(f)
2309 2300 else:
2310 2301 removed.append(f)
2311 2302
2312 2303 return scmutil.status(modified, added, removed, [], [], [], [])
@@ -1,593 +1,602 b''
1 1 # encoding.py - character transcoding support for Mercurial
2 2 #
3 3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import array
11 11 import locale
12 12 import os
13 13 import unicodedata
14 14
15 15 from . import (
16 16 error,
17 17 policy,
18 18 pycompat,
19 19 )
20 20
21 21 _sysstr = pycompat.sysstr
22 22
23 23 if pycompat.ispy3:
24 24 unichr = chr
25 25
26 26 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
27 27 # "Unicode Subtleties"), so we need to ignore them in some places for
28 28 # sanity.
29 29 _ignore = [unichr(int(x, 16)).encode("utf-8") for x in
30 30 "200c 200d 200e 200f 202a 202b 202c 202d 202e "
31 31 "206a 206b 206c 206d 206e 206f feff".split()]
32 32 # verify the next function will work
33 33 assert all(i.startswith(("\xe2", "\xef")) for i in _ignore)
34 34
35 35 def hfsignoreclean(s):
36 36 """Remove codepoints ignored by HFS+ from s.
37 37
38 38 >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8'))
39 39 '.hg'
40 40 >>> hfsignoreclean(u'.h\ufeffg'.encode('utf-8'))
41 41 '.hg'
42 42 """
43 43 if "\xe2" in s or "\xef" in s:
44 44 for c in _ignore:
45 45 s = s.replace(c, '')
46 46 return s
47 47
48 48 # encoding.environ is provided read-only, which may not be used to modify
49 49 # the process environment
50 50 _nativeenviron = (not pycompat.ispy3 or os.supports_bytes_environ)
51 51 if not pycompat.ispy3:
52 52 environ = os.environ # re-exports
53 53 elif _nativeenviron:
54 54 environ = os.environb # re-exports
55 55 else:
56 56 # preferred encoding isn't known yet; use utf-8 to avoid unicode error
57 57 # and recreate it once encoding is settled
58 58 environ = dict((k.encode(u'utf-8'), v.encode(u'utf-8'))
59 59 for k, v in os.environ.items()) # re-exports
60 60
61 61 _encodingfixers = {
62 62 '646': lambda: 'ascii',
63 63 'ANSI_X3.4-1968': lambda: 'ascii',
64 64 }
65 65
66 66 try:
67 67 encoding = environ.get("HGENCODING")
68 68 if not encoding:
69 69 encoding = locale.getpreferredencoding().encode('ascii') or 'ascii'
70 70 encoding = _encodingfixers.get(encoding, lambda: encoding)()
71 71 except locale.Error:
72 72 encoding = 'ascii'
73 73 encodingmode = environ.get("HGENCODINGMODE", "strict")
74 74 fallbackencoding = 'ISO-8859-1'
75 75
76 76 class localstr(str):
77 77 '''This class allows strings that are unmodified to be
78 78 round-tripped to the local encoding and back'''
79 79 def __new__(cls, u, l):
80 80 s = str.__new__(cls, l)
81 81 s._utf8 = u
82 82 return s
83 83 def __hash__(self):
84 84 return hash(self._utf8) # avoid collisions in local string space
85 85
86 86 def tolocal(s):
87 87 """
88 88 Convert a string from internal UTF-8 to local encoding
89 89
90 90 All internal strings should be UTF-8 but some repos before the
91 91 implementation of locale support may contain latin1 or possibly
92 92 other character sets. We attempt to decode everything strictly
93 93 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
94 94 replace unknown characters.
95 95
96 96 The localstr class is used to cache the known UTF-8 encoding of
97 97 strings next to their local representation to allow lossless
98 98 round-trip conversion back to UTF-8.
99 99
100 100 >>> u = 'foo: \\xc3\\xa4' # utf-8
101 101 >>> l = tolocal(u)
102 102 >>> l
103 103 'foo: ?'
104 104 >>> fromlocal(l)
105 105 'foo: \\xc3\\xa4'
106 106 >>> u2 = 'foo: \\xc3\\xa1'
107 107 >>> d = { l: 1, tolocal(u2): 2 }
108 108 >>> len(d) # no collision
109 109 2
110 110 >>> 'foo: ?' in d
111 111 False
112 112 >>> l1 = 'foo: \\xe4' # historical latin1 fallback
113 113 >>> l = tolocal(l1)
114 114 >>> l
115 115 'foo: ?'
116 116 >>> fromlocal(l) # magically in utf-8
117 117 'foo: \\xc3\\xa4'
118 118 """
119 119
120 120 try:
121 121 try:
122 122 # make sure string is actually stored in UTF-8
123 123 u = s.decode('UTF-8')
124 124 if encoding == 'UTF-8':
125 125 # fast path
126 126 return s
127 127 r = u.encode(_sysstr(encoding), u"replace")
128 128 if u == r.decode(_sysstr(encoding)):
129 129 # r is a safe, non-lossy encoding of s
130 130 return r
131 131 return localstr(s, r)
132 132 except UnicodeDecodeError:
133 133 # we should only get here if we're looking at an ancient changeset
134 134 try:
135 135 u = s.decode(_sysstr(fallbackencoding))
136 136 r = u.encode(_sysstr(encoding), u"replace")
137 137 if u == r.decode(_sysstr(encoding)):
138 138 # r is a safe, non-lossy encoding of s
139 139 return r
140 140 return localstr(u.encode('UTF-8'), r)
141 141 except UnicodeDecodeError:
142 142 u = s.decode("utf-8", "replace") # last ditch
143 143 # can't round-trip
144 144 return u.encode(_sysstr(encoding), u"replace")
145 145 except LookupError as k:
146 146 raise error.Abort(k, hint="please check your locale settings")
147 147
148 148 def fromlocal(s):
149 149 """
150 150 Convert a string from the local character encoding to UTF-8
151 151
152 152 We attempt to decode strings using the encoding mode set by
153 153 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
154 154 characters will cause an error message. Other modes include
155 155 'replace', which replaces unknown characters with a special
156 156 Unicode character, and 'ignore', which drops the character.
157 157 """
158 158
159 159 # can we do a lossless round-trip?
160 160 if isinstance(s, localstr):
161 161 return s._utf8
162 162
163 163 try:
164 164 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
165 165 return u.encode("utf-8")
166 166 except UnicodeDecodeError as inst:
167 167 sub = s[max(0, inst.start - 10):inst.start + 10]
168 168 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
169 169 except LookupError as k:
170 170 raise error.Abort(k, hint="please check your locale settings")
171 171
172 172 def unitolocal(u):
173 173 """Convert a unicode string to a byte string of local encoding"""
174 174 return tolocal(u.encode('utf-8'))
175 175
176 176 def unifromlocal(s):
177 177 """Convert a byte string of local encoding to a unicode string"""
178 178 return fromlocal(s).decode('utf-8')
179 179
180 def unimethod(bytesfunc):
181 """Create a proxy method that forwards __unicode__() and __str__() of
182 Python 3 to __bytes__()"""
183 def unifunc(obj):
184 return unifromlocal(bytesfunc(obj))
185 return unifunc
186
180 187 # converter functions between native str and byte string. use these if the
181 188 # character encoding is not aware (e.g. exception message) or is known to
182 189 # be locale dependent (e.g. date formatting.)
183 190 if pycompat.ispy3:
184 191 strtolocal = unitolocal
185 192 strfromlocal = unifromlocal
193 strmethod = unimethod
186 194 else:
187 195 strtolocal = pycompat.identity
188 196 strfromlocal = pycompat.identity
197 strmethod = pycompat.identity
189 198
190 199 if not _nativeenviron:
191 200 # now encoding and helper functions are available, recreate the environ
192 201 # dict to be exported to other modules
193 202 environ = dict((tolocal(k.encode(u'utf-8')), tolocal(v.encode(u'utf-8')))
194 203 for k, v in os.environ.items()) # re-exports
195 204
196 205 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
197 206 _wide = _sysstr(environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
198 207 and "WFA" or "WF")
199 208
200 209 def colwidth(s):
201 210 "Find the column width of a string for display in the local encoding"
202 211 return ucolwidth(s.decode(_sysstr(encoding), u'replace'))
203 212
204 213 def ucolwidth(d):
205 214 "Find the column width of a Unicode string for display"
206 215 eaw = getattr(unicodedata, 'east_asian_width', None)
207 216 if eaw is not None:
208 217 return sum([eaw(c) in _wide and 2 or 1 for c in d])
209 218 return len(d)
210 219
211 220 def getcols(s, start, c):
212 221 '''Use colwidth to find a c-column substring of s starting at byte
213 222 index start'''
214 223 for x in xrange(start + c, len(s)):
215 224 t = s[start:x]
216 225 if colwidth(t) == c:
217 226 return t
218 227
219 228 def trim(s, width, ellipsis='', leftside=False):
220 229 """Trim string 's' to at most 'width' columns (including 'ellipsis').
221 230
222 231 If 'leftside' is True, left side of string 's' is trimmed.
223 232 'ellipsis' is always placed at trimmed side.
224 233
225 234 >>> ellipsis = '+++'
226 235 >>> from . import encoding
227 236 >>> encoding.encoding = 'utf-8'
228 237 >>> t= '1234567890'
229 238 >>> print trim(t, 12, ellipsis=ellipsis)
230 239 1234567890
231 240 >>> print trim(t, 10, ellipsis=ellipsis)
232 241 1234567890
233 242 >>> print trim(t, 8, ellipsis=ellipsis)
234 243 12345+++
235 244 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
236 245 +++67890
237 246 >>> print trim(t, 8)
238 247 12345678
239 248 >>> print trim(t, 8, leftside=True)
240 249 34567890
241 250 >>> print trim(t, 3, ellipsis=ellipsis)
242 251 +++
243 252 >>> print trim(t, 1, ellipsis=ellipsis)
244 253 +
245 254 >>> u = u'\u3042\u3044\u3046\u3048\u304a' # 2 x 5 = 10 columns
246 255 >>> t = u.encode(encoding.encoding)
247 256 >>> print trim(t, 12, ellipsis=ellipsis)
248 257 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
249 258 >>> print trim(t, 10, ellipsis=ellipsis)
250 259 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
251 260 >>> print trim(t, 8, ellipsis=ellipsis)
252 261 \xe3\x81\x82\xe3\x81\x84+++
253 262 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
254 263 +++\xe3\x81\x88\xe3\x81\x8a
255 264 >>> print trim(t, 5)
256 265 \xe3\x81\x82\xe3\x81\x84
257 266 >>> print trim(t, 5, leftside=True)
258 267 \xe3\x81\x88\xe3\x81\x8a
259 268 >>> print trim(t, 4, ellipsis=ellipsis)
260 269 +++
261 270 >>> print trim(t, 4, ellipsis=ellipsis, leftside=True)
262 271 +++
263 272 >>> t = '\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa' # invalid byte sequence
264 273 >>> print trim(t, 12, ellipsis=ellipsis)
265 274 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
266 275 >>> print trim(t, 10, ellipsis=ellipsis)
267 276 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
268 277 >>> print trim(t, 8, ellipsis=ellipsis)
269 278 \x11\x22\x33\x44\x55+++
270 279 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
271 280 +++\x66\x77\x88\x99\xaa
272 281 >>> print trim(t, 8)
273 282 \x11\x22\x33\x44\x55\x66\x77\x88
274 283 >>> print trim(t, 8, leftside=True)
275 284 \x33\x44\x55\x66\x77\x88\x99\xaa
276 285 >>> print trim(t, 3, ellipsis=ellipsis)
277 286 +++
278 287 >>> print trim(t, 1, ellipsis=ellipsis)
279 288 +
280 289 """
281 290 try:
282 291 u = s.decode(_sysstr(encoding))
283 292 except UnicodeDecodeError:
284 293 if len(s) <= width: # trimming is not needed
285 294 return s
286 295 width -= len(ellipsis)
287 296 if width <= 0: # no enough room even for ellipsis
288 297 return ellipsis[:width + len(ellipsis)]
289 298 if leftside:
290 299 return ellipsis + s[-width:]
291 300 return s[:width] + ellipsis
292 301
293 302 if ucolwidth(u) <= width: # trimming is not needed
294 303 return s
295 304
296 305 width -= len(ellipsis)
297 306 if width <= 0: # no enough room even for ellipsis
298 307 return ellipsis[:width + len(ellipsis)]
299 308
300 309 if leftside:
301 310 uslice = lambda i: u[i:]
302 311 concat = lambda s: ellipsis + s
303 312 else:
304 313 uslice = lambda i: u[:-i]
305 314 concat = lambda s: s + ellipsis
306 315 for i in xrange(1, len(u)):
307 316 usub = uslice(i)
308 317 if ucolwidth(usub) <= width:
309 318 return concat(usub.encode(_sysstr(encoding)))
310 319 return ellipsis # no enough room for multi-column characters
311 320
312 321 def _asciilower(s):
313 322 '''convert a string to lowercase if ASCII
314 323
315 324 Raises UnicodeDecodeError if non-ASCII characters are found.'''
316 325 s.decode('ascii')
317 326 return s.lower()
318 327
319 328 def asciilower(s):
320 329 # delay importing avoids cyclic dependency around "parsers" in
321 330 # pure Python build (util => i18n => encoding => parsers => util)
322 331 parsers = policy.importmod(r'parsers')
323 332 impl = getattr(parsers, 'asciilower', _asciilower)
324 333 global asciilower
325 334 asciilower = impl
326 335 return impl(s)
327 336
328 337 def _asciiupper(s):
329 338 '''convert a string to uppercase if ASCII
330 339
331 340 Raises UnicodeDecodeError if non-ASCII characters are found.'''
332 341 s.decode('ascii')
333 342 return s.upper()
334 343
335 344 def asciiupper(s):
336 345 # delay importing avoids cyclic dependency around "parsers" in
337 346 # pure Python build (util => i18n => encoding => parsers => util)
338 347 parsers = policy.importmod(r'parsers')
339 348 impl = getattr(parsers, 'asciiupper', _asciiupper)
340 349 global asciiupper
341 350 asciiupper = impl
342 351 return impl(s)
343 352
344 353 def lower(s):
345 354 "best-effort encoding-aware case-folding of local string s"
346 355 try:
347 356 return asciilower(s)
348 357 except UnicodeDecodeError:
349 358 pass
350 359 try:
351 360 if isinstance(s, localstr):
352 361 u = s._utf8.decode("utf-8")
353 362 else:
354 363 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
355 364
356 365 lu = u.lower()
357 366 if u == lu:
358 367 return s # preserve localstring
359 368 return lu.encode(_sysstr(encoding))
360 369 except UnicodeError:
361 370 return s.lower() # we don't know how to fold this except in ASCII
362 371 except LookupError as k:
363 372 raise error.Abort(k, hint="please check your locale settings")
364 373
365 374 def upper(s):
366 375 "best-effort encoding-aware case-folding of local string s"
367 376 try:
368 377 return asciiupper(s)
369 378 except UnicodeDecodeError:
370 379 return upperfallback(s)
371 380
372 381 def upperfallback(s):
373 382 try:
374 383 if isinstance(s, localstr):
375 384 u = s._utf8.decode("utf-8")
376 385 else:
377 386 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
378 387
379 388 uu = u.upper()
380 389 if u == uu:
381 390 return s # preserve localstring
382 391 return uu.encode(_sysstr(encoding))
383 392 except UnicodeError:
384 393 return s.upper() # we don't know how to fold this except in ASCII
385 394 except LookupError as k:
386 395 raise error.Abort(k, hint="please check your locale settings")
387 396
388 397 class normcasespecs(object):
389 398 '''what a platform's normcase does to ASCII strings
390 399
391 400 This is specified per platform, and should be consistent with what normcase
392 401 on that platform actually does.
393 402
394 403 lower: normcase lowercases ASCII strings
395 404 upper: normcase uppercases ASCII strings
396 405 other: the fallback function should always be called
397 406
398 407 This should be kept in sync with normcase_spec in util.h.'''
399 408 lower = -1
400 409 upper = 1
401 410 other = 0
402 411
403 412 _jsonmap = []
404 413 _jsonmap.extend("\\u%04x" % x for x in range(32))
405 414 _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))
406 415 _jsonmap.append('\\u007f')
407 416 _jsonmap[0x09] = '\\t'
408 417 _jsonmap[0x0a] = '\\n'
409 418 _jsonmap[0x22] = '\\"'
410 419 _jsonmap[0x5c] = '\\\\'
411 420 _jsonmap[0x08] = '\\b'
412 421 _jsonmap[0x0c] = '\\f'
413 422 _jsonmap[0x0d] = '\\r'
414 423 _paranoidjsonmap = _jsonmap[:]
415 424 _paranoidjsonmap[0x3c] = '\\u003c' # '<' (e.g. escape "</script>")
416 425 _paranoidjsonmap[0x3e] = '\\u003e' # '>'
417 426 _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))
418 427
419 428 def jsonescape(s, paranoid=False):
420 429 '''returns a string suitable for JSON
421 430
422 431 JSON is problematic for us because it doesn't support non-Unicode
423 432 bytes. To deal with this, we take the following approach:
424 433
425 434 - localstr objects are converted back to UTF-8
426 435 - valid UTF-8/ASCII strings are passed as-is
427 436 - other strings are converted to UTF-8b surrogate encoding
428 437 - apply JSON-specified string escaping
429 438
430 439 (escapes are doubled in these tests)
431 440
432 441 >>> jsonescape('this is a test')
433 442 'this is a test'
434 443 >>> jsonescape('escape characters: \\0 \\x0b \\x7f')
435 444 'escape characters: \\\\u0000 \\\\u000b \\\\u007f'
436 445 >>> jsonescape('escape characters: \\t \\n \\r \\" \\\\')
437 446 'escape characters: \\\\t \\\\n \\\\r \\\\" \\\\\\\\'
438 447 >>> jsonescape('a weird byte: \\xdd')
439 448 'a weird byte: \\xed\\xb3\\x9d'
440 449 >>> jsonescape('utf-8: caf\\xc3\\xa9')
441 450 'utf-8: caf\\xc3\\xa9'
442 451 >>> jsonescape('')
443 452 ''
444 453
445 454 If paranoid, non-ascii and common troublesome characters are also escaped.
446 455 This is suitable for web output.
447 456
448 457 >>> jsonescape('escape boundary: \\x7e \\x7f \\xc2\\x80', paranoid=True)
449 458 'escape boundary: ~ \\\\u007f \\\\u0080'
450 459 >>> jsonescape('a weird byte: \\xdd', paranoid=True)
451 460 'a weird byte: \\\\udcdd'
452 461 >>> jsonescape('utf-8: caf\\xc3\\xa9', paranoid=True)
453 462 'utf-8: caf\\\\u00e9'
454 463 >>> jsonescape('non-BMP: \\xf0\\x9d\\x84\\x9e', paranoid=True)
455 464 'non-BMP: \\\\ud834\\\\udd1e'
456 465 >>> jsonescape('<foo@example.org>', paranoid=True)
457 466 '\\\\u003cfoo@example.org\\\\u003e'
458 467 '''
459 468
460 469 if paranoid:
461 470 jm = _paranoidjsonmap
462 471 else:
463 472 jm = _jsonmap
464 473
465 474 u8chars = toutf8b(s)
466 475 try:
467 476 return ''.join(jm[x] for x in bytearray(u8chars)) # fast path
468 477 except IndexError:
469 478 pass
470 479 # non-BMP char is represented as UTF-16 surrogate pair
471 480 u16codes = array.array('H', u8chars.decode('utf-8').encode('utf-16'))
472 481 u16codes.pop(0) # drop BOM
473 482 return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes)
474 483
475 484 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4]
476 485
477 486 def getutf8char(s, pos):
478 487 '''get the next full utf-8 character in the given string, starting at pos
479 488
480 489 Raises a UnicodeError if the given location does not start a valid
481 490 utf-8 character.
482 491 '''
483 492
484 493 # find how many bytes to attempt decoding from first nibble
485 494 l = _utf8len[ord(s[pos]) >> 4]
486 495 if not l: # ascii
487 496 return s[pos]
488 497
489 498 c = s[pos:pos + l]
490 499 # validate with attempted decode
491 500 c.decode("utf-8")
492 501 return c
493 502
494 503 def toutf8b(s):
495 504 '''convert a local, possibly-binary string into UTF-8b
496 505
497 506 This is intended as a generic method to preserve data when working
498 507 with schemes like JSON and XML that have no provision for
499 508 arbitrary byte strings. As Mercurial often doesn't know
500 509 what encoding data is in, we use so-called UTF-8b.
501 510
502 511 If a string is already valid UTF-8 (or ASCII), it passes unmodified.
503 512 Otherwise, unsupported bytes are mapped to UTF-16 surrogate range,
504 513 uDC00-uDCFF.
505 514
506 515 Principles of operation:
507 516
508 517 - ASCII and UTF-8 data successfully round-trips and is understood
509 518 by Unicode-oriented clients
510 519 - filenames and file contents in arbitrary other encodings can have
511 520 be round-tripped or recovered by clueful clients
512 521 - local strings that have a cached known UTF-8 encoding (aka
513 522 localstr) get sent as UTF-8 so Unicode-oriented clients get the
514 523 Unicode data they want
515 524 - because we must preserve UTF-8 bytestring in places such as
516 525 filenames, metadata can't be roundtripped without help
517 526
518 527 (Note: "UTF-8b" often refers to decoding a mix of valid UTF-8 and
519 528 arbitrary bytes into an internal Unicode format that can be
520 529 re-encoded back into the original. Here we are exposing the
521 530 internal surrogate encoding as a UTF-8 string.)
522 531 '''
523 532
524 533 if "\xed" not in s:
525 534 if isinstance(s, localstr):
526 535 return s._utf8
527 536 try:
528 537 s.decode('utf-8')
529 538 return s
530 539 except UnicodeDecodeError:
531 540 pass
532 541
533 542 r = ""
534 543 pos = 0
535 544 l = len(s)
536 545 while pos < l:
537 546 try:
538 547 c = getutf8char(s, pos)
539 548 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
540 549 # have to re-escape existing U+DCxx characters
541 550 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
542 551 pos += 1
543 552 else:
544 553 pos += len(c)
545 554 except UnicodeDecodeError:
546 555 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
547 556 pos += 1
548 557 r += c
549 558 return r
550 559
551 560 def fromutf8b(s):
552 561 '''Given a UTF-8b string, return a local, possibly-binary string.
553 562
554 563 return the original binary string. This
555 564 is a round-trip process for strings like filenames, but metadata
556 565 that's was passed through tolocal will remain in UTF-8.
557 566
558 567 >>> roundtrip = lambda x: fromutf8b(toutf8b(x)) == x
559 568 >>> m = "\\xc3\\xa9\\x99abcd"
560 569 >>> toutf8b(m)
561 570 '\\xc3\\xa9\\xed\\xb2\\x99abcd'
562 571 >>> roundtrip(m)
563 572 True
564 573 >>> roundtrip("\\xc2\\xc2\\x80")
565 574 True
566 575 >>> roundtrip("\\xef\\xbf\\xbd")
567 576 True
568 577 >>> roundtrip("\\xef\\xef\\xbf\\xbd")
569 578 True
570 579 >>> roundtrip("\\xf1\\x80\\x80\\x80\\x80")
571 580 True
572 581 '''
573 582
574 583 # fast path - look for uDxxx prefixes in s
575 584 if "\xed" not in s:
576 585 return s
577 586
578 587 # We could do this with the unicode type but some Python builds
579 588 # use UTF-16 internally (issue5031) which causes non-BMP code
580 589 # points to be escaped. Instead, we use our handy getutf8char
581 590 # helper again to walk the string without "decoding" it.
582 591
583 592 r = ""
584 593 pos = 0
585 594 l = len(s)
586 595 while pos < l:
587 596 c = getutf8char(s, pos)
588 597 pos += len(c)
589 598 # unescape U+DCxx characters
590 599 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
591 600 c = chr(ord(c.decode("utf-8")) & 0xff)
592 601 r += c
593 602 return r
@@ -1,3744 +1,3743 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import bz2
19 19 import calendar
20 20 import codecs
21 21 import collections
22 22 import datetime
23 23 import errno
24 24 import gc
25 25 import hashlib
26 26 import imp
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import signal
32 32 import socket
33 33 import stat
34 34 import string
35 35 import subprocess
36 36 import sys
37 37 import tempfile
38 38 import textwrap
39 39 import time
40 40 import traceback
41 41 import warnings
42 42 import zlib
43 43
44 44 from . import (
45 45 encoding,
46 46 error,
47 47 i18n,
48 48 policy,
49 49 pycompat,
50 50 )
51 51
52 52 base85 = policy.importmod(r'base85')
53 53 osutil = policy.importmod(r'osutil')
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 b85decode = base85.b85decode
57 57 b85encode = base85.b85encode
58 58
59 59 cookielib = pycompat.cookielib
60 60 empty = pycompat.empty
61 61 httplib = pycompat.httplib
62 62 httpserver = pycompat.httpserver
63 63 pickle = pycompat.pickle
64 64 queue = pycompat.queue
65 65 socketserver = pycompat.socketserver
66 66 stderr = pycompat.stderr
67 67 stdin = pycompat.stdin
68 68 stdout = pycompat.stdout
69 69 stringio = pycompat.stringio
70 70 urlerr = pycompat.urlerr
71 71 urlreq = pycompat.urlreq
72 72 xmlrpclib = pycompat.xmlrpclib
73 73
74 74 # workaround for win32mbcs
75 75 _filenamebytestr = pycompat.bytestr
76 76
77 77 def isatty(fp):
78 78 try:
79 79 return fp.isatty()
80 80 except AttributeError:
81 81 return False
82 82
83 83 # glibc determines buffering on first write to stdout - if we replace a TTY
84 84 # destined stdout with a pipe destined stdout (e.g. pager), we want line
85 85 # buffering
86 86 if isatty(stdout):
87 87 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
88 88
89 89 if pycompat.osname == 'nt':
90 90 from . import windows as platform
91 91 stdout = platform.winstdout(stdout)
92 92 else:
93 93 from . import posix as platform
94 94
95 95 _ = i18n._
96 96
97 97 bindunixsocket = platform.bindunixsocket
98 98 cachestat = platform.cachestat
99 99 checkexec = platform.checkexec
100 100 checklink = platform.checklink
101 101 copymode = platform.copymode
102 102 executablepath = platform.executablepath
103 103 expandglobs = platform.expandglobs
104 104 explainexit = platform.explainexit
105 105 findexe = platform.findexe
106 106 gethgcmd = platform.gethgcmd
107 107 getuser = platform.getuser
108 108 getpid = os.getpid
109 109 groupmembers = platform.groupmembers
110 110 groupname = platform.groupname
111 111 hidewindow = platform.hidewindow
112 112 isexec = platform.isexec
113 113 isowner = platform.isowner
114 114 listdir = osutil.listdir
115 115 localpath = platform.localpath
116 116 lookupreg = platform.lookupreg
117 117 makedir = platform.makedir
118 118 nlinks = platform.nlinks
119 119 normpath = platform.normpath
120 120 normcase = platform.normcase
121 121 normcasespec = platform.normcasespec
122 122 normcasefallback = platform.normcasefallback
123 123 openhardlinks = platform.openhardlinks
124 124 oslink = platform.oslink
125 125 parsepatchoutput = platform.parsepatchoutput
126 126 pconvert = platform.pconvert
127 127 poll = platform.poll
128 128 popen = platform.popen
129 129 posixfile = platform.posixfile
130 130 quotecommand = platform.quotecommand
131 131 readpipe = platform.readpipe
132 132 rename = platform.rename
133 133 removedirs = platform.removedirs
134 134 samedevice = platform.samedevice
135 135 samefile = platform.samefile
136 136 samestat = platform.samestat
137 137 setbinary = platform.setbinary
138 138 setflags = platform.setflags
139 139 setsignalhandler = platform.setsignalhandler
140 140 shellquote = platform.shellquote
141 141 spawndetached = platform.spawndetached
142 142 split = platform.split
143 143 sshargs = platform.sshargs
144 144 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
145 145 statisexec = platform.statisexec
146 146 statislink = platform.statislink
147 147 testpid = platform.testpid
148 148 umask = platform.umask
149 149 unlink = platform.unlink
150 150 username = platform.username
151 151
152 152 try:
153 153 recvfds = osutil.recvfds
154 154 except AttributeError:
155 155 pass
156 156 try:
157 157 setprocname = osutil.setprocname
158 158 except AttributeError:
159 159 pass
160 160
161 161 # Python compatibility
162 162
163 163 _notset = object()
164 164
165 165 # disable Python's problematic floating point timestamps (issue4836)
166 166 # (Python hypocritically says you shouldn't change this behavior in
167 167 # libraries, and sure enough Mercurial is not a library.)
168 168 os.stat_float_times(False)
169 169
170 170 def safehasattr(thing, attr):
171 171 return getattr(thing, attr, _notset) is not _notset
172 172
173 173 def bitsfrom(container):
174 174 bits = 0
175 175 for bit in container:
176 176 bits |= bit
177 177 return bits
178 178
179 179 # python 2.6 still have deprecation warning enabled by default. We do not want
180 180 # to display anything to standard user so detect if we are running test and
181 181 # only use python deprecation warning in this case.
182 182 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
183 183 if _dowarn:
184 184 # explicitly unfilter our warning for python 2.7
185 185 #
186 186 # The option of setting PYTHONWARNINGS in the test runner was investigated.
187 187 # However, module name set through PYTHONWARNINGS was exactly matched, so
188 188 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
189 189 # makes the whole PYTHONWARNINGS thing useless for our usecase.
190 190 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
191 191 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
192 192 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
193 193
194 194 def nouideprecwarn(msg, version, stacklevel=1):
195 195 """Issue an python native deprecation warning
196 196
197 197 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
198 198 """
199 199 if _dowarn:
200 200 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
201 201 " update your code.)") % version
202 202 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
203 203
204 204 DIGESTS = {
205 205 'md5': hashlib.md5,
206 206 'sha1': hashlib.sha1,
207 207 'sha512': hashlib.sha512,
208 208 }
209 209 # List of digest types from strongest to weakest
210 210 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
211 211
212 212 for k in DIGESTS_BY_STRENGTH:
213 213 assert k in DIGESTS
214 214
215 215 class digester(object):
216 216 """helper to compute digests.
217 217
218 218 This helper can be used to compute one or more digests given their name.
219 219
220 220 >>> d = digester(['md5', 'sha1'])
221 221 >>> d.update('foo')
222 222 >>> [k for k in sorted(d)]
223 223 ['md5', 'sha1']
224 224 >>> d['md5']
225 225 'acbd18db4cc2f85cedef654fccc4a4d8'
226 226 >>> d['sha1']
227 227 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
228 228 >>> digester.preferred(['md5', 'sha1'])
229 229 'sha1'
230 230 """
231 231
232 232 def __init__(self, digests, s=''):
233 233 self._hashes = {}
234 234 for k in digests:
235 235 if k not in DIGESTS:
236 236 raise Abort(_('unknown digest type: %s') % k)
237 237 self._hashes[k] = DIGESTS[k]()
238 238 if s:
239 239 self.update(s)
240 240
241 241 def update(self, data):
242 242 for h in self._hashes.values():
243 243 h.update(data)
244 244
245 245 def __getitem__(self, key):
246 246 if key not in DIGESTS:
247 247 raise Abort(_('unknown digest type: %s') % k)
248 248 return self._hashes[key].hexdigest()
249 249
250 250 def __iter__(self):
251 251 return iter(self._hashes)
252 252
253 253 @staticmethod
254 254 def preferred(supported):
255 255 """returns the strongest digest type in both supported and DIGESTS."""
256 256
257 257 for k in DIGESTS_BY_STRENGTH:
258 258 if k in supported:
259 259 return k
260 260 return None
261 261
262 262 class digestchecker(object):
263 263 """file handle wrapper that additionally checks content against a given
264 264 size and digests.
265 265
266 266 d = digestchecker(fh, size, {'md5': '...'})
267 267
268 268 When multiple digests are given, all of them are validated.
269 269 """
270 270
271 271 def __init__(self, fh, size, digests):
272 272 self._fh = fh
273 273 self._size = size
274 274 self._got = 0
275 275 self._digests = dict(digests)
276 276 self._digester = digester(self._digests.keys())
277 277
278 278 def read(self, length=-1):
279 279 content = self._fh.read(length)
280 280 self._digester.update(content)
281 281 self._got += len(content)
282 282 return content
283 283
284 284 def validate(self):
285 285 if self._size != self._got:
286 286 raise Abort(_('size mismatch: expected %d, got %d') %
287 287 (self._size, self._got))
288 288 for k, v in self._digests.items():
289 289 if v != self._digester[k]:
290 290 # i18n: first parameter is a digest name
291 291 raise Abort(_('%s mismatch: expected %s, got %s') %
292 292 (k, v, self._digester[k]))
293 293
294 294 try:
295 295 buffer = buffer
296 296 except NameError:
297 297 if not pycompat.ispy3:
298 298 def buffer(sliceable, offset=0, length=None):
299 299 if length is not None:
300 300 return sliceable[offset:offset + length]
301 301 return sliceable[offset:]
302 302 else:
303 303 def buffer(sliceable, offset=0, length=None):
304 304 if length is not None:
305 305 return memoryview(sliceable)[offset:offset + length]
306 306 return memoryview(sliceable)[offset:]
307 307
308 308 closefds = pycompat.osname == 'posix'
309 309
310 310 _chunksize = 4096
311 311
312 312 class bufferedinputpipe(object):
313 313 """a manually buffered input pipe
314 314
315 315 Python will not let us use buffered IO and lazy reading with 'polling' at
316 316 the same time. We cannot probe the buffer state and select will not detect
317 317 that data are ready to read if they are already buffered.
318 318
319 319 This class let us work around that by implementing its own buffering
320 320 (allowing efficient readline) while offering a way to know if the buffer is
321 321 empty from the output (allowing collaboration of the buffer with polling).
322 322
323 323 This class lives in the 'util' module because it makes use of the 'os'
324 324 module from the python stdlib.
325 325 """
326 326
327 327 def __init__(self, input):
328 328 self._input = input
329 329 self._buffer = []
330 330 self._eof = False
331 331 self._lenbuf = 0
332 332
333 333 @property
334 334 def hasbuffer(self):
335 335 """True is any data is currently buffered
336 336
337 337 This will be used externally a pre-step for polling IO. If there is
338 338 already data then no polling should be set in place."""
339 339 return bool(self._buffer)
340 340
341 341 @property
342 342 def closed(self):
343 343 return self._input.closed
344 344
345 345 def fileno(self):
346 346 return self._input.fileno()
347 347
348 348 def close(self):
349 349 return self._input.close()
350 350
351 351 def read(self, size):
352 352 while (not self._eof) and (self._lenbuf < size):
353 353 self._fillbuffer()
354 354 return self._frombuffer(size)
355 355
356 356 def readline(self, *args, **kwargs):
357 357 if 1 < len(self._buffer):
358 358 # this should not happen because both read and readline end with a
359 359 # _frombuffer call that collapse it.
360 360 self._buffer = [''.join(self._buffer)]
361 361 self._lenbuf = len(self._buffer[0])
362 362 lfi = -1
363 363 if self._buffer:
364 364 lfi = self._buffer[-1].find('\n')
365 365 while (not self._eof) and lfi < 0:
366 366 self._fillbuffer()
367 367 if self._buffer:
368 368 lfi = self._buffer[-1].find('\n')
369 369 size = lfi + 1
370 370 if lfi < 0: # end of file
371 371 size = self._lenbuf
372 372 elif 1 < len(self._buffer):
373 373 # we need to take previous chunks into account
374 374 size += self._lenbuf - len(self._buffer[-1])
375 375 return self._frombuffer(size)
376 376
377 377 def _frombuffer(self, size):
378 378 """return at most 'size' data from the buffer
379 379
380 380 The data are removed from the buffer."""
381 381 if size == 0 or not self._buffer:
382 382 return ''
383 383 buf = self._buffer[0]
384 384 if 1 < len(self._buffer):
385 385 buf = ''.join(self._buffer)
386 386
387 387 data = buf[:size]
388 388 buf = buf[len(data):]
389 389 if buf:
390 390 self._buffer = [buf]
391 391 self._lenbuf = len(buf)
392 392 else:
393 393 self._buffer = []
394 394 self._lenbuf = 0
395 395 return data
396 396
397 397 def _fillbuffer(self):
398 398 """read data to the buffer"""
399 399 data = os.read(self._input.fileno(), _chunksize)
400 400 if not data:
401 401 self._eof = True
402 402 else:
403 403 self._lenbuf += len(data)
404 404 self._buffer.append(data)
405 405
406 406 def popen2(cmd, env=None, newlines=False):
407 407 # Setting bufsize to -1 lets the system decide the buffer size.
408 408 # The default for bufsize is 0, meaning unbuffered. This leads to
409 409 # poor performance on Mac OS X: http://bugs.python.org/issue4194
410 410 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
411 411 close_fds=closefds,
412 412 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
413 413 universal_newlines=newlines,
414 414 env=env)
415 415 return p.stdin, p.stdout
416 416
417 417 def popen3(cmd, env=None, newlines=False):
418 418 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
419 419 return stdin, stdout, stderr
420 420
421 421 def popen4(cmd, env=None, newlines=False, bufsize=-1):
422 422 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
423 423 close_fds=closefds,
424 424 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
425 425 stderr=subprocess.PIPE,
426 426 universal_newlines=newlines,
427 427 env=env)
428 428 return p.stdin, p.stdout, p.stderr, p
429 429
430 430 def version():
431 431 """Return version information if available."""
432 432 try:
433 433 from . import __version__
434 434 return __version__.version
435 435 except ImportError:
436 436 return 'unknown'
437 437
438 438 def versiontuple(v=None, n=4):
439 439 """Parses a Mercurial version string into an N-tuple.
440 440
441 441 The version string to be parsed is specified with the ``v`` argument.
442 442 If it isn't defined, the current Mercurial version string will be parsed.
443 443
444 444 ``n`` can be 2, 3, or 4. Here is how some version strings map to
445 445 returned values:
446 446
447 447 >>> v = '3.6.1+190-df9b73d2d444'
448 448 >>> versiontuple(v, 2)
449 449 (3, 6)
450 450 >>> versiontuple(v, 3)
451 451 (3, 6, 1)
452 452 >>> versiontuple(v, 4)
453 453 (3, 6, 1, '190-df9b73d2d444')
454 454
455 455 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
456 456 (3, 6, 1, '190-df9b73d2d444+20151118')
457 457
458 458 >>> v = '3.6'
459 459 >>> versiontuple(v, 2)
460 460 (3, 6)
461 461 >>> versiontuple(v, 3)
462 462 (3, 6, None)
463 463 >>> versiontuple(v, 4)
464 464 (3, 6, None, None)
465 465
466 466 >>> v = '3.9-rc'
467 467 >>> versiontuple(v, 2)
468 468 (3, 9)
469 469 >>> versiontuple(v, 3)
470 470 (3, 9, None)
471 471 >>> versiontuple(v, 4)
472 472 (3, 9, None, 'rc')
473 473
474 474 >>> v = '3.9-rc+2-02a8fea4289b'
475 475 >>> versiontuple(v, 2)
476 476 (3, 9)
477 477 >>> versiontuple(v, 3)
478 478 (3, 9, None)
479 479 >>> versiontuple(v, 4)
480 480 (3, 9, None, 'rc+2-02a8fea4289b')
481 481 """
482 482 if not v:
483 483 v = version()
484 484 parts = remod.split('[\+-]', v, 1)
485 485 if len(parts) == 1:
486 486 vparts, extra = parts[0], None
487 487 else:
488 488 vparts, extra = parts
489 489
490 490 vints = []
491 491 for i in vparts.split('.'):
492 492 try:
493 493 vints.append(int(i))
494 494 except ValueError:
495 495 break
496 496 # (3, 6) -> (3, 6, None)
497 497 while len(vints) < 3:
498 498 vints.append(None)
499 499
500 500 if n == 2:
501 501 return (vints[0], vints[1])
502 502 if n == 3:
503 503 return (vints[0], vints[1], vints[2])
504 504 if n == 4:
505 505 return (vints[0], vints[1], vints[2], extra)
506 506
507 507 # used by parsedate
508 508 defaultdateformats = (
509 509 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
510 510 '%Y-%m-%dT%H:%M', # without seconds
511 511 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
512 512 '%Y-%m-%dT%H%M', # without seconds
513 513 '%Y-%m-%d %H:%M:%S', # our common legal variant
514 514 '%Y-%m-%d %H:%M', # without seconds
515 515 '%Y-%m-%d %H%M%S', # without :
516 516 '%Y-%m-%d %H%M', # without seconds
517 517 '%Y-%m-%d %I:%M:%S%p',
518 518 '%Y-%m-%d %H:%M',
519 519 '%Y-%m-%d %I:%M%p',
520 520 '%Y-%m-%d',
521 521 '%m-%d',
522 522 '%m/%d',
523 523 '%m/%d/%y',
524 524 '%m/%d/%Y',
525 525 '%a %b %d %H:%M:%S %Y',
526 526 '%a %b %d %I:%M:%S%p %Y',
527 527 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
528 528 '%b %d %H:%M:%S %Y',
529 529 '%b %d %I:%M:%S%p %Y',
530 530 '%b %d %H:%M:%S',
531 531 '%b %d %I:%M:%S%p',
532 532 '%b %d %H:%M',
533 533 '%b %d %I:%M%p',
534 534 '%b %d %Y',
535 535 '%b %d',
536 536 '%H:%M:%S',
537 537 '%I:%M:%S%p',
538 538 '%H:%M',
539 539 '%I:%M%p',
540 540 )
541 541
542 542 extendeddateformats = defaultdateformats + (
543 543 "%Y",
544 544 "%Y-%m",
545 545 "%b",
546 546 "%b %Y",
547 547 )
548 548
549 549 def cachefunc(func):
550 550 '''cache the result of function calls'''
551 551 # XXX doesn't handle keywords args
552 552 if func.__code__.co_argcount == 0:
553 553 cache = []
554 554 def f():
555 555 if len(cache) == 0:
556 556 cache.append(func())
557 557 return cache[0]
558 558 return f
559 559 cache = {}
560 560 if func.__code__.co_argcount == 1:
561 561 # we gain a small amount of time because
562 562 # we don't need to pack/unpack the list
563 563 def f(arg):
564 564 if arg not in cache:
565 565 cache[arg] = func(arg)
566 566 return cache[arg]
567 567 else:
568 568 def f(*args):
569 569 if args not in cache:
570 570 cache[args] = func(*args)
571 571 return cache[args]
572 572
573 573 return f
574 574
575 575 class sortdict(collections.OrderedDict):
576 576 '''a simple sorted dictionary
577 577
578 578 >>> d1 = sortdict([('a', 0), ('b', 1)])
579 579 >>> d2 = d1.copy()
580 580 >>> d2
581 581 sortdict([('a', 0), ('b', 1)])
582 582 >>> d2.update([('a', 2)])
583 583 >>> d2.keys() # should still be in last-set order
584 584 ['b', 'a']
585 585 '''
586 586
587 587 def __setitem__(self, key, value):
588 588 if key in self:
589 589 del self[key]
590 590 super(sortdict, self).__setitem__(key, value)
591 591
592 592 class _lrucachenode(object):
593 593 """A node in a doubly linked list.
594 594
595 595 Holds a reference to nodes on either side as well as a key-value
596 596 pair for the dictionary entry.
597 597 """
598 598 __slots__ = (u'next', u'prev', u'key', u'value')
599 599
600 600 def __init__(self):
601 601 self.next = None
602 602 self.prev = None
603 603
604 604 self.key = _notset
605 605 self.value = None
606 606
607 607 def markempty(self):
608 608 """Mark the node as emptied."""
609 609 self.key = _notset
610 610
611 611 class lrucachedict(object):
612 612 """Dict that caches most recent accesses and sets.
613 613
614 614 The dict consists of an actual backing dict - indexed by original
615 615 key - and a doubly linked circular list defining the order of entries in
616 616 the cache.
617 617
618 618 The head node is the newest entry in the cache. If the cache is full,
619 619 we recycle head.prev and make it the new head. Cache accesses result in
620 620 the node being moved to before the existing head and being marked as the
621 621 new head node.
622 622 """
623 623 def __init__(self, max):
624 624 self._cache = {}
625 625
626 626 self._head = head = _lrucachenode()
627 627 head.prev = head
628 628 head.next = head
629 629 self._size = 1
630 630 self._capacity = max
631 631
632 632 def __len__(self):
633 633 return len(self._cache)
634 634
635 635 def __contains__(self, k):
636 636 return k in self._cache
637 637
638 638 def __iter__(self):
639 639 # We don't have to iterate in cache order, but why not.
640 640 n = self._head
641 641 for i in range(len(self._cache)):
642 642 yield n.key
643 643 n = n.next
644 644
645 645 def __getitem__(self, k):
646 646 node = self._cache[k]
647 647 self._movetohead(node)
648 648 return node.value
649 649
650 650 def __setitem__(self, k, v):
651 651 node = self._cache.get(k)
652 652 # Replace existing value and mark as newest.
653 653 if node is not None:
654 654 node.value = v
655 655 self._movetohead(node)
656 656 return
657 657
658 658 if self._size < self._capacity:
659 659 node = self._addcapacity()
660 660 else:
661 661 # Grab the last/oldest item.
662 662 node = self._head.prev
663 663
664 664 # At capacity. Kill the old entry.
665 665 if node.key is not _notset:
666 666 del self._cache[node.key]
667 667
668 668 node.key = k
669 669 node.value = v
670 670 self._cache[k] = node
671 671 # And mark it as newest entry. No need to adjust order since it
672 672 # is already self._head.prev.
673 673 self._head = node
674 674
675 675 def __delitem__(self, k):
676 676 node = self._cache.pop(k)
677 677 node.markempty()
678 678
679 679 # Temporarily mark as newest item before re-adjusting head to make
680 680 # this node the oldest item.
681 681 self._movetohead(node)
682 682 self._head = node.next
683 683
684 684 # Additional dict methods.
685 685
686 686 def get(self, k, default=None):
687 687 try:
688 688 return self._cache[k].value
689 689 except KeyError:
690 690 return default
691 691
692 692 def clear(self):
693 693 n = self._head
694 694 while n.key is not _notset:
695 695 n.markempty()
696 696 n = n.next
697 697
698 698 self._cache.clear()
699 699
700 700 def copy(self):
701 701 result = lrucachedict(self._capacity)
702 702 n = self._head.prev
703 703 # Iterate in oldest-to-newest order, so the copy has the right ordering
704 704 for i in range(len(self._cache)):
705 705 result[n.key] = n.value
706 706 n = n.prev
707 707 return result
708 708
709 709 def _movetohead(self, node):
710 710 """Mark a node as the newest, making it the new head.
711 711
712 712 When a node is accessed, it becomes the freshest entry in the LRU
713 713 list, which is denoted by self._head.
714 714
715 715 Visually, let's make ``N`` the new head node (* denotes head):
716 716
717 717 previous/oldest <-> head <-> next/next newest
718 718
719 719 ----<->--- A* ---<->-----
720 720 | |
721 721 E <-> D <-> N <-> C <-> B
722 722
723 723 To:
724 724
725 725 ----<->--- N* ---<->-----
726 726 | |
727 727 E <-> D <-> C <-> B <-> A
728 728
729 729 This requires the following moves:
730 730
731 731 C.next = D (node.prev.next = node.next)
732 732 D.prev = C (node.next.prev = node.prev)
733 733 E.next = N (head.prev.next = node)
734 734 N.prev = E (node.prev = head.prev)
735 735 N.next = A (node.next = head)
736 736 A.prev = N (head.prev = node)
737 737 """
738 738 head = self._head
739 739 # C.next = D
740 740 node.prev.next = node.next
741 741 # D.prev = C
742 742 node.next.prev = node.prev
743 743 # N.prev = E
744 744 node.prev = head.prev
745 745 # N.next = A
746 746 # It is tempting to do just "head" here, however if node is
747 747 # adjacent to head, this will do bad things.
748 748 node.next = head.prev.next
749 749 # E.next = N
750 750 node.next.prev = node
751 751 # A.prev = N
752 752 node.prev.next = node
753 753
754 754 self._head = node
755 755
756 756 def _addcapacity(self):
757 757 """Add a node to the circular linked list.
758 758
759 759 The new node is inserted before the head node.
760 760 """
761 761 head = self._head
762 762 node = _lrucachenode()
763 763 head.prev.next = node
764 764 node.prev = head.prev
765 765 node.next = head
766 766 head.prev = node
767 767 self._size += 1
768 768 return node
769 769
770 770 def lrucachefunc(func):
771 771 '''cache most recent results of function calls'''
772 772 cache = {}
773 773 order = collections.deque()
774 774 if func.__code__.co_argcount == 1:
775 775 def f(arg):
776 776 if arg not in cache:
777 777 if len(cache) > 20:
778 778 del cache[order.popleft()]
779 779 cache[arg] = func(arg)
780 780 else:
781 781 order.remove(arg)
782 782 order.append(arg)
783 783 return cache[arg]
784 784 else:
785 785 def f(*args):
786 786 if args not in cache:
787 787 if len(cache) > 20:
788 788 del cache[order.popleft()]
789 789 cache[args] = func(*args)
790 790 else:
791 791 order.remove(args)
792 792 order.append(args)
793 793 return cache[args]
794 794
795 795 return f
796 796
797 797 class propertycache(object):
798 798 def __init__(self, func):
799 799 self.func = func
800 800 self.name = func.__name__
801 801 def __get__(self, obj, type=None):
802 802 result = self.func(obj)
803 803 self.cachevalue(obj, result)
804 804 return result
805 805
806 806 def cachevalue(self, obj, value):
807 807 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
808 808 obj.__dict__[self.name] = value
809 809
810 810 def pipefilter(s, cmd):
811 811 '''filter string S through command CMD, returning its output'''
812 812 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
813 813 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
814 814 pout, perr = p.communicate(s)
815 815 return pout
816 816
817 817 def tempfilter(s, cmd):
818 818 '''filter string S through a pair of temporary files with CMD.
819 819 CMD is used as a template to create the real command to be run,
820 820 with the strings INFILE and OUTFILE replaced by the real names of
821 821 the temporary files generated.'''
822 822 inname, outname = None, None
823 823 try:
824 824 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
825 825 fp = os.fdopen(infd, pycompat.sysstr('wb'))
826 826 fp.write(s)
827 827 fp.close()
828 828 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
829 829 os.close(outfd)
830 830 cmd = cmd.replace('INFILE', inname)
831 831 cmd = cmd.replace('OUTFILE', outname)
832 832 code = os.system(cmd)
833 833 if pycompat.sysplatform == 'OpenVMS' and code & 1:
834 834 code = 0
835 835 if code:
836 836 raise Abort(_("command '%s' failed: %s") %
837 837 (cmd, explainexit(code)))
838 838 return readfile(outname)
839 839 finally:
840 840 try:
841 841 if inname:
842 842 os.unlink(inname)
843 843 except OSError:
844 844 pass
845 845 try:
846 846 if outname:
847 847 os.unlink(outname)
848 848 except OSError:
849 849 pass
850 850
851 851 filtertable = {
852 852 'tempfile:': tempfilter,
853 853 'pipe:': pipefilter,
854 854 }
855 855
856 856 def filter(s, cmd):
857 857 "filter a string through a command that transforms its input to its output"
858 858 for name, fn in filtertable.iteritems():
859 859 if cmd.startswith(name):
860 860 return fn(s, cmd[len(name):].lstrip())
861 861 return pipefilter(s, cmd)
862 862
863 863 def binary(s):
864 864 """return true if a string is binary data"""
865 865 return bool(s and '\0' in s)
866 866
867 867 def increasingchunks(source, min=1024, max=65536):
868 868 '''return no less than min bytes per chunk while data remains,
869 869 doubling min after each chunk until it reaches max'''
870 870 def log2(x):
871 871 if not x:
872 872 return 0
873 873 i = 0
874 874 while x:
875 875 x >>= 1
876 876 i += 1
877 877 return i - 1
878 878
879 879 buf = []
880 880 blen = 0
881 881 for chunk in source:
882 882 buf.append(chunk)
883 883 blen += len(chunk)
884 884 if blen >= min:
885 885 if min < max:
886 886 min = min << 1
887 887 nmin = 1 << log2(blen)
888 888 if nmin > min:
889 889 min = nmin
890 890 if min > max:
891 891 min = max
892 892 yield ''.join(buf)
893 893 blen = 0
894 894 buf = []
895 895 if buf:
896 896 yield ''.join(buf)
897 897
898 898 Abort = error.Abort
899 899
900 900 def always(fn):
901 901 return True
902 902
903 903 def never(fn):
904 904 return False
905 905
906 906 def nogc(func):
907 907 """disable garbage collector
908 908
909 909 Python's garbage collector triggers a GC each time a certain number of
910 910 container objects (the number being defined by gc.get_threshold()) are
911 911 allocated even when marked not to be tracked by the collector. Tracking has
912 912 no effect on when GCs are triggered, only on what objects the GC looks
913 913 into. As a workaround, disable GC while building complex (huge)
914 914 containers.
915 915
916 916 This garbage collector issue have been fixed in 2.7.
917 917 """
918 918 if sys.version_info >= (2, 7):
919 919 return func
920 920 def wrapper(*args, **kwargs):
921 921 gcenabled = gc.isenabled()
922 922 gc.disable()
923 923 try:
924 924 return func(*args, **kwargs)
925 925 finally:
926 926 if gcenabled:
927 927 gc.enable()
928 928 return wrapper
929 929
930 930 def pathto(root, n1, n2):
931 931 '''return the relative path from one place to another.
932 932 root should use os.sep to separate directories
933 933 n1 should use os.sep to separate directories
934 934 n2 should use "/" to separate directories
935 935 returns an os.sep-separated path.
936 936
937 937 If n1 is a relative path, it's assumed it's
938 938 relative to root.
939 939 n2 should always be relative to root.
940 940 '''
941 941 if not n1:
942 942 return localpath(n2)
943 943 if os.path.isabs(n1):
944 944 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
945 945 return os.path.join(root, localpath(n2))
946 946 n2 = '/'.join((pconvert(root), n2))
947 947 a, b = splitpath(n1), n2.split('/')
948 948 a.reverse()
949 949 b.reverse()
950 950 while a and b and a[-1] == b[-1]:
951 951 a.pop()
952 952 b.pop()
953 953 b.reverse()
954 954 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
955 955
956 956 def mainfrozen():
957 957 """return True if we are a frozen executable.
958 958
959 959 The code supports py2exe (most common, Windows only) and tools/freeze
960 960 (portable, not much used).
961 961 """
962 962 return (safehasattr(sys, "frozen") or # new py2exe
963 963 safehasattr(sys, "importers") or # old py2exe
964 964 imp.is_frozen(u"__main__")) # tools/freeze
965 965
966 966 # the location of data files matching the source code
967 967 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
968 968 # executable version (py2exe) doesn't support __file__
969 969 datapath = os.path.dirname(pycompat.sysexecutable)
970 970 else:
971 971 datapath = os.path.dirname(pycompat.fsencode(__file__))
972 972
973 973 i18n.setdatapath(datapath)
974 974
975 975 _hgexecutable = None
976 976
977 977 def hgexecutable():
978 978 """return location of the 'hg' executable.
979 979
980 980 Defaults to $HG or 'hg' in the search path.
981 981 """
982 982 if _hgexecutable is None:
983 983 hg = encoding.environ.get('HG')
984 984 mainmod = sys.modules[pycompat.sysstr('__main__')]
985 985 if hg:
986 986 _sethgexecutable(hg)
987 987 elif mainfrozen():
988 988 if getattr(sys, 'frozen', None) == 'macosx_app':
989 989 # Env variable set by py2app
990 990 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
991 991 else:
992 992 _sethgexecutable(pycompat.sysexecutable)
993 993 elif (os.path.basename(
994 994 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
995 995 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
996 996 else:
997 997 exe = findexe('hg') or os.path.basename(sys.argv[0])
998 998 _sethgexecutable(exe)
999 999 return _hgexecutable
1000 1000
1001 1001 def _sethgexecutable(path):
1002 1002 """set location of the 'hg' executable"""
1003 1003 global _hgexecutable
1004 1004 _hgexecutable = path
1005 1005
1006 1006 def _isstdout(f):
1007 1007 fileno = getattr(f, 'fileno', None)
1008 1008 return fileno and fileno() == sys.__stdout__.fileno()
1009 1009
1010 1010 def shellenviron(environ=None):
1011 1011 """return environ with optional override, useful for shelling out"""
1012 1012 def py2shell(val):
1013 1013 'convert python object into string that is useful to shell'
1014 1014 if val is None or val is False:
1015 1015 return '0'
1016 1016 if val is True:
1017 1017 return '1'
1018 1018 return str(val)
1019 1019 env = dict(encoding.environ)
1020 1020 if environ:
1021 1021 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1022 1022 env['HG'] = hgexecutable()
1023 1023 return env
1024 1024
1025 1025 def system(cmd, environ=None, cwd=None, out=None):
1026 1026 '''enhanced shell command execution.
1027 1027 run with environment maybe modified, maybe in different dir.
1028 1028
1029 1029 if out is specified, it is assumed to be a file-like object that has a
1030 1030 write() method. stdout and stderr will be redirected to out.'''
1031 1031 try:
1032 1032 stdout.flush()
1033 1033 except Exception:
1034 1034 pass
1035 1035 cmd = quotecommand(cmd)
1036 1036 env = shellenviron(environ)
1037 1037 if out is None or _isstdout(out):
1038 1038 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1039 1039 env=env, cwd=cwd)
1040 1040 else:
1041 1041 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1042 1042 env=env, cwd=cwd, stdout=subprocess.PIPE,
1043 1043 stderr=subprocess.STDOUT)
1044 1044 for line in iter(proc.stdout.readline, ''):
1045 1045 out.write(line)
1046 1046 proc.wait()
1047 1047 rc = proc.returncode
1048 1048 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1049 1049 rc = 0
1050 1050 return rc
1051 1051
1052 1052 def checksignature(func):
1053 1053 '''wrap a function with code to check for calling errors'''
1054 1054 def check(*args, **kwargs):
1055 1055 try:
1056 1056 return func(*args, **kwargs)
1057 1057 except TypeError:
1058 1058 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1059 1059 raise error.SignatureError
1060 1060 raise
1061 1061
1062 1062 return check
1063 1063
1064 1064 # a whilelist of known filesystems where hardlink works reliably
1065 1065 _hardlinkfswhitelist = {
1066 1066 'btrfs',
1067 1067 'ext2',
1068 1068 'ext3',
1069 1069 'ext4',
1070 1070 'hfs',
1071 1071 'jfs',
1072 1072 'reiserfs',
1073 1073 'tmpfs',
1074 1074 'ufs',
1075 1075 'xfs',
1076 1076 'zfs',
1077 1077 }
1078 1078
1079 1079 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1080 1080 '''copy a file, preserving mode and optionally other stat info like
1081 1081 atime/mtime
1082 1082
1083 1083 checkambig argument is used with filestat, and is useful only if
1084 1084 destination file is guarded by any lock (e.g. repo.lock or
1085 1085 repo.wlock).
1086 1086
1087 1087 copystat and checkambig should be exclusive.
1088 1088 '''
1089 1089 assert not (copystat and checkambig)
1090 1090 oldstat = None
1091 1091 if os.path.lexists(dest):
1092 1092 if checkambig:
1093 1093 oldstat = checkambig and filestat.frompath(dest)
1094 1094 unlink(dest)
1095 1095 if hardlink:
1096 1096 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1097 1097 # unless we are confident that dest is on a whitelisted filesystem.
1098 1098 try:
1099 1099 fstype = getfstype(os.path.dirname(dest))
1100 1100 except OSError:
1101 1101 fstype = None
1102 1102 if fstype not in _hardlinkfswhitelist:
1103 1103 hardlink = False
1104 1104 if hardlink:
1105 1105 try:
1106 1106 oslink(src, dest)
1107 1107 return
1108 1108 except (IOError, OSError):
1109 1109 pass # fall back to normal copy
1110 1110 if os.path.islink(src):
1111 1111 os.symlink(os.readlink(src), dest)
1112 1112 # copytime is ignored for symlinks, but in general copytime isn't needed
1113 1113 # for them anyway
1114 1114 else:
1115 1115 try:
1116 1116 shutil.copyfile(src, dest)
1117 1117 if copystat:
1118 1118 # copystat also copies mode
1119 1119 shutil.copystat(src, dest)
1120 1120 else:
1121 1121 shutil.copymode(src, dest)
1122 1122 if oldstat and oldstat.stat:
1123 1123 newstat = filestat.frompath(dest)
1124 1124 if newstat.isambig(oldstat):
1125 1125 # stat of copied file is ambiguous to original one
1126 1126 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1127 1127 os.utime(dest, (advanced, advanced))
1128 1128 except shutil.Error as inst:
1129 1129 raise Abort(str(inst))
1130 1130
1131 1131 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1132 1132 """Copy a directory tree using hardlinks if possible."""
1133 1133 num = 0
1134 1134
1135 1135 gettopic = lambda: hardlink and _('linking') or _('copying')
1136 1136
1137 1137 if os.path.isdir(src):
1138 1138 if hardlink is None:
1139 1139 hardlink = (os.stat(src).st_dev ==
1140 1140 os.stat(os.path.dirname(dst)).st_dev)
1141 1141 topic = gettopic()
1142 1142 os.mkdir(dst)
1143 1143 for name, kind in listdir(src):
1144 1144 srcname = os.path.join(src, name)
1145 1145 dstname = os.path.join(dst, name)
1146 1146 def nprog(t, pos):
1147 1147 if pos is not None:
1148 1148 return progress(t, pos + num)
1149 1149 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1150 1150 num += n
1151 1151 else:
1152 1152 if hardlink is None:
1153 1153 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1154 1154 os.stat(os.path.dirname(dst)).st_dev)
1155 1155 topic = gettopic()
1156 1156
1157 1157 if hardlink:
1158 1158 try:
1159 1159 oslink(src, dst)
1160 1160 except (IOError, OSError):
1161 1161 hardlink = False
1162 1162 shutil.copy(src, dst)
1163 1163 else:
1164 1164 shutil.copy(src, dst)
1165 1165 num += 1
1166 1166 progress(topic, num)
1167 1167 progress(topic, None)
1168 1168
1169 1169 return hardlink, num
1170 1170
1171 1171 _winreservednames = '''con prn aux nul
1172 1172 com1 com2 com3 com4 com5 com6 com7 com8 com9
1173 1173 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1174 1174 _winreservedchars = ':*?"<>|'
1175 1175 def checkwinfilename(path):
1176 1176 r'''Check that the base-relative path is a valid filename on Windows.
1177 1177 Returns None if the path is ok, or a UI string describing the problem.
1178 1178
1179 1179 >>> checkwinfilename("just/a/normal/path")
1180 1180 >>> checkwinfilename("foo/bar/con.xml")
1181 1181 "filename contains 'con', which is reserved on Windows"
1182 1182 >>> checkwinfilename("foo/con.xml/bar")
1183 1183 "filename contains 'con', which is reserved on Windows"
1184 1184 >>> checkwinfilename("foo/bar/xml.con")
1185 1185 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1186 1186 "filename contains 'AUX', which is reserved on Windows"
1187 1187 >>> checkwinfilename("foo/bar/bla:.txt")
1188 1188 "filename contains ':', which is reserved on Windows"
1189 1189 >>> checkwinfilename("foo/bar/b\07la.txt")
1190 1190 "filename contains '\\x07', which is invalid on Windows"
1191 1191 >>> checkwinfilename("foo/bar/bla ")
1192 1192 "filename ends with ' ', which is not allowed on Windows"
1193 1193 >>> checkwinfilename("../bar")
1194 1194 >>> checkwinfilename("foo\\")
1195 1195 "filename ends with '\\', which is invalid on Windows"
1196 1196 >>> checkwinfilename("foo\\/bar")
1197 1197 "directory name ends with '\\', which is invalid on Windows"
1198 1198 '''
1199 1199 if path.endswith('\\'):
1200 1200 return _("filename ends with '\\', which is invalid on Windows")
1201 1201 if '\\/' in path:
1202 1202 return _("directory name ends with '\\', which is invalid on Windows")
1203 1203 for n in path.replace('\\', '/').split('/'):
1204 1204 if not n:
1205 1205 continue
1206 1206 for c in _filenamebytestr(n):
1207 1207 if c in _winreservedchars:
1208 1208 return _("filename contains '%s', which is reserved "
1209 1209 "on Windows") % c
1210 1210 if ord(c) <= 31:
1211 1211 return _("filename contains %r, which is invalid "
1212 1212 "on Windows") % c
1213 1213 base = n.split('.')[0]
1214 1214 if base and base.lower() in _winreservednames:
1215 1215 return _("filename contains '%s', which is reserved "
1216 1216 "on Windows") % base
1217 1217 t = n[-1]
1218 1218 if t in '. ' and n not in '..':
1219 1219 return _("filename ends with '%s', which is not allowed "
1220 1220 "on Windows") % t
1221 1221
1222 1222 if pycompat.osname == 'nt':
1223 1223 checkosfilename = checkwinfilename
1224 1224 timer = time.clock
1225 1225 else:
1226 1226 checkosfilename = platform.checkosfilename
1227 1227 timer = time.time
1228 1228
1229 1229 if safehasattr(time, "perf_counter"):
1230 1230 timer = time.perf_counter
1231 1231
1232 1232 def makelock(info, pathname):
1233 1233 try:
1234 1234 return os.symlink(info, pathname)
1235 1235 except OSError as why:
1236 1236 if why.errno == errno.EEXIST:
1237 1237 raise
1238 1238 except AttributeError: # no symlink in os
1239 1239 pass
1240 1240
1241 1241 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1242 1242 os.write(ld, info)
1243 1243 os.close(ld)
1244 1244
1245 1245 def readlock(pathname):
1246 1246 try:
1247 1247 return os.readlink(pathname)
1248 1248 except OSError as why:
1249 1249 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1250 1250 raise
1251 1251 except AttributeError: # no symlink in os
1252 1252 pass
1253 1253 fp = posixfile(pathname)
1254 1254 r = fp.read()
1255 1255 fp.close()
1256 1256 return r
1257 1257
1258 1258 def fstat(fp):
1259 1259 '''stat file object that may not have fileno method.'''
1260 1260 try:
1261 1261 return os.fstat(fp.fileno())
1262 1262 except AttributeError:
1263 1263 return os.stat(fp.name)
1264 1264
1265 1265 # File system features
1266 1266
1267 1267 def fscasesensitive(path):
1268 1268 """
1269 1269 Return true if the given path is on a case-sensitive filesystem
1270 1270
1271 1271 Requires a path (like /foo/.hg) ending with a foldable final
1272 1272 directory component.
1273 1273 """
1274 1274 s1 = os.lstat(path)
1275 1275 d, b = os.path.split(path)
1276 1276 b2 = b.upper()
1277 1277 if b == b2:
1278 1278 b2 = b.lower()
1279 1279 if b == b2:
1280 1280 return True # no evidence against case sensitivity
1281 1281 p2 = os.path.join(d, b2)
1282 1282 try:
1283 1283 s2 = os.lstat(p2)
1284 1284 if s2 == s1:
1285 1285 return False
1286 1286 return True
1287 1287 except OSError:
1288 1288 return True
1289 1289
1290 1290 try:
1291 1291 import re2
1292 1292 _re2 = None
1293 1293 except ImportError:
1294 1294 _re2 = False
1295 1295
1296 1296 class _re(object):
1297 1297 def _checkre2(self):
1298 1298 global _re2
1299 1299 try:
1300 1300 # check if match works, see issue3964
1301 1301 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1302 1302 except ImportError:
1303 1303 _re2 = False
1304 1304
1305 1305 def compile(self, pat, flags=0):
1306 1306 '''Compile a regular expression, using re2 if possible
1307 1307
1308 1308 For best performance, use only re2-compatible regexp features. The
1309 1309 only flags from the re module that are re2-compatible are
1310 1310 IGNORECASE and MULTILINE.'''
1311 1311 if _re2 is None:
1312 1312 self._checkre2()
1313 1313 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1314 1314 if flags & remod.IGNORECASE:
1315 1315 pat = '(?i)' + pat
1316 1316 if flags & remod.MULTILINE:
1317 1317 pat = '(?m)' + pat
1318 1318 try:
1319 1319 return re2.compile(pat)
1320 1320 except re2.error:
1321 1321 pass
1322 1322 return remod.compile(pat, flags)
1323 1323
1324 1324 @propertycache
1325 1325 def escape(self):
1326 1326 '''Return the version of escape corresponding to self.compile.
1327 1327
1328 1328 This is imperfect because whether re2 or re is used for a particular
1329 1329 function depends on the flags, etc, but it's the best we can do.
1330 1330 '''
1331 1331 global _re2
1332 1332 if _re2 is None:
1333 1333 self._checkre2()
1334 1334 if _re2:
1335 1335 return re2.escape
1336 1336 else:
1337 1337 return remod.escape
1338 1338
1339 1339 re = _re()
1340 1340
1341 1341 _fspathcache = {}
1342 1342 def fspath(name, root):
1343 1343 '''Get name in the case stored in the filesystem
1344 1344
1345 1345 The name should be relative to root, and be normcase-ed for efficiency.
1346 1346
1347 1347 Note that this function is unnecessary, and should not be
1348 1348 called, for case-sensitive filesystems (simply because it's expensive).
1349 1349
1350 1350 The root should be normcase-ed, too.
1351 1351 '''
1352 1352 def _makefspathcacheentry(dir):
1353 1353 return dict((normcase(n), n) for n in os.listdir(dir))
1354 1354
1355 1355 seps = pycompat.ossep
1356 1356 if pycompat.osaltsep:
1357 1357 seps = seps + pycompat.osaltsep
1358 1358 # Protect backslashes. This gets silly very quickly.
1359 1359 seps.replace('\\','\\\\')
1360 1360 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1361 1361 dir = os.path.normpath(root)
1362 1362 result = []
1363 1363 for part, sep in pattern.findall(name):
1364 1364 if sep:
1365 1365 result.append(sep)
1366 1366 continue
1367 1367
1368 1368 if dir not in _fspathcache:
1369 1369 _fspathcache[dir] = _makefspathcacheentry(dir)
1370 1370 contents = _fspathcache[dir]
1371 1371
1372 1372 found = contents.get(part)
1373 1373 if not found:
1374 1374 # retry "once per directory" per "dirstate.walk" which
1375 1375 # may take place for each patches of "hg qpush", for example
1376 1376 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1377 1377 found = contents.get(part)
1378 1378
1379 1379 result.append(found or part)
1380 1380 dir = os.path.join(dir, part)
1381 1381
1382 1382 return ''.join(result)
1383 1383
1384 1384 def getfstype(dirpath):
1385 1385 '''Get the filesystem type name from a directory (best-effort)
1386 1386
1387 1387 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1388 1388 '''
1389 1389 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1390 1390
1391 1391 def checknlink(testfile):
1392 1392 '''check whether hardlink count reporting works properly'''
1393 1393
1394 1394 # testfile may be open, so we need a separate file for checking to
1395 1395 # work around issue2543 (or testfile may get lost on Samba shares)
1396 1396 f1 = testfile + ".hgtmp1"
1397 1397 if os.path.lexists(f1):
1398 1398 return False
1399 1399 try:
1400 1400 posixfile(f1, 'w').close()
1401 1401 except IOError:
1402 1402 try:
1403 1403 os.unlink(f1)
1404 1404 except OSError:
1405 1405 pass
1406 1406 return False
1407 1407
1408 1408 f2 = testfile + ".hgtmp2"
1409 1409 fd = None
1410 1410 try:
1411 1411 oslink(f1, f2)
1412 1412 # nlinks() may behave differently for files on Windows shares if
1413 1413 # the file is open.
1414 1414 fd = posixfile(f2)
1415 1415 return nlinks(f2) > 1
1416 1416 except OSError:
1417 1417 return False
1418 1418 finally:
1419 1419 if fd is not None:
1420 1420 fd.close()
1421 1421 for f in (f1, f2):
1422 1422 try:
1423 1423 os.unlink(f)
1424 1424 except OSError:
1425 1425 pass
1426 1426
1427 1427 def endswithsep(path):
1428 1428 '''Check path ends with os.sep or os.altsep.'''
1429 1429 return (path.endswith(pycompat.ossep)
1430 1430 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1431 1431
1432 1432 def splitpath(path):
1433 1433 '''Split path by os.sep.
1434 1434 Note that this function does not use os.altsep because this is
1435 1435 an alternative of simple "xxx.split(os.sep)".
1436 1436 It is recommended to use os.path.normpath() before using this
1437 1437 function if need.'''
1438 1438 return path.split(pycompat.ossep)
1439 1439
1440 1440 def gui():
1441 1441 '''Are we running in a GUI?'''
1442 1442 if pycompat.sysplatform == 'darwin':
1443 1443 if 'SSH_CONNECTION' in encoding.environ:
1444 1444 # handle SSH access to a box where the user is logged in
1445 1445 return False
1446 1446 elif getattr(osutil, 'isgui', None):
1447 1447 # check if a CoreGraphics session is available
1448 1448 return osutil.isgui()
1449 1449 else:
1450 1450 # pure build; use a safe default
1451 1451 return True
1452 1452 else:
1453 1453 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1454 1454
1455 1455 def mktempcopy(name, emptyok=False, createmode=None):
1456 1456 """Create a temporary file with the same contents from name
1457 1457
1458 1458 The permission bits are copied from the original file.
1459 1459
1460 1460 If the temporary file is going to be truncated immediately, you
1461 1461 can use emptyok=True as an optimization.
1462 1462
1463 1463 Returns the name of the temporary file.
1464 1464 """
1465 1465 d, fn = os.path.split(name)
1466 1466 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1467 1467 os.close(fd)
1468 1468 # Temporary files are created with mode 0600, which is usually not
1469 1469 # what we want. If the original file already exists, just copy
1470 1470 # its mode. Otherwise, manually obey umask.
1471 1471 copymode(name, temp, createmode)
1472 1472 if emptyok:
1473 1473 return temp
1474 1474 try:
1475 1475 try:
1476 1476 ifp = posixfile(name, "rb")
1477 1477 except IOError as inst:
1478 1478 if inst.errno == errno.ENOENT:
1479 1479 return temp
1480 1480 if not getattr(inst, 'filename', None):
1481 1481 inst.filename = name
1482 1482 raise
1483 1483 ofp = posixfile(temp, "wb")
1484 1484 for chunk in filechunkiter(ifp):
1485 1485 ofp.write(chunk)
1486 1486 ifp.close()
1487 1487 ofp.close()
1488 1488 except: # re-raises
1489 1489 try: os.unlink(temp)
1490 1490 except OSError: pass
1491 1491 raise
1492 1492 return temp
1493 1493
1494 1494 class filestat(object):
1495 1495 """help to exactly detect change of a file
1496 1496
1497 1497 'stat' attribute is result of 'os.stat()' if specified 'path'
1498 1498 exists. Otherwise, it is None. This can avoid preparative
1499 1499 'exists()' examination on client side of this class.
1500 1500 """
1501 1501 def __init__(self, stat):
1502 1502 self.stat = stat
1503 1503
1504 1504 @classmethod
1505 1505 def frompath(cls, path):
1506 1506 try:
1507 1507 stat = os.stat(path)
1508 1508 except OSError as err:
1509 1509 if err.errno != errno.ENOENT:
1510 1510 raise
1511 1511 stat = None
1512 1512 return cls(stat)
1513 1513
1514 1514 @classmethod
1515 1515 def fromfp(cls, fp):
1516 1516 stat = os.fstat(fp.fileno())
1517 1517 return cls(stat)
1518 1518
1519 1519 __hash__ = object.__hash__
1520 1520
1521 1521 def __eq__(self, old):
1522 1522 try:
1523 1523 # if ambiguity between stat of new and old file is
1524 1524 # avoided, comparison of size, ctime and mtime is enough
1525 1525 # to exactly detect change of a file regardless of platform
1526 1526 return (self.stat.st_size == old.stat.st_size and
1527 1527 self.stat.st_ctime == old.stat.st_ctime and
1528 1528 self.stat.st_mtime == old.stat.st_mtime)
1529 1529 except AttributeError:
1530 1530 pass
1531 1531 try:
1532 1532 return self.stat is None and old.stat is None
1533 1533 except AttributeError:
1534 1534 return False
1535 1535
1536 1536 def isambig(self, old):
1537 1537 """Examine whether new (= self) stat is ambiguous against old one
1538 1538
1539 1539 "S[N]" below means stat of a file at N-th change:
1540 1540
1541 1541 - S[n-1].ctime < S[n].ctime: can detect change of a file
1542 1542 - S[n-1].ctime == S[n].ctime
1543 1543 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1544 1544 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1545 1545 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1546 1546 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1547 1547
1548 1548 Case (*2) above means that a file was changed twice or more at
1549 1549 same time in sec (= S[n-1].ctime), and comparison of timestamp
1550 1550 is ambiguous.
1551 1551
1552 1552 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1553 1553 timestamp is ambiguous".
1554 1554
1555 1555 But advancing mtime only in case (*2) doesn't work as
1556 1556 expected, because naturally advanced S[n].mtime in case (*1)
1557 1557 might be equal to manually advanced S[n-1 or earlier].mtime.
1558 1558
1559 1559 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1560 1560 treated as ambiguous regardless of mtime, to avoid overlooking
1561 1561 by confliction between such mtime.
1562 1562
1563 1563 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1564 1564 S[n].mtime", even if size of a file isn't changed.
1565 1565 """
1566 1566 try:
1567 1567 return (self.stat.st_ctime == old.stat.st_ctime)
1568 1568 except AttributeError:
1569 1569 return False
1570 1570
1571 1571 def avoidambig(self, path, old):
1572 1572 """Change file stat of specified path to avoid ambiguity
1573 1573
1574 1574 'old' should be previous filestat of 'path'.
1575 1575
1576 1576 This skips avoiding ambiguity, if a process doesn't have
1577 1577 appropriate privileges for 'path'. This returns False in this
1578 1578 case.
1579 1579
1580 1580 Otherwise, this returns True, as "ambiguity is avoided".
1581 1581 """
1582 1582 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1583 1583 try:
1584 1584 os.utime(path, (advanced, advanced))
1585 1585 except OSError as inst:
1586 1586 if inst.errno == errno.EPERM:
1587 1587 # utime() on the file created by another user causes EPERM,
1588 1588 # if a process doesn't have appropriate privileges
1589 1589 return False
1590 1590 raise
1591 1591 return True
1592 1592
1593 1593 def __ne__(self, other):
1594 1594 return not self == other
1595 1595
1596 1596 class atomictempfile(object):
1597 1597 '''writable file object that atomically updates a file
1598 1598
1599 1599 All writes will go to a temporary copy of the original file. Call
1600 1600 close() when you are done writing, and atomictempfile will rename
1601 1601 the temporary copy to the original name, making the changes
1602 1602 visible. If the object is destroyed without being closed, all your
1603 1603 writes are discarded.
1604 1604
1605 1605 checkambig argument of constructor is used with filestat, and is
1606 1606 useful only if target file is guarded by any lock (e.g. repo.lock
1607 1607 or repo.wlock).
1608 1608 '''
1609 1609 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1610 1610 self.__name = name # permanent name
1611 1611 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1612 1612 createmode=createmode)
1613 1613 self._fp = posixfile(self._tempname, mode)
1614 1614 self._checkambig = checkambig
1615 1615
1616 1616 # delegated methods
1617 1617 self.read = self._fp.read
1618 1618 self.write = self._fp.write
1619 1619 self.seek = self._fp.seek
1620 1620 self.tell = self._fp.tell
1621 1621 self.fileno = self._fp.fileno
1622 1622
1623 1623 def close(self):
1624 1624 if not self._fp.closed:
1625 1625 self._fp.close()
1626 1626 filename = localpath(self.__name)
1627 1627 oldstat = self._checkambig and filestat.frompath(filename)
1628 1628 if oldstat and oldstat.stat:
1629 1629 rename(self._tempname, filename)
1630 1630 newstat = filestat.frompath(filename)
1631 1631 if newstat.isambig(oldstat):
1632 1632 # stat of changed file is ambiguous to original one
1633 1633 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1634 1634 os.utime(filename, (advanced, advanced))
1635 1635 else:
1636 1636 rename(self._tempname, filename)
1637 1637
1638 1638 def discard(self):
1639 1639 if not self._fp.closed:
1640 1640 try:
1641 1641 os.unlink(self._tempname)
1642 1642 except OSError:
1643 1643 pass
1644 1644 self._fp.close()
1645 1645
1646 1646 def __del__(self):
1647 1647 if safehasattr(self, '_fp'): # constructor actually did something
1648 1648 self.discard()
1649 1649
1650 1650 def __enter__(self):
1651 1651 return self
1652 1652
1653 1653 def __exit__(self, exctype, excvalue, traceback):
1654 1654 if exctype is not None:
1655 1655 self.discard()
1656 1656 else:
1657 1657 self.close()
1658 1658
1659 1659 def unlinkpath(f, ignoremissing=False):
1660 1660 """unlink and remove the directory if it is empty"""
1661 1661 if ignoremissing:
1662 1662 tryunlink(f)
1663 1663 else:
1664 1664 unlink(f)
1665 1665 # try removing directories that might now be empty
1666 1666 try:
1667 1667 removedirs(os.path.dirname(f))
1668 1668 except OSError:
1669 1669 pass
1670 1670
1671 1671 def tryunlink(f):
1672 1672 """Attempt to remove a file, ignoring ENOENT errors."""
1673 1673 try:
1674 1674 unlink(f)
1675 1675 except OSError as e:
1676 1676 if e.errno != errno.ENOENT:
1677 1677 raise
1678 1678
1679 1679 def makedirs(name, mode=None, notindexed=False):
1680 1680 """recursive directory creation with parent mode inheritance
1681 1681
1682 1682 Newly created directories are marked as "not to be indexed by
1683 1683 the content indexing service", if ``notindexed`` is specified
1684 1684 for "write" mode access.
1685 1685 """
1686 1686 try:
1687 1687 makedir(name, notindexed)
1688 1688 except OSError as err:
1689 1689 if err.errno == errno.EEXIST:
1690 1690 return
1691 1691 if err.errno != errno.ENOENT or not name:
1692 1692 raise
1693 1693 parent = os.path.dirname(os.path.abspath(name))
1694 1694 if parent == name:
1695 1695 raise
1696 1696 makedirs(parent, mode, notindexed)
1697 1697 try:
1698 1698 makedir(name, notindexed)
1699 1699 except OSError as err:
1700 1700 # Catch EEXIST to handle races
1701 1701 if err.errno == errno.EEXIST:
1702 1702 return
1703 1703 raise
1704 1704 if mode is not None:
1705 1705 os.chmod(name, mode)
1706 1706
1707 1707 def readfile(path):
1708 1708 with open(path, 'rb') as fp:
1709 1709 return fp.read()
1710 1710
1711 1711 def writefile(path, text):
1712 1712 with open(path, 'wb') as fp:
1713 1713 fp.write(text)
1714 1714
1715 1715 def appendfile(path, text):
1716 1716 with open(path, 'ab') as fp:
1717 1717 fp.write(text)
1718 1718
1719 1719 class chunkbuffer(object):
1720 1720 """Allow arbitrary sized chunks of data to be efficiently read from an
1721 1721 iterator over chunks of arbitrary size."""
1722 1722
1723 1723 def __init__(self, in_iter):
1724 1724 """in_iter is the iterator that's iterating over the input chunks."""
1725 1725 def splitbig(chunks):
1726 1726 for chunk in chunks:
1727 1727 if len(chunk) > 2**20:
1728 1728 pos = 0
1729 1729 while pos < len(chunk):
1730 1730 end = pos + 2 ** 18
1731 1731 yield chunk[pos:end]
1732 1732 pos = end
1733 1733 else:
1734 1734 yield chunk
1735 1735 self.iter = splitbig(in_iter)
1736 1736 self._queue = collections.deque()
1737 1737 self._chunkoffset = 0
1738 1738
1739 1739 def read(self, l=None):
1740 1740 """Read L bytes of data from the iterator of chunks of data.
1741 1741 Returns less than L bytes if the iterator runs dry.
1742 1742
1743 1743 If size parameter is omitted, read everything"""
1744 1744 if l is None:
1745 1745 return ''.join(self.iter)
1746 1746
1747 1747 left = l
1748 1748 buf = []
1749 1749 queue = self._queue
1750 1750 while left > 0:
1751 1751 # refill the queue
1752 1752 if not queue:
1753 1753 target = 2**18
1754 1754 for chunk in self.iter:
1755 1755 queue.append(chunk)
1756 1756 target -= len(chunk)
1757 1757 if target <= 0:
1758 1758 break
1759 1759 if not queue:
1760 1760 break
1761 1761
1762 1762 # The easy way to do this would be to queue.popleft(), modify the
1763 1763 # chunk (if necessary), then queue.appendleft(). However, for cases
1764 1764 # where we read partial chunk content, this incurs 2 dequeue
1765 1765 # mutations and creates a new str for the remaining chunk in the
1766 1766 # queue. Our code below avoids this overhead.
1767 1767
1768 1768 chunk = queue[0]
1769 1769 chunkl = len(chunk)
1770 1770 offset = self._chunkoffset
1771 1771
1772 1772 # Use full chunk.
1773 1773 if offset == 0 and left >= chunkl:
1774 1774 left -= chunkl
1775 1775 queue.popleft()
1776 1776 buf.append(chunk)
1777 1777 # self._chunkoffset remains at 0.
1778 1778 continue
1779 1779
1780 1780 chunkremaining = chunkl - offset
1781 1781
1782 1782 # Use all of unconsumed part of chunk.
1783 1783 if left >= chunkremaining:
1784 1784 left -= chunkremaining
1785 1785 queue.popleft()
1786 1786 # offset == 0 is enabled by block above, so this won't merely
1787 1787 # copy via ``chunk[0:]``.
1788 1788 buf.append(chunk[offset:])
1789 1789 self._chunkoffset = 0
1790 1790
1791 1791 # Partial chunk needed.
1792 1792 else:
1793 1793 buf.append(chunk[offset:offset + left])
1794 1794 self._chunkoffset += left
1795 1795 left -= chunkremaining
1796 1796
1797 1797 return ''.join(buf)
1798 1798
1799 1799 def filechunkiter(f, size=131072, limit=None):
1800 1800 """Create a generator that produces the data in the file size
1801 1801 (default 131072) bytes at a time, up to optional limit (default is
1802 1802 to read all data). Chunks may be less than size bytes if the
1803 1803 chunk is the last chunk in the file, or the file is a socket or
1804 1804 some other type of file that sometimes reads less data than is
1805 1805 requested."""
1806 1806 assert size >= 0
1807 1807 assert limit is None or limit >= 0
1808 1808 while True:
1809 1809 if limit is None:
1810 1810 nbytes = size
1811 1811 else:
1812 1812 nbytes = min(limit, size)
1813 1813 s = nbytes and f.read(nbytes)
1814 1814 if not s:
1815 1815 break
1816 1816 if limit:
1817 1817 limit -= len(s)
1818 1818 yield s
1819 1819
1820 1820 def makedate(timestamp=None):
1821 1821 '''Return a unix timestamp (or the current time) as a (unixtime,
1822 1822 offset) tuple based off the local timezone.'''
1823 1823 if timestamp is None:
1824 1824 timestamp = time.time()
1825 1825 if timestamp < 0:
1826 1826 hint = _("check your clock")
1827 1827 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1828 1828 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1829 1829 datetime.datetime.fromtimestamp(timestamp))
1830 1830 tz = delta.days * 86400 + delta.seconds
1831 1831 return timestamp, tz
1832 1832
1833 1833 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1834 1834 """represent a (unixtime, offset) tuple as a localized time.
1835 1835 unixtime is seconds since the epoch, and offset is the time zone's
1836 1836 number of seconds away from UTC.
1837 1837
1838 1838 >>> datestr((0, 0))
1839 1839 'Thu Jan 01 00:00:00 1970 +0000'
1840 1840 >>> datestr((42, 0))
1841 1841 'Thu Jan 01 00:00:42 1970 +0000'
1842 1842 >>> datestr((-42, 0))
1843 1843 'Wed Dec 31 23:59:18 1969 +0000'
1844 1844 >>> datestr((0x7fffffff, 0))
1845 1845 'Tue Jan 19 03:14:07 2038 +0000'
1846 1846 >>> datestr((-0x80000000, 0))
1847 1847 'Fri Dec 13 20:45:52 1901 +0000'
1848 1848 """
1849 1849 t, tz = date or makedate()
1850 1850 if "%1" in format or "%2" in format or "%z" in format:
1851 1851 sign = (tz > 0) and "-" or "+"
1852 1852 minutes = abs(tz) // 60
1853 1853 q, r = divmod(minutes, 60)
1854 1854 format = format.replace("%z", "%1%2")
1855 1855 format = format.replace("%1", "%c%02d" % (sign, q))
1856 1856 format = format.replace("%2", "%02d" % r)
1857 1857 d = t - tz
1858 1858 if d > 0x7fffffff:
1859 1859 d = 0x7fffffff
1860 1860 elif d < -0x80000000:
1861 1861 d = -0x80000000
1862 1862 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1863 1863 # because they use the gmtime() system call which is buggy on Windows
1864 1864 # for negative values.
1865 1865 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1866 1866 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1867 1867 return s
1868 1868
1869 1869 def shortdate(date=None):
1870 1870 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1871 1871 return datestr(date, format='%Y-%m-%d')
1872 1872
1873 1873 def parsetimezone(s):
1874 1874 """find a trailing timezone, if any, in string, and return a
1875 1875 (offset, remainder) pair"""
1876 1876
1877 1877 if s.endswith("GMT") or s.endswith("UTC"):
1878 1878 return 0, s[:-3].rstrip()
1879 1879
1880 1880 # Unix-style timezones [+-]hhmm
1881 1881 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1882 1882 sign = (s[-5] == "+") and 1 or -1
1883 1883 hours = int(s[-4:-2])
1884 1884 minutes = int(s[-2:])
1885 1885 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1886 1886
1887 1887 # ISO8601 trailing Z
1888 1888 if s.endswith("Z") and s[-2:-1].isdigit():
1889 1889 return 0, s[:-1]
1890 1890
1891 1891 # ISO8601-style [+-]hh:mm
1892 1892 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1893 1893 s[-5:-3].isdigit() and s[-2:].isdigit()):
1894 1894 sign = (s[-6] == "+") and 1 or -1
1895 1895 hours = int(s[-5:-3])
1896 1896 minutes = int(s[-2:])
1897 1897 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1898 1898
1899 1899 return None, s
1900 1900
1901 1901 def strdate(string, format, defaults=None):
1902 1902 """parse a localized time string and return a (unixtime, offset) tuple.
1903 1903 if the string cannot be parsed, ValueError is raised."""
1904 1904 if defaults is None:
1905 1905 defaults = {}
1906 1906
1907 1907 # NOTE: unixtime = localunixtime + offset
1908 1908 offset, date = parsetimezone(string)
1909 1909
1910 1910 # add missing elements from defaults
1911 1911 usenow = False # default to using biased defaults
1912 1912 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1913 1913 part = pycompat.bytestr(part)
1914 1914 found = [True for p in part if ("%"+p) in format]
1915 1915 if not found:
1916 1916 date += "@" + defaults[part][usenow]
1917 1917 format += "@%" + part[0]
1918 1918 else:
1919 1919 # We've found a specific time element, less specific time
1920 1920 # elements are relative to today
1921 1921 usenow = True
1922 1922
1923 1923 timetuple = time.strptime(encoding.strfromlocal(date),
1924 1924 encoding.strfromlocal(format))
1925 1925 localunixtime = int(calendar.timegm(timetuple))
1926 1926 if offset is None:
1927 1927 # local timezone
1928 1928 unixtime = int(time.mktime(timetuple))
1929 1929 offset = unixtime - localunixtime
1930 1930 else:
1931 1931 unixtime = localunixtime + offset
1932 1932 return unixtime, offset
1933 1933
1934 1934 def parsedate(date, formats=None, bias=None):
1935 1935 """parse a localized date/time and return a (unixtime, offset) tuple.
1936 1936
1937 1937 The date may be a "unixtime offset" string or in one of the specified
1938 1938 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1939 1939
1940 1940 >>> parsedate(' today ') == parsedate(\
1941 1941 datetime.date.today().strftime('%b %d'))
1942 1942 True
1943 1943 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
1944 1944 datetime.timedelta(days=1)\
1945 1945 ).strftime('%b %d'))
1946 1946 True
1947 1947 >>> now, tz = makedate()
1948 1948 >>> strnow, strtz = parsedate('now')
1949 1949 >>> (strnow - now) < 1
1950 1950 True
1951 1951 >>> tz == strtz
1952 1952 True
1953 1953 """
1954 1954 if bias is None:
1955 1955 bias = {}
1956 1956 if not date:
1957 1957 return 0, 0
1958 1958 if isinstance(date, tuple) and len(date) == 2:
1959 1959 return date
1960 1960 if not formats:
1961 1961 formats = defaultdateformats
1962 1962 date = date.strip()
1963 1963
1964 1964 if date == 'now' or date == _('now'):
1965 1965 return makedate()
1966 1966 if date == 'today' or date == _('today'):
1967 1967 date = datetime.date.today().strftime('%b %d')
1968 1968 elif date == 'yesterday' or date == _('yesterday'):
1969 1969 date = (datetime.date.today() -
1970 1970 datetime.timedelta(days=1)).strftime('%b %d')
1971 1971
1972 1972 try:
1973 1973 when, offset = map(int, date.split(' '))
1974 1974 except ValueError:
1975 1975 # fill out defaults
1976 1976 now = makedate()
1977 1977 defaults = {}
1978 1978 for part in ("d", "mb", "yY", "HI", "M", "S"):
1979 1979 # this piece is for rounding the specific end of unknowns
1980 1980 b = bias.get(part)
1981 1981 if b is None:
1982 1982 if part[0:1] in "HMS":
1983 1983 b = "00"
1984 1984 else:
1985 1985 b = "0"
1986 1986
1987 1987 # this piece is for matching the generic end to today's date
1988 1988 n = datestr(now, "%" + part[0:1])
1989 1989
1990 1990 defaults[part] = (b, n)
1991 1991
1992 1992 for format in formats:
1993 1993 try:
1994 1994 when, offset = strdate(date, format, defaults)
1995 1995 except (ValueError, OverflowError):
1996 1996 pass
1997 1997 else:
1998 1998 break
1999 1999 else:
2000 2000 raise error.ParseError(_('invalid date: %r') % date)
2001 2001 # validate explicit (probably user-specified) date and
2002 2002 # time zone offset. values must fit in signed 32 bits for
2003 2003 # current 32-bit linux runtimes. timezones go from UTC-12
2004 2004 # to UTC+14
2005 2005 if when < -0x80000000 or when > 0x7fffffff:
2006 2006 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2007 2007 if offset < -50400 or offset > 43200:
2008 2008 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2009 2009 return when, offset
2010 2010
2011 2011 def matchdate(date):
2012 2012 """Return a function that matches a given date match specifier
2013 2013
2014 2014 Formats include:
2015 2015
2016 2016 '{date}' match a given date to the accuracy provided
2017 2017
2018 2018 '<{date}' on or before a given date
2019 2019
2020 2020 '>{date}' on or after a given date
2021 2021
2022 2022 >>> p1 = parsedate("10:29:59")
2023 2023 >>> p2 = parsedate("10:30:00")
2024 2024 >>> p3 = parsedate("10:30:59")
2025 2025 >>> p4 = parsedate("10:31:00")
2026 2026 >>> p5 = parsedate("Sep 15 10:30:00 1999")
2027 2027 >>> f = matchdate("10:30")
2028 2028 >>> f(p1[0])
2029 2029 False
2030 2030 >>> f(p2[0])
2031 2031 True
2032 2032 >>> f(p3[0])
2033 2033 True
2034 2034 >>> f(p4[0])
2035 2035 False
2036 2036 >>> f(p5[0])
2037 2037 False
2038 2038 """
2039 2039
2040 2040 def lower(date):
2041 2041 d = {'mb': "1", 'd': "1"}
2042 2042 return parsedate(date, extendeddateformats, d)[0]
2043 2043
2044 2044 def upper(date):
2045 2045 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2046 2046 for days in ("31", "30", "29"):
2047 2047 try:
2048 2048 d["d"] = days
2049 2049 return parsedate(date, extendeddateformats, d)[0]
2050 2050 except Abort:
2051 2051 pass
2052 2052 d["d"] = "28"
2053 2053 return parsedate(date, extendeddateformats, d)[0]
2054 2054
2055 2055 date = date.strip()
2056 2056
2057 2057 if not date:
2058 2058 raise Abort(_("dates cannot consist entirely of whitespace"))
2059 2059 elif date[0] == "<":
2060 2060 if not date[1:]:
2061 2061 raise Abort(_("invalid day spec, use '<DATE'"))
2062 2062 when = upper(date[1:])
2063 2063 return lambda x: x <= when
2064 2064 elif date[0] == ">":
2065 2065 if not date[1:]:
2066 2066 raise Abort(_("invalid day spec, use '>DATE'"))
2067 2067 when = lower(date[1:])
2068 2068 return lambda x: x >= when
2069 2069 elif date[0] == "-":
2070 2070 try:
2071 2071 days = int(date[1:])
2072 2072 except ValueError:
2073 2073 raise Abort(_("invalid day spec: %s") % date[1:])
2074 2074 if days < 0:
2075 2075 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2076 2076 % date[1:])
2077 2077 when = makedate()[0] - days * 3600 * 24
2078 2078 return lambda x: x >= when
2079 2079 elif " to " in date:
2080 2080 a, b = date.split(" to ")
2081 2081 start, stop = lower(a), upper(b)
2082 2082 return lambda x: x >= start and x <= stop
2083 2083 else:
2084 2084 start, stop = lower(date), upper(date)
2085 2085 return lambda x: x >= start and x <= stop
2086 2086
2087 2087 def stringmatcher(pattern, casesensitive=True):
2088 2088 """
2089 2089 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2090 2090 returns the matcher name, pattern, and matcher function.
2091 2091 missing or unknown prefixes are treated as literal matches.
2092 2092
2093 2093 helper for tests:
2094 2094 >>> def test(pattern, *tests):
2095 2095 ... kind, pattern, matcher = stringmatcher(pattern)
2096 2096 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2097 2097 >>> def itest(pattern, *tests):
2098 2098 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2099 2099 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2100 2100
2101 2101 exact matching (no prefix):
2102 2102 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2103 2103 ('literal', 'abcdefg', [False, False, True])
2104 2104
2105 2105 regex matching ('re:' prefix)
2106 2106 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2107 2107 ('re', 'a.+b', [False, False, True])
2108 2108
2109 2109 force exact matches ('literal:' prefix)
2110 2110 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2111 2111 ('literal', 're:foobar', [False, True])
2112 2112
2113 2113 unknown prefixes are ignored and treated as literals
2114 2114 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2115 2115 ('literal', 'foo:bar', [False, False, True])
2116 2116
2117 2117 case insensitive regex matches
2118 2118 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2119 2119 ('re', 'A.+b', [False, False, True])
2120 2120
2121 2121 case insensitive literal matches
2122 2122 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2123 2123 ('literal', 'ABCDEFG', [False, False, True])
2124 2124 """
2125 2125 if pattern.startswith('re:'):
2126 2126 pattern = pattern[3:]
2127 2127 try:
2128 2128 flags = 0
2129 2129 if not casesensitive:
2130 2130 flags = remod.I
2131 2131 regex = remod.compile(pattern, flags)
2132 2132 except remod.error as e:
2133 2133 raise error.ParseError(_('invalid regular expression: %s')
2134 2134 % e)
2135 2135 return 're', pattern, regex.search
2136 2136 elif pattern.startswith('literal:'):
2137 2137 pattern = pattern[8:]
2138 2138
2139 2139 match = pattern.__eq__
2140 2140
2141 2141 if not casesensitive:
2142 2142 ipat = encoding.lower(pattern)
2143 2143 match = lambda s: ipat == encoding.lower(s)
2144 2144 return 'literal', pattern, match
2145 2145
2146 2146 def shortuser(user):
2147 2147 """Return a short representation of a user name or email address."""
2148 2148 f = user.find('@')
2149 2149 if f >= 0:
2150 2150 user = user[:f]
2151 2151 f = user.find('<')
2152 2152 if f >= 0:
2153 2153 user = user[f + 1:]
2154 2154 f = user.find(' ')
2155 2155 if f >= 0:
2156 2156 user = user[:f]
2157 2157 f = user.find('.')
2158 2158 if f >= 0:
2159 2159 user = user[:f]
2160 2160 return user
2161 2161
2162 2162 def emailuser(user):
2163 2163 """Return the user portion of an email address."""
2164 2164 f = user.find('@')
2165 2165 if f >= 0:
2166 2166 user = user[:f]
2167 2167 f = user.find('<')
2168 2168 if f >= 0:
2169 2169 user = user[f + 1:]
2170 2170 return user
2171 2171
2172 2172 def email(author):
2173 2173 '''get email of author.'''
2174 2174 r = author.find('>')
2175 2175 if r == -1:
2176 2176 r = None
2177 2177 return author[author.find('<') + 1:r]
2178 2178
2179 2179 def ellipsis(text, maxlength=400):
2180 2180 """Trim string to at most maxlength (default: 400) columns in display."""
2181 2181 return encoding.trim(text, maxlength, ellipsis='...')
2182 2182
2183 2183 def unitcountfn(*unittable):
2184 2184 '''return a function that renders a readable count of some quantity'''
2185 2185
2186 2186 def go(count):
2187 2187 for multiplier, divisor, format in unittable:
2188 2188 if abs(count) >= divisor * multiplier:
2189 2189 return format % (count / float(divisor))
2190 2190 return unittable[-1][2] % count
2191 2191
2192 2192 return go
2193 2193
2194 2194 def processlinerange(fromline, toline):
2195 2195 """Check that linerange <fromline>:<toline> makes sense and return a
2196 2196 0-based range.
2197 2197
2198 2198 >>> processlinerange(10, 20)
2199 2199 (9, 20)
2200 2200 >>> processlinerange(2, 1)
2201 2201 Traceback (most recent call last):
2202 2202 ...
2203 2203 ParseError: line range must be positive
2204 2204 >>> processlinerange(0, 5)
2205 2205 Traceback (most recent call last):
2206 2206 ...
2207 2207 ParseError: fromline must be strictly positive
2208 2208 """
2209 2209 if toline - fromline < 0:
2210 2210 raise error.ParseError(_("line range must be positive"))
2211 2211 if fromline < 1:
2212 2212 raise error.ParseError(_("fromline must be strictly positive"))
2213 2213 return fromline - 1, toline
2214 2214
2215 2215 bytecount = unitcountfn(
2216 2216 (100, 1 << 30, _('%.0f GB')),
2217 2217 (10, 1 << 30, _('%.1f GB')),
2218 2218 (1, 1 << 30, _('%.2f GB')),
2219 2219 (100, 1 << 20, _('%.0f MB')),
2220 2220 (10, 1 << 20, _('%.1f MB')),
2221 2221 (1, 1 << 20, _('%.2f MB')),
2222 2222 (100, 1 << 10, _('%.0f KB')),
2223 2223 (10, 1 << 10, _('%.1f KB')),
2224 2224 (1, 1 << 10, _('%.2f KB')),
2225 2225 (1, 1, _('%.0f bytes')),
2226 2226 )
2227 2227
2228 2228 # Matches a single EOL which can either be a CRLF where repeated CR
2229 2229 # are removed or a LF. We do not care about old Macintosh files, so a
2230 2230 # stray CR is an error.
2231 2231 _eolre = remod.compile(br'\r*\n')
2232 2232
2233 2233 def tolf(s):
2234 2234 return _eolre.sub('\n', s)
2235 2235
2236 2236 def tocrlf(s):
2237 2237 return _eolre.sub('\r\n', s)
2238 2238
2239 2239 if pycompat.oslinesep == '\r\n':
2240 2240 tonativeeol = tocrlf
2241 2241 fromnativeeol = tolf
2242 2242 else:
2243 2243 tonativeeol = pycompat.identity
2244 2244 fromnativeeol = pycompat.identity
2245 2245
2246 2246 def escapestr(s):
2247 2247 # call underlying function of s.encode('string_escape') directly for
2248 2248 # Python 3 compatibility
2249 2249 return codecs.escape_encode(s)[0]
2250 2250
2251 2251 def unescapestr(s):
2252 2252 return codecs.escape_decode(s)[0]
2253 2253
2254 2254 def uirepr(s):
2255 2255 # Avoid double backslash in Windows path repr()
2256 2256 return repr(s).replace('\\\\', '\\')
2257 2257
2258 2258 # delay import of textwrap
2259 2259 def MBTextWrapper(**kwargs):
2260 2260 class tw(textwrap.TextWrapper):
2261 2261 """
2262 2262 Extend TextWrapper for width-awareness.
2263 2263
2264 2264 Neither number of 'bytes' in any encoding nor 'characters' is
2265 2265 appropriate to calculate terminal columns for specified string.
2266 2266
2267 2267 Original TextWrapper implementation uses built-in 'len()' directly,
2268 2268 so overriding is needed to use width information of each characters.
2269 2269
2270 2270 In addition, characters classified into 'ambiguous' width are
2271 2271 treated as wide in East Asian area, but as narrow in other.
2272 2272
2273 2273 This requires use decision to determine width of such characters.
2274 2274 """
2275 2275 def _cutdown(self, ucstr, space_left):
2276 2276 l = 0
2277 2277 colwidth = encoding.ucolwidth
2278 2278 for i in xrange(len(ucstr)):
2279 2279 l += colwidth(ucstr[i])
2280 2280 if space_left < l:
2281 2281 return (ucstr[:i], ucstr[i:])
2282 2282 return ucstr, ''
2283 2283
2284 2284 # overriding of base class
2285 2285 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2286 2286 space_left = max(width - cur_len, 1)
2287 2287
2288 2288 if self.break_long_words:
2289 2289 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2290 2290 cur_line.append(cut)
2291 2291 reversed_chunks[-1] = res
2292 2292 elif not cur_line:
2293 2293 cur_line.append(reversed_chunks.pop())
2294 2294
2295 2295 # this overriding code is imported from TextWrapper of Python 2.6
2296 2296 # to calculate columns of string by 'encoding.ucolwidth()'
2297 2297 def _wrap_chunks(self, chunks):
2298 2298 colwidth = encoding.ucolwidth
2299 2299
2300 2300 lines = []
2301 2301 if self.width <= 0:
2302 2302 raise ValueError("invalid width %r (must be > 0)" % self.width)
2303 2303
2304 2304 # Arrange in reverse order so items can be efficiently popped
2305 2305 # from a stack of chucks.
2306 2306 chunks.reverse()
2307 2307
2308 2308 while chunks:
2309 2309
2310 2310 # Start the list of chunks that will make up the current line.
2311 2311 # cur_len is just the length of all the chunks in cur_line.
2312 2312 cur_line = []
2313 2313 cur_len = 0
2314 2314
2315 2315 # Figure out which static string will prefix this line.
2316 2316 if lines:
2317 2317 indent = self.subsequent_indent
2318 2318 else:
2319 2319 indent = self.initial_indent
2320 2320
2321 2321 # Maximum width for this line.
2322 2322 width = self.width - len(indent)
2323 2323
2324 2324 # First chunk on line is whitespace -- drop it, unless this
2325 2325 # is the very beginning of the text (i.e. no lines started yet).
2326 2326 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2327 2327 del chunks[-1]
2328 2328
2329 2329 while chunks:
2330 2330 l = colwidth(chunks[-1])
2331 2331
2332 2332 # Can at least squeeze this chunk onto the current line.
2333 2333 if cur_len + l <= width:
2334 2334 cur_line.append(chunks.pop())
2335 2335 cur_len += l
2336 2336
2337 2337 # Nope, this line is full.
2338 2338 else:
2339 2339 break
2340 2340
2341 2341 # The current line is full, and the next chunk is too big to
2342 2342 # fit on *any* line (not just this one).
2343 2343 if chunks and colwidth(chunks[-1]) > width:
2344 2344 self._handle_long_word(chunks, cur_line, cur_len, width)
2345 2345
2346 2346 # If the last chunk on this line is all whitespace, drop it.
2347 2347 if (self.drop_whitespace and
2348 2348 cur_line and cur_line[-1].strip() == r''):
2349 2349 del cur_line[-1]
2350 2350
2351 2351 # Convert current line back to a string and store it in list
2352 2352 # of all lines (return value).
2353 2353 if cur_line:
2354 2354 lines.append(indent + r''.join(cur_line))
2355 2355
2356 2356 return lines
2357 2357
2358 2358 global MBTextWrapper
2359 2359 MBTextWrapper = tw
2360 2360 return tw(**kwargs)
2361 2361
2362 2362 def wrap(line, width, initindent='', hangindent=''):
2363 2363 maxindent = max(len(hangindent), len(initindent))
2364 2364 if width <= maxindent:
2365 2365 # adjust for weird terminal size
2366 2366 width = max(78, maxindent + 1)
2367 2367 line = line.decode(pycompat.sysstr(encoding.encoding),
2368 2368 pycompat.sysstr(encoding.encodingmode))
2369 2369 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2370 2370 pycompat.sysstr(encoding.encodingmode))
2371 2371 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2372 2372 pycompat.sysstr(encoding.encodingmode))
2373 2373 wrapper = MBTextWrapper(width=width,
2374 2374 initial_indent=initindent,
2375 2375 subsequent_indent=hangindent)
2376 2376 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2377 2377
2378 2378 if (pyplatform.python_implementation() == 'CPython' and
2379 2379 sys.version_info < (3, 0)):
2380 2380 # There is an issue in CPython that some IO methods do not handle EINTR
2381 2381 # correctly. The following table shows what CPython version (and functions)
2382 2382 # are affected (buggy: has the EINTR bug, okay: otherwise):
2383 2383 #
2384 2384 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2385 2385 # --------------------------------------------------
2386 2386 # fp.__iter__ | buggy | buggy | okay
2387 2387 # fp.read* | buggy | okay [1] | okay
2388 2388 #
2389 2389 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2390 2390 #
2391 2391 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2392 2392 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2393 2393 #
2394 2394 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2395 2395 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2396 2396 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2397 2397 # fp.__iter__ but not other fp.read* methods.
2398 2398 #
2399 2399 # On modern systems like Linux, the "read" syscall cannot be interrupted
2400 2400 # when reading "fast" files like on-disk files. So the EINTR issue only
2401 2401 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2402 2402 # files approximately as "fast" files and use the fast (unsafe) code path,
2403 2403 # to minimize the performance impact.
2404 2404 if sys.version_info >= (2, 7, 4):
2405 2405 # fp.readline deals with EINTR correctly, use it as a workaround.
2406 2406 def _safeiterfile(fp):
2407 2407 return iter(fp.readline, '')
2408 2408 else:
2409 2409 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2410 2410 # note: this may block longer than necessary because of bufsize.
2411 2411 def _safeiterfile(fp, bufsize=4096):
2412 2412 fd = fp.fileno()
2413 2413 line = ''
2414 2414 while True:
2415 2415 try:
2416 2416 buf = os.read(fd, bufsize)
2417 2417 except OSError as ex:
2418 2418 # os.read only raises EINTR before any data is read
2419 2419 if ex.errno == errno.EINTR:
2420 2420 continue
2421 2421 else:
2422 2422 raise
2423 2423 line += buf
2424 2424 if '\n' in buf:
2425 2425 splitted = line.splitlines(True)
2426 2426 line = ''
2427 2427 for l in splitted:
2428 2428 if l[-1] == '\n':
2429 2429 yield l
2430 2430 else:
2431 2431 line = l
2432 2432 if not buf:
2433 2433 break
2434 2434 if line:
2435 2435 yield line
2436 2436
2437 2437 def iterfile(fp):
2438 2438 fastpath = True
2439 2439 if type(fp) is file:
2440 2440 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2441 2441 if fastpath:
2442 2442 return fp
2443 2443 else:
2444 2444 return _safeiterfile(fp)
2445 2445 else:
2446 2446 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2447 2447 def iterfile(fp):
2448 2448 return fp
2449 2449
2450 2450 def iterlines(iterator):
2451 2451 for chunk in iterator:
2452 2452 for line in chunk.splitlines():
2453 2453 yield line
2454 2454
2455 2455 def expandpath(path):
2456 2456 return os.path.expanduser(os.path.expandvars(path))
2457 2457
2458 2458 def hgcmd():
2459 2459 """Return the command used to execute current hg
2460 2460
2461 2461 This is different from hgexecutable() because on Windows we want
2462 2462 to avoid things opening new shell windows like batch files, so we
2463 2463 get either the python call or current executable.
2464 2464 """
2465 2465 if mainfrozen():
2466 2466 if getattr(sys, 'frozen', None) == 'macosx_app':
2467 2467 # Env variable set by py2app
2468 2468 return [encoding.environ['EXECUTABLEPATH']]
2469 2469 else:
2470 2470 return [pycompat.sysexecutable]
2471 2471 return gethgcmd()
2472 2472
2473 2473 def rundetached(args, condfn):
2474 2474 """Execute the argument list in a detached process.
2475 2475
2476 2476 condfn is a callable which is called repeatedly and should return
2477 2477 True once the child process is known to have started successfully.
2478 2478 At this point, the child process PID is returned. If the child
2479 2479 process fails to start or finishes before condfn() evaluates to
2480 2480 True, return -1.
2481 2481 """
2482 2482 # Windows case is easier because the child process is either
2483 2483 # successfully starting and validating the condition or exiting
2484 2484 # on failure. We just poll on its PID. On Unix, if the child
2485 2485 # process fails to start, it will be left in a zombie state until
2486 2486 # the parent wait on it, which we cannot do since we expect a long
2487 2487 # running process on success. Instead we listen for SIGCHLD telling
2488 2488 # us our child process terminated.
2489 2489 terminated = set()
2490 2490 def handler(signum, frame):
2491 2491 terminated.add(os.wait())
2492 2492 prevhandler = None
2493 2493 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2494 2494 if SIGCHLD is not None:
2495 2495 prevhandler = signal.signal(SIGCHLD, handler)
2496 2496 try:
2497 2497 pid = spawndetached(args)
2498 2498 while not condfn():
2499 2499 if ((pid in terminated or not testpid(pid))
2500 2500 and not condfn()):
2501 2501 return -1
2502 2502 time.sleep(0.1)
2503 2503 return pid
2504 2504 finally:
2505 2505 if prevhandler is not None:
2506 2506 signal.signal(signal.SIGCHLD, prevhandler)
2507 2507
2508 2508 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2509 2509 """Return the result of interpolating items in the mapping into string s.
2510 2510
2511 2511 prefix is a single character string, or a two character string with
2512 2512 a backslash as the first character if the prefix needs to be escaped in
2513 2513 a regular expression.
2514 2514
2515 2515 fn is an optional function that will be applied to the replacement text
2516 2516 just before replacement.
2517 2517
2518 2518 escape_prefix is an optional flag that allows using doubled prefix for
2519 2519 its escaping.
2520 2520 """
2521 2521 fn = fn or (lambda s: s)
2522 2522 patterns = '|'.join(mapping.keys())
2523 2523 if escape_prefix:
2524 2524 patterns += '|' + prefix
2525 2525 if len(prefix) > 1:
2526 2526 prefix_char = prefix[1:]
2527 2527 else:
2528 2528 prefix_char = prefix
2529 2529 mapping[prefix_char] = prefix_char
2530 2530 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2531 2531 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2532 2532
2533 2533 def getport(port):
2534 2534 """Return the port for a given network service.
2535 2535
2536 2536 If port is an integer, it's returned as is. If it's a string, it's
2537 2537 looked up using socket.getservbyname(). If there's no matching
2538 2538 service, error.Abort is raised.
2539 2539 """
2540 2540 try:
2541 2541 return int(port)
2542 2542 except ValueError:
2543 2543 pass
2544 2544
2545 2545 try:
2546 2546 return socket.getservbyname(port)
2547 2547 except socket.error:
2548 2548 raise Abort(_("no port number associated with service '%s'") % port)
2549 2549
2550 2550 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2551 2551 '0': False, 'no': False, 'false': False, 'off': False,
2552 2552 'never': False}
2553 2553
2554 2554 def parsebool(s):
2555 2555 """Parse s into a boolean.
2556 2556
2557 2557 If s is not a valid boolean, returns None.
2558 2558 """
2559 2559 return _booleans.get(s.lower(), None)
2560 2560
2561 2561 _hextochr = dict((a + b, chr(int(a + b, 16)))
2562 2562 for a in string.hexdigits for b in string.hexdigits)
2563 2563
2564 2564 class url(object):
2565 2565 r"""Reliable URL parser.
2566 2566
2567 2567 This parses URLs and provides attributes for the following
2568 2568 components:
2569 2569
2570 2570 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2571 2571
2572 2572 Missing components are set to None. The only exception is
2573 2573 fragment, which is set to '' if present but empty.
2574 2574
2575 2575 If parsefragment is False, fragment is included in query. If
2576 2576 parsequery is False, query is included in path. If both are
2577 2577 False, both fragment and query are included in path.
2578 2578
2579 2579 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2580 2580
2581 2581 Note that for backward compatibility reasons, bundle URLs do not
2582 2582 take host names. That means 'bundle://../' has a path of '../'.
2583 2583
2584 2584 Examples:
2585 2585
2586 2586 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2587 2587 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2588 2588 >>> url('ssh://[::1]:2200//home/joe/repo')
2589 2589 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2590 2590 >>> url('file:///home/joe/repo')
2591 2591 <url scheme: 'file', path: '/home/joe/repo'>
2592 2592 >>> url('file:///c:/temp/foo/')
2593 2593 <url scheme: 'file', path: 'c:/temp/foo/'>
2594 2594 >>> url('bundle:foo')
2595 2595 <url scheme: 'bundle', path: 'foo'>
2596 2596 >>> url('bundle://../foo')
2597 2597 <url scheme: 'bundle', path: '../foo'>
2598 2598 >>> url(r'c:\foo\bar')
2599 2599 <url path: 'c:\\foo\\bar'>
2600 2600 >>> url(r'\\blah\blah\blah')
2601 2601 <url path: '\\\\blah\\blah\\blah'>
2602 2602 >>> url(r'\\blah\blah\blah#baz')
2603 2603 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2604 2604 >>> url(r'file:///C:\users\me')
2605 2605 <url scheme: 'file', path: 'C:\\users\\me'>
2606 2606
2607 2607 Authentication credentials:
2608 2608
2609 2609 >>> url('ssh://joe:xyz@x/repo')
2610 2610 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2611 2611 >>> url('ssh://joe@x/repo')
2612 2612 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2613 2613
2614 2614 Query strings and fragments:
2615 2615
2616 2616 >>> url('http://host/a?b#c')
2617 2617 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2618 2618 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2619 2619 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2620 2620
2621 2621 Empty path:
2622 2622
2623 2623 >>> url('')
2624 2624 <url path: ''>
2625 2625 >>> url('#a')
2626 2626 <url path: '', fragment: 'a'>
2627 2627 >>> url('http://host/')
2628 2628 <url scheme: 'http', host: 'host', path: ''>
2629 2629 >>> url('http://host/#a')
2630 2630 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2631 2631
2632 2632 Only scheme:
2633 2633
2634 2634 >>> url('http:')
2635 2635 <url scheme: 'http'>
2636 2636 """
2637 2637
2638 2638 _safechars = "!~*'()+"
2639 2639 _safepchars = "/!~*'()+:\\"
2640 2640 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2641 2641
2642 2642 def __init__(self, path, parsequery=True, parsefragment=True):
2643 2643 # We slowly chomp away at path until we have only the path left
2644 2644 self.scheme = self.user = self.passwd = self.host = None
2645 2645 self.port = self.path = self.query = self.fragment = None
2646 2646 self._localpath = True
2647 2647 self._hostport = ''
2648 2648 self._origpath = path
2649 2649
2650 2650 if parsefragment and '#' in path:
2651 2651 path, self.fragment = path.split('#', 1)
2652 2652
2653 2653 # special case for Windows drive letters and UNC paths
2654 2654 if hasdriveletter(path) or path.startswith('\\\\'):
2655 2655 self.path = path
2656 2656 return
2657 2657
2658 2658 # For compatibility reasons, we can't handle bundle paths as
2659 2659 # normal URLS
2660 2660 if path.startswith('bundle:'):
2661 2661 self.scheme = 'bundle'
2662 2662 path = path[7:]
2663 2663 if path.startswith('//'):
2664 2664 path = path[2:]
2665 2665 self.path = path
2666 2666 return
2667 2667
2668 2668 if self._matchscheme(path):
2669 2669 parts = path.split(':', 1)
2670 2670 if parts[0]:
2671 2671 self.scheme, path = parts
2672 2672 self._localpath = False
2673 2673
2674 2674 if not path:
2675 2675 path = None
2676 2676 if self._localpath:
2677 2677 self.path = ''
2678 2678 return
2679 2679 else:
2680 2680 if self._localpath:
2681 2681 self.path = path
2682 2682 return
2683 2683
2684 2684 if parsequery and '?' in path:
2685 2685 path, self.query = path.split('?', 1)
2686 2686 if not path:
2687 2687 path = None
2688 2688 if not self.query:
2689 2689 self.query = None
2690 2690
2691 2691 # // is required to specify a host/authority
2692 2692 if path and path.startswith('//'):
2693 2693 parts = path[2:].split('/', 1)
2694 2694 if len(parts) > 1:
2695 2695 self.host, path = parts
2696 2696 else:
2697 2697 self.host = parts[0]
2698 2698 path = None
2699 2699 if not self.host:
2700 2700 self.host = None
2701 2701 # path of file:///d is /d
2702 2702 # path of file:///d:/ is d:/, not /d:/
2703 2703 if path and not hasdriveletter(path):
2704 2704 path = '/' + path
2705 2705
2706 2706 if self.host and '@' in self.host:
2707 2707 self.user, self.host = self.host.rsplit('@', 1)
2708 2708 if ':' in self.user:
2709 2709 self.user, self.passwd = self.user.split(':', 1)
2710 2710 if not self.host:
2711 2711 self.host = None
2712 2712
2713 2713 # Don't split on colons in IPv6 addresses without ports
2714 2714 if (self.host and ':' in self.host and
2715 2715 not (self.host.startswith('[') and self.host.endswith(']'))):
2716 2716 self._hostport = self.host
2717 2717 self.host, self.port = self.host.rsplit(':', 1)
2718 2718 if not self.host:
2719 2719 self.host = None
2720 2720
2721 2721 if (self.host and self.scheme == 'file' and
2722 2722 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2723 2723 raise Abort(_('file:// URLs can only refer to localhost'))
2724 2724
2725 2725 self.path = path
2726 2726
2727 2727 # leave the query string escaped
2728 2728 for a in ('user', 'passwd', 'host', 'port',
2729 2729 'path', 'fragment'):
2730 2730 v = getattr(self, a)
2731 2731 if v is not None:
2732 2732 setattr(self, a, urlreq.unquote(v))
2733 2733
2734 2734 def __repr__(self):
2735 2735 attrs = []
2736 2736 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2737 2737 'query', 'fragment'):
2738 2738 v = getattr(self, a)
2739 2739 if v is not None:
2740 2740 attrs.append('%s: %r' % (a, v))
2741 2741 return '<url %s>' % ', '.join(attrs)
2742 2742
2743 def __str__(self):
2743 def __bytes__(self):
2744 2744 r"""Join the URL's components back into a URL string.
2745 2745
2746 2746 Examples:
2747 2747
2748 2748 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2749 2749 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2750 2750 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2751 2751 'http://user:pw@host:80/?foo=bar&baz=42'
2752 2752 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2753 2753 'http://user:pw@host:80/?foo=bar%3dbaz'
2754 2754 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2755 2755 'ssh://user:pw@[::1]:2200//home/joe#'
2756 2756 >>> str(url('http://localhost:80//'))
2757 2757 'http://localhost:80//'
2758 2758 >>> str(url('http://localhost:80/'))
2759 2759 'http://localhost:80/'
2760 2760 >>> str(url('http://localhost:80'))
2761 2761 'http://localhost:80/'
2762 2762 >>> str(url('bundle:foo'))
2763 2763 'bundle:foo'
2764 2764 >>> str(url('bundle://../foo'))
2765 2765 'bundle:../foo'
2766 2766 >>> str(url('path'))
2767 2767 'path'
2768 2768 >>> str(url('file:///tmp/foo/bar'))
2769 2769 'file:///tmp/foo/bar'
2770 2770 >>> str(url('file:///c:/tmp/foo/bar'))
2771 2771 'file:///c:/tmp/foo/bar'
2772 2772 >>> print url(r'bundle:foo\bar')
2773 2773 bundle:foo\bar
2774 2774 >>> print url(r'file:///D:\data\hg')
2775 2775 file:///D:\data\hg
2776 2776 """
2777 return encoding.strfromlocal(self.__bytes__())
2778
2779 def __bytes__(self):
2780 2777 if self._localpath:
2781 2778 s = self.path
2782 2779 if self.scheme == 'bundle':
2783 2780 s = 'bundle:' + s
2784 2781 if self.fragment:
2785 2782 s += '#' + self.fragment
2786 2783 return s
2787 2784
2788 2785 s = self.scheme + ':'
2789 2786 if self.user or self.passwd or self.host:
2790 2787 s += '//'
2791 2788 elif self.scheme and (not self.path or self.path.startswith('/')
2792 2789 or hasdriveletter(self.path)):
2793 2790 s += '//'
2794 2791 if hasdriveletter(self.path):
2795 2792 s += '/'
2796 2793 if self.user:
2797 2794 s += urlreq.quote(self.user, safe=self._safechars)
2798 2795 if self.passwd:
2799 2796 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2800 2797 if self.user or self.passwd:
2801 2798 s += '@'
2802 2799 if self.host:
2803 2800 if not (self.host.startswith('[') and self.host.endswith(']')):
2804 2801 s += urlreq.quote(self.host)
2805 2802 else:
2806 2803 s += self.host
2807 2804 if self.port:
2808 2805 s += ':' + urlreq.quote(self.port)
2809 2806 if self.host:
2810 2807 s += '/'
2811 2808 if self.path:
2812 2809 # TODO: similar to the query string, we should not unescape the
2813 2810 # path when we store it, the path might contain '%2f' = '/',
2814 2811 # which we should *not* escape.
2815 2812 s += urlreq.quote(self.path, safe=self._safepchars)
2816 2813 if self.query:
2817 2814 # we store the query in escaped form.
2818 2815 s += '?' + self.query
2819 2816 if self.fragment is not None:
2820 2817 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2821 2818 return s
2822 2819
2820 __str__ = encoding.strmethod(__bytes__)
2821
2823 2822 def authinfo(self):
2824 2823 user, passwd = self.user, self.passwd
2825 2824 try:
2826 2825 self.user, self.passwd = None, None
2827 2826 s = bytes(self)
2828 2827 finally:
2829 2828 self.user, self.passwd = user, passwd
2830 2829 if not self.user:
2831 2830 return (s, None)
2832 2831 # authinfo[1] is passed to urllib2 password manager, and its
2833 2832 # URIs must not contain credentials. The host is passed in the
2834 2833 # URIs list because Python < 2.4.3 uses only that to search for
2835 2834 # a password.
2836 2835 return (s, (None, (s, self.host),
2837 2836 self.user, self.passwd or ''))
2838 2837
2839 2838 def isabs(self):
2840 2839 if self.scheme and self.scheme != 'file':
2841 2840 return True # remote URL
2842 2841 if hasdriveletter(self.path):
2843 2842 return True # absolute for our purposes - can't be joined()
2844 2843 if self.path.startswith(r'\\'):
2845 2844 return True # Windows UNC path
2846 2845 if self.path.startswith('/'):
2847 2846 return True # POSIX-style
2848 2847 return False
2849 2848
2850 2849 def localpath(self):
2851 2850 if self.scheme == 'file' or self.scheme == 'bundle':
2852 2851 path = self.path or '/'
2853 2852 # For Windows, we need to promote hosts containing drive
2854 2853 # letters to paths with drive letters.
2855 2854 if hasdriveletter(self._hostport):
2856 2855 path = self._hostport + '/' + self.path
2857 2856 elif (self.host is not None and self.path
2858 2857 and not hasdriveletter(path)):
2859 2858 path = '/' + path
2860 2859 return path
2861 2860 return self._origpath
2862 2861
2863 2862 def islocal(self):
2864 2863 '''whether localpath will return something that posixfile can open'''
2865 2864 return (not self.scheme or self.scheme == 'file'
2866 2865 or self.scheme == 'bundle')
2867 2866
2868 2867 def hasscheme(path):
2869 2868 return bool(url(path).scheme)
2870 2869
2871 2870 def hasdriveletter(path):
2872 2871 return path and path[1:2] == ':' and path[0:1].isalpha()
2873 2872
2874 2873 def urllocalpath(path):
2875 2874 return url(path, parsequery=False, parsefragment=False).localpath()
2876 2875
2877 2876 def hidepassword(u):
2878 2877 '''hide user credential in a url string'''
2879 2878 u = url(u)
2880 2879 if u.passwd:
2881 2880 u.passwd = '***'
2882 2881 return bytes(u)
2883 2882
2884 2883 def removeauth(u):
2885 2884 '''remove all authentication information from a url string'''
2886 2885 u = url(u)
2887 2886 u.user = u.passwd = None
2888 2887 return str(u)
2889 2888
2890 2889 timecount = unitcountfn(
2891 2890 (1, 1e3, _('%.0f s')),
2892 2891 (100, 1, _('%.1f s')),
2893 2892 (10, 1, _('%.2f s')),
2894 2893 (1, 1, _('%.3f s')),
2895 2894 (100, 0.001, _('%.1f ms')),
2896 2895 (10, 0.001, _('%.2f ms')),
2897 2896 (1, 0.001, _('%.3f ms')),
2898 2897 (100, 0.000001, _('%.1f us')),
2899 2898 (10, 0.000001, _('%.2f us')),
2900 2899 (1, 0.000001, _('%.3f us')),
2901 2900 (100, 0.000000001, _('%.1f ns')),
2902 2901 (10, 0.000000001, _('%.2f ns')),
2903 2902 (1, 0.000000001, _('%.3f ns')),
2904 2903 )
2905 2904
2906 2905 _timenesting = [0]
2907 2906
2908 2907 def timed(func):
2909 2908 '''Report the execution time of a function call to stderr.
2910 2909
2911 2910 During development, use as a decorator when you need to measure
2912 2911 the cost of a function, e.g. as follows:
2913 2912
2914 2913 @util.timed
2915 2914 def foo(a, b, c):
2916 2915 pass
2917 2916 '''
2918 2917
2919 2918 def wrapper(*args, **kwargs):
2920 2919 start = timer()
2921 2920 indent = 2
2922 2921 _timenesting[0] += indent
2923 2922 try:
2924 2923 return func(*args, **kwargs)
2925 2924 finally:
2926 2925 elapsed = timer() - start
2927 2926 _timenesting[0] -= indent
2928 2927 stderr.write('%s%s: %s\n' %
2929 2928 (' ' * _timenesting[0], func.__name__,
2930 2929 timecount(elapsed)))
2931 2930 return wrapper
2932 2931
2933 2932 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2934 2933 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2935 2934
2936 2935 def sizetoint(s):
2937 2936 '''Convert a space specifier to a byte count.
2938 2937
2939 2938 >>> sizetoint('30')
2940 2939 30
2941 2940 >>> sizetoint('2.2kb')
2942 2941 2252
2943 2942 >>> sizetoint('6M')
2944 2943 6291456
2945 2944 '''
2946 2945 t = s.strip().lower()
2947 2946 try:
2948 2947 for k, u in _sizeunits:
2949 2948 if t.endswith(k):
2950 2949 return int(float(t[:-len(k)]) * u)
2951 2950 return int(t)
2952 2951 except ValueError:
2953 2952 raise error.ParseError(_("couldn't parse size: %s") % s)
2954 2953
2955 2954 class hooks(object):
2956 2955 '''A collection of hook functions that can be used to extend a
2957 2956 function's behavior. Hooks are called in lexicographic order,
2958 2957 based on the names of their sources.'''
2959 2958
2960 2959 def __init__(self):
2961 2960 self._hooks = []
2962 2961
2963 2962 def add(self, source, hook):
2964 2963 self._hooks.append((source, hook))
2965 2964
2966 2965 def __call__(self, *args):
2967 2966 self._hooks.sort(key=lambda x: x[0])
2968 2967 results = []
2969 2968 for source, hook in self._hooks:
2970 2969 results.append(hook(*args))
2971 2970 return results
2972 2971
2973 2972 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
2974 2973 '''Yields lines for a nicely formatted stacktrace.
2975 2974 Skips the 'skip' last entries, then return the last 'depth' entries.
2976 2975 Each file+linenumber is formatted according to fileline.
2977 2976 Each line is formatted according to line.
2978 2977 If line is None, it yields:
2979 2978 length of longest filepath+line number,
2980 2979 filepath+linenumber,
2981 2980 function
2982 2981
2983 2982 Not be used in production code but very convenient while developing.
2984 2983 '''
2985 2984 entries = [(fileline % (fn, ln), func)
2986 2985 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
2987 2986 ][-depth:]
2988 2987 if entries:
2989 2988 fnmax = max(len(entry[0]) for entry in entries)
2990 2989 for fnln, func in entries:
2991 2990 if line is None:
2992 2991 yield (fnmax, fnln, func)
2993 2992 else:
2994 2993 yield line % (fnmax, fnln, func)
2995 2994
2996 2995 def debugstacktrace(msg='stacktrace', skip=0,
2997 2996 f=stderr, otherf=stdout, depth=0):
2998 2997 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2999 2998 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3000 2999 By default it will flush stdout first.
3001 3000 It can be used everywhere and intentionally does not require an ui object.
3002 3001 Not be used in production code but very convenient while developing.
3003 3002 '''
3004 3003 if otherf:
3005 3004 otherf.flush()
3006 3005 f.write('%s at:\n' % msg.rstrip())
3007 3006 for line in getstackframes(skip + 1, depth=depth):
3008 3007 f.write(line)
3009 3008 f.flush()
3010 3009
3011 3010 class dirs(object):
3012 3011 '''a multiset of directory names from a dirstate or manifest'''
3013 3012
3014 3013 def __init__(self, map, skip=None):
3015 3014 self._dirs = {}
3016 3015 addpath = self.addpath
3017 3016 if safehasattr(map, 'iteritems') and skip is not None:
3018 3017 for f, s in map.iteritems():
3019 3018 if s[0] != skip:
3020 3019 addpath(f)
3021 3020 else:
3022 3021 for f in map:
3023 3022 addpath(f)
3024 3023
3025 3024 def addpath(self, path):
3026 3025 dirs = self._dirs
3027 3026 for base in finddirs(path):
3028 3027 if base in dirs:
3029 3028 dirs[base] += 1
3030 3029 return
3031 3030 dirs[base] = 1
3032 3031
3033 3032 def delpath(self, path):
3034 3033 dirs = self._dirs
3035 3034 for base in finddirs(path):
3036 3035 if dirs[base] > 1:
3037 3036 dirs[base] -= 1
3038 3037 return
3039 3038 del dirs[base]
3040 3039
3041 3040 def __iter__(self):
3042 3041 return iter(self._dirs)
3043 3042
3044 3043 def __contains__(self, d):
3045 3044 return d in self._dirs
3046 3045
3047 3046 if safehasattr(parsers, 'dirs'):
3048 3047 dirs = parsers.dirs
3049 3048
3050 3049 def finddirs(path):
3051 3050 pos = path.rfind('/')
3052 3051 while pos != -1:
3053 3052 yield path[:pos]
3054 3053 pos = path.rfind('/', 0, pos)
3055 3054
3056 3055 class ctxmanager(object):
3057 3056 '''A context manager for use in 'with' blocks to allow multiple
3058 3057 contexts to be entered at once. This is both safer and more
3059 3058 flexible than contextlib.nested.
3060 3059
3061 3060 Once Mercurial supports Python 2.7+, this will become mostly
3062 3061 unnecessary.
3063 3062 '''
3064 3063
3065 3064 def __init__(self, *args):
3066 3065 '''Accepts a list of no-argument functions that return context
3067 3066 managers. These will be invoked at __call__ time.'''
3068 3067 self._pending = args
3069 3068 self._atexit = []
3070 3069
3071 3070 def __enter__(self):
3072 3071 return self
3073 3072
3074 3073 def enter(self):
3075 3074 '''Create and enter context managers in the order in which they were
3076 3075 passed to the constructor.'''
3077 3076 values = []
3078 3077 for func in self._pending:
3079 3078 obj = func()
3080 3079 values.append(obj.__enter__())
3081 3080 self._atexit.append(obj.__exit__)
3082 3081 del self._pending
3083 3082 return values
3084 3083
3085 3084 def atexit(self, func, *args, **kwargs):
3086 3085 '''Add a function to call when this context manager exits. The
3087 3086 ordering of multiple atexit calls is unspecified, save that
3088 3087 they will happen before any __exit__ functions.'''
3089 3088 def wrapper(exc_type, exc_val, exc_tb):
3090 3089 func(*args, **kwargs)
3091 3090 self._atexit.append(wrapper)
3092 3091 return func
3093 3092
3094 3093 def __exit__(self, exc_type, exc_val, exc_tb):
3095 3094 '''Context managers are exited in the reverse order from which
3096 3095 they were created.'''
3097 3096 received = exc_type is not None
3098 3097 suppressed = False
3099 3098 pending = None
3100 3099 self._atexit.reverse()
3101 3100 for exitfunc in self._atexit:
3102 3101 try:
3103 3102 if exitfunc(exc_type, exc_val, exc_tb):
3104 3103 suppressed = True
3105 3104 exc_type = None
3106 3105 exc_val = None
3107 3106 exc_tb = None
3108 3107 except BaseException:
3109 3108 pending = sys.exc_info()
3110 3109 exc_type, exc_val, exc_tb = pending = sys.exc_info()
3111 3110 del self._atexit
3112 3111 if pending:
3113 3112 raise exc_val
3114 3113 return received and suppressed
3115 3114
3116 3115 # compression code
3117 3116
3118 3117 SERVERROLE = 'server'
3119 3118 CLIENTROLE = 'client'
3120 3119
3121 3120 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3122 3121 (u'name', u'serverpriority',
3123 3122 u'clientpriority'))
3124 3123
3125 3124 class compressormanager(object):
3126 3125 """Holds registrations of various compression engines.
3127 3126
3128 3127 This class essentially abstracts the differences between compression
3129 3128 engines to allow new compression formats to be added easily, possibly from
3130 3129 extensions.
3131 3130
3132 3131 Compressors are registered against the global instance by calling its
3133 3132 ``register()`` method.
3134 3133 """
3135 3134 def __init__(self):
3136 3135 self._engines = {}
3137 3136 # Bundle spec human name to engine name.
3138 3137 self._bundlenames = {}
3139 3138 # Internal bundle identifier to engine name.
3140 3139 self._bundletypes = {}
3141 3140 # Revlog header to engine name.
3142 3141 self._revlogheaders = {}
3143 3142 # Wire proto identifier to engine name.
3144 3143 self._wiretypes = {}
3145 3144
3146 3145 def __getitem__(self, key):
3147 3146 return self._engines[key]
3148 3147
3149 3148 def __contains__(self, key):
3150 3149 return key in self._engines
3151 3150
3152 3151 def __iter__(self):
3153 3152 return iter(self._engines.keys())
3154 3153
3155 3154 def register(self, engine):
3156 3155 """Register a compression engine with the manager.
3157 3156
3158 3157 The argument must be a ``compressionengine`` instance.
3159 3158 """
3160 3159 if not isinstance(engine, compressionengine):
3161 3160 raise ValueError(_('argument must be a compressionengine'))
3162 3161
3163 3162 name = engine.name()
3164 3163
3165 3164 if name in self._engines:
3166 3165 raise error.Abort(_('compression engine %s already registered') %
3167 3166 name)
3168 3167
3169 3168 bundleinfo = engine.bundletype()
3170 3169 if bundleinfo:
3171 3170 bundlename, bundletype = bundleinfo
3172 3171
3173 3172 if bundlename in self._bundlenames:
3174 3173 raise error.Abort(_('bundle name %s already registered') %
3175 3174 bundlename)
3176 3175 if bundletype in self._bundletypes:
3177 3176 raise error.Abort(_('bundle type %s already registered by %s') %
3178 3177 (bundletype, self._bundletypes[bundletype]))
3179 3178
3180 3179 # No external facing name declared.
3181 3180 if bundlename:
3182 3181 self._bundlenames[bundlename] = name
3183 3182
3184 3183 self._bundletypes[bundletype] = name
3185 3184
3186 3185 wiresupport = engine.wireprotosupport()
3187 3186 if wiresupport:
3188 3187 wiretype = wiresupport.name
3189 3188 if wiretype in self._wiretypes:
3190 3189 raise error.Abort(_('wire protocol compression %s already '
3191 3190 'registered by %s') %
3192 3191 (wiretype, self._wiretypes[wiretype]))
3193 3192
3194 3193 self._wiretypes[wiretype] = name
3195 3194
3196 3195 revlogheader = engine.revlogheader()
3197 3196 if revlogheader and revlogheader in self._revlogheaders:
3198 3197 raise error.Abort(_('revlog header %s already registered by %s') %
3199 3198 (revlogheader, self._revlogheaders[revlogheader]))
3200 3199
3201 3200 if revlogheader:
3202 3201 self._revlogheaders[revlogheader] = name
3203 3202
3204 3203 self._engines[name] = engine
3205 3204
3206 3205 @property
3207 3206 def supportedbundlenames(self):
3208 3207 return set(self._bundlenames.keys())
3209 3208
3210 3209 @property
3211 3210 def supportedbundletypes(self):
3212 3211 return set(self._bundletypes.keys())
3213 3212
3214 3213 def forbundlename(self, bundlename):
3215 3214 """Obtain a compression engine registered to a bundle name.
3216 3215
3217 3216 Will raise KeyError if the bundle type isn't registered.
3218 3217
3219 3218 Will abort if the engine is known but not available.
3220 3219 """
3221 3220 engine = self._engines[self._bundlenames[bundlename]]
3222 3221 if not engine.available():
3223 3222 raise error.Abort(_('compression engine %s could not be loaded') %
3224 3223 engine.name())
3225 3224 return engine
3226 3225
3227 3226 def forbundletype(self, bundletype):
3228 3227 """Obtain a compression engine registered to a bundle type.
3229 3228
3230 3229 Will raise KeyError if the bundle type isn't registered.
3231 3230
3232 3231 Will abort if the engine is known but not available.
3233 3232 """
3234 3233 engine = self._engines[self._bundletypes[bundletype]]
3235 3234 if not engine.available():
3236 3235 raise error.Abort(_('compression engine %s could not be loaded') %
3237 3236 engine.name())
3238 3237 return engine
3239 3238
3240 3239 def supportedwireengines(self, role, onlyavailable=True):
3241 3240 """Obtain compression engines that support the wire protocol.
3242 3241
3243 3242 Returns a list of engines in prioritized order, most desired first.
3244 3243
3245 3244 If ``onlyavailable`` is set, filter out engines that can't be
3246 3245 loaded.
3247 3246 """
3248 3247 assert role in (SERVERROLE, CLIENTROLE)
3249 3248
3250 3249 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3251 3250
3252 3251 engines = [self._engines[e] for e in self._wiretypes.values()]
3253 3252 if onlyavailable:
3254 3253 engines = [e for e in engines if e.available()]
3255 3254
3256 3255 def getkey(e):
3257 3256 # Sort first by priority, highest first. In case of tie, sort
3258 3257 # alphabetically. This is arbitrary, but ensures output is
3259 3258 # stable.
3260 3259 w = e.wireprotosupport()
3261 3260 return -1 * getattr(w, attr), w.name
3262 3261
3263 3262 return list(sorted(engines, key=getkey))
3264 3263
3265 3264 def forwiretype(self, wiretype):
3266 3265 engine = self._engines[self._wiretypes[wiretype]]
3267 3266 if not engine.available():
3268 3267 raise error.Abort(_('compression engine %s could not be loaded') %
3269 3268 engine.name())
3270 3269 return engine
3271 3270
3272 3271 def forrevlogheader(self, header):
3273 3272 """Obtain a compression engine registered to a revlog header.
3274 3273
3275 3274 Will raise KeyError if the revlog header value isn't registered.
3276 3275 """
3277 3276 return self._engines[self._revlogheaders[header]]
3278 3277
3279 3278 compengines = compressormanager()
3280 3279
3281 3280 class compressionengine(object):
3282 3281 """Base class for compression engines.
3283 3282
3284 3283 Compression engines must implement the interface defined by this class.
3285 3284 """
3286 3285 def name(self):
3287 3286 """Returns the name of the compression engine.
3288 3287
3289 3288 This is the key the engine is registered under.
3290 3289
3291 3290 This method must be implemented.
3292 3291 """
3293 3292 raise NotImplementedError()
3294 3293
3295 3294 def available(self):
3296 3295 """Whether the compression engine is available.
3297 3296
3298 3297 The intent of this method is to allow optional compression engines
3299 3298 that may not be available in all installations (such as engines relying
3300 3299 on C extensions that may not be present).
3301 3300 """
3302 3301 return True
3303 3302
3304 3303 def bundletype(self):
3305 3304 """Describes bundle identifiers for this engine.
3306 3305
3307 3306 If this compression engine isn't supported for bundles, returns None.
3308 3307
3309 3308 If this engine can be used for bundles, returns a 2-tuple of strings of
3310 3309 the user-facing "bundle spec" compression name and an internal
3311 3310 identifier used to denote the compression format within bundles. To
3312 3311 exclude the name from external usage, set the first element to ``None``.
3313 3312
3314 3313 If bundle compression is supported, the class must also implement
3315 3314 ``compressstream`` and `decompressorreader``.
3316 3315
3317 3316 The docstring of this method is used in the help system to tell users
3318 3317 about this engine.
3319 3318 """
3320 3319 return None
3321 3320
3322 3321 def wireprotosupport(self):
3323 3322 """Declare support for this compression format on the wire protocol.
3324 3323
3325 3324 If this compression engine isn't supported for compressing wire
3326 3325 protocol payloads, returns None.
3327 3326
3328 3327 Otherwise, returns ``compenginewireprotosupport`` with the following
3329 3328 fields:
3330 3329
3331 3330 * String format identifier
3332 3331 * Integer priority for the server
3333 3332 * Integer priority for the client
3334 3333
3335 3334 The integer priorities are used to order the advertisement of format
3336 3335 support by server and client. The highest integer is advertised
3337 3336 first. Integers with non-positive values aren't advertised.
3338 3337
3339 3338 The priority values are somewhat arbitrary and only used for default
3340 3339 ordering. The relative order can be changed via config options.
3341 3340
3342 3341 If wire protocol compression is supported, the class must also implement
3343 3342 ``compressstream`` and ``decompressorreader``.
3344 3343 """
3345 3344 return None
3346 3345
3347 3346 def revlogheader(self):
3348 3347 """Header added to revlog chunks that identifies this engine.
3349 3348
3350 3349 If this engine can be used to compress revlogs, this method should
3351 3350 return the bytes used to identify chunks compressed with this engine.
3352 3351 Else, the method should return ``None`` to indicate it does not
3353 3352 participate in revlog compression.
3354 3353 """
3355 3354 return None
3356 3355
3357 3356 def compressstream(self, it, opts=None):
3358 3357 """Compress an iterator of chunks.
3359 3358
3360 3359 The method receives an iterator (ideally a generator) of chunks of
3361 3360 bytes to be compressed. It returns an iterator (ideally a generator)
3362 3361 of bytes of chunks representing the compressed output.
3363 3362
3364 3363 Optionally accepts an argument defining how to perform compression.
3365 3364 Each engine treats this argument differently.
3366 3365 """
3367 3366 raise NotImplementedError()
3368 3367
3369 3368 def decompressorreader(self, fh):
3370 3369 """Perform decompression on a file object.
3371 3370
3372 3371 Argument is an object with a ``read(size)`` method that returns
3373 3372 compressed data. Return value is an object with a ``read(size)`` that
3374 3373 returns uncompressed data.
3375 3374 """
3376 3375 raise NotImplementedError()
3377 3376
3378 3377 def revlogcompressor(self, opts=None):
3379 3378 """Obtain an object that can be used to compress revlog entries.
3380 3379
3381 3380 The object has a ``compress(data)`` method that compresses binary
3382 3381 data. This method returns compressed binary data or ``None`` if
3383 3382 the data could not be compressed (too small, not compressible, etc).
3384 3383 The returned data should have a header uniquely identifying this
3385 3384 compression format so decompression can be routed to this engine.
3386 3385 This header should be identified by the ``revlogheader()`` return
3387 3386 value.
3388 3387
3389 3388 The object has a ``decompress(data)`` method that decompresses
3390 3389 data. The method will only be called if ``data`` begins with
3391 3390 ``revlogheader()``. The method should return the raw, uncompressed
3392 3391 data or raise a ``RevlogError``.
3393 3392
3394 3393 The object is reusable but is not thread safe.
3395 3394 """
3396 3395 raise NotImplementedError()
3397 3396
3398 3397 class _zlibengine(compressionengine):
3399 3398 def name(self):
3400 3399 return 'zlib'
3401 3400
3402 3401 def bundletype(self):
3403 3402 """zlib compression using the DEFLATE algorithm.
3404 3403
3405 3404 All Mercurial clients should support this format. The compression
3406 3405 algorithm strikes a reasonable balance between compression ratio
3407 3406 and size.
3408 3407 """
3409 3408 return 'gzip', 'GZ'
3410 3409
3411 3410 def wireprotosupport(self):
3412 3411 return compewireprotosupport('zlib', 20, 20)
3413 3412
3414 3413 def revlogheader(self):
3415 3414 return 'x'
3416 3415
3417 3416 def compressstream(self, it, opts=None):
3418 3417 opts = opts or {}
3419 3418
3420 3419 z = zlib.compressobj(opts.get('level', -1))
3421 3420 for chunk in it:
3422 3421 data = z.compress(chunk)
3423 3422 # Not all calls to compress emit data. It is cheaper to inspect
3424 3423 # here than to feed empty chunks through generator.
3425 3424 if data:
3426 3425 yield data
3427 3426
3428 3427 yield z.flush()
3429 3428
3430 3429 def decompressorreader(self, fh):
3431 3430 def gen():
3432 3431 d = zlib.decompressobj()
3433 3432 for chunk in filechunkiter(fh):
3434 3433 while chunk:
3435 3434 # Limit output size to limit memory.
3436 3435 yield d.decompress(chunk, 2 ** 18)
3437 3436 chunk = d.unconsumed_tail
3438 3437
3439 3438 return chunkbuffer(gen())
3440 3439
3441 3440 class zlibrevlogcompressor(object):
3442 3441 def compress(self, data):
3443 3442 insize = len(data)
3444 3443 # Caller handles empty input case.
3445 3444 assert insize > 0
3446 3445
3447 3446 if insize < 44:
3448 3447 return None
3449 3448
3450 3449 elif insize <= 1000000:
3451 3450 compressed = zlib.compress(data)
3452 3451 if len(compressed) < insize:
3453 3452 return compressed
3454 3453 return None
3455 3454
3456 3455 # zlib makes an internal copy of the input buffer, doubling
3457 3456 # memory usage for large inputs. So do streaming compression
3458 3457 # on large inputs.
3459 3458 else:
3460 3459 z = zlib.compressobj()
3461 3460 parts = []
3462 3461 pos = 0
3463 3462 while pos < insize:
3464 3463 pos2 = pos + 2**20
3465 3464 parts.append(z.compress(data[pos:pos2]))
3466 3465 pos = pos2
3467 3466 parts.append(z.flush())
3468 3467
3469 3468 if sum(map(len, parts)) < insize:
3470 3469 return ''.join(parts)
3471 3470 return None
3472 3471
3473 3472 def decompress(self, data):
3474 3473 try:
3475 3474 return zlib.decompress(data)
3476 3475 except zlib.error as e:
3477 3476 raise error.RevlogError(_('revlog decompress error: %s') %
3478 3477 str(e))
3479 3478
3480 3479 def revlogcompressor(self, opts=None):
3481 3480 return self.zlibrevlogcompressor()
3482 3481
3483 3482 compengines.register(_zlibengine())
3484 3483
3485 3484 class _bz2engine(compressionengine):
3486 3485 def name(self):
3487 3486 return 'bz2'
3488 3487
3489 3488 def bundletype(self):
3490 3489 """An algorithm that produces smaller bundles than ``gzip``.
3491 3490
3492 3491 All Mercurial clients should support this format.
3493 3492
3494 3493 This engine will likely produce smaller bundles than ``gzip`` but
3495 3494 will be significantly slower, both during compression and
3496 3495 decompression.
3497 3496
3498 3497 If available, the ``zstd`` engine can yield similar or better
3499 3498 compression at much higher speeds.
3500 3499 """
3501 3500 return 'bzip2', 'BZ'
3502 3501
3503 3502 # We declare a protocol name but don't advertise by default because
3504 3503 # it is slow.
3505 3504 def wireprotosupport(self):
3506 3505 return compewireprotosupport('bzip2', 0, 0)
3507 3506
3508 3507 def compressstream(self, it, opts=None):
3509 3508 opts = opts or {}
3510 3509 z = bz2.BZ2Compressor(opts.get('level', 9))
3511 3510 for chunk in it:
3512 3511 data = z.compress(chunk)
3513 3512 if data:
3514 3513 yield data
3515 3514
3516 3515 yield z.flush()
3517 3516
3518 3517 def decompressorreader(self, fh):
3519 3518 def gen():
3520 3519 d = bz2.BZ2Decompressor()
3521 3520 for chunk in filechunkiter(fh):
3522 3521 yield d.decompress(chunk)
3523 3522
3524 3523 return chunkbuffer(gen())
3525 3524
3526 3525 compengines.register(_bz2engine())
3527 3526
3528 3527 class _truncatedbz2engine(compressionengine):
3529 3528 def name(self):
3530 3529 return 'bz2truncated'
3531 3530
3532 3531 def bundletype(self):
3533 3532 return None, '_truncatedBZ'
3534 3533
3535 3534 # We don't implement compressstream because it is hackily handled elsewhere.
3536 3535
3537 3536 def decompressorreader(self, fh):
3538 3537 def gen():
3539 3538 # The input stream doesn't have the 'BZ' header. So add it back.
3540 3539 d = bz2.BZ2Decompressor()
3541 3540 d.decompress('BZ')
3542 3541 for chunk in filechunkiter(fh):
3543 3542 yield d.decompress(chunk)
3544 3543
3545 3544 return chunkbuffer(gen())
3546 3545
3547 3546 compengines.register(_truncatedbz2engine())
3548 3547
3549 3548 class _noopengine(compressionengine):
3550 3549 def name(self):
3551 3550 return 'none'
3552 3551
3553 3552 def bundletype(self):
3554 3553 """No compression is performed.
3555 3554
3556 3555 Use this compression engine to explicitly disable compression.
3557 3556 """
3558 3557 return 'none', 'UN'
3559 3558
3560 3559 # Clients always support uncompressed payloads. Servers don't because
3561 3560 # unless you are on a fast network, uncompressed payloads can easily
3562 3561 # saturate your network pipe.
3563 3562 def wireprotosupport(self):
3564 3563 return compewireprotosupport('none', 0, 10)
3565 3564
3566 3565 # We don't implement revlogheader because it is handled specially
3567 3566 # in the revlog class.
3568 3567
3569 3568 def compressstream(self, it, opts=None):
3570 3569 return it
3571 3570
3572 3571 def decompressorreader(self, fh):
3573 3572 return fh
3574 3573
3575 3574 class nooprevlogcompressor(object):
3576 3575 def compress(self, data):
3577 3576 return None
3578 3577
3579 3578 def revlogcompressor(self, opts=None):
3580 3579 return self.nooprevlogcompressor()
3581 3580
3582 3581 compengines.register(_noopengine())
3583 3582
3584 3583 class _zstdengine(compressionengine):
3585 3584 def name(self):
3586 3585 return 'zstd'
3587 3586
3588 3587 @propertycache
3589 3588 def _module(self):
3590 3589 # Not all installs have the zstd module available. So defer importing
3591 3590 # until first access.
3592 3591 try:
3593 3592 from . import zstd
3594 3593 # Force delayed import.
3595 3594 zstd.__version__
3596 3595 return zstd
3597 3596 except ImportError:
3598 3597 return None
3599 3598
3600 3599 def available(self):
3601 3600 return bool(self._module)
3602 3601
3603 3602 def bundletype(self):
3604 3603 """A modern compression algorithm that is fast and highly flexible.
3605 3604
3606 3605 Only supported by Mercurial 4.1 and newer clients.
3607 3606
3608 3607 With the default settings, zstd compression is both faster and yields
3609 3608 better compression than ``gzip``. It also frequently yields better
3610 3609 compression than ``bzip2`` while operating at much higher speeds.
3611 3610
3612 3611 If this engine is available and backwards compatibility is not a
3613 3612 concern, it is likely the best available engine.
3614 3613 """
3615 3614 return 'zstd', 'ZS'
3616 3615
3617 3616 def wireprotosupport(self):
3618 3617 return compewireprotosupport('zstd', 50, 50)
3619 3618
3620 3619 def revlogheader(self):
3621 3620 return '\x28'
3622 3621
3623 3622 def compressstream(self, it, opts=None):
3624 3623 opts = opts or {}
3625 3624 # zstd level 3 is almost always significantly faster than zlib
3626 3625 # while providing no worse compression. It strikes a good balance
3627 3626 # between speed and compression.
3628 3627 level = opts.get('level', 3)
3629 3628
3630 3629 zstd = self._module
3631 3630 z = zstd.ZstdCompressor(level=level).compressobj()
3632 3631 for chunk in it:
3633 3632 data = z.compress(chunk)
3634 3633 if data:
3635 3634 yield data
3636 3635
3637 3636 yield z.flush()
3638 3637
3639 3638 def decompressorreader(self, fh):
3640 3639 zstd = self._module
3641 3640 dctx = zstd.ZstdDecompressor()
3642 3641 return chunkbuffer(dctx.read_from(fh))
3643 3642
3644 3643 class zstdrevlogcompressor(object):
3645 3644 def __init__(self, zstd, level=3):
3646 3645 # Writing the content size adds a few bytes to the output. However,
3647 3646 # it allows decompression to be more optimal since we can
3648 3647 # pre-allocate a buffer to hold the result.
3649 3648 self._cctx = zstd.ZstdCompressor(level=level,
3650 3649 write_content_size=True)
3651 3650 self._dctx = zstd.ZstdDecompressor()
3652 3651 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3653 3652 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3654 3653
3655 3654 def compress(self, data):
3656 3655 insize = len(data)
3657 3656 # Caller handles empty input case.
3658 3657 assert insize > 0
3659 3658
3660 3659 if insize < 50:
3661 3660 return None
3662 3661
3663 3662 elif insize <= 1000000:
3664 3663 compressed = self._cctx.compress(data)
3665 3664 if len(compressed) < insize:
3666 3665 return compressed
3667 3666 return None
3668 3667 else:
3669 3668 z = self._cctx.compressobj()
3670 3669 chunks = []
3671 3670 pos = 0
3672 3671 while pos < insize:
3673 3672 pos2 = pos + self._compinsize
3674 3673 chunk = z.compress(data[pos:pos2])
3675 3674 if chunk:
3676 3675 chunks.append(chunk)
3677 3676 pos = pos2
3678 3677 chunks.append(z.flush())
3679 3678
3680 3679 if sum(map(len, chunks)) < insize:
3681 3680 return ''.join(chunks)
3682 3681 return None
3683 3682
3684 3683 def decompress(self, data):
3685 3684 insize = len(data)
3686 3685
3687 3686 try:
3688 3687 # This was measured to be faster than other streaming
3689 3688 # decompressors.
3690 3689 dobj = self._dctx.decompressobj()
3691 3690 chunks = []
3692 3691 pos = 0
3693 3692 while pos < insize:
3694 3693 pos2 = pos + self._decompinsize
3695 3694 chunk = dobj.decompress(data[pos:pos2])
3696 3695 if chunk:
3697 3696 chunks.append(chunk)
3698 3697 pos = pos2
3699 3698 # Frame should be exhausted, so no finish() API.
3700 3699
3701 3700 return ''.join(chunks)
3702 3701 except Exception as e:
3703 3702 raise error.RevlogError(_('revlog decompress error: %s') %
3704 3703 str(e))
3705 3704
3706 3705 def revlogcompressor(self, opts=None):
3707 3706 opts = opts or {}
3708 3707 return self.zstdrevlogcompressor(self._module,
3709 3708 level=opts.get('level', 3))
3710 3709
3711 3710 compengines.register(_zstdengine())
3712 3711
3713 3712 def bundlecompressiontopics():
3714 3713 """Obtains a list of available bundle compressions for use in help."""
3715 3714 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3716 3715 items = {}
3717 3716
3718 3717 # We need to format the docstring. So use a dummy object/type to hold it
3719 3718 # rather than mutating the original.
3720 3719 class docobject(object):
3721 3720 pass
3722 3721
3723 3722 for name in compengines:
3724 3723 engine = compengines[name]
3725 3724
3726 3725 if not engine.available():
3727 3726 continue
3728 3727
3729 3728 bt = engine.bundletype()
3730 3729 if not bt or not bt[0]:
3731 3730 continue
3732 3731
3733 3732 doc = pycompat.sysstr('``%s``\n %s') % (
3734 3733 bt[0], engine.bundletype.__doc__)
3735 3734
3736 3735 value = docobject()
3737 3736 value.__doc__ = doc
3738 3737
3739 3738 items[bt[0]] = value
3740 3739
3741 3740 return items
3742 3741
3743 3742 # convenient shortcut
3744 3743 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now