##// END OF EJS Templates
py3: stop normalizing .encode()/.decode() arguments to unicode...
Gregory Szorc -
r43361:127cc1f7 default
parent child Browse files
Show More
@@ -1,670 +1,670
1 1 # testparseutil.py - utilities to parse test script for check tools
2 2 #
3 3 # Copyright 2018 FUJIWARA Katsunori <foozy@lares.dti.ne.jp> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import abc
11 11 import re
12 12 import sys
13 13
14 14 ####################
15 15 # for Python3 compatibility (almost comes from mercurial/pycompat.py)
16 16
17 17 ispy3 = sys.version_info[0] >= 3
18 18
19 19
20 20 def identity(a):
21 21 return a
22 22
23 23
24 24 def _rapply(f, xs):
25 25 if xs is None:
26 26 # assume None means non-value of optional data
27 27 return xs
28 28 if isinstance(xs, (list, set, tuple)):
29 29 return type(xs)(_rapply(f, x) for x in xs)
30 30 if isinstance(xs, dict):
31 31 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
32 32 return f(xs)
33 33
34 34
35 35 def rapply(f, xs):
36 36 if f is identity:
37 37 # fast path mainly for py2
38 38 return xs
39 39 return _rapply(f, xs)
40 40
41 41
42 42 if ispy3:
43 43 import builtins
44 44
45 45 def bytestr(s):
46 46 # tiny version of pycompat.bytestr
47 47 return s.encode('latin1')
48 48
49 49 def sysstr(s):
50 50 if isinstance(s, builtins.str):
51 51 return s
52 return s.decode(u'latin-1')
52 return s.decode('latin-1')
53 53
54 54 def opentext(f):
55 55 return open(f, 'r')
56 56
57 57
58 58 else:
59 59 bytestr = str
60 60 sysstr = identity
61 61
62 62 opentext = open
63 63
64 64
65 65 def b2s(x):
66 66 # convert BYTES elements in "x" to SYSSTR recursively
67 67 return rapply(sysstr, x)
68 68
69 69
70 70 def writeout(data):
71 71 # write "data" in BYTES into stdout
72 72 sys.stdout.write(data)
73 73
74 74
75 75 def writeerr(data):
76 76 # write "data" in BYTES into stderr
77 77 sys.stderr.write(data)
78 78
79 79
80 80 ####################
81 81
82 82
83 83 class embeddedmatcher(object):
84 84 """Base class to detect embedded code fragments in *.t test script
85 85 """
86 86
87 87 __metaclass__ = abc.ABCMeta
88 88
89 89 def __init__(self, desc):
90 90 self.desc = desc
91 91
92 92 @abc.abstractmethod
93 93 def startsat(self, line):
94 94 """Examine whether embedded code starts at line
95 95
96 96 This can return arbitrary object, and it is used as 'ctx' for
97 97 subsequent method invocations.
98 98 """
99 99
100 100 @abc.abstractmethod
101 101 def endsat(self, ctx, line):
102 102 """Examine whether embedded code ends at line"""
103 103
104 104 @abc.abstractmethod
105 105 def isinside(self, ctx, line):
106 106 """Examine whether line is inside embedded code, if not yet endsat
107 107 """
108 108
109 109 @abc.abstractmethod
110 110 def ignores(self, ctx):
111 111 """Examine whether detected embedded code should be ignored"""
112 112
113 113 @abc.abstractmethod
114 114 def filename(self, ctx):
115 115 """Return filename of embedded code
116 116
117 117 If filename isn't specified for embedded code explicitly, this
118 118 returns None.
119 119 """
120 120
121 121 @abc.abstractmethod
122 122 def codeatstart(self, ctx, line):
123 123 """Return actual code at the start line of embedded code
124 124
125 125 This might return None, if the start line doesn't contain
126 126 actual code.
127 127 """
128 128
129 129 @abc.abstractmethod
130 130 def codeatend(self, ctx, line):
131 131 """Return actual code at the end line of embedded code
132 132
133 133 This might return None, if the end line doesn't contain actual
134 134 code.
135 135 """
136 136
137 137 @abc.abstractmethod
138 138 def codeinside(self, ctx, line):
139 139 """Return actual code at line inside embedded code"""
140 140
141 141
142 142 def embedded(basefile, lines, errors, matchers):
143 143 """pick embedded code fragments up from given lines
144 144
145 145 This is common parsing logic, which examines specified matchers on
146 146 given lines.
147 147
148 148 :basefile: a name of a file, from which lines to be parsed come.
149 149 :lines: to be parsed (might be a value returned by "open(basefile)")
150 150 :errors: an array, into which messages for detected error are stored
151 151 :matchers: an array of embeddedmatcher objects
152 152
153 153 This function yields '(filename, starts, ends, code)' tuple.
154 154
155 155 :filename: a name of embedded code, if it is explicitly specified
156 156 (e.g. "foobar" of "cat >> foobar <<EOF").
157 157 Otherwise, this is None
158 158 :starts: line number (1-origin), at which embedded code starts (inclusive)
159 159 :ends: line number (1-origin), at which embedded code ends (exclusive)
160 160 :code: extracted embedded code, which is single-stringified
161 161
162 162 >>> class ambigmatcher(object):
163 163 ... # mock matcher class to examine implementation of
164 164 ... # "ambiguous matching" corner case
165 165 ... def __init__(self, desc, matchfunc):
166 166 ... self.desc = desc
167 167 ... self.matchfunc = matchfunc
168 168 ... def startsat(self, line):
169 169 ... return self.matchfunc(line)
170 170 >>> ambig1 = ambigmatcher('ambiguous #1',
171 171 ... lambda l: l.startswith(' $ cat '))
172 172 >>> ambig2 = ambigmatcher('ambiguous #2',
173 173 ... lambda l: l.endswith('<< EOF\\n'))
174 174 >>> lines = [' $ cat > foo.py << EOF\\n']
175 175 >>> errors = []
176 176 >>> matchers = [ambig1, ambig2]
177 177 >>> list(t for t in embedded('<dummy>', lines, errors, matchers))
178 178 []
179 179 >>> b2s(errors)
180 180 ['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"']
181 181
182 182 """
183 183 matcher = None
184 184 ctx = filename = code = startline = None # for pyflakes
185 185
186 186 for lineno, line in enumerate(lines, 1):
187 187 if not line.endswith('\n'):
188 188 line += '\n' # to normalize EOF line
189 189 if matcher: # now, inside embedded code
190 190 if matcher.endsat(ctx, line):
191 191 codeatend = matcher.codeatend(ctx, line)
192 192 if codeatend is not None:
193 193 code.append(codeatend)
194 194 if not matcher.ignores(ctx):
195 195 yield (filename, startline, lineno, ''.join(code))
196 196 matcher = None
197 197 # DO NOT "continue", because line might start next fragment
198 198 elif not matcher.isinside(ctx, line):
199 199 # this is an error of basefile
200 200 # (if matchers are implemented correctly)
201 201 errors.append(
202 202 '%s:%d: unexpected line for "%s"'
203 203 % (basefile, lineno, matcher.desc)
204 204 )
205 205 # stop extracting embedded code by current 'matcher',
206 206 # because appearance of unexpected line might mean
207 207 # that expected end-of-embedded-code line might never
208 208 # appear
209 209 matcher = None
210 210 # DO NOT "continue", because line might start next fragment
211 211 else:
212 212 code.append(matcher.codeinside(ctx, line))
213 213 continue
214 214
215 215 # examine whether current line starts embedded code or not
216 216 assert not matcher
217 217
218 218 matched = []
219 219 for m in matchers:
220 220 ctx = m.startsat(line)
221 221 if ctx:
222 222 matched.append((m, ctx))
223 223 if matched:
224 224 if len(matched) > 1:
225 225 # this is an error of matchers, maybe
226 226 errors.append(
227 227 '%s:%d: ambiguous line for %s'
228 228 % (
229 229 basefile,
230 230 lineno,
231 231 ', '.join(['"%s"' % m.desc for m, c in matched]),
232 232 )
233 233 )
234 234 # omit extracting embedded code, because choosing
235 235 # arbitrary matcher from matched ones might fail to
236 236 # detect the end of embedded code as expected.
237 237 continue
238 238 matcher, ctx = matched[0]
239 239 filename = matcher.filename(ctx)
240 240 code = []
241 241 codeatstart = matcher.codeatstart(ctx, line)
242 242 if codeatstart is not None:
243 243 code.append(codeatstart)
244 244 startline = lineno
245 245 else:
246 246 startline = lineno + 1
247 247
248 248 if matcher:
249 249 # examine whether EOF ends embedded code, because embedded
250 250 # code isn't yet ended explicitly
251 251 if matcher.endsat(ctx, '\n'):
252 252 codeatend = matcher.codeatend(ctx, '\n')
253 253 if codeatend is not None:
254 254 code.append(codeatend)
255 255 if not matcher.ignores(ctx):
256 256 yield (filename, startline, lineno + 1, ''.join(code))
257 257 else:
258 258 # this is an error of basefile
259 259 # (if matchers are implemented correctly)
260 260 errors.append(
261 261 '%s:%d: unexpected end of file for "%s"'
262 262 % (basefile, lineno, matcher.desc)
263 263 )
264 264
265 265
266 266 # heredoc limit mark to ignore embedded code at check-code.py or so
267 267 heredocignorelimit = 'NO_CHECK_EOF'
268 268
269 269 # the pattern to match against cases below, and to return a limit mark
270 270 # string as 'lname' group
271 271 #
272 272 # - << LIMITMARK
273 273 # - << "LIMITMARK"
274 274 # - << 'LIMITMARK'
275 275 heredoclimitpat = r'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'
276 276
277 277
278 278 class fileheredocmatcher(embeddedmatcher):
279 279 """Detect "cat > FILE << LIMIT" style embedded code
280 280
281 281 >>> matcher = fileheredocmatcher('heredoc .py file', r'[^<]+\\.py')
282 282 >>> b2s(matcher.startsat(' $ cat > file.py << EOF\\n'))
283 283 ('file.py', ' > EOF\\n')
284 284 >>> b2s(matcher.startsat(' $ cat >>file.py <<EOF\\n'))
285 285 ('file.py', ' > EOF\\n')
286 286 >>> b2s(matcher.startsat(' $ cat> \\x27any file.py\\x27<< "EOF"\\n'))
287 287 ('any file.py', ' > EOF\\n')
288 288 >>> b2s(matcher.startsat(" $ cat > file.py << 'ANYLIMIT'\\n"))
289 289 ('file.py', ' > ANYLIMIT\\n')
290 290 >>> b2s(matcher.startsat(' $ cat<<ANYLIMIT>"file.py"\\n'))
291 291 ('file.py', ' > ANYLIMIT\\n')
292 292 >>> start = ' $ cat > file.py << EOF\\n'
293 293 >>> ctx = matcher.startsat(start)
294 294 >>> matcher.codeatstart(ctx, start)
295 295 >>> b2s(matcher.filename(ctx))
296 296 'file.py'
297 297 >>> matcher.ignores(ctx)
298 298 False
299 299 >>> inside = ' > foo = 1\\n'
300 300 >>> matcher.endsat(ctx, inside)
301 301 False
302 302 >>> matcher.isinside(ctx, inside)
303 303 True
304 304 >>> b2s(matcher.codeinside(ctx, inside))
305 305 'foo = 1\\n'
306 306 >>> end = ' > EOF\\n'
307 307 >>> matcher.endsat(ctx, end)
308 308 True
309 309 >>> matcher.codeatend(ctx, end)
310 310 >>> matcher.endsat(ctx, ' > EOFEOF\\n')
311 311 False
312 312 >>> ctx = matcher.startsat(' $ cat > file.py << NO_CHECK_EOF\\n')
313 313 >>> matcher.ignores(ctx)
314 314 True
315 315 """
316 316
317 317 _prefix = ' > '
318 318
319 319 def __init__(self, desc, namepat):
320 320 super(fileheredocmatcher, self).__init__(desc)
321 321
322 322 # build the pattern to match against cases below (and ">>"
323 323 # variants), and to return a target filename string as 'name'
324 324 # group
325 325 #
326 326 # - > NAMEPAT
327 327 # - > "NAMEPAT"
328 328 # - > 'NAMEPAT'
329 329 namepat = (
330 330 r'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)' % namepat
331 331 )
332 332 self._fileres = [
333 333 # "cat > NAME << LIMIT" case
334 334 re.compile(r' \$ \s*cat' + namepat + heredoclimitpat),
335 335 # "cat << LIMIT > NAME" case
336 336 re.compile(r' \$ \s*cat' + heredoclimitpat + namepat),
337 337 ]
338 338
339 339 def startsat(self, line):
340 340 # ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple
341 341 for filere in self._fileres:
342 342 matched = filere.match(line)
343 343 if matched:
344 344 return (
345 345 matched.group('name'),
346 346 ' > %s\n' % matched.group('limit'),
347 347 )
348 348
349 349 def endsat(self, ctx, line):
350 350 return ctx[1] == line
351 351
352 352 def isinside(self, ctx, line):
353 353 return line.startswith(self._prefix)
354 354
355 355 def ignores(self, ctx):
356 356 return ' > %s\n' % heredocignorelimit == ctx[1]
357 357
358 358 def filename(self, ctx):
359 359 return ctx[0]
360 360
361 361 def codeatstart(self, ctx, line):
362 362 return None # no embedded code at start line
363 363
364 364 def codeatend(self, ctx, line):
365 365 return None # no embedded code at end line
366 366
367 367 def codeinside(self, ctx, line):
368 368 return line[len(self._prefix) :] # strip prefix
369 369
370 370
371 371 ####
372 372 # for embedded python script
373 373
374 374
375 375 class pydoctestmatcher(embeddedmatcher):
376 376 """Detect ">>> code" style embedded python code
377 377
378 378 >>> matcher = pydoctestmatcher()
379 379 >>> startline = ' >>> foo = 1\\n'
380 380 >>> matcher.startsat(startline)
381 381 True
382 382 >>> matcher.startsat(' ... foo = 1\\n')
383 383 False
384 384 >>> ctx = matcher.startsat(startline)
385 385 >>> matcher.filename(ctx)
386 386 >>> matcher.ignores(ctx)
387 387 False
388 388 >>> b2s(matcher.codeatstart(ctx, startline))
389 389 'foo = 1\\n'
390 390 >>> inside = ' >>> foo = 1\\n'
391 391 >>> matcher.endsat(ctx, inside)
392 392 False
393 393 >>> matcher.isinside(ctx, inside)
394 394 True
395 395 >>> b2s(matcher.codeinside(ctx, inside))
396 396 'foo = 1\\n'
397 397 >>> inside = ' ... foo = 1\\n'
398 398 >>> matcher.endsat(ctx, inside)
399 399 False
400 400 >>> matcher.isinside(ctx, inside)
401 401 True
402 402 >>> b2s(matcher.codeinside(ctx, inside))
403 403 'foo = 1\\n'
404 404 >>> inside = ' expected output\\n'
405 405 >>> matcher.endsat(ctx, inside)
406 406 False
407 407 >>> matcher.isinside(ctx, inside)
408 408 True
409 409 >>> b2s(matcher.codeinside(ctx, inside))
410 410 '\\n'
411 411 >>> inside = ' \\n'
412 412 >>> matcher.endsat(ctx, inside)
413 413 False
414 414 >>> matcher.isinside(ctx, inside)
415 415 True
416 416 >>> b2s(matcher.codeinside(ctx, inside))
417 417 '\\n'
418 418 >>> end = ' $ foo bar\\n'
419 419 >>> matcher.endsat(ctx, end)
420 420 True
421 421 >>> matcher.codeatend(ctx, end)
422 422 >>> end = '\\n'
423 423 >>> matcher.endsat(ctx, end)
424 424 True
425 425 >>> matcher.codeatend(ctx, end)
426 426 """
427 427
428 428 _prefix = ' >>> '
429 429 _prefixre = re.compile(r' (>>>|\.\.\.) ')
430 430
431 431 # If a line matches against not _prefixre but _outputre, that line
432 432 # is "an expected output line" (= not a part of code fragment).
433 433 #
434 434 # Strictly speaking, a line matching against "(#if|#else|#endif)"
435 435 # is also treated similarly in "inline python code" semantics by
436 436 # run-tests.py. But "directive line inside inline python code"
437 437 # should be rejected by Mercurial reviewers. Therefore, this
438 438 # regexp does not matche against such directive lines.
439 439 _outputre = re.compile(r' $| [^$]')
440 440
441 441 def __init__(self):
442 442 super(pydoctestmatcher, self).__init__("doctest style python code")
443 443
444 444 def startsat(self, line):
445 445 # ctx is "True"
446 446 return line.startswith(self._prefix)
447 447
448 448 def endsat(self, ctx, line):
449 449 return not (self._prefixre.match(line) or self._outputre.match(line))
450 450
451 451 def isinside(self, ctx, line):
452 452 return True # always true, if not yet ended
453 453
454 454 def ignores(self, ctx):
455 455 return False # should be checked always
456 456
457 457 def filename(self, ctx):
458 458 return None # no filename
459 459
460 460 def codeatstart(self, ctx, line):
461 461 return line[len(self._prefix) :] # strip prefix ' >>> '/' ... '
462 462
463 463 def codeatend(self, ctx, line):
464 464 return None # no embedded code at end line
465 465
466 466 def codeinside(self, ctx, line):
467 467 if self._prefixre.match(line):
468 468 return line[len(self._prefix) :] # strip prefix ' >>> '/' ... '
469 469 return '\n' # an expected output line is treated as an empty line
470 470
471 471
472 472 class pyheredocmatcher(embeddedmatcher):
473 473 """Detect "python << LIMIT" style embedded python code
474 474
475 475 >>> matcher = pyheredocmatcher()
476 476 >>> b2s(matcher.startsat(' $ python << EOF\\n'))
477 477 ' > EOF\\n'
478 478 >>> b2s(matcher.startsat(' $ $PYTHON <<EOF\\n'))
479 479 ' > EOF\\n'
480 480 >>> b2s(matcher.startsat(' $ "$PYTHON"<< "EOF"\\n'))
481 481 ' > EOF\\n'
482 482 >>> b2s(matcher.startsat(" $ $PYTHON << 'ANYLIMIT'\\n"))
483 483 ' > ANYLIMIT\\n'
484 484 >>> matcher.startsat(' $ "$PYTHON" < EOF\\n')
485 485 >>> start = ' $ python << EOF\\n'
486 486 >>> ctx = matcher.startsat(start)
487 487 >>> matcher.codeatstart(ctx, start)
488 488 >>> matcher.filename(ctx)
489 489 >>> matcher.ignores(ctx)
490 490 False
491 491 >>> inside = ' > foo = 1\\n'
492 492 >>> matcher.endsat(ctx, inside)
493 493 False
494 494 >>> matcher.isinside(ctx, inside)
495 495 True
496 496 >>> b2s(matcher.codeinside(ctx, inside))
497 497 'foo = 1\\n'
498 498 >>> end = ' > EOF\\n'
499 499 >>> matcher.endsat(ctx, end)
500 500 True
501 501 >>> matcher.codeatend(ctx, end)
502 502 >>> matcher.endsat(ctx, ' > EOFEOF\\n')
503 503 False
504 504 >>> ctx = matcher.startsat(' $ python << NO_CHECK_EOF\\n')
505 505 >>> matcher.ignores(ctx)
506 506 True
507 507 """
508 508
509 509 _prefix = ' > '
510 510
511 511 _startre = re.compile(
512 512 r' \$ (\$PYTHON|"\$PYTHON"|python).*' + heredoclimitpat
513 513 )
514 514
515 515 def __init__(self):
516 516 super(pyheredocmatcher, self).__init__("heredoc python invocation")
517 517
518 518 def startsat(self, line):
519 519 # ctx is END-LINE-OF-EMBEDDED-CODE
520 520 matched = self._startre.match(line)
521 521 if matched:
522 522 return ' > %s\n' % matched.group('limit')
523 523
524 524 def endsat(self, ctx, line):
525 525 return ctx == line
526 526
527 527 def isinside(self, ctx, line):
528 528 return line.startswith(self._prefix)
529 529
530 530 def ignores(self, ctx):
531 531 return ' > %s\n' % heredocignorelimit == ctx
532 532
533 533 def filename(self, ctx):
534 534 return None # no filename
535 535
536 536 def codeatstart(self, ctx, line):
537 537 return None # no embedded code at start line
538 538
539 539 def codeatend(self, ctx, line):
540 540 return None # no embedded code at end line
541 541
542 542 def codeinside(self, ctx, line):
543 543 return line[len(self._prefix) :] # strip prefix
544 544
545 545
546 546 _pymatchers = [
547 547 pydoctestmatcher(),
548 548 pyheredocmatcher(),
549 549 # use '[^<]+' instead of '\S+', in order to match against
550 550 # paths including whitespaces
551 551 fileheredocmatcher('heredoc .py file', r'[^<]+\.py'),
552 552 ]
553 553
554 554
555 555 def pyembedded(basefile, lines, errors):
556 556 return embedded(basefile, lines, errors, _pymatchers)
557 557
558 558
559 559 ####
560 560 # for embedded shell script
561 561
562 562 _shmatchers = [
563 563 # use '[^<]+' instead of '\S+', in order to match against
564 564 # paths including whitespaces
565 565 fileheredocmatcher('heredoc .sh file', r'[^<]+\.sh'),
566 566 ]
567 567
568 568
569 569 def shembedded(basefile, lines, errors):
570 570 return embedded(basefile, lines, errors, _shmatchers)
571 571
572 572
573 573 ####
574 574 # for embedded hgrc configuration
575 575
576 576 _hgrcmatchers = [
577 577 # use '[^<]+' instead of '\S+', in order to match against
578 578 # paths including whitespaces
579 579 fileheredocmatcher(
580 580 'heredoc hgrc file', r'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'
581 581 ),
582 582 ]
583 583
584 584
585 585 def hgrcembedded(basefile, lines, errors):
586 586 return embedded(basefile, lines, errors, _hgrcmatchers)
587 587
588 588
589 589 ####
590 590
591 591 if __name__ == "__main__":
592 592 import optparse
593 593 import sys
594 594
595 595 def showembedded(basefile, lines, embeddedfunc, opts):
596 596 errors = []
597 597 for name, starts, ends, code in embeddedfunc(basefile, lines, errors):
598 598 if not name:
599 599 name = '<anonymous>'
600 600 writeout("%s:%d: %s starts\n" % (basefile, starts, name))
601 601 if opts.verbose and code:
602 602 writeout(" |%s\n" % "\n |".join(l for l in code.splitlines()))
603 603 writeout("%s:%d: %s ends\n" % (basefile, ends, name))
604 604 for e in errors:
605 605 writeerr("%s\n" % e)
606 606 return len(errors)
607 607
608 608 def applyembedded(args, embeddedfunc, opts):
609 609 ret = 0
610 610 if args:
611 611 for f in args:
612 612 with opentext(f) as fp:
613 613 if showembedded(f, fp, embeddedfunc, opts):
614 614 ret = 1
615 615 else:
616 616 lines = [l for l in sys.stdin.readlines()]
617 617 if showembedded('<stdin>', lines, embeddedfunc, opts):
618 618 ret = 1
619 619 return ret
620 620
621 621 commands = {}
622 622
623 623 def command(name, desc):
624 624 def wrap(func):
625 625 commands[name] = (desc, func)
626 626
627 627 return wrap
628 628
629 629 @command("pyembedded", "detect embedded python script")
630 630 def pyembeddedcmd(args, opts):
631 631 return applyembedded(args, pyembedded, opts)
632 632
633 633 @command("shembedded", "detect embedded shell script")
634 634 def shembeddedcmd(args, opts):
635 635 return applyembedded(args, shembedded, opts)
636 636
637 637 @command("hgrcembedded", "detect embedded hgrc configuration")
638 638 def hgrcembeddedcmd(args, opts):
639 639 return applyembedded(args, hgrcembedded, opts)
640 640
641 641 availablecommands = "\n".join(
642 642 [" - %s: %s" % (key, value[0]) for key, value in commands.items()]
643 643 )
644 644
645 645 parser = optparse.OptionParser(
646 646 """%prog COMMAND [file ...]
647 647
648 648 Pick up embedded code fragments from given file(s) or stdin, and list
649 649 up start/end lines of them in standard compiler format
650 650 ("FILENAME:LINENO:").
651 651
652 652 Available commands are:
653 653 """
654 654 + availablecommands
655 655 + """
656 656 """
657 657 )
658 658 parser.add_option(
659 659 "-v",
660 660 "--verbose",
661 661 help="enable additional output (e.g. actual code)",
662 662 action="store_true",
663 663 )
664 664 (opts, args) = parser.parse_args()
665 665
666 666 if not args or args[0] not in commands:
667 667 parser.print_help()
668 668 sys.exit(255)
669 669
670 670 sys.exit(commands[args[0]][1](args[1:], opts))
@@ -1,746 +1,746
1 1 # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
2 2 #
3 3 # Copyright 2017 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import contextlib
11 11 import errno
12 12 import hashlib
13 13 import json
14 14 import os
15 15 import re
16 16 import socket
17 17
18 18 from mercurial.i18n import _
19 19 from mercurial.pycompat import getattr
20 20
21 21 from mercurial import (
22 22 encoding,
23 23 error,
24 24 node,
25 25 pathutil,
26 26 pycompat,
27 27 url as urlmod,
28 28 util,
29 29 vfs as vfsmod,
30 30 worker,
31 31 )
32 32
33 33 from mercurial.utils import stringutil
34 34
35 35 from ..largefiles import lfutil
36 36
37 37 # 64 bytes for SHA256
38 38 _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
39 39
40 40
41 41 class lfsvfs(vfsmod.vfs):
42 42 def join(self, path):
43 43 """split the path at first two characters, like: XX/XXXXX..."""
44 44 if not _lfsre.match(path):
45 45 raise error.ProgrammingError(b'unexpected lfs path: %s' % path)
46 46 return super(lfsvfs, self).join(path[0:2], path[2:])
47 47
48 48 def walk(self, path=None, onerror=None):
49 49 """Yield (dirpath, [], oids) tuple for blobs under path
50 50
51 51 Oids only exist in the root of this vfs, so dirpath is always ''.
52 52 """
53 53 root = os.path.normpath(self.base)
54 54 # when dirpath == root, dirpath[prefixlen:] becomes empty
55 55 # because len(dirpath) < prefixlen.
56 56 prefixlen = len(pathutil.normasprefix(root))
57 57 oids = []
58 58
59 59 for dirpath, dirs, files in os.walk(
60 60 self.reljoin(self.base, path or b''), onerror=onerror
61 61 ):
62 62 dirpath = dirpath[prefixlen:]
63 63
64 64 # Silently skip unexpected files and directories
65 65 if len(dirpath) == 2:
66 66 oids.extend(
67 67 [dirpath + f for f in files if _lfsre.match(dirpath + f)]
68 68 )
69 69
70 70 yield (b'', [], oids)
71 71
72 72
73 73 class nullvfs(lfsvfs):
74 74 def __init__(self):
75 75 pass
76 76
77 77 def exists(self, oid):
78 78 return False
79 79
80 80 def read(self, oid):
81 81 # store.read() calls into here if the blob doesn't exist in its
82 82 # self.vfs. Raise the same error as a normal vfs when asked to read a
83 83 # file that doesn't exist. The only difference is the full file path
84 84 # isn't available in the error.
85 85 raise IOError(
86 86 errno.ENOENT,
87 87 pycompat.sysstr(b'%s: No such file or directory' % oid),
88 88 )
89 89
90 90 def walk(self, path=None, onerror=None):
91 91 return (b'', [], [])
92 92
93 93 def write(self, oid, data):
94 94 pass
95 95
96 96
97 97 class filewithprogress(object):
98 98 """a file-like object that supports __len__ and read.
99 99
100 100 Useful to provide progress information for how many bytes are read.
101 101 """
102 102
103 103 def __init__(self, fp, callback):
104 104 self._fp = fp
105 105 self._callback = callback # func(readsize)
106 106 fp.seek(0, os.SEEK_END)
107 107 self._len = fp.tell()
108 108 fp.seek(0)
109 109
110 110 def __len__(self):
111 111 return self._len
112 112
113 113 def read(self, size):
114 114 if self._fp is None:
115 115 return b''
116 116 data = self._fp.read(size)
117 117 if data:
118 118 if self._callback:
119 119 self._callback(len(data))
120 120 else:
121 121 self._fp.close()
122 122 self._fp = None
123 123 return data
124 124
125 125
126 126 class local(object):
127 127 """Local blobstore for large file contents.
128 128
129 129 This blobstore is used both as a cache and as a staging area for large blobs
130 130 to be uploaded to the remote blobstore.
131 131 """
132 132
133 133 def __init__(self, repo):
134 134 fullpath = repo.svfs.join(b'lfs/objects')
135 135 self.vfs = lfsvfs(fullpath)
136 136
137 137 if repo.ui.configbool(b'experimental', b'lfs.disableusercache'):
138 138 self.cachevfs = nullvfs()
139 139 else:
140 140 usercache = lfutil._usercachedir(repo.ui, b'lfs')
141 141 self.cachevfs = lfsvfs(usercache)
142 142 self.ui = repo.ui
143 143
144 144 def open(self, oid):
145 145 """Open a read-only file descriptor to the named blob, in either the
146 146 usercache or the local store."""
147 147 # The usercache is the most likely place to hold the file. Commit will
148 148 # write to both it and the local store, as will anything that downloads
149 149 # the blobs. However, things like clone without an update won't
150 150 # populate the local store. For an init + push of a local clone,
151 151 # the usercache is the only place it _could_ be. If not present, the
152 152 # missing file msg here will indicate the local repo, not the usercache.
153 153 if self.cachevfs.exists(oid):
154 154 return self.cachevfs(oid, b'rb')
155 155
156 156 return self.vfs(oid, b'rb')
157 157
158 158 def download(self, oid, src):
159 159 """Read the blob from the remote source in chunks, verify the content,
160 160 and write to this local blobstore."""
161 161 sha256 = hashlib.sha256()
162 162
163 163 with self.vfs(oid, b'wb', atomictemp=True) as fp:
164 164 for chunk in util.filechunkiter(src, size=1048576):
165 165 fp.write(chunk)
166 166 sha256.update(chunk)
167 167
168 168 realoid = node.hex(sha256.digest())
169 169 if realoid != oid:
170 170 raise LfsCorruptionError(
171 171 _(b'corrupt remote lfs object: %s') % oid
172 172 )
173 173
174 174 self._linktousercache(oid)
175 175
176 176 def write(self, oid, data):
177 177 """Write blob to local blobstore.
178 178
179 179 This should only be called from the filelog during a commit or similar.
180 180 As such, there is no need to verify the data. Imports from a remote
181 181 store must use ``download()`` instead."""
182 182 with self.vfs(oid, b'wb', atomictemp=True) as fp:
183 183 fp.write(data)
184 184
185 185 self._linktousercache(oid)
186 186
187 187 def linkfromusercache(self, oid):
188 188 """Link blobs found in the user cache into this store.
189 189
190 190 The server module needs to do this when it lets the client know not to
191 191 upload the blob, to ensure it is always available in this store.
192 192 Normally this is done implicitly when the client reads or writes the
193 193 blob, but that doesn't happen when the server tells the client that it
194 194 already has the blob.
195 195 """
196 196 if not isinstance(self.cachevfs, nullvfs) and not self.vfs.exists(oid):
197 197 self.ui.note(_(b'lfs: found %s in the usercache\n') % oid)
198 198 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
199 199
200 200 def _linktousercache(self, oid):
201 201 # XXX: should we verify the content of the cache, and hardlink back to
202 202 # the local store on success, but truncate, write and link on failure?
203 203 if not self.cachevfs.exists(oid) and not isinstance(
204 204 self.cachevfs, nullvfs
205 205 ):
206 206 self.ui.note(_(b'lfs: adding %s to the usercache\n') % oid)
207 207 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
208 208
209 209 def read(self, oid, verify=True):
210 210 """Read blob from local blobstore."""
211 211 if not self.vfs.exists(oid):
212 212 blob = self._read(self.cachevfs, oid, verify)
213 213
214 214 # Even if revlog will verify the content, it needs to be verified
215 215 # now before making the hardlink to avoid propagating corrupt blobs.
216 216 # Don't abort if corruption is detected, because `hg verify` will
217 217 # give more useful info about the corruption- simply don't add the
218 218 # hardlink.
219 219 if verify or node.hex(hashlib.sha256(blob).digest()) == oid:
220 220 self.ui.note(_(b'lfs: found %s in the usercache\n') % oid)
221 221 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
222 222 else:
223 223 self.ui.note(_(b'lfs: found %s in the local lfs store\n') % oid)
224 224 blob = self._read(self.vfs, oid, verify)
225 225 return blob
226 226
227 227 def _read(self, vfs, oid, verify):
228 228 """Read blob (after verifying) from the given store"""
229 229 blob = vfs.read(oid)
230 230 if verify:
231 231 _verify(oid, blob)
232 232 return blob
233 233
234 234 def verify(self, oid):
235 235 """Indicate whether or not the hash of the underlying file matches its
236 236 name."""
237 237 sha256 = hashlib.sha256()
238 238
239 239 with self.open(oid) as fp:
240 240 for chunk in util.filechunkiter(fp, size=1048576):
241 241 sha256.update(chunk)
242 242
243 243 return oid == node.hex(sha256.digest())
244 244
245 245 def has(self, oid):
246 246 """Returns True if the local blobstore contains the requested blob,
247 247 False otherwise."""
248 248 return self.cachevfs.exists(oid) or self.vfs.exists(oid)
249 249
250 250
251 251 def _urlerrorreason(urlerror):
252 252 '''Create a friendly message for the given URLError to be used in an
253 253 LfsRemoteError message.
254 254 '''
255 255 inst = urlerror
256 256
257 257 if isinstance(urlerror.reason, Exception):
258 258 inst = urlerror.reason
259 259
260 260 if util.safehasattr(inst, b'reason'):
261 261 try: # usually it is in the form (errno, strerror)
262 262 reason = inst.reason.args[1]
263 263 except (AttributeError, IndexError):
264 264 # it might be anything, for example a string
265 265 reason = inst.reason
266 266 if isinstance(reason, pycompat.unicode):
267 267 # SSLError of Python 2.7.9 contains a unicode
268 268 reason = encoding.unitolocal(reason)
269 269 return reason
270 270 elif getattr(inst, "strerror", None):
271 271 return encoding.strtolocal(inst.strerror)
272 272 else:
273 273 return stringutil.forcebytestr(urlerror)
274 274
275 275
276 276 class lfsauthhandler(util.urlreq.basehandler):
277 277 handler_order = 480 # Before HTTPDigestAuthHandler (== 490)
278 278
279 279 def http_error_401(self, req, fp, code, msg, headers):
280 280 """Enforces that any authentication performed is HTTP Basic
281 281 Authentication. No authentication is also acceptable.
282 282 """
283 283 authreq = headers.get(r'www-authenticate', None)
284 284 if authreq:
285 285 scheme = authreq.split()[0]
286 286
287 287 if scheme.lower() != r'basic':
288 288 msg = _(b'the server must support Basic Authentication')
289 289 raise util.urlerr.httperror(
290 290 req.get_full_url(),
291 291 code,
292 292 encoding.strfromlocal(msg),
293 293 headers,
294 294 fp,
295 295 )
296 296 return None
297 297
298 298
299 299 class _gitlfsremote(object):
300 300 def __init__(self, repo, url):
301 301 ui = repo.ui
302 302 self.ui = ui
303 303 baseurl, authinfo = url.authinfo()
304 304 self.baseurl = baseurl.rstrip(b'/')
305 305 useragent = repo.ui.config(b'experimental', b'lfs.user-agent')
306 306 if not useragent:
307 307 useragent = b'git-lfs/2.3.4 (Mercurial %s)' % util.version()
308 308 self.urlopener = urlmod.opener(ui, authinfo, useragent)
309 309 self.urlopener.add_handler(lfsauthhandler())
310 310 self.retry = ui.configint(b'lfs', b'retry')
311 311
312 312 def writebatch(self, pointers, fromstore):
313 313 """Batch upload from local to remote blobstore."""
314 314 self._batch(_deduplicate(pointers), fromstore, b'upload')
315 315
316 316 def readbatch(self, pointers, tostore):
317 317 """Batch download from remote to local blostore."""
318 318 self._batch(_deduplicate(pointers), tostore, b'download')
319 319
320 320 def _batchrequest(self, pointers, action):
321 321 """Get metadata about objects pointed by pointers for given action
322 322
323 323 Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
324 324 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
325 325 """
326 326 objects = [
327 327 {r'oid': pycompat.strurl(p.oid()), r'size': p.size()}
328 328 for p in pointers
329 329 ]
330 330 requestdata = pycompat.bytesurl(
331 331 json.dumps(
332 332 {r'objects': objects, r'operation': pycompat.strurl(action),}
333 333 )
334 334 )
335 335 url = b'%s/objects/batch' % self.baseurl
336 336 batchreq = util.urlreq.request(pycompat.strurl(url), data=requestdata)
337 337 batchreq.add_header(r'Accept', r'application/vnd.git-lfs+json')
338 338 batchreq.add_header(r'Content-Type', r'application/vnd.git-lfs+json')
339 339 try:
340 340 with contextlib.closing(self.urlopener.open(batchreq)) as rsp:
341 341 rawjson = rsp.read()
342 342 except util.urlerr.httperror as ex:
343 343 hints = {
344 344 400: _(
345 345 b'check that lfs serving is enabled on %s and "%s" is '
346 346 b'supported'
347 347 )
348 348 % (self.baseurl, action),
349 349 404: _(b'the "lfs.url" config may be used to override %s')
350 350 % self.baseurl,
351 351 }
352 352 hint = hints.get(ex.code, _(b'api=%s, action=%s') % (url, action))
353 353 raise LfsRemoteError(
354 354 _(b'LFS HTTP error: %s') % stringutil.forcebytestr(ex),
355 355 hint=hint,
356 356 )
357 357 except util.urlerr.urlerror as ex:
358 358 hint = (
359 359 _(b'the "lfs.url" config may be used to override %s')
360 360 % self.baseurl
361 361 )
362 362 raise LfsRemoteError(
363 363 _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
364 364 )
365 365 try:
366 366 response = json.loads(rawjson)
367 367 except ValueError:
368 368 raise LfsRemoteError(
369 369 _(b'LFS server returns invalid JSON: %s')
370 370 % rawjson.encode("utf-8")
371 371 )
372 372
373 373 if self.ui.debugflag:
374 374 self.ui.debug(b'Status: %d\n' % rsp.status)
375 375 # lfs-test-server and hg serve return headers in different order
376 376 headers = pycompat.bytestr(rsp.info()).strip()
377 377 self.ui.debug(b'%s\n' % b'\n'.join(sorted(headers.splitlines())))
378 378
379 379 if r'objects' in response:
380 380 response[r'objects'] = sorted(
381 381 response[r'objects'], key=lambda p: p[r'oid']
382 382 )
383 383 self.ui.debug(
384 384 b'%s\n'
385 385 % pycompat.bytesurl(
386 386 json.dumps(
387 387 response,
388 388 indent=2,
389 389 separators=(r'', r': '),
390 390 sort_keys=True,
391 391 )
392 392 )
393 393 )
394 394
395 395 def encodestr(x):
396 396 if isinstance(x, pycompat.unicode):
397 return x.encode(u'utf-8')
397 return x.encode('utf-8')
398 398 return x
399 399
400 400 return pycompat.rapply(encodestr, response)
401 401
402 402 def _checkforservererror(self, pointers, responses, action):
403 403 """Scans errors from objects
404 404
405 405 Raises LfsRemoteError if any objects have an error"""
406 406 for response in responses:
407 407 # The server should return 404 when objects cannot be found. Some
408 408 # server implementation (ex. lfs-test-server) does not set "error"
409 409 # but just removes "download" from "actions". Treat that case
410 410 # as the same as 404 error.
411 411 if b'error' not in response:
412 412 if action == b'download' and action not in response.get(
413 413 b'actions', []
414 414 ):
415 415 code = 404
416 416 else:
417 417 continue
418 418 else:
419 419 # An error dict without a code doesn't make much sense, so
420 420 # treat as a server error.
421 421 code = response.get(b'error').get(b'code', 500)
422 422
423 423 ptrmap = {p.oid(): p for p in pointers}
424 424 p = ptrmap.get(response[b'oid'], None)
425 425 if p:
426 426 filename = getattr(p, 'filename', b'unknown')
427 427 errors = {
428 428 404: b'The object does not exist',
429 429 410: b'The object was removed by the owner',
430 430 422: b'Validation error',
431 431 500: b'Internal server error',
432 432 }
433 433 msg = errors.get(code, b'status code %d' % code)
434 434 raise LfsRemoteError(
435 435 _(b'LFS server error for "%s": %s') % (filename, msg)
436 436 )
437 437 else:
438 438 raise LfsRemoteError(
439 439 _(b'LFS server error. Unsolicited response for oid %s')
440 440 % response[b'oid']
441 441 )
442 442
443 443 def _extractobjects(self, response, pointers, action):
444 444 """extract objects from response of the batch API
445 445
446 446 response: parsed JSON object returned by batch API
447 447 return response['objects'] filtered by action
448 448 raise if any object has an error
449 449 """
450 450 # Scan errors from objects - fail early
451 451 objects = response.get(b'objects', [])
452 452 self._checkforservererror(pointers, objects, action)
453 453
454 454 # Filter objects with given action. Practically, this skips uploading
455 455 # objects which exist in the server.
456 456 filteredobjects = [
457 457 o for o in objects if action in o.get(b'actions', [])
458 458 ]
459 459
460 460 return filteredobjects
461 461
462 462 def _basictransfer(self, obj, action, localstore):
463 463 """Download or upload a single object using basic transfer protocol
464 464
465 465 obj: dict, an object description returned by batch API
466 466 action: string, one of ['upload', 'download']
467 467 localstore: blobstore.local
468 468
469 469 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
470 470 basic-transfers.md
471 471 """
472 472 oid = obj[b'oid']
473 473 href = obj[b'actions'][action].get(b'href')
474 474 headers = obj[b'actions'][action].get(b'header', {}).items()
475 475
476 476 request = util.urlreq.request(pycompat.strurl(href))
477 477 if action == b'upload':
478 478 # If uploading blobs, read data from local blobstore.
479 479 if not localstore.verify(oid):
480 480 raise error.Abort(
481 481 _(b'detected corrupt lfs object: %s') % oid,
482 482 hint=_(b'run hg verify'),
483 483 )
484 484 request.data = filewithprogress(localstore.open(oid), None)
485 485 request.get_method = lambda: r'PUT'
486 486 request.add_header(r'Content-Type', r'application/octet-stream')
487 487 request.add_header(r'Content-Length', len(request.data))
488 488
489 489 for k, v in headers:
490 490 request.add_header(pycompat.strurl(k), pycompat.strurl(v))
491 491
492 492 response = b''
493 493 try:
494 494 with contextlib.closing(self.urlopener.open(request)) as req:
495 495 ui = self.ui # Shorten debug lines
496 496 if self.ui.debugflag:
497 497 ui.debug(b'Status: %d\n' % req.status)
498 498 # lfs-test-server and hg serve return headers in different
499 499 # order
500 500 headers = pycompat.bytestr(req.info()).strip()
501 501 ui.debug(b'%s\n' % b'\n'.join(sorted(headers.splitlines())))
502 502
503 503 if action == b'download':
504 504 # If downloading blobs, store downloaded data to local
505 505 # blobstore
506 506 localstore.download(oid, req)
507 507 else:
508 508 while True:
509 509 data = req.read(1048576)
510 510 if not data:
511 511 break
512 512 response += data
513 513 if response:
514 514 ui.debug(b'lfs %s response: %s' % (action, response))
515 515 except util.urlerr.httperror as ex:
516 516 if self.ui.debugflag:
517 517 self.ui.debug(
518 518 b'%s: %s\n' % (oid, ex.read())
519 519 ) # XXX: also bytes?
520 520 raise LfsRemoteError(
521 521 _(b'LFS HTTP error: %s (oid=%s, action=%s)')
522 522 % (stringutil.forcebytestr(ex), oid, action)
523 523 )
524 524 except util.urlerr.urlerror as ex:
525 525 hint = _(b'attempted connection to %s') % pycompat.bytesurl(
526 526 util.urllibcompat.getfullurl(request)
527 527 )
528 528 raise LfsRemoteError(
529 529 _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
530 530 )
531 531
532 532 def _batch(self, pointers, localstore, action):
533 533 if action not in [b'upload', b'download']:
534 534 raise error.ProgrammingError(b'invalid Git-LFS action: %s' % action)
535 535
536 536 response = self._batchrequest(pointers, action)
537 537 objects = self._extractobjects(response, pointers, action)
538 538 total = sum(x.get(b'size', 0) for x in objects)
539 539 sizes = {}
540 540 for obj in objects:
541 541 sizes[obj.get(b'oid')] = obj.get(b'size', 0)
542 542 topic = {
543 543 b'upload': _(b'lfs uploading'),
544 544 b'download': _(b'lfs downloading'),
545 545 }[action]
546 546 if len(objects) > 1:
547 547 self.ui.note(
548 548 _(b'lfs: need to transfer %d objects (%s)\n')
549 549 % (len(objects), util.bytecount(total))
550 550 )
551 551
552 552 def transfer(chunk):
553 553 for obj in chunk:
554 554 objsize = obj.get(b'size', 0)
555 555 if self.ui.verbose:
556 556 if action == b'download':
557 557 msg = _(b'lfs: downloading %s (%s)\n')
558 558 elif action == b'upload':
559 559 msg = _(b'lfs: uploading %s (%s)\n')
560 560 self.ui.note(
561 561 msg % (obj.get(b'oid'), util.bytecount(objsize))
562 562 )
563 563 retry = self.retry
564 564 while True:
565 565 try:
566 566 self._basictransfer(obj, action, localstore)
567 567 yield 1, obj.get(b'oid')
568 568 break
569 569 except socket.error as ex:
570 570 if retry > 0:
571 571 self.ui.note(
572 572 _(b'lfs: failed: %r (remaining retry %d)\n')
573 573 % (stringutil.forcebytestr(ex), retry)
574 574 )
575 575 retry -= 1
576 576 continue
577 577 raise
578 578
579 579 # Until https multiplexing gets sorted out
580 580 if self.ui.configbool(b'experimental', b'lfs.worker-enable'):
581 581 oids = worker.worker(
582 582 self.ui,
583 583 0.1,
584 584 transfer,
585 585 (),
586 586 sorted(objects, key=lambda o: o.get(b'oid')),
587 587 )
588 588 else:
589 589 oids = transfer(sorted(objects, key=lambda o: o.get(b'oid')))
590 590
591 591 with self.ui.makeprogress(topic, total=total) as progress:
592 592 progress.update(0)
593 593 processed = 0
594 594 blobs = 0
595 595 for _one, oid in oids:
596 596 processed += sizes[oid]
597 597 blobs += 1
598 598 progress.update(processed)
599 599 self.ui.note(_(b'lfs: processed: %s\n') % oid)
600 600
601 601 if blobs > 0:
602 602 if action == b'upload':
603 603 self.ui.status(
604 604 _(b'lfs: uploaded %d files (%s)\n')
605 605 % (blobs, util.bytecount(processed))
606 606 )
607 607 elif action == b'download':
608 608 self.ui.status(
609 609 _(b'lfs: downloaded %d files (%s)\n')
610 610 % (blobs, util.bytecount(processed))
611 611 )
612 612
613 613 def __del__(self):
614 614 # copied from mercurial/httppeer.py
615 615 urlopener = getattr(self, 'urlopener', None)
616 616 if urlopener:
617 617 for h in urlopener.handlers:
618 618 h.close()
619 619 getattr(h, "close_all", lambda: None)()
620 620
621 621
622 622 class _dummyremote(object):
623 623 """Dummy store storing blobs to temp directory."""
624 624
625 625 def __init__(self, repo, url):
626 626 fullpath = repo.vfs.join(b'lfs', url.path)
627 627 self.vfs = lfsvfs(fullpath)
628 628
629 629 def writebatch(self, pointers, fromstore):
630 630 for p in _deduplicate(pointers):
631 631 content = fromstore.read(p.oid(), verify=True)
632 632 with self.vfs(p.oid(), b'wb', atomictemp=True) as fp:
633 633 fp.write(content)
634 634
635 635 def readbatch(self, pointers, tostore):
636 636 for p in _deduplicate(pointers):
637 637 with self.vfs(p.oid(), b'rb') as fp:
638 638 tostore.download(p.oid(), fp)
639 639
640 640
641 641 class _nullremote(object):
642 642 """Null store storing blobs to /dev/null."""
643 643
644 644 def __init__(self, repo, url):
645 645 pass
646 646
647 647 def writebatch(self, pointers, fromstore):
648 648 pass
649 649
650 650 def readbatch(self, pointers, tostore):
651 651 pass
652 652
653 653
654 654 class _promptremote(object):
655 655 """Prompt user to set lfs.url when accessed."""
656 656
657 657 def __init__(self, repo, url):
658 658 pass
659 659
660 660 def writebatch(self, pointers, fromstore, ui=None):
661 661 self._prompt()
662 662
663 663 def readbatch(self, pointers, tostore, ui=None):
664 664 self._prompt()
665 665
666 666 def _prompt(self):
667 667 raise error.Abort(_(b'lfs.url needs to be configured'))
668 668
669 669
670 670 _storemap = {
671 671 b'https': _gitlfsremote,
672 672 b'http': _gitlfsremote,
673 673 b'file': _dummyremote,
674 674 b'null': _nullremote,
675 675 None: _promptremote,
676 676 }
677 677
678 678
679 679 def _deduplicate(pointers):
680 680 """Remove any duplicate oids that exist in the list"""
681 681 reduced = util.sortdict()
682 682 for p in pointers:
683 683 reduced[p.oid()] = p
684 684 return reduced.values()
685 685
686 686
687 687 def _verify(oid, content):
688 688 realoid = node.hex(hashlib.sha256(content).digest())
689 689 if realoid != oid:
690 690 raise LfsCorruptionError(
691 691 _(b'detected corrupt lfs object: %s') % oid,
692 692 hint=_(b'run hg verify'),
693 693 )
694 694
695 695
696 696 def remote(repo, remote=None):
697 697 """remotestore factory. return a store in _storemap depending on config
698 698
699 699 If ``lfs.url`` is specified, use that remote endpoint. Otherwise, try to
700 700 infer the endpoint, based on the remote repository using the same path
701 701 adjustments as git. As an extension, 'http' is supported as well so that
702 702 ``hg serve`` works out of the box.
703 703
704 704 https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
705 705 """
706 706 lfsurl = repo.ui.config(b'lfs', b'url')
707 707 url = util.url(lfsurl or b'')
708 708 if lfsurl is None:
709 709 if remote:
710 710 path = remote
711 711 elif util.safehasattr(repo, b'_subtoppath'):
712 712 # The pull command sets this during the optional update phase, which
713 713 # tells exactly where the pull originated, whether 'paths.default'
714 714 # or explicit.
715 715 path = repo._subtoppath
716 716 else:
717 717 # TODO: investigate 'paths.remote:lfsurl' style path customization,
718 718 # and fall back to inferring from 'paths.remote' if unspecified.
719 719 path = repo.ui.config(b'paths', b'default') or b''
720 720
721 721 defaulturl = util.url(path)
722 722
723 723 # TODO: support local paths as well.
724 724 # TODO: consider the ssh -> https transformation that git applies
725 725 if defaulturl.scheme in (b'http', b'https'):
726 726 if defaulturl.path and defaulturl.path[:-1] != b'/':
727 727 defaulturl.path += b'/'
728 728 defaulturl.path = (defaulturl.path or b'') + b'.git/info/lfs'
729 729
730 730 url = util.url(bytes(defaulturl))
731 731 repo.ui.note(_(b'lfs: assuming remote store: %s\n') % url)
732 732
733 733 scheme = url.scheme
734 734 if scheme not in _storemap:
735 735 raise error.Abort(_(b'lfs: unknown url scheme: %s') % scheme)
736 736 return _storemap[scheme](repo, url)
737 737
738 738
739 739 class LfsRemoteError(error.StorageError):
740 740 pass
741 741
742 742
743 743 class LfsCorruptionError(error.Abort):
744 744 """Raised when a corrupt blob is detected, aborting an operation
745 745
746 746 It exists to allow specialized handling on the server side."""
@@ -1,267 +1,259
1 1 # __init__.py - Startup and module loading logic for Mercurial.
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import sys
11 11
12 12 # Allow 'from mercurial import demandimport' to keep working.
13 13 import hgdemandimport
14 14
15 15 demandimport = hgdemandimport
16 16
17 17 __all__ = []
18 18
19 19 # Python 3 uses a custom module loader that transforms source code between
20 20 # source file reading and compilation. This is done by registering a custom
21 21 # finder that changes the spec for Mercurial modules to use a custom loader.
22 22 if sys.version_info[0] >= 3:
23 23 import importlib
24 24 import importlib.abc
25 25 import io
26 26 import token
27 27 import tokenize
28 28
29 29 class hgpathentryfinder(importlib.abc.MetaPathFinder):
30 30 """A sys.meta_path finder that uses a custom module loader."""
31 31
32 32 def find_spec(self, fullname, path, target=None):
33 33 # Only handle Mercurial-related modules.
34 34 if not fullname.startswith(('mercurial.', 'hgext.')):
35 35 return None
36 36 # don't try to parse binary
37 37 if fullname.startswith('mercurial.cext.'):
38 38 return None
39 39 # third-party packages are expected to be dual-version clean
40 40 if fullname.startswith('mercurial.thirdparty'):
41 41 return None
42 42 # zstd is already dual-version clean, don't try and mangle it
43 43 if fullname.startswith('mercurial.zstd'):
44 44 return None
45 45 # rustext is built for the right python version,
46 46 # don't try and mangle it
47 47 if fullname.startswith('mercurial.rustext'):
48 48 return None
49 49 # pywatchman is already dual-version clean, don't try and mangle it
50 50 if fullname.startswith('hgext.fsmonitor.pywatchman'):
51 51 return None
52 52
53 53 # Try to find the module using other registered finders.
54 54 spec = None
55 55 for finder in sys.meta_path:
56 56 if finder == self:
57 57 continue
58 58
59 59 # Originally the API was a `find_module` method, but it was
60 60 # renamed to `find_spec` in python 3.4, with a new `target`
61 61 # argument.
62 62 find_spec_method = getattr(finder, 'find_spec', None)
63 63 if find_spec_method:
64 64 spec = find_spec_method(fullname, path, target=target)
65 65 else:
66 66 spec = finder.find_module(fullname)
67 67 if spec is not None:
68 68 spec = importlib.util.spec_from_loader(fullname, spec)
69 69 if spec:
70 70 break
71 71
72 72 # This is a Mercurial-related module but we couldn't find it
73 73 # using the previously-registered finders. This likely means
74 74 # the module doesn't exist.
75 75 if not spec:
76 76 return None
77 77
78 78 # TODO need to support loaders from alternate specs, like zip
79 79 # loaders.
80 80 loader = hgloader(spec.name, spec.origin)
81 81 # Can't use util.safehasattr here because that would require
82 82 # importing util, and we're in import code.
83 83 if hasattr(spec.loader, 'loader'): # hasattr-py3-only
84 84 # This is a nested loader (maybe a lazy loader?)
85 85 spec.loader.loader = loader
86 86 else:
87 87 spec.loader = loader
88 88 return spec
89 89
90 90 def replacetokens(tokens, fullname):
91 91 """Transform a stream of tokens from raw to Python 3.
92 92
93 93 It is called by the custom module loading machinery to rewrite
94 94 source/tokens between source decoding and compilation.
95 95
96 96 Returns a generator of possibly rewritten tokens.
97 97
98 98 The input token list may be mutated as part of processing. However,
99 99 its changes do not necessarily match the output token stream.
100 100
101 101 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
102 102 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
103 103 """
104 104 # The following utility functions access the tokens list and i index of
105 105 # the for i, t enumerate(tokens) loop below
106 106 def _isop(j, *o):
107 107 """Assert that tokens[j] is an OP with one of the given values"""
108 108 try:
109 109 return tokens[j].type == token.OP and tokens[j].string in o
110 110 except IndexError:
111 111 return False
112 112
113 113 def _findargnofcall(n):
114 114 """Find arg n of a call expression (start at 0)
115 115
116 116 Returns index of the first token of that argument, or None if
117 117 there is not that many arguments.
118 118
119 119 Assumes that token[i + 1] is '('.
120 120
121 121 """
122 122 nested = 0
123 123 for j in range(i + 2, len(tokens)):
124 124 if _isop(j, ')', ']', '}'):
125 125 # end of call, tuple, subscription or dict / set
126 126 nested -= 1
127 127 if nested < 0:
128 128 return None
129 129 elif n == 0:
130 130 # this is the starting position of arg
131 131 return j
132 132 elif _isop(j, '(', '[', '{'):
133 133 nested += 1
134 134 elif _isop(j, ',') and nested == 0:
135 135 n -= 1
136 136
137 137 return None
138 138
139 139 def _ensureunicode(j):
140 140 """Make sure the token at j is a unicode string
141 141
142 142 This rewrites a string token to include the unicode literal prefix
143 143 so the string transformer won't add the byte prefix.
144 144
145 145 Ignores tokens that are not strings. Assumes bounds checking has
146 146 already been done.
147 147
148 148 """
149 149 st = tokens[j]
150 150 if st.type == token.STRING and st.string.startswith(("'", '"')):
151 151 tokens[j] = st._replace(string='u%s' % st.string)
152 152
153 153 for i, t in enumerate(tokens):
154 154 # This looks like a function call.
155 155 if t.type == token.NAME and _isop(i + 1, '('):
156 156 fn = t.string
157 157
158 158 # *attr() builtins don't accept byte strings to 2nd argument.
159 159 if fn in (
160 160 'getattr',
161 161 'setattr',
162 162 'hasattr',
163 163 'safehasattr',
164 164 ) and not _isop(i - 1, '.'):
165 165 arg1idx = _findargnofcall(1)
166 166 if arg1idx is not None:
167 167 _ensureunicode(arg1idx)
168 168
169 # .encode() and .decode() on str/bytes/unicode don't accept
170 # byte strings on Python 3.
171 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
172 for argn in range(2):
173 argidx = _findargnofcall(argn)
174 if argidx is not None:
175 _ensureunicode(argidx)
176
177 169 # It changes iteritems/values to items/values as they are not
178 170 # present in Python 3 world.
179 171 elif fn in ('iteritems', 'itervalues') and not (
180 172 tokens[i - 1].type == token.NAME
181 173 and tokens[i - 1].string == 'def'
182 174 ):
183 175 yield t._replace(string=fn[4:])
184 176 continue
185 177
186 178 # Emit unmodified token.
187 179 yield t
188 180
189 181 # Header to add to bytecode files. This MUST be changed when
190 182 # ``replacetoken`` or any mechanism that changes semantics of module
191 183 # loading is changed. Otherwise cached bytecode may get loaded without
192 184 # the new transformation mechanisms applied.
193 BYTECODEHEADER = b'HG\x00\x12'
185 BYTECODEHEADER = b'HG\x00\x13'
194 186
195 187 class hgloader(importlib.machinery.SourceFileLoader):
196 188 """Custom module loader that transforms source code.
197 189
198 190 When the source code is converted to a code object, we transform
199 191 certain patterns to be Python 3 compatible. This allows us to write code
200 192 that is natively Python 2 and compatible with Python 3 without
201 193 making the code excessively ugly.
202 194
203 195 We do this by transforming the token stream between parse and compile.
204 196
205 197 Implementing transformations invalidates caching assumptions made
206 198 by the built-in importer. The built-in importer stores a header on
207 199 saved bytecode files indicating the Python/bytecode version. If the
208 200 version changes, the cached bytecode is ignored. The Mercurial
209 201 transformations could change at any time. This means we need to check
210 202 that cached bytecode was generated with the current transformation
211 203 code or there could be a mismatch between cached bytecode and what
212 204 would be generated from this class.
213 205
214 206 We supplement the bytecode caching layer by wrapping ``get_data``
215 207 and ``set_data``. These functions are called when the
216 208 ``SourceFileLoader`` retrieves and saves bytecode cache files,
217 209 respectively. We simply add an additional header on the file. As
218 210 long as the version in this file is changed when semantics change,
219 211 cached bytecode should be invalidated when transformations change.
220 212
221 213 The added header has the form ``HG<VERSION>``. That is a literal
222 214 ``HG`` with 2 binary bytes indicating the transformation version.
223 215 """
224 216
225 217 def get_data(self, path):
226 218 data = super(hgloader, self).get_data(path)
227 219
228 220 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
229 221 return data
230 222
231 223 # There should be a header indicating the Mercurial transformation
232 224 # version. If it doesn't exist or doesn't match the current version,
233 225 # we raise an OSError because that is what
234 226 # ``SourceFileLoader.get_code()`` expects when loading bytecode
235 227 # paths to indicate the cached file is "bad."
236 228 if data[0:2] != b'HG':
237 229 raise OSError('no hg header')
238 230 if data[0:4] != BYTECODEHEADER:
239 231 raise OSError('hg header version mismatch')
240 232
241 233 return data[4:]
242 234
243 235 def set_data(self, path, data, *args, **kwargs):
244 236 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
245 237 data = BYTECODEHEADER + data
246 238
247 239 return super(hgloader, self).set_data(path, data, *args, **kwargs)
248 240
249 241 def source_to_code(self, data, path):
250 242 """Perform token transformation before compilation."""
251 243 buf = io.BytesIO(data)
252 244 tokens = tokenize.tokenize(buf.readline)
253 245 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
254 246 # Python's built-in importer strips frames from exceptions raised
255 247 # for this code. Unfortunately, that mechanism isn't extensible
256 248 # and our frame will be blamed for the import failure. There
257 249 # are extremely hacky ways to do frame stripping. We haven't
258 250 # implemented them because they are very ugly.
259 251 return super(hgloader, self).source_to_code(data, path)
260 252
261 253 # We automagically register our custom importer as a side-effect of
262 254 # loading. This is necessary to ensure that any entry points are able
263 255 # to import mercurial.* modules without having to perform this
264 256 # registration themselves.
265 257 if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path):
266 258 # meta_path is used before any implicit finders and before sys.path.
267 259 sys.meta_path.insert(0, hgpathentryfinder())
@@ -1,453 +1,453
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import getopt
14 14 import inspect
15 15 import os
16 16 import shlex
17 17 import sys
18 18 import tempfile
19 19
20 20 ispy3 = sys.version_info[0] >= 3
21 21 ispypy = r'__pypy__' in sys.builtin_module_names
22 22
23 23 if not ispy3:
24 24 import cookielib
25 25 import cPickle as pickle
26 26 import httplib
27 27 import Queue as queue
28 28 import SocketServer as socketserver
29 29 import xmlrpclib
30 30
31 31 from .thirdparty.concurrent import futures
32 32
33 33 def future_set_exception_info(f, exc_info):
34 34 f.set_exception_info(*exc_info)
35 35
36 36
37 37 else:
38 38 import concurrent.futures as futures
39 39 import http.cookiejar as cookielib
40 40 import http.client as httplib
41 41 import pickle
42 42 import queue as queue
43 43 import socketserver
44 44 import xmlrpc.client as xmlrpclib
45 45
46 46 def future_set_exception_info(f, exc_info):
47 47 f.set_exception(exc_info[0])
48 48
49 49
50 50 def identity(a):
51 51 return a
52 52
53 53
54 54 def _rapply(f, xs):
55 55 if xs is None:
56 56 # assume None means non-value of optional data
57 57 return xs
58 58 if isinstance(xs, (list, set, tuple)):
59 59 return type(xs)(_rapply(f, x) for x in xs)
60 60 if isinstance(xs, dict):
61 61 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
62 62 return f(xs)
63 63
64 64
65 65 def rapply(f, xs):
66 66 """Apply function recursively to every item preserving the data structure
67 67
68 68 >>> def f(x):
69 69 ... return 'f(%s)' % x
70 70 >>> rapply(f, None) is None
71 71 True
72 72 >>> rapply(f, 'a')
73 73 'f(a)'
74 74 >>> rapply(f, {'a'}) == {'f(a)'}
75 75 True
76 76 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
77 77 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
78 78
79 79 >>> xs = [object()]
80 80 >>> rapply(identity, xs) is xs
81 81 True
82 82 """
83 83 if f is identity:
84 84 # fast path mainly for py2
85 85 return xs
86 86 return _rapply(f, xs)
87 87
88 88
89 89 if ispy3:
90 90 import builtins
91 91 import functools
92 92 import io
93 93 import struct
94 94
95 95 fsencode = os.fsencode
96 96 fsdecode = os.fsdecode
97 97 oscurdir = os.curdir.encode('ascii')
98 98 oslinesep = os.linesep.encode('ascii')
99 99 osname = os.name.encode('ascii')
100 100 ospathsep = os.pathsep.encode('ascii')
101 101 ospardir = os.pardir.encode('ascii')
102 102 ossep = os.sep.encode('ascii')
103 103 osaltsep = os.altsep
104 104 if osaltsep:
105 105 osaltsep = osaltsep.encode('ascii')
106 106
107 107 sysplatform = sys.platform.encode('ascii')
108 108 sysexecutable = sys.executable
109 109 if sysexecutable:
110 110 sysexecutable = os.fsencode(sysexecutable)
111 111 bytesio = io.BytesIO
112 112 # TODO deprecate stringio name, as it is a lie on Python 3.
113 113 stringio = bytesio
114 114
115 115 def maplist(*args):
116 116 return list(map(*args))
117 117
118 118 def rangelist(*args):
119 119 return list(range(*args))
120 120
121 121 def ziplist(*args):
122 122 return list(zip(*args))
123 123
124 124 rawinput = input
125 125 getargspec = inspect.getfullargspec
126 126
127 127 long = int
128 128
129 129 # TODO: .buffer might not exist if std streams were replaced; we'll need
130 130 # a silly wrapper to make a bytes stream backed by a unicode one.
131 131 stdin = sys.stdin.buffer
132 132 stdout = sys.stdout.buffer
133 133 stderr = sys.stderr.buffer
134 134
135 135 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
136 136 # we can use os.fsencode() to get back bytes argv.
137 137 #
138 138 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
139 139 #
140 140 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
141 141 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
142 142 if getattr(sys, 'argv', None) is not None:
143 143 sysargv = list(map(os.fsencode, sys.argv))
144 144
145 145 bytechr = struct.Struct(r'>B').pack
146 146 byterepr = b'%r'.__mod__
147 147
148 148 class bytestr(bytes):
149 149 """A bytes which mostly acts as a Python 2 str
150 150
151 151 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
152 152 ('', 'foo', 'ascii', '1')
153 153 >>> s = bytestr(b'foo')
154 154 >>> assert s is bytestr(s)
155 155
156 156 __bytes__() should be called if provided:
157 157
158 158 >>> class bytesable(object):
159 159 ... def __bytes__(self):
160 160 ... return b'bytes'
161 161 >>> bytestr(bytesable())
162 162 'bytes'
163 163
164 164 There's no implicit conversion from non-ascii str as its encoding is
165 165 unknown:
166 166
167 167 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
168 168 Traceback (most recent call last):
169 169 ...
170 170 UnicodeEncodeError: ...
171 171
172 172 Comparison between bytestr and bytes should work:
173 173
174 174 >>> assert bytestr(b'foo') == b'foo'
175 175 >>> assert b'foo' == bytestr(b'foo')
176 176 >>> assert b'f' in bytestr(b'foo')
177 177 >>> assert bytestr(b'f') in b'foo'
178 178
179 179 Sliced elements should be bytes, not integer:
180 180
181 181 >>> s[1], s[:2]
182 182 (b'o', b'fo')
183 183 >>> list(s), list(reversed(s))
184 184 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
185 185
186 186 As bytestr type isn't propagated across operations, you need to cast
187 187 bytes to bytestr explicitly:
188 188
189 189 >>> s = bytestr(b'foo').upper()
190 190 >>> t = bytestr(s)
191 191 >>> s[0], t[0]
192 192 (70, b'F')
193 193
194 194 Be careful to not pass a bytestr object to a function which expects
195 195 bytearray-like behavior.
196 196
197 197 >>> t = bytes(t) # cast to bytes
198 198 >>> assert type(t) is bytes
199 199 """
200 200
201 201 def __new__(cls, s=b''):
202 202 if isinstance(s, bytestr):
203 203 return s
204 204 if not isinstance(
205 205 s, (bytes, bytearray)
206 206 ) and not hasattr( # hasattr-py3-only
207 207 s, u'__bytes__'
208 208 ):
209 s = str(s).encode(u'ascii')
209 s = str(s).encode('ascii')
210 210 return bytes.__new__(cls, s)
211 211
212 212 def __getitem__(self, key):
213 213 s = bytes.__getitem__(self, key)
214 214 if not isinstance(s, bytes):
215 215 s = bytechr(s)
216 216 return s
217 217
218 218 def __iter__(self):
219 219 return iterbytestr(bytes.__iter__(self))
220 220
221 221 def __repr__(self):
222 222 return bytes.__repr__(self)[1:] # drop b''
223 223
224 224 def iterbytestr(s):
225 225 """Iterate bytes as if it were a str object of Python 2"""
226 226 return map(bytechr, s)
227 227
228 228 def maybebytestr(s):
229 229 """Promote bytes to bytestr"""
230 230 if isinstance(s, bytes):
231 231 return bytestr(s)
232 232 return s
233 233
234 234 def sysbytes(s):
235 235 """Convert an internal str (e.g. keyword, __doc__) back to bytes
236 236
237 237 This never raises UnicodeEncodeError, but only ASCII characters
238 238 can be round-trip by sysstr(sysbytes(s)).
239 239 """
240 return s.encode(u'utf-8')
240 return s.encode('utf-8')
241 241
242 242 def sysstr(s):
243 243 """Return a keyword str to be passed to Python functions such as
244 244 getattr() and str.encode()
245 245
246 246 This never raises UnicodeDecodeError. Non-ascii characters are
247 247 considered invalid and mapped to arbitrary but unique code points
248 248 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
249 249 """
250 250 if isinstance(s, builtins.str):
251 251 return s
252 return s.decode(u'latin-1')
252 return s.decode('latin-1')
253 253
254 254 def strurl(url):
255 255 """Converts a bytes url back to str"""
256 256 if isinstance(url, bytes):
257 return url.decode(u'ascii')
257 return url.decode('ascii')
258 258 return url
259 259
260 260 def bytesurl(url):
261 261 """Converts a str url to bytes by encoding in ascii"""
262 262 if isinstance(url, str):
263 return url.encode(u'ascii')
263 return url.encode('ascii')
264 264 return url
265 265
266 266 def raisewithtb(exc, tb):
267 267 """Raise exception with the given traceback"""
268 268 raise exc.with_traceback(tb)
269 269
270 270 def getdoc(obj):
271 271 """Get docstring as bytes; may be None so gettext() won't confuse it
272 272 with _('')"""
273 273 doc = getattr(obj, u'__doc__', None)
274 274 if doc is None:
275 275 return doc
276 276 return sysbytes(doc)
277 277
278 278 def _wrapattrfunc(f):
279 279 @functools.wraps(f)
280 280 def w(object, name, *args):
281 281 return f(object, sysstr(name), *args)
282 282
283 283 return w
284 284
285 285 # these wrappers are automagically imported by hgloader
286 286 delattr = _wrapattrfunc(builtins.delattr)
287 287 getattr = _wrapattrfunc(builtins.getattr)
288 288 hasattr = _wrapattrfunc(builtins.hasattr)
289 289 setattr = _wrapattrfunc(builtins.setattr)
290 290 xrange = builtins.range
291 291 unicode = str
292 292
293 293 def open(name, mode=b'r', buffering=-1, encoding=None):
294 294 return builtins.open(name, sysstr(mode), buffering, encoding)
295 295
296 296 safehasattr = _wrapattrfunc(builtins.hasattr)
297 297
298 298 def _getoptbwrapper(orig, args, shortlist, namelist):
299 299 """
300 300 Takes bytes arguments, converts them to unicode, pass them to
301 301 getopt.getopt(), convert the returned values back to bytes and then
302 302 return them for Python 3 compatibility as getopt.getopt() don't accepts
303 303 bytes on Python 3.
304 304 """
305 305 args = [a.decode('latin-1') for a in args]
306 306 shortlist = shortlist.decode('latin-1')
307 307 namelist = [a.decode('latin-1') for a in namelist]
308 308 opts, args = orig(args, shortlist, namelist)
309 309 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
310 310 args = [a.encode('latin-1') for a in args]
311 311 return opts, args
312 312
313 313 def strkwargs(dic):
314 314 """
315 315 Converts the keys of a python dictonary to str i.e. unicodes so that
316 316 they can be passed as keyword arguments as dictonaries with bytes keys
317 317 can't be passed as keyword arguments to functions on Python 3.
318 318 """
319 319 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
320 320 return dic
321 321
322 322 def byteskwargs(dic):
323 323 """
324 324 Converts keys of python dictonaries to bytes as they were converted to
325 325 str to pass that dictonary as a keyword argument on Python 3.
326 326 """
327 327 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
328 328 return dic
329 329
330 330 # TODO: handle shlex.shlex().
331 331 def shlexsplit(s, comments=False, posix=True):
332 332 """
333 333 Takes bytes argument, convert it to str i.e. unicodes, pass that into
334 334 shlex.split(), convert the returned value to bytes and return that for
335 335 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
336 336 """
337 337 ret = shlex.split(s.decode('latin-1'), comments, posix)
338 338 return [a.encode('latin-1') for a in ret]
339 339
340 340 shlexquote = shlex.quote
341 341
342 342 else:
343 343 import cStringIO
344 344 import pipes
345 345
346 346 xrange = xrange
347 347 unicode = unicode
348 348 bytechr = chr
349 349 byterepr = repr
350 350 bytestr = str
351 351 iterbytestr = iter
352 352 maybebytestr = identity
353 353 sysbytes = identity
354 354 sysstr = identity
355 355 strurl = identity
356 356 bytesurl = identity
357 357 open = open
358 358 delattr = delattr
359 359 getattr = getattr
360 360 hasattr = hasattr
361 361 setattr = setattr
362 362
363 363 # this can't be parsed on Python 3
364 364 exec(b'def raisewithtb(exc, tb):\n' b' raise exc, None, tb\n')
365 365
366 366 def fsencode(filename):
367 367 """
368 368 Partial backport from os.py in Python 3, which only accepts bytes.
369 369 In Python 2, our paths should only ever be bytes, a unicode path
370 370 indicates a bug.
371 371 """
372 372 if isinstance(filename, str):
373 373 return filename
374 374 else:
375 375 raise TypeError(r"expect str, not %s" % type(filename).__name__)
376 376
377 377 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
378 378 # better not to touch Python 2 part as it's already working fine.
379 379 fsdecode = identity
380 380
381 381 def getdoc(obj):
382 382 return getattr(obj, '__doc__', None)
383 383
384 384 _notset = object()
385 385
386 386 def safehasattr(thing, attr):
387 387 return getattr(thing, attr, _notset) is not _notset
388 388
389 389 def _getoptbwrapper(orig, args, shortlist, namelist):
390 390 return orig(args, shortlist, namelist)
391 391
392 392 strkwargs = identity
393 393 byteskwargs = identity
394 394
395 395 oscurdir = os.curdir
396 396 oslinesep = os.linesep
397 397 osname = os.name
398 398 ospathsep = os.pathsep
399 399 ospardir = os.pardir
400 400 ossep = os.sep
401 401 osaltsep = os.altsep
402 402 long = long
403 403 stdin = sys.stdin
404 404 stdout = sys.stdout
405 405 stderr = sys.stderr
406 406 if getattr(sys, 'argv', None) is not None:
407 407 sysargv = sys.argv
408 408 sysplatform = sys.platform
409 409 sysexecutable = sys.executable
410 410 shlexsplit = shlex.split
411 411 shlexquote = pipes.quote
412 412 bytesio = cStringIO.StringIO
413 413 stringio = bytesio
414 414 maplist = map
415 415 rangelist = range
416 416 ziplist = zip
417 417 rawinput = raw_input
418 418 getargspec = inspect.getargspec
419 419
420 420 isjython = sysplatform.startswith(b'java')
421 421
422 422 isdarwin = sysplatform.startswith(b'darwin')
423 423 islinux = sysplatform.startswith(b'linux')
424 424 isposix = osname == b'posix'
425 425 iswindows = osname == b'nt'
426 426
427 427
428 428 def getoptb(args, shortlist, namelist):
429 429 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
430 430
431 431
432 432 def gnugetoptb(args, shortlist, namelist):
433 433 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
434 434
435 435
436 436 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
437 437 return tempfile.mkdtemp(suffix, prefix, dir)
438 438
439 439
440 440 # text=True is not supported; use util.from/tonativeeol() instead
441 441 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
442 442 return tempfile.mkstemp(suffix, prefix, dir)
443 443
444 444
445 445 # mode must include 'b'ytes as encoding= is not supported
446 446 def namedtempfile(
447 447 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
448 448 ):
449 449 mode = sysstr(mode)
450 450 assert r'b' in mode
451 451 return tempfile.NamedTemporaryFile(
452 452 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
453 453 )
General Comments 0
You need to be logged in to leave comments. Login now