##// END OF EJS Templates
clone: use better names for temp files...
Arseniy Alekseyev -
r50085:4ff4e23d default
parent child Browse files
Show More
@@ -1,944 +1,949 b''
1 1 # streamclone.py - producing and consuming streaming repository data
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import contextlib
10 10 import errno
11 11 import os
12 12 import struct
13 13
14 14 from .i18n import _
15 15 from .pycompat import open
16 16 from .interfaces import repository
17 17 from . import (
18 18 bookmarks,
19 19 cacheutil,
20 20 error,
21 21 narrowspec,
22 22 phases,
23 23 pycompat,
24 24 requirements as requirementsmod,
25 25 scmutil,
26 26 store,
27 27 util,
28 28 )
29 29 from .revlogutils import (
30 30 nodemap,
31 31 )
32 32 from .utils import (
33 33 stringutil,
34 34 )
35 35
36 36
37 37 def new_stream_clone_requirements(default_requirements, streamed_requirements):
38 38 """determine the final set of requirement for a new stream clone
39 39
40 40 this method combine the "default" requirements that a new repository would
41 41 use with the constaint we get from the stream clone content. We keep local
42 42 configuration choice when possible.
43 43 """
44 44 requirements = set(default_requirements)
45 45 requirements -= requirementsmod.STREAM_FIXED_REQUIREMENTS
46 46 requirements.update(streamed_requirements)
47 47 return requirements
48 48
49 49
50 50 def streamed_requirements(repo):
51 51 """the set of requirement the new clone will have to support
52 52
53 53 This is used for advertising the stream options and to generate the actual
54 54 stream content."""
55 55 requiredformats = (
56 56 repo.requirements & requirementsmod.STREAM_FIXED_REQUIREMENTS
57 57 )
58 58 return requiredformats
59 59
60 60
61 61 def canperformstreamclone(pullop, bundle2=False):
62 62 """Whether it is possible to perform a streaming clone as part of pull.
63 63
64 64 ``bundle2`` will cause the function to consider stream clone through
65 65 bundle2 and only through bundle2.
66 66
67 67 Returns a tuple of (supported, requirements). ``supported`` is True if
68 68 streaming clone is supported and False otherwise. ``requirements`` is
69 69 a set of repo requirements from the remote, or ``None`` if stream clone
70 70 isn't supported.
71 71 """
72 72 repo = pullop.repo
73 73 remote = pullop.remote
74 74
75 75 bundle2supported = False
76 76 if pullop.canusebundle2:
77 77 if b'v2' in pullop.remotebundle2caps.get(b'stream', []):
78 78 bundle2supported = True
79 79 # else
80 80 # Server doesn't support bundle2 stream clone or doesn't support
81 81 # the versions we support. Fall back and possibly allow legacy.
82 82
83 83 # Ensures legacy code path uses available bundle2.
84 84 if bundle2supported and not bundle2:
85 85 return False, None
86 86 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
87 87 elif bundle2 and not bundle2supported:
88 88 return False, None
89 89
90 90 # Streaming clone only works on empty repositories.
91 91 if len(repo):
92 92 return False, None
93 93
94 94 # Streaming clone only works if all data is being requested.
95 95 if pullop.heads:
96 96 return False, None
97 97
98 98 streamrequested = pullop.streamclonerequested
99 99
100 100 # If we don't have a preference, let the server decide for us. This
101 101 # likely only comes into play in LANs.
102 102 if streamrequested is None:
103 103 # The server can advertise whether to prefer streaming clone.
104 104 streamrequested = remote.capable(b'stream-preferred')
105 105
106 106 if not streamrequested:
107 107 return False, None
108 108
109 109 # In order for stream clone to work, the client has to support all the
110 110 # requirements advertised by the server.
111 111 #
112 112 # The server advertises its requirements via the "stream" and "streamreqs"
113 113 # capability. "stream" (a value-less capability) is advertised if and only
114 114 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
115 115 # is advertised and contains a comma-delimited list of requirements.
116 116 requirements = set()
117 117 if remote.capable(b'stream'):
118 118 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
119 119 else:
120 120 streamreqs = remote.capable(b'streamreqs')
121 121 # This is weird and shouldn't happen with modern servers.
122 122 if not streamreqs:
123 123 pullop.repo.ui.warn(
124 124 _(
125 125 b'warning: stream clone requested but server has them '
126 126 b'disabled\n'
127 127 )
128 128 )
129 129 return False, None
130 130
131 131 streamreqs = set(streamreqs.split(b','))
132 132 # Server requires something we don't support. Bail.
133 133 missingreqs = streamreqs - repo.supported
134 134 if missingreqs:
135 135 pullop.repo.ui.warn(
136 136 _(
137 137 b'warning: stream clone requested but client is missing '
138 138 b'requirements: %s\n'
139 139 )
140 140 % b', '.join(sorted(missingreqs))
141 141 )
142 142 pullop.repo.ui.warn(
143 143 _(
144 144 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
145 145 b'for more information)\n'
146 146 )
147 147 )
148 148 return False, None
149 149 requirements = streamreqs
150 150
151 151 return True, requirements
152 152
153 153
154 154 def maybeperformlegacystreamclone(pullop):
155 155 """Possibly perform a legacy stream clone operation.
156 156
157 157 Legacy stream clones are performed as part of pull but before all other
158 158 operations.
159 159
160 160 A legacy stream clone will not be performed if a bundle2 stream clone is
161 161 supported.
162 162 """
163 163 from . import localrepo
164 164
165 165 supported, requirements = canperformstreamclone(pullop)
166 166
167 167 if not supported:
168 168 return
169 169
170 170 repo = pullop.repo
171 171 remote = pullop.remote
172 172
173 173 # Save remote branchmap. We will use it later to speed up branchcache
174 174 # creation.
175 175 rbranchmap = None
176 176 if remote.capable(b'branchmap'):
177 177 with remote.commandexecutor() as e:
178 178 rbranchmap = e.callcommand(b'branchmap', {}).result()
179 179
180 180 repo.ui.status(_(b'streaming all changes\n'))
181 181
182 182 with remote.commandexecutor() as e:
183 183 fp = e.callcommand(b'stream_out', {}).result()
184 184
185 185 # TODO strictly speaking, this code should all be inside the context
186 186 # manager because the context manager is supposed to ensure all wire state
187 187 # is flushed when exiting. But the legacy peers don't do this, so it
188 188 # doesn't matter.
189 189 l = fp.readline()
190 190 try:
191 191 resp = int(l)
192 192 except ValueError:
193 193 raise error.ResponseError(
194 194 _(b'unexpected response from remote server:'), l
195 195 )
196 196 if resp == 1:
197 197 raise error.Abort(_(b'operation forbidden by server'))
198 198 elif resp == 2:
199 199 raise error.Abort(_(b'locking the remote repository failed'))
200 200 elif resp != 0:
201 201 raise error.Abort(_(b'the server sent an unknown error code'))
202 202
203 203 l = fp.readline()
204 204 try:
205 205 filecount, bytecount = map(int, l.split(b' ', 1))
206 206 except (ValueError, TypeError):
207 207 raise error.ResponseError(
208 208 _(b'unexpected response from remote server:'), l
209 209 )
210 210
211 211 with repo.lock():
212 212 consumev1(repo, fp, filecount, bytecount)
213 213 repo.requirements = new_stream_clone_requirements(
214 214 repo.requirements,
215 215 requirements,
216 216 )
217 217 repo.svfs.options = localrepo.resolvestorevfsoptions(
218 218 repo.ui, repo.requirements, repo.features
219 219 )
220 220 scmutil.writereporequirements(repo)
221 221 nodemap.post_stream_cleanup(repo)
222 222
223 223 if rbranchmap:
224 224 repo._branchcaches.replace(repo, rbranchmap)
225 225
226 226 repo.invalidate()
227 227
228 228
229 229 def allowservergeneration(repo):
230 230 """Whether streaming clones are allowed from the server."""
231 231 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
232 232 return False
233 233
234 234 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
235 235 return False
236 236
237 237 # The way stream clone works makes it impossible to hide secret changesets.
238 238 # So don't allow this by default.
239 239 secret = phases.hassecret(repo)
240 240 if secret:
241 241 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
242 242
243 243 return True
244 244
245 245
246 246 # This is it's own function so extensions can override it.
247 247 def _walkstreamfiles(repo, matcher=None):
248 248 return repo.store.walk(matcher)
249 249
250 250
251 251 def generatev1(repo):
252 252 """Emit content for version 1 of a streaming clone.
253 253
254 254 This returns a 3-tuple of (file count, byte size, data iterator).
255 255
256 256 The data iterator consists of N entries for each file being transferred.
257 257 Each file entry starts as a line with the file name and integer size
258 258 delimited by a null byte.
259 259
260 260 The raw file data follows. Following the raw file data is the next file
261 261 entry, or EOF.
262 262
263 263 When used on the wire protocol, an additional line indicating protocol
264 264 success will be prepended to the stream. This function is not responsible
265 265 for adding it.
266 266
267 267 This function will obtain a repository lock to ensure a consistent view of
268 268 the store is captured. It therefore may raise LockError.
269 269 """
270 270 entries = []
271 271 total_bytes = 0
272 272 # Get consistent snapshot of repo, lock during scan.
273 273 with repo.lock():
274 274 repo.ui.debug(b'scanning\n')
275 275 for file_type, name, size in _walkstreamfiles(repo):
276 276 if size:
277 277 entries.append((name, size))
278 278 total_bytes += size
279 279 _test_sync_point_walk_1(repo)
280 280 _test_sync_point_walk_2(repo)
281 281
282 282 repo.ui.debug(
283 283 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
284 284 )
285 285
286 286 svfs = repo.svfs
287 287 debugflag = repo.ui.debugflag
288 288
289 289 def emitrevlogdata():
290 290 for name, size in entries:
291 291 if debugflag:
292 292 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
293 293 # partially encode name over the wire for backwards compat
294 294 yield b'%s\0%d\n' % (store.encodedir(name), size)
295 295 # auditing at this stage is both pointless (paths are already
296 296 # trusted by the local repo) and expensive
297 297 with svfs(name, b'rb', auditpath=False) as fp:
298 298 if size <= 65536:
299 299 yield fp.read(size)
300 300 else:
301 301 for chunk in util.filechunkiter(fp, limit=size):
302 302 yield chunk
303 303
304 304 return len(entries), total_bytes, emitrevlogdata()
305 305
306 306
307 307 def generatev1wireproto(repo):
308 308 """Emit content for version 1 of streaming clone suitable for the wire.
309 309
310 310 This is the data output from ``generatev1()`` with 2 header lines. The
311 311 first line indicates overall success. The 2nd contains the file count and
312 312 byte size of payload.
313 313
314 314 The success line contains "0" for success, "1" for stream generation not
315 315 allowed, and "2" for error locking the repository (possibly indicating
316 316 a permissions error for the server process).
317 317 """
318 318 if not allowservergeneration(repo):
319 319 yield b'1\n'
320 320 return
321 321
322 322 try:
323 323 filecount, bytecount, it = generatev1(repo)
324 324 except error.LockError:
325 325 yield b'2\n'
326 326 return
327 327
328 328 # Indicates successful response.
329 329 yield b'0\n'
330 330 yield b'%d %d\n' % (filecount, bytecount)
331 331 for chunk in it:
332 332 yield chunk
333 333
334 334
335 335 def generatebundlev1(repo, compression=b'UN'):
336 336 """Emit content for version 1 of a stream clone bundle.
337 337
338 338 The first 4 bytes of the output ("HGS1") denote this as stream clone
339 339 bundle version 1.
340 340
341 341 The next 2 bytes indicate the compression type. Only "UN" is currently
342 342 supported.
343 343
344 344 The next 16 bytes are two 64-bit big endian unsigned integers indicating
345 345 file count and byte count, respectively.
346 346
347 347 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
348 348 of the requirements string, including a trailing \0. The following N bytes
349 349 are the requirements string, which is ASCII containing a comma-delimited
350 350 list of repo requirements that are needed to support the data.
351 351
352 352 The remaining content is the output of ``generatev1()`` (which may be
353 353 compressed in the future).
354 354
355 355 Returns a tuple of (requirements, data generator).
356 356 """
357 357 if compression != b'UN':
358 358 raise ValueError(b'we do not support the compression argument yet')
359 359
360 360 requirements = streamed_requirements(repo)
361 361 requires = b','.join(sorted(requirements))
362 362
363 363 def gen():
364 364 yield b'HGS1'
365 365 yield compression
366 366
367 367 filecount, bytecount, it = generatev1(repo)
368 368 repo.ui.status(
369 369 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
370 370 )
371 371
372 372 yield struct.pack(b'>QQ', filecount, bytecount)
373 373 yield struct.pack(b'>H', len(requires) + 1)
374 374 yield requires + b'\0'
375 375
376 376 # This is where we'll add compression in the future.
377 377 assert compression == b'UN'
378 378
379 379 progress = repo.ui.makeprogress(
380 380 _(b'bundle'), total=bytecount, unit=_(b'bytes')
381 381 )
382 382 progress.update(0)
383 383
384 384 for chunk in it:
385 385 progress.increment(step=len(chunk))
386 386 yield chunk
387 387
388 388 progress.complete()
389 389
390 390 return requirements, gen()
391 391
392 392
393 393 def consumev1(repo, fp, filecount, bytecount):
394 394 """Apply the contents from version 1 of a streaming clone file handle.
395 395
396 396 This takes the output from "stream_out" and applies it to the specified
397 397 repository.
398 398
399 399 Like "stream_out," the status line added by the wire protocol is not
400 400 handled by this function.
401 401 """
402 402 with repo.lock():
403 403 repo.ui.status(
404 404 _(b'%d files to transfer, %s of data\n')
405 405 % (filecount, util.bytecount(bytecount))
406 406 )
407 407 progress = repo.ui.makeprogress(
408 408 _(b'clone'), total=bytecount, unit=_(b'bytes')
409 409 )
410 410 progress.update(0)
411 411 start = util.timer()
412 412
413 413 # TODO: get rid of (potential) inconsistency
414 414 #
415 415 # If transaction is started and any @filecache property is
416 416 # changed at this point, it causes inconsistency between
417 417 # in-memory cached property and streamclone-ed file on the
418 418 # disk. Nested transaction prevents transaction scope "clone"
419 419 # below from writing in-memory changes out at the end of it,
420 420 # even though in-memory changes are discarded at the end of it
421 421 # regardless of transaction nesting.
422 422 #
423 423 # But transaction nesting can't be simply prohibited, because
424 424 # nesting occurs also in ordinary case (e.g. enabling
425 425 # clonebundles).
426 426
427 427 with repo.transaction(b'clone'):
428 428 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
429 429 for i in pycompat.xrange(filecount):
430 430 # XXX doesn't support '\n' or '\r' in filenames
431 431 l = fp.readline()
432 432 try:
433 433 name, size = l.split(b'\0', 1)
434 434 size = int(size)
435 435 except (ValueError, TypeError):
436 436 raise error.ResponseError(
437 437 _(b'unexpected response from remote server:'), l
438 438 )
439 439 if repo.ui.debugflag:
440 440 repo.ui.debug(
441 441 b'adding %s (%s)\n' % (name, util.bytecount(size))
442 442 )
443 443 # for backwards compat, name was partially encoded
444 444 path = store.decodedir(name)
445 445 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
446 446 for chunk in util.filechunkiter(fp, limit=size):
447 447 progress.increment(step=len(chunk))
448 448 ofp.write(chunk)
449 449
450 450 # force @filecache properties to be reloaded from
451 451 # streamclone-ed file at next access
452 452 repo.invalidate(clearfilecache=True)
453 453
454 454 elapsed = util.timer() - start
455 455 if elapsed <= 0:
456 456 elapsed = 0.001
457 457 progress.complete()
458 458 repo.ui.status(
459 459 _(b'transferred %s in %.1f seconds (%s/sec)\n')
460 460 % (
461 461 util.bytecount(bytecount),
462 462 elapsed,
463 463 util.bytecount(bytecount / elapsed),
464 464 )
465 465 )
466 466
467 467
468 468 def readbundle1header(fp):
469 469 compression = fp.read(2)
470 470 if compression != b'UN':
471 471 raise error.Abort(
472 472 _(
473 473 b'only uncompressed stream clone bundles are '
474 474 b'supported; got %s'
475 475 )
476 476 % compression
477 477 )
478 478
479 479 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
480 480 requireslen = struct.unpack(b'>H', fp.read(2))[0]
481 481 requires = fp.read(requireslen)
482 482
483 483 if not requires.endswith(b'\0'):
484 484 raise error.Abort(
485 485 _(
486 486 b'malformed stream clone bundle: '
487 487 b'requirements not properly encoded'
488 488 )
489 489 )
490 490
491 491 requirements = set(requires.rstrip(b'\0').split(b','))
492 492
493 493 return filecount, bytecount, requirements
494 494
495 495
496 496 def applybundlev1(repo, fp):
497 497 """Apply the content from a stream clone bundle version 1.
498 498
499 499 We assume the 4 byte header has been read and validated and the file handle
500 500 is at the 2 byte compression identifier.
501 501 """
502 502 if len(repo):
503 503 raise error.Abort(
504 504 _(b'cannot apply stream clone bundle on non-empty repo')
505 505 )
506 506
507 507 filecount, bytecount, requirements = readbundle1header(fp)
508 508 missingreqs = requirements - repo.supported
509 509 if missingreqs:
510 510 raise error.Abort(
511 511 _(b'unable to apply stream clone: unsupported format: %s')
512 512 % b', '.join(sorted(missingreqs))
513 513 )
514 514
515 515 consumev1(repo, fp, filecount, bytecount)
516 516 nodemap.post_stream_cleanup(repo)
517 517
518 518
519 519 class streamcloneapplier:
520 520 """Class to manage applying streaming clone bundles.
521 521
522 522 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
523 523 readers to perform bundle type-specific functionality.
524 524 """
525 525
526 526 def __init__(self, fh):
527 527 self._fh = fh
528 528
529 529 def apply(self, repo):
530 530 return applybundlev1(repo, self._fh)
531 531
532 532
533 533 # type of file to stream
534 534 _fileappend = 0 # append only file
535 535 _filefull = 1 # full snapshot file
536 536
537 537 # Source of the file
538 538 _srcstore = b's' # store (svfs)
539 539 _srccache = b'c' # cache (cache)
540 540
541 541 # This is it's own function so extensions can override it.
542 542 def _walkstreamfullstorefiles(repo):
543 543 """list snapshot file from the store"""
544 544 fnames = []
545 545 if not repo.publishing():
546 546 fnames.append(b'phaseroots')
547 547 return fnames
548 548
549 549
550 550 def _filterfull(entry, copy, vfsmap):
551 551 """actually copy the snapshot files"""
552 552 src, name, ftype, data = entry
553 553 if ftype != _filefull:
554 554 return entry
555 555 return (src, name, ftype, copy(vfsmap[src].join(name)))
556 556
557 557
558 558 @contextlib.contextmanager
559 559 def maketempcopies():
560 560 """return a function to temporary copy file"""
561
561 562 files = []
563 dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
562 564 try:
563 565
564 566 def copy(src):
565 fd, dst = pycompat.mkstemp()
567 fd, dst = pycompat.mkstemp(
568 prefix=os.path.basename(src), dir=dst_dir
569 )
566 570 os.close(fd)
567 571 files.append(dst)
568 572 util.copyfiles(src, dst, hardlink=True)
569 573 return dst
570 574
571 575 yield copy
572 576 finally:
573 577 for tmp in files:
574 578 util.tryunlink(tmp)
579 util.tryrmdir(dst_dir)
575 580
576 581
577 582 def _makemap(repo):
578 583 """make a (src -> vfs) map for the repo"""
579 584 vfsmap = {
580 585 _srcstore: repo.svfs,
581 586 _srccache: repo.cachevfs,
582 587 }
583 588 # we keep repo.vfs out of the on purpose, ther are too many danger there
584 589 # (eg: .hg/hgrc)
585 590 assert repo.vfs not in vfsmap.values()
586 591
587 592 return vfsmap
588 593
589 594
590 595 def _emit2(repo, entries, totalfilesize):
591 596 """actually emit the stream bundle"""
592 597 vfsmap = _makemap(repo)
593 598 # we keep repo.vfs out of the on purpose, ther are too many danger there
594 599 # (eg: .hg/hgrc),
595 600 #
596 601 # this assert is duplicated (from _makemap) as author might think this is
597 602 # fine, while this is really not fine.
598 603 if repo.vfs in vfsmap.values():
599 604 raise error.ProgrammingError(
600 605 b'repo.vfs must not be added to vfsmap for security reasons'
601 606 )
602 607
603 608 progress = repo.ui.makeprogress(
604 609 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
605 610 )
606 611 progress.update(0)
607 612 with maketempcopies() as copy, progress:
608 613 # copy is delayed until we are in the try
609 614 entries = [_filterfull(e, copy, vfsmap) for e in entries]
610 615 yield None # this release the lock on the repository
611 616 totalbytecount = 0
612 617
613 618 for src, name, ftype, data in entries:
614 619 vfs = vfsmap[src]
615 620 yield src
616 621 yield util.uvarintencode(len(name))
617 622 if ftype == _fileappend:
618 623 fp = vfs(name)
619 624 size = data
620 625 elif ftype == _filefull:
621 626 fp = open(data, b'rb')
622 627 size = util.fstat(fp).st_size
623 628 bytecount = 0
624 629 try:
625 630 yield util.uvarintencode(size)
626 631 yield name
627 632 if size <= 65536:
628 633 chunks = (fp.read(size),)
629 634 else:
630 635 chunks = util.filechunkiter(fp, limit=size)
631 636 for chunk in chunks:
632 637 bytecount += len(chunk)
633 638 totalbytecount += len(chunk)
634 639 progress.update(totalbytecount)
635 640 yield chunk
636 641 if bytecount != size:
637 642 # Would most likely be caused by a race due to `hg strip` or
638 643 # a revlog split
639 644 raise error.Abort(
640 645 _(
641 646 b'clone could only read %d bytes from %s, but '
642 647 b'expected %d bytes'
643 648 )
644 649 % (bytecount, name, size)
645 650 )
646 651 finally:
647 652 fp.close()
648 653
649 654
650 655 def _test_sync_point_walk_1(repo):
651 656 """a function for synchronisation during tests"""
652 657
653 658
654 659 def _test_sync_point_walk_2(repo):
655 660 """a function for synchronisation during tests"""
656 661
657 662
658 663 def _v2_walk(repo, includes, excludes, includeobsmarkers):
659 664 """emit a seris of files information useful to clone a repo
660 665
661 666 return (entries, totalfilesize)
662 667
663 668 entries is a list of tuple (vfs-key, file-path, file-type, size)
664 669
665 670 - `vfs-key`: is a key to the right vfs to write the file (see _makemap)
666 671 - `name`: file path of the file to copy (to be feed to the vfss)
667 672 - `file-type`: do this file need to be copied with the source lock ?
668 673 - `size`: the size of the file (or None)
669 674 """
670 675 assert repo._currentlock(repo._lockref) is not None
671 676 entries = []
672 677 totalfilesize = 0
673 678
674 679 matcher = None
675 680 if includes or excludes:
676 681 matcher = narrowspec.match(repo.root, includes, excludes)
677 682
678 683 for rl_type, name, size in _walkstreamfiles(repo, matcher):
679 684 if size:
680 685 ft = _fileappend
681 686 if rl_type & store.FILEFLAGS_VOLATILE:
682 687 ft = _filefull
683 688 entries.append((_srcstore, name, ft, size))
684 689 totalfilesize += size
685 690 for name in _walkstreamfullstorefiles(repo):
686 691 if repo.svfs.exists(name):
687 692 totalfilesize += repo.svfs.lstat(name).st_size
688 693 entries.append((_srcstore, name, _filefull, None))
689 694 if includeobsmarkers and repo.svfs.exists(b'obsstore'):
690 695 totalfilesize += repo.svfs.lstat(b'obsstore').st_size
691 696 entries.append((_srcstore, b'obsstore', _filefull, None))
692 697 for name in cacheutil.cachetocopy(repo):
693 698 if repo.cachevfs.exists(name):
694 699 totalfilesize += repo.cachevfs.lstat(name).st_size
695 700 entries.append((_srccache, name, _filefull, None))
696 701 return entries, totalfilesize
697 702
698 703
699 704 def generatev2(repo, includes, excludes, includeobsmarkers):
700 705 """Emit content for version 2 of a streaming clone.
701 706
702 707 the data stream consists the following entries:
703 708 1) A char representing the file destination (eg: store or cache)
704 709 2) A varint containing the length of the filename
705 710 3) A varint containing the length of file data
706 711 4) N bytes containing the filename (the internal, store-agnostic form)
707 712 5) N bytes containing the file data
708 713
709 714 Returns a 3-tuple of (file count, file size, data iterator).
710 715 """
711 716
712 717 with repo.lock():
713 718
714 719 repo.ui.debug(b'scanning\n')
715 720
716 721 entries, totalfilesize = _v2_walk(
717 722 repo,
718 723 includes=includes,
719 724 excludes=excludes,
720 725 includeobsmarkers=includeobsmarkers,
721 726 )
722 727
723 728 chunks = _emit2(repo, entries, totalfilesize)
724 729 first = next(chunks)
725 730 assert first is None
726 731 _test_sync_point_walk_1(repo)
727 732 _test_sync_point_walk_2(repo)
728 733
729 734 return len(entries), totalfilesize, chunks
730 735
731 736
732 737 @contextlib.contextmanager
733 738 def nested(*ctxs):
734 739 this = ctxs[0]
735 740 rest = ctxs[1:]
736 741 with this:
737 742 if rest:
738 743 with nested(*rest):
739 744 yield
740 745 else:
741 746 yield
742 747
743 748
744 749 def consumev2(repo, fp, filecount, filesize):
745 750 """Apply the contents from a version 2 streaming clone.
746 751
747 752 Data is read from an object that only needs to provide a ``read(size)``
748 753 method.
749 754 """
750 755 with repo.lock():
751 756 repo.ui.status(
752 757 _(b'%d files to transfer, %s of data\n')
753 758 % (filecount, util.bytecount(filesize))
754 759 )
755 760
756 761 start = util.timer()
757 762 progress = repo.ui.makeprogress(
758 763 _(b'clone'), total=filesize, unit=_(b'bytes')
759 764 )
760 765 progress.update(0)
761 766
762 767 vfsmap = _makemap(repo)
763 768 # we keep repo.vfs out of the on purpose, ther are too many danger
764 769 # there (eg: .hg/hgrc),
765 770 #
766 771 # this assert is duplicated (from _makemap) as author might think this
767 772 # is fine, while this is really not fine.
768 773 if repo.vfs in vfsmap.values():
769 774 raise error.ProgrammingError(
770 775 b'repo.vfs must not be added to vfsmap for security reasons'
771 776 )
772 777
773 778 with repo.transaction(b'clone'):
774 779 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
775 780 with nested(*ctxs):
776 781 for i in range(filecount):
777 782 src = util.readexactly(fp, 1)
778 783 vfs = vfsmap[src]
779 784 namelen = util.uvarintdecodestream(fp)
780 785 datalen = util.uvarintdecodestream(fp)
781 786
782 787 name = util.readexactly(fp, namelen)
783 788
784 789 if repo.ui.debugflag:
785 790 repo.ui.debug(
786 791 b'adding [%s] %s (%s)\n'
787 792 % (src, name, util.bytecount(datalen))
788 793 )
789 794
790 795 with vfs(name, b'w') as ofp:
791 796 for chunk in util.filechunkiter(fp, limit=datalen):
792 797 progress.increment(step=len(chunk))
793 798 ofp.write(chunk)
794 799
795 800 # force @filecache properties to be reloaded from
796 801 # streamclone-ed file at next access
797 802 repo.invalidate(clearfilecache=True)
798 803
799 804 elapsed = util.timer() - start
800 805 if elapsed <= 0:
801 806 elapsed = 0.001
802 807 repo.ui.status(
803 808 _(b'transferred %s in %.1f seconds (%s/sec)\n')
804 809 % (
805 810 util.bytecount(progress.pos),
806 811 elapsed,
807 812 util.bytecount(progress.pos / elapsed),
808 813 )
809 814 )
810 815 progress.complete()
811 816
812 817
813 818 def applybundlev2(repo, fp, filecount, filesize, requirements):
814 819 from . import localrepo
815 820
816 821 missingreqs = [r for r in requirements if r not in repo.supported]
817 822 if missingreqs:
818 823 raise error.Abort(
819 824 _(b'unable to apply stream clone: unsupported format: %s')
820 825 % b', '.join(sorted(missingreqs))
821 826 )
822 827
823 828 consumev2(repo, fp, filecount, filesize)
824 829
825 830 repo.requirements = new_stream_clone_requirements(
826 831 repo.requirements,
827 832 requirements,
828 833 )
829 834 repo.svfs.options = localrepo.resolvestorevfsoptions(
830 835 repo.ui, repo.requirements, repo.features
831 836 )
832 837 scmutil.writereporequirements(repo)
833 838 nodemap.post_stream_cleanup(repo)
834 839
835 840
836 841 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
837 842 hardlink = [True]
838 843
839 844 def copy_used():
840 845 hardlink[0] = False
841 846 progress.topic = _(b'copying')
842 847
843 848 for k, path, size in entries:
844 849 src_vfs = src_vfs_map[k]
845 850 dst_vfs = dst_vfs_map[k]
846 851 src_path = src_vfs.join(path)
847 852 dst_path = dst_vfs.join(path)
848 853 # We cannot use dirname and makedirs of dst_vfs here because the store
849 854 # encoding confuses them. See issue 6581 for details.
850 855 dirname = os.path.dirname(dst_path)
851 856 if not os.path.exists(dirname):
852 857 util.makedirs(dirname)
853 858 dst_vfs.register_file(path)
854 859 # XXX we could use the #nb_bytes argument.
855 860 util.copyfile(
856 861 src_path,
857 862 dst_path,
858 863 hardlink=hardlink[0],
859 864 no_hardlink_cb=copy_used,
860 865 check_fs_hardlink=False,
861 866 )
862 867 progress.increment()
863 868 return hardlink[0]
864 869
865 870
866 871 def local_copy(src_repo, dest_repo):
867 872 """copy all content from one local repository to another
868 873
869 874 This is useful for local clone"""
870 875 src_store_requirements = {
871 876 r
872 877 for r in src_repo.requirements
873 878 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
874 879 }
875 880 dest_store_requirements = {
876 881 r
877 882 for r in dest_repo.requirements
878 883 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
879 884 }
880 885 assert src_store_requirements == dest_store_requirements
881 886
882 887 with dest_repo.lock():
883 888 with src_repo.lock():
884 889
885 890 # bookmark is not integrated to the streaming as it might use the
886 891 # `repo.vfs` and they are too many sentitive data accessible
887 892 # through `repo.vfs` to expose it to streaming clone.
888 893 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
889 894 srcbookmarks = src_book_vfs.join(b'bookmarks')
890 895 bm_count = 0
891 896 if os.path.exists(srcbookmarks):
892 897 bm_count = 1
893 898
894 899 entries, totalfilesize = _v2_walk(
895 900 src_repo,
896 901 includes=None,
897 902 excludes=None,
898 903 includeobsmarkers=True,
899 904 )
900 905 src_vfs_map = _makemap(src_repo)
901 906 dest_vfs_map = _makemap(dest_repo)
902 907 progress = src_repo.ui.makeprogress(
903 908 topic=_(b'linking'),
904 909 total=len(entries) + bm_count,
905 910 unit=_(b'files'),
906 911 )
907 912 # copy files
908 913 #
909 914 # We could copy the full file while the source repository is locked
910 915 # and the other one without the lock. However, in the linking case,
911 916 # this would also requires checks that nobody is appending any data
912 917 # to the files while we do the clone, so this is not done yet. We
913 918 # could do this blindly when copying files.
914 919 files = ((k, path, size) for k, path, ftype, size in entries)
915 920 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
916 921
917 922 # copy bookmarks over
918 923 if bm_count:
919 924 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
920 925 dstbookmarks = dst_book_vfs.join(b'bookmarks')
921 926 util.copyfile(srcbookmarks, dstbookmarks)
922 927 progress.complete()
923 928 if hardlink:
924 929 msg = b'linked %d files\n'
925 930 else:
926 931 msg = b'copied %d files\n'
927 932 src_repo.ui.debug(msg % (len(entries) + bm_count))
928 933
929 934 with dest_repo.transaction(b"localclone") as tr:
930 935 dest_repo.store.write(tr)
931 936
932 937 # clean up transaction file as they do not make sense
933 938 undo_files = [(dest_repo.svfs, b'undo.backupfiles')]
934 939 undo_files.extend(dest_repo.undofiles())
935 940 for undovfs, undofile in undo_files:
936 941 try:
937 942 undovfs.unlink(undofile)
938 943 except OSError as e:
939 944 if e.errno != errno.ENOENT:
940 945 msg = _(b'error removing %s: %s\n')
941 946 path = undovfs.join(undofile)
942 947 e_msg = stringutil.forcebytestr(e)
943 948 msg %= (path, e_msg)
944 949 dest_repo.ui.warn(msg)
@@ -1,3319 +1,3327 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16
17 17 import abc
18 18 import collections
19 19 import contextlib
20 20 import errno
21 21 import gc
22 22 import hashlib
23 23 import io
24 24 import itertools
25 25 import locale
26 26 import mmap
27 27 import os
28 28 import pickle # provides util.pickle symbol
29 29 import re as remod
30 30 import shutil
31 31 import stat
32 32 import sys
33 33 import time
34 34 import traceback
35 35 import warnings
36 36
37 37 from .node import hex
38 38 from .thirdparty import attr
39 39 from .pycompat import (
40 40 delattr,
41 41 getattr,
42 42 open,
43 43 setattr,
44 44 )
45 45 from hgdemandimport import tracing
46 46 from . import (
47 47 encoding,
48 48 error,
49 49 i18n,
50 50 policy,
51 51 pycompat,
52 52 urllibcompat,
53 53 )
54 54 from .utils import (
55 55 compression,
56 56 hashutil,
57 57 procutil,
58 58 stringutil,
59 59 )
60 60
61 61 if pycompat.TYPE_CHECKING:
62 62 from typing import (
63 63 Iterator,
64 64 List,
65 65 Optional,
66 66 Tuple,
67 67 )
68 68
69 69
70 70 base85 = policy.importmod('base85')
71 71 osutil = policy.importmod('osutil')
72 72
73 73 b85decode = base85.b85decode
74 74 b85encode = base85.b85encode
75 75
76 76 cookielib = pycompat.cookielib
77 77 httplib = pycompat.httplib
78 78 safehasattr = pycompat.safehasattr
79 79 socketserver = pycompat.socketserver
80 80 bytesio = io.BytesIO
81 81 # TODO deprecate stringio name, as it is a lie on Python 3.
82 82 stringio = bytesio
83 83 xmlrpclib = pycompat.xmlrpclib
84 84
85 85 httpserver = urllibcompat.httpserver
86 86 urlerr = urllibcompat.urlerr
87 87 urlreq = urllibcompat.urlreq
88 88
89 89 # workaround for win32mbcs
90 90 _filenamebytestr = pycompat.bytestr
91 91
92 92 if pycompat.iswindows:
93 93 from . import windows as platform
94 94 else:
95 95 from . import posix as platform
96 96
97 97 _ = i18n._
98 98
99 99 abspath = platform.abspath
100 100 bindunixsocket = platform.bindunixsocket
101 101 cachestat = platform.cachestat
102 102 checkexec = platform.checkexec
103 103 checklink = platform.checklink
104 104 copymode = platform.copymode
105 105 expandglobs = platform.expandglobs
106 106 getfsmountpoint = platform.getfsmountpoint
107 107 getfstype = platform.getfstype
108 108 get_password = platform.get_password
109 109 groupmembers = platform.groupmembers
110 110 groupname = platform.groupname
111 111 isexec = platform.isexec
112 112 isowner = platform.isowner
113 113 listdir = osutil.listdir
114 114 localpath = platform.localpath
115 115 lookupreg = platform.lookupreg
116 116 makedir = platform.makedir
117 117 nlinks = platform.nlinks
118 118 normpath = platform.normpath
119 119 normcase = platform.normcase
120 120 normcasespec = platform.normcasespec
121 121 normcasefallback = platform.normcasefallback
122 122 openhardlinks = platform.openhardlinks
123 123 oslink = platform.oslink
124 124 parsepatchoutput = platform.parsepatchoutput
125 125 pconvert = platform.pconvert
126 126 poll = platform.poll
127 127 posixfile = platform.posixfile
128 128 readlink = platform.readlink
129 129 rename = platform.rename
130 130 removedirs = platform.removedirs
131 131 samedevice = platform.samedevice
132 132 samefile = platform.samefile
133 133 samestat = platform.samestat
134 134 setflags = platform.setflags
135 135 split = platform.split
136 136 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
137 137 statisexec = platform.statisexec
138 138 statislink = platform.statislink
139 139 umask = platform.umask
140 140 unlink = platform.unlink
141 141 username = platform.username
142 142
143 143
144 144 def setumask(val):
145 145 # type: (int) -> None
146 146 '''updates the umask. used by chg server'''
147 147 if pycompat.iswindows:
148 148 return
149 149 os.umask(val)
150 150 global umask
151 151 platform.umask = umask = val & 0o777
152 152
153 153
154 154 # small compat layer
155 155 compengines = compression.compengines
156 156 SERVERROLE = compression.SERVERROLE
157 157 CLIENTROLE = compression.CLIENTROLE
158 158
159 159 try:
160 160 recvfds = osutil.recvfds
161 161 except AttributeError:
162 162 pass
163 163
164 164 # Python compatibility
165 165
166 166 _notset = object()
167 167
168 168
169 169 def bitsfrom(container):
170 170 bits = 0
171 171 for bit in container:
172 172 bits |= bit
173 173 return bits
174 174
175 175
176 176 # python 2.6 still have deprecation warning enabled by default. We do not want
177 177 # to display anything to standard user so detect if we are running test and
178 178 # only use python deprecation warning in this case.
179 179 _dowarn = bool(encoding.environ.get(b'HGEMITWARNINGS'))
180 180 if _dowarn:
181 181 # explicitly unfilter our warning for python 2.7
182 182 #
183 183 # The option of setting PYTHONWARNINGS in the test runner was investigated.
184 184 # However, module name set through PYTHONWARNINGS was exactly matched, so
185 185 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
186 186 # makes the whole PYTHONWARNINGS thing useless for our usecase.
187 187 warnings.filterwarnings('default', '', DeprecationWarning, 'mercurial')
188 188 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext')
189 189 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext3rd')
190 190 if _dowarn:
191 191 # silence warning emitted by passing user string to re.sub()
192 192 warnings.filterwarnings(
193 193 'ignore', 'bad escape', DeprecationWarning, 'mercurial'
194 194 )
195 195 warnings.filterwarnings(
196 196 'ignore', 'invalid escape sequence', DeprecationWarning, 'mercurial'
197 197 )
198 198 # TODO: reinvent imp.is_frozen()
199 199 warnings.filterwarnings(
200 200 'ignore',
201 201 'the imp module is deprecated',
202 202 DeprecationWarning,
203 203 'mercurial',
204 204 )
205 205
206 206
207 207 def nouideprecwarn(msg, version, stacklevel=1):
208 208 """Issue an python native deprecation warning
209 209
210 210 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
211 211 """
212 212 if _dowarn:
213 213 msg += (
214 214 b"\n(compatibility will be dropped after Mercurial-%s,"
215 215 b" update your code.)"
216 216 ) % version
217 217 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
218 218 # on python 3 with chg, we will need to explicitly flush the output
219 219 sys.stderr.flush()
220 220
221 221
222 222 DIGESTS = {
223 223 b'md5': hashlib.md5,
224 224 b'sha1': hashutil.sha1,
225 225 b'sha512': hashlib.sha512,
226 226 }
227 227 # List of digest types from strongest to weakest
228 228 DIGESTS_BY_STRENGTH = [b'sha512', b'sha1', b'md5']
229 229
230 230 for k in DIGESTS_BY_STRENGTH:
231 231 assert k in DIGESTS
232 232
233 233
234 234 class digester:
235 235 """helper to compute digests.
236 236
237 237 This helper can be used to compute one or more digests given their name.
238 238
239 239 >>> d = digester([b'md5', b'sha1'])
240 240 >>> d.update(b'foo')
241 241 >>> [k for k in sorted(d)]
242 242 ['md5', 'sha1']
243 243 >>> d[b'md5']
244 244 'acbd18db4cc2f85cedef654fccc4a4d8'
245 245 >>> d[b'sha1']
246 246 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
247 247 >>> digester.preferred([b'md5', b'sha1'])
248 248 'sha1'
249 249 """
250 250
251 251 def __init__(self, digests, s=b''):
252 252 self._hashes = {}
253 253 for k in digests:
254 254 if k not in DIGESTS:
255 255 raise error.Abort(_(b'unknown digest type: %s') % k)
256 256 self._hashes[k] = DIGESTS[k]()
257 257 if s:
258 258 self.update(s)
259 259
260 260 def update(self, data):
261 261 for h in self._hashes.values():
262 262 h.update(data)
263 263
264 264 def __getitem__(self, key):
265 265 if key not in DIGESTS:
266 266 raise error.Abort(_(b'unknown digest type: %s') % k)
267 267 return hex(self._hashes[key].digest())
268 268
269 269 def __iter__(self):
270 270 return iter(self._hashes)
271 271
272 272 @staticmethod
273 273 def preferred(supported):
274 274 """returns the strongest digest type in both supported and DIGESTS."""
275 275
276 276 for k in DIGESTS_BY_STRENGTH:
277 277 if k in supported:
278 278 return k
279 279 return None
280 280
281 281
282 282 class digestchecker:
283 283 """file handle wrapper that additionally checks content against a given
284 284 size and digests.
285 285
286 286 d = digestchecker(fh, size, {'md5': '...'})
287 287
288 288 When multiple digests are given, all of them are validated.
289 289 """
290 290
291 291 def __init__(self, fh, size, digests):
292 292 self._fh = fh
293 293 self._size = size
294 294 self._got = 0
295 295 self._digests = dict(digests)
296 296 self._digester = digester(self._digests.keys())
297 297
298 298 def read(self, length=-1):
299 299 content = self._fh.read(length)
300 300 self._digester.update(content)
301 301 self._got += len(content)
302 302 return content
303 303
304 304 def validate(self):
305 305 if self._size != self._got:
306 306 raise error.Abort(
307 307 _(b'size mismatch: expected %d, got %d')
308 308 % (self._size, self._got)
309 309 )
310 310 for k, v in self._digests.items():
311 311 if v != self._digester[k]:
312 312 # i18n: first parameter is a digest name
313 313 raise error.Abort(
314 314 _(b'%s mismatch: expected %s, got %s')
315 315 % (k, v, self._digester[k])
316 316 )
317 317
318 318
319 319 try:
320 320 buffer = buffer # pytype: disable=name-error
321 321 except NameError:
322 322
323 323 def buffer(sliceable, offset=0, length=None):
324 324 if length is not None:
325 325 return memoryview(sliceable)[offset : offset + length]
326 326 return memoryview(sliceable)[offset:]
327 327
328 328
329 329 _chunksize = 4096
330 330
331 331
332 332 class bufferedinputpipe:
333 333 """a manually buffered input pipe
334 334
335 335 Python will not let us use buffered IO and lazy reading with 'polling' at
336 336 the same time. We cannot probe the buffer state and select will not detect
337 337 that data are ready to read if they are already buffered.
338 338
339 339 This class let us work around that by implementing its own buffering
340 340 (allowing efficient readline) while offering a way to know if the buffer is
341 341 empty from the output (allowing collaboration of the buffer with polling).
342 342
343 343 This class lives in the 'util' module because it makes use of the 'os'
344 344 module from the python stdlib.
345 345 """
346 346
347 347 def __new__(cls, fh):
348 348 # If we receive a fileobjectproxy, we need to use a variation of this
349 349 # class that notifies observers about activity.
350 350 if isinstance(fh, fileobjectproxy):
351 351 cls = observedbufferedinputpipe
352 352
353 353 return super(bufferedinputpipe, cls).__new__(cls)
354 354
355 355 def __init__(self, input):
356 356 self._input = input
357 357 self._buffer = []
358 358 self._eof = False
359 359 self._lenbuf = 0
360 360
361 361 @property
362 362 def hasbuffer(self):
363 363 """True is any data is currently buffered
364 364
365 365 This will be used externally a pre-step for polling IO. If there is
366 366 already data then no polling should be set in place."""
367 367 return bool(self._buffer)
368 368
369 369 @property
370 370 def closed(self):
371 371 return self._input.closed
372 372
373 373 def fileno(self):
374 374 return self._input.fileno()
375 375
376 376 def close(self):
377 377 return self._input.close()
378 378
379 379 def read(self, size):
380 380 while (not self._eof) and (self._lenbuf < size):
381 381 self._fillbuffer()
382 382 return self._frombuffer(size)
383 383
384 384 def unbufferedread(self, size):
385 385 if not self._eof and self._lenbuf == 0:
386 386 self._fillbuffer(max(size, _chunksize))
387 387 return self._frombuffer(min(self._lenbuf, size))
388 388
389 389 def readline(self, *args, **kwargs):
390 390 if len(self._buffer) > 1:
391 391 # this should not happen because both read and readline end with a
392 392 # _frombuffer call that collapse it.
393 393 self._buffer = [b''.join(self._buffer)]
394 394 self._lenbuf = len(self._buffer[0])
395 395 lfi = -1
396 396 if self._buffer:
397 397 lfi = self._buffer[-1].find(b'\n')
398 398 while (not self._eof) and lfi < 0:
399 399 self._fillbuffer()
400 400 if self._buffer:
401 401 lfi = self._buffer[-1].find(b'\n')
402 402 size = lfi + 1
403 403 if lfi < 0: # end of file
404 404 size = self._lenbuf
405 405 elif len(self._buffer) > 1:
406 406 # we need to take previous chunks into account
407 407 size += self._lenbuf - len(self._buffer[-1])
408 408 return self._frombuffer(size)
409 409
410 410 def _frombuffer(self, size):
411 411 """return at most 'size' data from the buffer
412 412
413 413 The data are removed from the buffer."""
414 414 if size == 0 or not self._buffer:
415 415 return b''
416 416 buf = self._buffer[0]
417 417 if len(self._buffer) > 1:
418 418 buf = b''.join(self._buffer)
419 419
420 420 data = buf[:size]
421 421 buf = buf[len(data) :]
422 422 if buf:
423 423 self._buffer = [buf]
424 424 self._lenbuf = len(buf)
425 425 else:
426 426 self._buffer = []
427 427 self._lenbuf = 0
428 428 return data
429 429
430 430 def _fillbuffer(self, size=_chunksize):
431 431 """read data to the buffer"""
432 432 data = os.read(self._input.fileno(), size)
433 433 if not data:
434 434 self._eof = True
435 435 else:
436 436 self._lenbuf += len(data)
437 437 self._buffer.append(data)
438 438
439 439 return data
440 440
441 441
442 442 def mmapread(fp, size=None):
443 443 if size == 0:
444 444 # size of 0 to mmap.mmap() means "all data"
445 445 # rather than "zero bytes", so special case that.
446 446 return b''
447 447 elif size is None:
448 448 size = 0
449 449 fd = getattr(fp, 'fileno', lambda: fp)()
450 450 try:
451 451 return mmap.mmap(fd, size, access=mmap.ACCESS_READ)
452 452 except ValueError:
453 453 # Empty files cannot be mmapped, but mmapread should still work. Check
454 454 # if the file is empty, and if so, return an empty buffer.
455 455 if os.fstat(fd).st_size == 0:
456 456 return b''
457 457 raise
458 458
459 459
460 460 class fileobjectproxy:
461 461 """A proxy around file objects that tells a watcher when events occur.
462 462
463 463 This type is intended to only be used for testing purposes. Think hard
464 464 before using it in important code.
465 465 """
466 466
467 467 __slots__ = (
468 468 '_orig',
469 469 '_observer',
470 470 )
471 471
472 472 def __init__(self, fh, observer):
473 473 object.__setattr__(self, '_orig', fh)
474 474 object.__setattr__(self, '_observer', observer)
475 475
476 476 def __getattribute__(self, name):
477 477 ours = {
478 478 '_observer',
479 479 # IOBase
480 480 'close',
481 481 # closed if a property
482 482 'fileno',
483 483 'flush',
484 484 'isatty',
485 485 'readable',
486 486 'readline',
487 487 'readlines',
488 488 'seek',
489 489 'seekable',
490 490 'tell',
491 491 'truncate',
492 492 'writable',
493 493 'writelines',
494 494 # RawIOBase
495 495 'read',
496 496 'readall',
497 497 'readinto',
498 498 'write',
499 499 # BufferedIOBase
500 500 # raw is a property
501 501 'detach',
502 502 # read defined above
503 503 'read1',
504 504 # readinto defined above
505 505 # write defined above
506 506 }
507 507
508 508 # We only observe some methods.
509 509 if name in ours:
510 510 return object.__getattribute__(self, name)
511 511
512 512 return getattr(object.__getattribute__(self, '_orig'), name)
513 513
514 514 def __nonzero__(self):
515 515 return bool(object.__getattribute__(self, '_orig'))
516 516
517 517 __bool__ = __nonzero__
518 518
519 519 def __delattr__(self, name):
520 520 return delattr(object.__getattribute__(self, '_orig'), name)
521 521
522 522 def __setattr__(self, name, value):
523 523 return setattr(object.__getattribute__(self, '_orig'), name, value)
524 524
525 525 def __iter__(self):
526 526 return object.__getattribute__(self, '_orig').__iter__()
527 527
528 528 def _observedcall(self, name, *args, **kwargs):
529 529 # Call the original object.
530 530 orig = object.__getattribute__(self, '_orig')
531 531 res = getattr(orig, name)(*args, **kwargs)
532 532
533 533 # Call a method on the observer of the same name with arguments
534 534 # so it can react, log, etc.
535 535 observer = object.__getattribute__(self, '_observer')
536 536 fn = getattr(observer, name, None)
537 537 if fn:
538 538 fn(res, *args, **kwargs)
539 539
540 540 return res
541 541
542 542 def close(self, *args, **kwargs):
543 543 return object.__getattribute__(self, '_observedcall')(
544 544 'close', *args, **kwargs
545 545 )
546 546
547 547 def fileno(self, *args, **kwargs):
548 548 return object.__getattribute__(self, '_observedcall')(
549 549 'fileno', *args, **kwargs
550 550 )
551 551
552 552 def flush(self, *args, **kwargs):
553 553 return object.__getattribute__(self, '_observedcall')(
554 554 'flush', *args, **kwargs
555 555 )
556 556
557 557 def isatty(self, *args, **kwargs):
558 558 return object.__getattribute__(self, '_observedcall')(
559 559 'isatty', *args, **kwargs
560 560 )
561 561
562 562 def readable(self, *args, **kwargs):
563 563 return object.__getattribute__(self, '_observedcall')(
564 564 'readable', *args, **kwargs
565 565 )
566 566
567 567 def readline(self, *args, **kwargs):
568 568 return object.__getattribute__(self, '_observedcall')(
569 569 'readline', *args, **kwargs
570 570 )
571 571
572 572 def readlines(self, *args, **kwargs):
573 573 return object.__getattribute__(self, '_observedcall')(
574 574 'readlines', *args, **kwargs
575 575 )
576 576
577 577 def seek(self, *args, **kwargs):
578 578 return object.__getattribute__(self, '_observedcall')(
579 579 'seek', *args, **kwargs
580 580 )
581 581
582 582 def seekable(self, *args, **kwargs):
583 583 return object.__getattribute__(self, '_observedcall')(
584 584 'seekable', *args, **kwargs
585 585 )
586 586
587 587 def tell(self, *args, **kwargs):
588 588 return object.__getattribute__(self, '_observedcall')(
589 589 'tell', *args, **kwargs
590 590 )
591 591
592 592 def truncate(self, *args, **kwargs):
593 593 return object.__getattribute__(self, '_observedcall')(
594 594 'truncate', *args, **kwargs
595 595 )
596 596
597 597 def writable(self, *args, **kwargs):
598 598 return object.__getattribute__(self, '_observedcall')(
599 599 'writable', *args, **kwargs
600 600 )
601 601
602 602 def writelines(self, *args, **kwargs):
603 603 return object.__getattribute__(self, '_observedcall')(
604 604 'writelines', *args, **kwargs
605 605 )
606 606
607 607 def read(self, *args, **kwargs):
608 608 return object.__getattribute__(self, '_observedcall')(
609 609 'read', *args, **kwargs
610 610 )
611 611
612 612 def readall(self, *args, **kwargs):
613 613 return object.__getattribute__(self, '_observedcall')(
614 614 'readall', *args, **kwargs
615 615 )
616 616
617 617 def readinto(self, *args, **kwargs):
618 618 return object.__getattribute__(self, '_observedcall')(
619 619 'readinto', *args, **kwargs
620 620 )
621 621
622 622 def write(self, *args, **kwargs):
623 623 return object.__getattribute__(self, '_observedcall')(
624 624 'write', *args, **kwargs
625 625 )
626 626
627 627 def detach(self, *args, **kwargs):
628 628 return object.__getattribute__(self, '_observedcall')(
629 629 'detach', *args, **kwargs
630 630 )
631 631
632 632 def read1(self, *args, **kwargs):
633 633 return object.__getattribute__(self, '_observedcall')(
634 634 'read1', *args, **kwargs
635 635 )
636 636
637 637
638 638 class observedbufferedinputpipe(bufferedinputpipe):
639 639 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
640 640
641 641 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
642 642 bypass ``fileobjectproxy``. Because of this, we need to make
643 643 ``bufferedinputpipe`` aware of these operations.
644 644
645 645 This variation of ``bufferedinputpipe`` can notify observers about
646 646 ``os.read()`` events. It also re-publishes other events, such as
647 647 ``read()`` and ``readline()``.
648 648 """
649 649
650 650 def _fillbuffer(self):
651 651 res = super(observedbufferedinputpipe, self)._fillbuffer()
652 652
653 653 fn = getattr(self._input._observer, 'osread', None)
654 654 if fn:
655 655 fn(res, _chunksize)
656 656
657 657 return res
658 658
659 659 # We use different observer methods because the operation isn't
660 660 # performed on the actual file object but on us.
661 661 def read(self, size):
662 662 res = super(observedbufferedinputpipe, self).read(size)
663 663
664 664 fn = getattr(self._input._observer, 'bufferedread', None)
665 665 if fn:
666 666 fn(res, size)
667 667
668 668 return res
669 669
670 670 def readline(self, *args, **kwargs):
671 671 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
672 672
673 673 fn = getattr(self._input._observer, 'bufferedreadline', None)
674 674 if fn:
675 675 fn(res)
676 676
677 677 return res
678 678
679 679
680 680 PROXIED_SOCKET_METHODS = {
681 681 'makefile',
682 682 'recv',
683 683 'recvfrom',
684 684 'recvfrom_into',
685 685 'recv_into',
686 686 'send',
687 687 'sendall',
688 688 'sendto',
689 689 'setblocking',
690 690 'settimeout',
691 691 'gettimeout',
692 692 'setsockopt',
693 693 }
694 694
695 695
696 696 class socketproxy:
697 697 """A proxy around a socket that tells a watcher when events occur.
698 698
699 699 This is like ``fileobjectproxy`` except for sockets.
700 700
701 701 This type is intended to only be used for testing purposes. Think hard
702 702 before using it in important code.
703 703 """
704 704
705 705 __slots__ = (
706 706 '_orig',
707 707 '_observer',
708 708 )
709 709
710 710 def __init__(self, sock, observer):
711 711 object.__setattr__(self, '_orig', sock)
712 712 object.__setattr__(self, '_observer', observer)
713 713
714 714 def __getattribute__(self, name):
715 715 if name in PROXIED_SOCKET_METHODS:
716 716 return object.__getattribute__(self, name)
717 717
718 718 return getattr(object.__getattribute__(self, '_orig'), name)
719 719
720 720 def __delattr__(self, name):
721 721 return delattr(object.__getattribute__(self, '_orig'), name)
722 722
723 723 def __setattr__(self, name, value):
724 724 return setattr(object.__getattribute__(self, '_orig'), name, value)
725 725
726 726 def __nonzero__(self):
727 727 return bool(object.__getattribute__(self, '_orig'))
728 728
729 729 __bool__ = __nonzero__
730 730
731 731 def _observedcall(self, name, *args, **kwargs):
732 732 # Call the original object.
733 733 orig = object.__getattribute__(self, '_orig')
734 734 res = getattr(orig, name)(*args, **kwargs)
735 735
736 736 # Call a method on the observer of the same name with arguments
737 737 # so it can react, log, etc.
738 738 observer = object.__getattribute__(self, '_observer')
739 739 fn = getattr(observer, name, None)
740 740 if fn:
741 741 fn(res, *args, **kwargs)
742 742
743 743 return res
744 744
745 745 def makefile(self, *args, **kwargs):
746 746 res = object.__getattribute__(self, '_observedcall')(
747 747 'makefile', *args, **kwargs
748 748 )
749 749
750 750 # The file object may be used for I/O. So we turn it into a
751 751 # proxy using our observer.
752 752 observer = object.__getattribute__(self, '_observer')
753 753 return makeloggingfileobject(
754 754 observer.fh,
755 755 res,
756 756 observer.name,
757 757 reads=observer.reads,
758 758 writes=observer.writes,
759 759 logdata=observer.logdata,
760 760 logdataapis=observer.logdataapis,
761 761 )
762 762
763 763 def recv(self, *args, **kwargs):
764 764 return object.__getattribute__(self, '_observedcall')(
765 765 'recv', *args, **kwargs
766 766 )
767 767
768 768 def recvfrom(self, *args, **kwargs):
769 769 return object.__getattribute__(self, '_observedcall')(
770 770 'recvfrom', *args, **kwargs
771 771 )
772 772
773 773 def recvfrom_into(self, *args, **kwargs):
774 774 return object.__getattribute__(self, '_observedcall')(
775 775 'recvfrom_into', *args, **kwargs
776 776 )
777 777
778 778 def recv_into(self, *args, **kwargs):
779 779 return object.__getattribute__(self, '_observedcall')(
780 780 'recv_info', *args, **kwargs
781 781 )
782 782
783 783 def send(self, *args, **kwargs):
784 784 return object.__getattribute__(self, '_observedcall')(
785 785 'send', *args, **kwargs
786 786 )
787 787
788 788 def sendall(self, *args, **kwargs):
789 789 return object.__getattribute__(self, '_observedcall')(
790 790 'sendall', *args, **kwargs
791 791 )
792 792
793 793 def sendto(self, *args, **kwargs):
794 794 return object.__getattribute__(self, '_observedcall')(
795 795 'sendto', *args, **kwargs
796 796 )
797 797
798 798 def setblocking(self, *args, **kwargs):
799 799 return object.__getattribute__(self, '_observedcall')(
800 800 'setblocking', *args, **kwargs
801 801 )
802 802
803 803 def settimeout(self, *args, **kwargs):
804 804 return object.__getattribute__(self, '_observedcall')(
805 805 'settimeout', *args, **kwargs
806 806 )
807 807
808 808 def gettimeout(self, *args, **kwargs):
809 809 return object.__getattribute__(self, '_observedcall')(
810 810 'gettimeout', *args, **kwargs
811 811 )
812 812
813 813 def setsockopt(self, *args, **kwargs):
814 814 return object.__getattribute__(self, '_observedcall')(
815 815 'setsockopt', *args, **kwargs
816 816 )
817 817
818 818
819 819 class baseproxyobserver:
820 820 def __init__(self, fh, name, logdata, logdataapis):
821 821 self.fh = fh
822 822 self.name = name
823 823 self.logdata = logdata
824 824 self.logdataapis = logdataapis
825 825
826 826 def _writedata(self, data):
827 827 if not self.logdata:
828 828 if self.logdataapis:
829 829 self.fh.write(b'\n')
830 830 self.fh.flush()
831 831 return
832 832
833 833 # Simple case writes all data on a single line.
834 834 if b'\n' not in data:
835 835 if self.logdataapis:
836 836 self.fh.write(b': %s\n' % stringutil.escapestr(data))
837 837 else:
838 838 self.fh.write(
839 839 b'%s> %s\n' % (self.name, stringutil.escapestr(data))
840 840 )
841 841 self.fh.flush()
842 842 return
843 843
844 844 # Data with newlines is written to multiple lines.
845 845 if self.logdataapis:
846 846 self.fh.write(b':\n')
847 847
848 848 lines = data.splitlines(True)
849 849 for line in lines:
850 850 self.fh.write(
851 851 b'%s> %s\n' % (self.name, stringutil.escapestr(line))
852 852 )
853 853 self.fh.flush()
854 854
855 855
856 856 class fileobjectobserver(baseproxyobserver):
857 857 """Logs file object activity."""
858 858
859 859 def __init__(
860 860 self, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
861 861 ):
862 862 super(fileobjectobserver, self).__init__(fh, name, logdata, logdataapis)
863 863 self.reads = reads
864 864 self.writes = writes
865 865
866 866 def read(self, res, size=-1):
867 867 if not self.reads:
868 868 return
869 869 # Python 3 can return None from reads at EOF instead of empty strings.
870 870 if res is None:
871 871 res = b''
872 872
873 873 if size == -1 and res == b'':
874 874 # Suppress pointless read(-1) calls that return
875 875 # nothing. These happen _a lot_ on Python 3, and there
876 876 # doesn't seem to be a better workaround to have matching
877 877 # Python 2 and 3 behavior. :(
878 878 return
879 879
880 880 if self.logdataapis:
881 881 self.fh.write(b'%s> read(%d) -> %d' % (self.name, size, len(res)))
882 882
883 883 self._writedata(res)
884 884
885 885 def readline(self, res, limit=-1):
886 886 if not self.reads:
887 887 return
888 888
889 889 if self.logdataapis:
890 890 self.fh.write(b'%s> readline() -> %d' % (self.name, len(res)))
891 891
892 892 self._writedata(res)
893 893
894 894 def readinto(self, res, dest):
895 895 if not self.reads:
896 896 return
897 897
898 898 if self.logdataapis:
899 899 self.fh.write(
900 900 b'%s> readinto(%d) -> %r' % (self.name, len(dest), res)
901 901 )
902 902
903 903 data = dest[0:res] if res is not None else b''
904 904
905 905 # _writedata() uses "in" operator and is confused by memoryview because
906 906 # characters are ints on Python 3.
907 907 if isinstance(data, memoryview):
908 908 data = data.tobytes()
909 909
910 910 self._writedata(data)
911 911
912 912 def write(self, res, data):
913 913 if not self.writes:
914 914 return
915 915
916 916 # Python 2 returns None from some write() calls. Python 3 (reasonably)
917 917 # returns the integer bytes written.
918 918 if res is None and data:
919 919 res = len(data)
920 920
921 921 if self.logdataapis:
922 922 self.fh.write(b'%s> write(%d) -> %r' % (self.name, len(data), res))
923 923
924 924 self._writedata(data)
925 925
926 926 def flush(self, res):
927 927 if not self.writes:
928 928 return
929 929
930 930 self.fh.write(b'%s> flush() -> %r\n' % (self.name, res))
931 931
932 932 # For observedbufferedinputpipe.
933 933 def bufferedread(self, res, size):
934 934 if not self.reads:
935 935 return
936 936
937 937 if self.logdataapis:
938 938 self.fh.write(
939 939 b'%s> bufferedread(%d) -> %d' % (self.name, size, len(res))
940 940 )
941 941
942 942 self._writedata(res)
943 943
944 944 def bufferedreadline(self, res):
945 945 if not self.reads:
946 946 return
947 947
948 948 if self.logdataapis:
949 949 self.fh.write(
950 950 b'%s> bufferedreadline() -> %d' % (self.name, len(res))
951 951 )
952 952
953 953 self._writedata(res)
954 954
955 955
956 956 def makeloggingfileobject(
957 957 logh, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
958 958 ):
959 959 """Turn a file object into a logging file object."""
960 960
961 961 observer = fileobjectobserver(
962 962 logh,
963 963 name,
964 964 reads=reads,
965 965 writes=writes,
966 966 logdata=logdata,
967 967 logdataapis=logdataapis,
968 968 )
969 969 return fileobjectproxy(fh, observer)
970 970
971 971
972 972 class socketobserver(baseproxyobserver):
973 973 """Logs socket activity."""
974 974
975 975 def __init__(
976 976 self,
977 977 fh,
978 978 name,
979 979 reads=True,
980 980 writes=True,
981 981 states=True,
982 982 logdata=False,
983 983 logdataapis=True,
984 984 ):
985 985 super(socketobserver, self).__init__(fh, name, logdata, logdataapis)
986 986 self.reads = reads
987 987 self.writes = writes
988 988 self.states = states
989 989
990 990 def makefile(self, res, mode=None, bufsize=None):
991 991 if not self.states:
992 992 return
993 993
994 994 self.fh.write(b'%s> makefile(%r, %r)\n' % (self.name, mode, bufsize))
995 995
996 996 def recv(self, res, size, flags=0):
997 997 if not self.reads:
998 998 return
999 999
1000 1000 if self.logdataapis:
1001 1001 self.fh.write(
1002 1002 b'%s> recv(%d, %d) -> %d' % (self.name, size, flags, len(res))
1003 1003 )
1004 1004 self._writedata(res)
1005 1005
1006 1006 def recvfrom(self, res, size, flags=0):
1007 1007 if not self.reads:
1008 1008 return
1009 1009
1010 1010 if self.logdataapis:
1011 1011 self.fh.write(
1012 1012 b'%s> recvfrom(%d, %d) -> %d'
1013 1013 % (self.name, size, flags, len(res[0]))
1014 1014 )
1015 1015
1016 1016 self._writedata(res[0])
1017 1017
1018 1018 def recvfrom_into(self, res, buf, size, flags=0):
1019 1019 if not self.reads:
1020 1020 return
1021 1021
1022 1022 if self.logdataapis:
1023 1023 self.fh.write(
1024 1024 b'%s> recvfrom_into(%d, %d) -> %d'
1025 1025 % (self.name, size, flags, res[0])
1026 1026 )
1027 1027
1028 1028 self._writedata(buf[0 : res[0]])
1029 1029
1030 1030 def recv_into(self, res, buf, size=0, flags=0):
1031 1031 if not self.reads:
1032 1032 return
1033 1033
1034 1034 if self.logdataapis:
1035 1035 self.fh.write(
1036 1036 b'%s> recv_into(%d, %d) -> %d' % (self.name, size, flags, res)
1037 1037 )
1038 1038
1039 1039 self._writedata(buf[0:res])
1040 1040
1041 1041 def send(self, res, data, flags=0):
1042 1042 if not self.writes:
1043 1043 return
1044 1044
1045 1045 self.fh.write(
1046 1046 b'%s> send(%d, %d) -> %d' % (self.name, len(data), flags, len(res))
1047 1047 )
1048 1048 self._writedata(data)
1049 1049
1050 1050 def sendall(self, res, data, flags=0):
1051 1051 if not self.writes:
1052 1052 return
1053 1053
1054 1054 if self.logdataapis:
1055 1055 # Returns None on success. So don't bother reporting return value.
1056 1056 self.fh.write(
1057 1057 b'%s> sendall(%d, %d)' % (self.name, len(data), flags)
1058 1058 )
1059 1059
1060 1060 self._writedata(data)
1061 1061
1062 1062 def sendto(self, res, data, flagsoraddress, address=None):
1063 1063 if not self.writes:
1064 1064 return
1065 1065
1066 1066 if address:
1067 1067 flags = flagsoraddress
1068 1068 else:
1069 1069 flags = 0
1070 1070
1071 1071 if self.logdataapis:
1072 1072 self.fh.write(
1073 1073 b'%s> sendto(%d, %d, %r) -> %d'
1074 1074 % (self.name, len(data), flags, address, res)
1075 1075 )
1076 1076
1077 1077 self._writedata(data)
1078 1078
1079 1079 def setblocking(self, res, flag):
1080 1080 if not self.states:
1081 1081 return
1082 1082
1083 1083 self.fh.write(b'%s> setblocking(%r)\n' % (self.name, flag))
1084 1084
1085 1085 def settimeout(self, res, value):
1086 1086 if not self.states:
1087 1087 return
1088 1088
1089 1089 self.fh.write(b'%s> settimeout(%r)\n' % (self.name, value))
1090 1090
1091 1091 def gettimeout(self, res):
1092 1092 if not self.states:
1093 1093 return
1094 1094
1095 1095 self.fh.write(b'%s> gettimeout() -> %f\n' % (self.name, res))
1096 1096
1097 1097 def setsockopt(self, res, level, optname, value):
1098 1098 if not self.states:
1099 1099 return
1100 1100
1101 1101 self.fh.write(
1102 1102 b'%s> setsockopt(%r, %r, %r) -> %r\n'
1103 1103 % (self.name, level, optname, value, res)
1104 1104 )
1105 1105
1106 1106
1107 1107 def makeloggingsocket(
1108 1108 logh,
1109 1109 fh,
1110 1110 name,
1111 1111 reads=True,
1112 1112 writes=True,
1113 1113 states=True,
1114 1114 logdata=False,
1115 1115 logdataapis=True,
1116 1116 ):
1117 1117 """Turn a socket into a logging socket."""
1118 1118
1119 1119 observer = socketobserver(
1120 1120 logh,
1121 1121 name,
1122 1122 reads=reads,
1123 1123 writes=writes,
1124 1124 states=states,
1125 1125 logdata=logdata,
1126 1126 logdataapis=logdataapis,
1127 1127 )
1128 1128 return socketproxy(fh, observer)
1129 1129
1130 1130
1131 1131 def version():
1132 1132 """Return version information if available."""
1133 1133 try:
1134 1134 from . import __version__
1135 1135
1136 1136 return __version__.version
1137 1137 except ImportError:
1138 1138 return b'unknown'
1139 1139
1140 1140
1141 1141 def versiontuple(v=None, n=4):
1142 1142 """Parses a Mercurial version string into an N-tuple.
1143 1143
1144 1144 The version string to be parsed is specified with the ``v`` argument.
1145 1145 If it isn't defined, the current Mercurial version string will be parsed.
1146 1146
1147 1147 ``n`` can be 2, 3, or 4. Here is how some version strings map to
1148 1148 returned values:
1149 1149
1150 1150 >>> v = b'3.6.1+190-df9b73d2d444'
1151 1151 >>> versiontuple(v, 2)
1152 1152 (3, 6)
1153 1153 >>> versiontuple(v, 3)
1154 1154 (3, 6, 1)
1155 1155 >>> versiontuple(v, 4)
1156 1156 (3, 6, 1, '190-df9b73d2d444')
1157 1157
1158 1158 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1159 1159 (3, 6, 1, '190-df9b73d2d444+20151118')
1160 1160
1161 1161 >>> v = b'3.6'
1162 1162 >>> versiontuple(v, 2)
1163 1163 (3, 6)
1164 1164 >>> versiontuple(v, 3)
1165 1165 (3, 6, None)
1166 1166 >>> versiontuple(v, 4)
1167 1167 (3, 6, None, None)
1168 1168
1169 1169 >>> v = b'3.9-rc'
1170 1170 >>> versiontuple(v, 2)
1171 1171 (3, 9)
1172 1172 >>> versiontuple(v, 3)
1173 1173 (3, 9, None)
1174 1174 >>> versiontuple(v, 4)
1175 1175 (3, 9, None, 'rc')
1176 1176
1177 1177 >>> v = b'3.9-rc+2-02a8fea4289b'
1178 1178 >>> versiontuple(v, 2)
1179 1179 (3, 9)
1180 1180 >>> versiontuple(v, 3)
1181 1181 (3, 9, None)
1182 1182 >>> versiontuple(v, 4)
1183 1183 (3, 9, None, 'rc+2-02a8fea4289b')
1184 1184
1185 1185 >>> versiontuple(b'4.6rc0')
1186 1186 (4, 6, None, 'rc0')
1187 1187 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1188 1188 (4, 6, None, 'rc0+12-425d55e54f98')
1189 1189 >>> versiontuple(b'.1.2.3')
1190 1190 (None, None, None, '.1.2.3')
1191 1191 >>> versiontuple(b'12.34..5')
1192 1192 (12, 34, None, '..5')
1193 1193 >>> versiontuple(b'1.2.3.4.5.6')
1194 1194 (1, 2, 3, '.4.5.6')
1195 1195 """
1196 1196 if not v:
1197 1197 v = version()
1198 1198 m = remod.match(br'(\d+(?:\.\d+){,2})[+-]?(.*)', v)
1199 1199 if not m:
1200 1200 vparts, extra = b'', v
1201 1201 elif m.group(2):
1202 1202 vparts, extra = m.groups()
1203 1203 else:
1204 1204 vparts, extra = m.group(1), None
1205 1205
1206 1206 assert vparts is not None # help pytype
1207 1207
1208 1208 vints = []
1209 1209 for i in vparts.split(b'.'):
1210 1210 try:
1211 1211 vints.append(int(i))
1212 1212 except ValueError:
1213 1213 break
1214 1214 # (3, 6) -> (3, 6, None)
1215 1215 while len(vints) < 3:
1216 1216 vints.append(None)
1217 1217
1218 1218 if n == 2:
1219 1219 return (vints[0], vints[1])
1220 1220 if n == 3:
1221 1221 return (vints[0], vints[1], vints[2])
1222 1222 if n == 4:
1223 1223 return (vints[0], vints[1], vints[2], extra)
1224 1224
1225 1225 raise error.ProgrammingError(b"invalid version part request: %d" % n)
1226 1226
1227 1227
1228 1228 def cachefunc(func):
1229 1229 '''cache the result of function calls'''
1230 1230 # XXX doesn't handle keywords args
1231 1231 if func.__code__.co_argcount == 0:
1232 1232 listcache = []
1233 1233
1234 1234 def f():
1235 1235 if len(listcache) == 0:
1236 1236 listcache.append(func())
1237 1237 return listcache[0]
1238 1238
1239 1239 return f
1240 1240 cache = {}
1241 1241 if func.__code__.co_argcount == 1:
1242 1242 # we gain a small amount of time because
1243 1243 # we don't need to pack/unpack the list
1244 1244 def f(arg):
1245 1245 if arg not in cache:
1246 1246 cache[arg] = func(arg)
1247 1247 return cache[arg]
1248 1248
1249 1249 else:
1250 1250
1251 1251 def f(*args):
1252 1252 if args not in cache:
1253 1253 cache[args] = func(*args)
1254 1254 return cache[args]
1255 1255
1256 1256 return f
1257 1257
1258 1258
1259 1259 class cow:
1260 1260 """helper class to make copy-on-write easier
1261 1261
1262 1262 Call preparewrite before doing any writes.
1263 1263 """
1264 1264
1265 1265 def preparewrite(self):
1266 1266 """call this before writes, return self or a copied new object"""
1267 1267 if getattr(self, '_copied', 0):
1268 1268 self._copied -= 1
1269 1269 # Function cow.__init__ expects 1 arg(s), got 2 [wrong-arg-count]
1270 1270 return self.__class__(self) # pytype: disable=wrong-arg-count
1271 1271 return self
1272 1272
1273 1273 def copy(self):
1274 1274 """always do a cheap copy"""
1275 1275 self._copied = getattr(self, '_copied', 0) + 1
1276 1276 return self
1277 1277
1278 1278
1279 1279 class sortdict(collections.OrderedDict):
1280 1280 """a simple sorted dictionary
1281 1281
1282 1282 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1283 1283 >>> d2 = d1.copy()
1284 1284 >>> d2
1285 1285 sortdict([('a', 0), ('b', 1)])
1286 1286 >>> d2.update([(b'a', 2)])
1287 1287 >>> list(d2.keys()) # should still be in last-set order
1288 1288 ['b', 'a']
1289 1289 >>> d1.insert(1, b'a.5', 0.5)
1290 1290 >>> d1
1291 1291 sortdict([('a', 0), ('a.5', 0.5), ('b', 1)])
1292 1292 """
1293 1293
1294 1294 def __setitem__(self, key, value):
1295 1295 if key in self:
1296 1296 del self[key]
1297 1297 super(sortdict, self).__setitem__(key, value)
1298 1298
1299 1299 if pycompat.ispypy:
1300 1300 # __setitem__() isn't called as of PyPy 5.8.0
1301 1301 def update(self, src, **f):
1302 1302 if isinstance(src, dict):
1303 1303 src = src.items()
1304 1304 for k, v in src:
1305 1305 self[k] = v
1306 1306 for k in f:
1307 1307 self[k] = f[k]
1308 1308
1309 1309 def insert(self, position, key, value):
1310 1310 for (i, (k, v)) in enumerate(list(self.items())):
1311 1311 if i == position:
1312 1312 self[key] = value
1313 1313 if i >= position:
1314 1314 del self[k]
1315 1315 self[k] = v
1316 1316
1317 1317
1318 1318 class cowdict(cow, dict):
1319 1319 """copy-on-write dict
1320 1320
1321 1321 Be sure to call d = d.preparewrite() before writing to d.
1322 1322
1323 1323 >>> a = cowdict()
1324 1324 >>> a is a.preparewrite()
1325 1325 True
1326 1326 >>> b = a.copy()
1327 1327 >>> b is a
1328 1328 True
1329 1329 >>> c = b.copy()
1330 1330 >>> c is a
1331 1331 True
1332 1332 >>> a = a.preparewrite()
1333 1333 >>> b is a
1334 1334 False
1335 1335 >>> a is a.preparewrite()
1336 1336 True
1337 1337 >>> c = c.preparewrite()
1338 1338 >>> b is c
1339 1339 False
1340 1340 >>> b is b.preparewrite()
1341 1341 True
1342 1342 """
1343 1343
1344 1344
1345 1345 class cowsortdict(cow, sortdict):
1346 1346 """copy-on-write sortdict
1347 1347
1348 1348 Be sure to call d = d.preparewrite() before writing to d.
1349 1349 """
1350 1350
1351 1351
1352 1352 class transactional: # pytype: disable=ignored-metaclass
1353 1353 """Base class for making a transactional type into a context manager."""
1354 1354
1355 1355 __metaclass__ = abc.ABCMeta
1356 1356
1357 1357 @abc.abstractmethod
1358 1358 def close(self):
1359 1359 """Successfully closes the transaction."""
1360 1360
1361 1361 @abc.abstractmethod
1362 1362 def release(self):
1363 1363 """Marks the end of the transaction.
1364 1364
1365 1365 If the transaction has not been closed, it will be aborted.
1366 1366 """
1367 1367
1368 1368 def __enter__(self):
1369 1369 return self
1370 1370
1371 1371 def __exit__(self, exc_type, exc_val, exc_tb):
1372 1372 try:
1373 1373 if exc_type is None:
1374 1374 self.close()
1375 1375 finally:
1376 1376 self.release()
1377 1377
1378 1378
1379 1379 @contextlib.contextmanager
1380 1380 def acceptintervention(tr=None):
1381 1381 """A context manager that closes the transaction on InterventionRequired
1382 1382
1383 1383 If no transaction was provided, this simply runs the body and returns
1384 1384 """
1385 1385 if not tr:
1386 1386 yield
1387 1387 return
1388 1388 try:
1389 1389 yield
1390 1390 tr.close()
1391 1391 except error.InterventionRequired:
1392 1392 tr.close()
1393 1393 raise
1394 1394 finally:
1395 1395 tr.release()
1396 1396
1397 1397
1398 1398 @contextlib.contextmanager
1399 1399 def nullcontextmanager(enter_result=None):
1400 1400 yield enter_result
1401 1401
1402 1402
1403 1403 class _lrucachenode:
1404 1404 """A node in a doubly linked list.
1405 1405
1406 1406 Holds a reference to nodes on either side as well as a key-value
1407 1407 pair for the dictionary entry.
1408 1408 """
1409 1409
1410 1410 __slots__ = ('next', 'prev', 'key', 'value', 'cost')
1411 1411
1412 1412 def __init__(self):
1413 1413 self.next = self
1414 1414 self.prev = self
1415 1415
1416 1416 self.key = _notset
1417 1417 self.value = None
1418 1418 self.cost = 0
1419 1419
1420 1420 def markempty(self):
1421 1421 """Mark the node as emptied."""
1422 1422 self.key = _notset
1423 1423 self.value = None
1424 1424 self.cost = 0
1425 1425
1426 1426
1427 1427 class lrucachedict:
1428 1428 """Dict that caches most recent accesses and sets.
1429 1429
1430 1430 The dict consists of an actual backing dict - indexed by original
1431 1431 key - and a doubly linked circular list defining the order of entries in
1432 1432 the cache.
1433 1433
1434 1434 The head node is the newest entry in the cache. If the cache is full,
1435 1435 we recycle head.prev and make it the new head. Cache accesses result in
1436 1436 the node being moved to before the existing head and being marked as the
1437 1437 new head node.
1438 1438
1439 1439 Items in the cache can be inserted with an optional "cost" value. This is
1440 1440 simply an integer that is specified by the caller. The cache can be queried
1441 1441 for the total cost of all items presently in the cache.
1442 1442
1443 1443 The cache can also define a maximum cost. If a cache insertion would
1444 1444 cause the total cost of the cache to go beyond the maximum cost limit,
1445 1445 nodes will be evicted to make room for the new code. This can be used
1446 1446 to e.g. set a max memory limit and associate an estimated bytes size
1447 1447 cost to each item in the cache. By default, no maximum cost is enforced.
1448 1448 """
1449 1449
1450 1450 def __init__(self, max, maxcost=0):
1451 1451 self._cache = {}
1452 1452
1453 1453 self._head = _lrucachenode()
1454 1454 self._size = 1
1455 1455 self.capacity = max
1456 1456 self.totalcost = 0
1457 1457 self.maxcost = maxcost
1458 1458
1459 1459 def __len__(self):
1460 1460 return len(self._cache)
1461 1461
1462 1462 def __contains__(self, k):
1463 1463 return k in self._cache
1464 1464
1465 1465 def __iter__(self):
1466 1466 # We don't have to iterate in cache order, but why not.
1467 1467 n = self._head
1468 1468 for i in range(len(self._cache)):
1469 1469 yield n.key
1470 1470 n = n.next
1471 1471
1472 1472 def __getitem__(self, k):
1473 1473 node = self._cache[k]
1474 1474 self._movetohead(node)
1475 1475 return node.value
1476 1476
1477 1477 def insert(self, k, v, cost=0):
1478 1478 """Insert a new item in the cache with optional cost value."""
1479 1479 node = self._cache.get(k)
1480 1480 # Replace existing value and mark as newest.
1481 1481 if node is not None:
1482 1482 self.totalcost -= node.cost
1483 1483 node.value = v
1484 1484 node.cost = cost
1485 1485 self.totalcost += cost
1486 1486 self._movetohead(node)
1487 1487
1488 1488 if self.maxcost:
1489 1489 self._enforcecostlimit()
1490 1490
1491 1491 return
1492 1492
1493 1493 if self._size < self.capacity:
1494 1494 node = self._addcapacity()
1495 1495 else:
1496 1496 # Grab the last/oldest item.
1497 1497 node = self._head.prev
1498 1498
1499 1499 # At capacity. Kill the old entry.
1500 1500 if node.key is not _notset:
1501 1501 self.totalcost -= node.cost
1502 1502 del self._cache[node.key]
1503 1503
1504 1504 node.key = k
1505 1505 node.value = v
1506 1506 node.cost = cost
1507 1507 self.totalcost += cost
1508 1508 self._cache[k] = node
1509 1509 # And mark it as newest entry. No need to adjust order since it
1510 1510 # is already self._head.prev.
1511 1511 self._head = node
1512 1512
1513 1513 if self.maxcost:
1514 1514 self._enforcecostlimit()
1515 1515
1516 1516 def __setitem__(self, k, v):
1517 1517 self.insert(k, v)
1518 1518
1519 1519 def __delitem__(self, k):
1520 1520 self.pop(k)
1521 1521
1522 1522 def pop(self, k, default=_notset):
1523 1523 try:
1524 1524 node = self._cache.pop(k)
1525 1525 except KeyError:
1526 1526 if default is _notset:
1527 1527 raise
1528 1528 return default
1529 1529
1530 1530 assert node is not None # help pytype
1531 1531 value = node.value
1532 1532 self.totalcost -= node.cost
1533 1533 node.markempty()
1534 1534
1535 1535 # Temporarily mark as newest item before re-adjusting head to make
1536 1536 # this node the oldest item.
1537 1537 self._movetohead(node)
1538 1538 self._head = node.next
1539 1539
1540 1540 return value
1541 1541
1542 1542 # Additional dict methods.
1543 1543
1544 1544 def get(self, k, default=None):
1545 1545 try:
1546 1546 return self.__getitem__(k)
1547 1547 except KeyError:
1548 1548 return default
1549 1549
1550 1550 def peek(self, k, default=_notset):
1551 1551 """Get the specified item without moving it to the head
1552 1552
1553 1553 Unlike get(), this doesn't mutate the internal state. But be aware
1554 1554 that it doesn't mean peek() is thread safe.
1555 1555 """
1556 1556 try:
1557 1557 node = self._cache[k]
1558 1558 assert node is not None # help pytype
1559 1559 return node.value
1560 1560 except KeyError:
1561 1561 if default is _notset:
1562 1562 raise
1563 1563 return default
1564 1564
1565 1565 def clear(self):
1566 1566 n = self._head
1567 1567 while n.key is not _notset:
1568 1568 self.totalcost -= n.cost
1569 1569 n.markempty()
1570 1570 n = n.next
1571 1571
1572 1572 self._cache.clear()
1573 1573
1574 1574 def copy(self, capacity=None, maxcost=0):
1575 1575 """Create a new cache as a copy of the current one.
1576 1576
1577 1577 By default, the new cache has the same capacity as the existing one.
1578 1578 But, the cache capacity can be changed as part of performing the
1579 1579 copy.
1580 1580
1581 1581 Items in the copy have an insertion/access order matching this
1582 1582 instance.
1583 1583 """
1584 1584
1585 1585 capacity = capacity or self.capacity
1586 1586 maxcost = maxcost or self.maxcost
1587 1587 result = lrucachedict(capacity, maxcost=maxcost)
1588 1588
1589 1589 # We copy entries by iterating in oldest-to-newest order so the copy
1590 1590 # has the correct ordering.
1591 1591
1592 1592 # Find the first non-empty entry.
1593 1593 n = self._head.prev
1594 1594 while n.key is _notset and n is not self._head:
1595 1595 n = n.prev
1596 1596
1597 1597 # We could potentially skip the first N items when decreasing capacity.
1598 1598 # But let's keep it simple unless it is a performance problem.
1599 1599 for i in range(len(self._cache)):
1600 1600 result.insert(n.key, n.value, cost=n.cost)
1601 1601 n = n.prev
1602 1602
1603 1603 return result
1604 1604
1605 1605 def popoldest(self):
1606 1606 """Remove the oldest item from the cache.
1607 1607
1608 1608 Returns the (key, value) describing the removed cache entry.
1609 1609 """
1610 1610 if not self._cache:
1611 1611 return
1612 1612
1613 1613 # Walk the linked list backwards starting at tail node until we hit
1614 1614 # a non-empty node.
1615 1615 n = self._head.prev
1616 1616
1617 1617 assert n is not None # help pytype
1618 1618
1619 1619 while n.key is _notset:
1620 1620 n = n.prev
1621 1621
1622 1622 assert n is not None # help pytype
1623 1623
1624 1624 key, value = n.key, n.value
1625 1625
1626 1626 # And remove it from the cache and mark it as empty.
1627 1627 del self._cache[n.key]
1628 1628 self.totalcost -= n.cost
1629 1629 n.markempty()
1630 1630
1631 1631 return key, value
1632 1632
1633 1633 def _movetohead(self, node):
1634 1634 """Mark a node as the newest, making it the new head.
1635 1635
1636 1636 When a node is accessed, it becomes the freshest entry in the LRU
1637 1637 list, which is denoted by self._head.
1638 1638
1639 1639 Visually, let's make ``N`` the new head node (* denotes head):
1640 1640
1641 1641 previous/oldest <-> head <-> next/next newest
1642 1642
1643 1643 ----<->--- A* ---<->-----
1644 1644 | |
1645 1645 E <-> D <-> N <-> C <-> B
1646 1646
1647 1647 To:
1648 1648
1649 1649 ----<->--- N* ---<->-----
1650 1650 | |
1651 1651 E <-> D <-> C <-> B <-> A
1652 1652
1653 1653 This requires the following moves:
1654 1654
1655 1655 C.next = D (node.prev.next = node.next)
1656 1656 D.prev = C (node.next.prev = node.prev)
1657 1657 E.next = N (head.prev.next = node)
1658 1658 N.prev = E (node.prev = head.prev)
1659 1659 N.next = A (node.next = head)
1660 1660 A.prev = N (head.prev = node)
1661 1661 """
1662 1662 head = self._head
1663 1663 # C.next = D
1664 1664 node.prev.next = node.next
1665 1665 # D.prev = C
1666 1666 node.next.prev = node.prev
1667 1667 # N.prev = E
1668 1668 node.prev = head.prev
1669 1669 # N.next = A
1670 1670 # It is tempting to do just "head" here, however if node is
1671 1671 # adjacent to head, this will do bad things.
1672 1672 node.next = head.prev.next
1673 1673 # E.next = N
1674 1674 node.next.prev = node
1675 1675 # A.prev = N
1676 1676 node.prev.next = node
1677 1677
1678 1678 self._head = node
1679 1679
1680 1680 def _addcapacity(self):
1681 1681 """Add a node to the circular linked list.
1682 1682
1683 1683 The new node is inserted before the head node.
1684 1684 """
1685 1685 head = self._head
1686 1686 node = _lrucachenode()
1687 1687 head.prev.next = node
1688 1688 node.prev = head.prev
1689 1689 node.next = head
1690 1690 head.prev = node
1691 1691 self._size += 1
1692 1692 return node
1693 1693
1694 1694 def _enforcecostlimit(self):
1695 1695 # This should run after an insertion. It should only be called if total
1696 1696 # cost limits are being enforced.
1697 1697 # The most recently inserted node is never evicted.
1698 1698 if len(self) <= 1 or self.totalcost <= self.maxcost:
1699 1699 return
1700 1700
1701 1701 # This is logically equivalent to calling popoldest() until we
1702 1702 # free up enough cost. We don't do that since popoldest() needs
1703 1703 # to walk the linked list and doing this in a loop would be
1704 1704 # quadratic. So we find the first non-empty node and then
1705 1705 # walk nodes until we free up enough capacity.
1706 1706 #
1707 1707 # If we only removed the minimum number of nodes to free enough
1708 1708 # cost at insert time, chances are high that the next insert would
1709 1709 # also require pruning. This would effectively constitute quadratic
1710 1710 # behavior for insert-heavy workloads. To mitigate this, we set a
1711 1711 # target cost that is a percentage of the max cost. This will tend
1712 1712 # to free more nodes when the high water mark is reached, which
1713 1713 # lowers the chances of needing to prune on the subsequent insert.
1714 1714 targetcost = int(self.maxcost * 0.75)
1715 1715
1716 1716 n = self._head.prev
1717 1717 while n.key is _notset:
1718 1718 n = n.prev
1719 1719
1720 1720 while len(self) > 1 and self.totalcost > targetcost:
1721 1721 del self._cache[n.key]
1722 1722 self.totalcost -= n.cost
1723 1723 n.markempty()
1724 1724 n = n.prev
1725 1725
1726 1726
1727 1727 def lrucachefunc(func):
1728 1728 '''cache most recent results of function calls'''
1729 1729 cache = {}
1730 1730 order = collections.deque()
1731 1731 if func.__code__.co_argcount == 1:
1732 1732
1733 1733 def f(arg):
1734 1734 if arg not in cache:
1735 1735 if len(cache) > 20:
1736 1736 del cache[order.popleft()]
1737 1737 cache[arg] = func(arg)
1738 1738 else:
1739 1739 order.remove(arg)
1740 1740 order.append(arg)
1741 1741 return cache[arg]
1742 1742
1743 1743 else:
1744 1744
1745 1745 def f(*args):
1746 1746 if args not in cache:
1747 1747 if len(cache) > 20:
1748 1748 del cache[order.popleft()]
1749 1749 cache[args] = func(*args)
1750 1750 else:
1751 1751 order.remove(args)
1752 1752 order.append(args)
1753 1753 return cache[args]
1754 1754
1755 1755 return f
1756 1756
1757 1757
1758 1758 class propertycache:
1759 1759 def __init__(self, func):
1760 1760 self.func = func
1761 1761 self.name = func.__name__
1762 1762
1763 1763 def __get__(self, obj, type=None):
1764 1764 result = self.func(obj)
1765 1765 self.cachevalue(obj, result)
1766 1766 return result
1767 1767
1768 1768 def cachevalue(self, obj, value):
1769 1769 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1770 1770 obj.__dict__[self.name] = value
1771 1771
1772 1772
1773 1773 def clearcachedproperty(obj, prop):
1774 1774 '''clear a cached property value, if one has been set'''
1775 1775 prop = pycompat.sysstr(prop)
1776 1776 if prop in obj.__dict__:
1777 1777 del obj.__dict__[prop]
1778 1778
1779 1779
1780 1780 def increasingchunks(source, min=1024, max=65536):
1781 1781 """return no less than min bytes per chunk while data remains,
1782 1782 doubling min after each chunk until it reaches max"""
1783 1783
1784 1784 def log2(x):
1785 1785 if not x:
1786 1786 return 0
1787 1787 i = 0
1788 1788 while x:
1789 1789 x >>= 1
1790 1790 i += 1
1791 1791 return i - 1
1792 1792
1793 1793 buf = []
1794 1794 blen = 0
1795 1795 for chunk in source:
1796 1796 buf.append(chunk)
1797 1797 blen += len(chunk)
1798 1798 if blen >= min:
1799 1799 if min < max:
1800 1800 min = min << 1
1801 1801 nmin = 1 << log2(blen)
1802 1802 if nmin > min:
1803 1803 min = nmin
1804 1804 if min > max:
1805 1805 min = max
1806 1806 yield b''.join(buf)
1807 1807 blen = 0
1808 1808 buf = []
1809 1809 if buf:
1810 1810 yield b''.join(buf)
1811 1811
1812 1812
1813 1813 def always(fn):
1814 1814 return True
1815 1815
1816 1816
1817 1817 def never(fn):
1818 1818 return False
1819 1819
1820 1820
1821 1821 def nogc(func):
1822 1822 """disable garbage collector
1823 1823
1824 1824 Python's garbage collector triggers a GC each time a certain number of
1825 1825 container objects (the number being defined by gc.get_threshold()) are
1826 1826 allocated even when marked not to be tracked by the collector. Tracking has
1827 1827 no effect on when GCs are triggered, only on what objects the GC looks
1828 1828 into. As a workaround, disable GC while building complex (huge)
1829 1829 containers.
1830 1830
1831 1831 This garbage collector issue have been fixed in 2.7. But it still affect
1832 1832 CPython's performance.
1833 1833 """
1834 1834
1835 1835 def wrapper(*args, **kwargs):
1836 1836 gcenabled = gc.isenabled()
1837 1837 gc.disable()
1838 1838 try:
1839 1839 return func(*args, **kwargs)
1840 1840 finally:
1841 1841 if gcenabled:
1842 1842 gc.enable()
1843 1843
1844 1844 return wrapper
1845 1845
1846 1846
1847 1847 if pycompat.ispypy:
1848 1848 # PyPy runs slower with gc disabled
1849 1849 nogc = lambda x: x
1850 1850
1851 1851
1852 1852 def pathto(root, n1, n2):
1853 1853 # type: (bytes, bytes, bytes) -> bytes
1854 1854 """return the relative path from one place to another.
1855 1855 root should use os.sep to separate directories
1856 1856 n1 should use os.sep to separate directories
1857 1857 n2 should use "/" to separate directories
1858 1858 returns an os.sep-separated path.
1859 1859
1860 1860 If n1 is a relative path, it's assumed it's
1861 1861 relative to root.
1862 1862 n2 should always be relative to root.
1863 1863 """
1864 1864 if not n1:
1865 1865 return localpath(n2)
1866 1866 if os.path.isabs(n1):
1867 1867 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1868 1868 return os.path.join(root, localpath(n2))
1869 1869 n2 = b'/'.join((pconvert(root), n2))
1870 1870 a, b = splitpath(n1), n2.split(b'/')
1871 1871 a.reverse()
1872 1872 b.reverse()
1873 1873 while a and b and a[-1] == b[-1]:
1874 1874 a.pop()
1875 1875 b.pop()
1876 1876 b.reverse()
1877 1877 return pycompat.ossep.join(([b'..'] * len(a)) + b) or b'.'
1878 1878
1879 1879
1880 1880 def checksignature(func, depth=1):
1881 1881 '''wrap a function with code to check for calling errors'''
1882 1882
1883 1883 def check(*args, **kwargs):
1884 1884 try:
1885 1885 return func(*args, **kwargs)
1886 1886 except TypeError:
1887 1887 if len(traceback.extract_tb(sys.exc_info()[2])) == depth:
1888 1888 raise error.SignatureError
1889 1889 raise
1890 1890
1891 1891 return check
1892 1892
1893 1893
1894 1894 # a whilelist of known filesystems where hardlink works reliably
1895 1895 _hardlinkfswhitelist = {
1896 1896 b'apfs',
1897 1897 b'btrfs',
1898 1898 b'ext2',
1899 1899 b'ext3',
1900 1900 b'ext4',
1901 1901 b'hfs',
1902 1902 b'jfs',
1903 1903 b'NTFS',
1904 1904 b'reiserfs',
1905 1905 b'tmpfs',
1906 1906 b'ufs',
1907 1907 b'xfs',
1908 1908 b'zfs',
1909 1909 }
1910 1910
1911 1911
1912 1912 def copyfile(
1913 1913 src,
1914 1914 dest,
1915 1915 hardlink=False,
1916 1916 copystat=False,
1917 1917 checkambig=False,
1918 1918 nb_bytes=None,
1919 1919 no_hardlink_cb=None,
1920 1920 check_fs_hardlink=True,
1921 1921 ):
1922 1922 """copy a file, preserving mode and optionally other stat info like
1923 1923 atime/mtime
1924 1924
1925 1925 checkambig argument is used with filestat, and is useful only if
1926 1926 destination file is guarded by any lock (e.g. repo.lock or
1927 1927 repo.wlock).
1928 1928
1929 1929 copystat and checkambig should be exclusive.
1930 1930
1931 1931 nb_bytes: if set only copy the first `nb_bytes` of the source file.
1932 1932 """
1933 1933 assert not (copystat and checkambig)
1934 1934 oldstat = None
1935 1935 if os.path.lexists(dest):
1936 1936 if checkambig:
1937 1937 oldstat = checkambig and filestat.frompath(dest)
1938 1938 unlink(dest)
1939 1939 if hardlink and check_fs_hardlink:
1940 1940 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1941 1941 # unless we are confident that dest is on a whitelisted filesystem.
1942 1942 try:
1943 1943 fstype = getfstype(os.path.dirname(dest))
1944 1944 except OSError:
1945 1945 fstype = None
1946 1946 if fstype not in _hardlinkfswhitelist:
1947 1947 if no_hardlink_cb is not None:
1948 1948 no_hardlink_cb()
1949 1949 hardlink = False
1950 1950 if hardlink:
1951 1951 try:
1952 1952 oslink(src, dest)
1953 1953 if nb_bytes is not None:
1954 1954 m = "the `nb_bytes` argument is incompatible with `hardlink`"
1955 1955 raise error.ProgrammingError(m)
1956 1956 return
1957 1957 except (IOError, OSError) as exc:
1958 1958 if exc.errno != errno.EEXIST and no_hardlink_cb is not None:
1959 1959 no_hardlink_cb()
1960 1960 # fall back to normal copy
1961 1961 if os.path.islink(src):
1962 1962 os.symlink(os.readlink(src), dest)
1963 1963 # copytime is ignored for symlinks, but in general copytime isn't needed
1964 1964 # for them anyway
1965 1965 if nb_bytes is not None:
1966 1966 m = "cannot use `nb_bytes` on a symlink"
1967 1967 raise error.ProgrammingError(m)
1968 1968 else:
1969 1969 try:
1970 1970 shutil.copyfile(src, dest)
1971 1971 if copystat:
1972 1972 # copystat also copies mode
1973 1973 shutil.copystat(src, dest)
1974 1974 else:
1975 1975 shutil.copymode(src, dest)
1976 1976 if oldstat and oldstat.stat:
1977 1977 newstat = filestat.frompath(dest)
1978 1978 if newstat.isambig(oldstat):
1979 1979 # stat of copied file is ambiguous to original one
1980 1980 advanced = (
1981 1981 oldstat.stat[stat.ST_MTIME] + 1
1982 1982 ) & 0x7FFFFFFF
1983 1983 os.utime(dest, (advanced, advanced))
1984 1984 # We could do something smarter using `copy_file_range` call or similar
1985 1985 if nb_bytes is not None:
1986 1986 with open(dest, mode='r+') as f:
1987 1987 f.truncate(nb_bytes)
1988 1988 except shutil.Error as inst:
1989 1989 raise error.Abort(stringutil.forcebytestr(inst))
1990 1990
1991 1991
1992 1992 def copyfiles(src, dst, hardlink=None, progress=None):
1993 1993 """Copy a directory tree using hardlinks if possible."""
1994 1994 num = 0
1995 1995
1996 1996 def settopic():
1997 1997 if progress:
1998 1998 progress.topic = _(b'linking') if hardlink else _(b'copying')
1999 1999
2000 2000 if os.path.isdir(src):
2001 2001 if hardlink is None:
2002 2002 hardlink = (
2003 2003 os.stat(src).st_dev == os.stat(os.path.dirname(dst)).st_dev
2004 2004 )
2005 2005 settopic()
2006 2006 os.mkdir(dst)
2007 2007 for name, kind in listdir(src):
2008 2008 srcname = os.path.join(src, name)
2009 2009 dstname = os.path.join(dst, name)
2010 2010 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
2011 2011 num += n
2012 2012 else:
2013 2013 if hardlink is None:
2014 2014 hardlink = (
2015 2015 os.stat(os.path.dirname(src)).st_dev
2016 2016 == os.stat(os.path.dirname(dst)).st_dev
2017 2017 )
2018 2018 settopic()
2019 2019
2020 2020 if hardlink:
2021 2021 try:
2022 2022 oslink(src, dst)
2023 2023 except (IOError, OSError) as exc:
2024 2024 if exc.errno != errno.EEXIST:
2025 2025 hardlink = False
2026 2026 # XXX maybe try to relink if the file exist ?
2027 2027 shutil.copy(src, dst)
2028 2028 else:
2029 2029 shutil.copy(src, dst)
2030 2030 num += 1
2031 2031 if progress:
2032 2032 progress.increment()
2033 2033
2034 2034 return hardlink, num
2035 2035
2036 2036
2037 2037 _winreservednames = {
2038 2038 b'con',
2039 2039 b'prn',
2040 2040 b'aux',
2041 2041 b'nul',
2042 2042 b'com1',
2043 2043 b'com2',
2044 2044 b'com3',
2045 2045 b'com4',
2046 2046 b'com5',
2047 2047 b'com6',
2048 2048 b'com7',
2049 2049 b'com8',
2050 2050 b'com9',
2051 2051 b'lpt1',
2052 2052 b'lpt2',
2053 2053 b'lpt3',
2054 2054 b'lpt4',
2055 2055 b'lpt5',
2056 2056 b'lpt6',
2057 2057 b'lpt7',
2058 2058 b'lpt8',
2059 2059 b'lpt9',
2060 2060 }
2061 2061 _winreservedchars = b':*?"<>|'
2062 2062
2063 2063
2064 2064 def checkwinfilename(path):
2065 2065 # type: (bytes) -> Optional[bytes]
2066 2066 r"""Check that the base-relative path is a valid filename on Windows.
2067 2067 Returns None if the path is ok, or a UI string describing the problem.
2068 2068
2069 2069 >>> checkwinfilename(b"just/a/normal/path")
2070 2070 >>> checkwinfilename(b"foo/bar/con.xml")
2071 2071 "filename contains 'con', which is reserved on Windows"
2072 2072 >>> checkwinfilename(b"foo/con.xml/bar")
2073 2073 "filename contains 'con', which is reserved on Windows"
2074 2074 >>> checkwinfilename(b"foo/bar/xml.con")
2075 2075 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
2076 2076 "filename contains 'AUX', which is reserved on Windows"
2077 2077 >>> checkwinfilename(b"foo/bar/bla:.txt")
2078 2078 "filename contains ':', which is reserved on Windows"
2079 2079 >>> checkwinfilename(b"foo/bar/b\07la.txt")
2080 2080 "filename contains '\\x07', which is invalid on Windows"
2081 2081 >>> checkwinfilename(b"foo/bar/bla ")
2082 2082 "filename ends with ' ', which is not allowed on Windows"
2083 2083 >>> checkwinfilename(b"../bar")
2084 2084 >>> checkwinfilename(b"foo\\")
2085 2085 "filename ends with '\\', which is invalid on Windows"
2086 2086 >>> checkwinfilename(b"foo\\/bar")
2087 2087 "directory name ends with '\\', which is invalid on Windows"
2088 2088 """
2089 2089 if path.endswith(b'\\'):
2090 2090 return _(b"filename ends with '\\', which is invalid on Windows")
2091 2091 if b'\\/' in path:
2092 2092 return _(b"directory name ends with '\\', which is invalid on Windows")
2093 2093 for n in path.replace(b'\\', b'/').split(b'/'):
2094 2094 if not n:
2095 2095 continue
2096 2096 for c in _filenamebytestr(n):
2097 2097 if c in _winreservedchars:
2098 2098 return (
2099 2099 _(
2100 2100 b"filename contains '%s', which is reserved "
2101 2101 b"on Windows"
2102 2102 )
2103 2103 % c
2104 2104 )
2105 2105 if ord(c) <= 31:
2106 2106 return _(
2107 2107 b"filename contains '%s', which is invalid on Windows"
2108 2108 ) % stringutil.escapestr(c)
2109 2109 base = n.split(b'.')[0]
2110 2110 if base and base.lower() in _winreservednames:
2111 2111 return (
2112 2112 _(b"filename contains '%s', which is reserved on Windows")
2113 2113 % base
2114 2114 )
2115 2115 t = n[-1:]
2116 2116 if t in b'. ' and n not in b'..':
2117 2117 return (
2118 2118 _(
2119 2119 b"filename ends with '%s', which is not allowed "
2120 2120 b"on Windows"
2121 2121 )
2122 2122 % t
2123 2123 )
2124 2124
2125 2125
2126 2126 timer = getattr(time, "perf_counter", None)
2127 2127
2128 2128 if pycompat.iswindows:
2129 2129 checkosfilename = checkwinfilename
2130 2130 if not timer:
2131 2131 timer = time.clock
2132 2132 else:
2133 2133 # mercurial.windows doesn't have platform.checkosfilename
2134 2134 checkosfilename = platform.checkosfilename # pytype: disable=module-attr
2135 2135 if not timer:
2136 2136 timer = time.time
2137 2137
2138 2138
2139 2139 def makelock(info, pathname):
2140 2140 """Create a lock file atomically if possible
2141 2141
2142 2142 This may leave a stale lock file if symlink isn't supported and signal
2143 2143 interrupt is enabled.
2144 2144 """
2145 2145 try:
2146 2146 return os.symlink(info, pathname)
2147 2147 except OSError as why:
2148 2148 if why.errno == errno.EEXIST:
2149 2149 raise
2150 2150 except AttributeError: # no symlink in os
2151 2151 pass
2152 2152
2153 2153 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
2154 2154 ld = os.open(pathname, flags)
2155 2155 os.write(ld, info)
2156 2156 os.close(ld)
2157 2157
2158 2158
2159 2159 def readlock(pathname):
2160 2160 # type: (bytes) -> bytes
2161 2161 try:
2162 2162 return readlink(pathname)
2163 2163 except OSError as why:
2164 2164 if why.errno not in (errno.EINVAL, errno.ENOSYS):
2165 2165 raise
2166 2166 except AttributeError: # no symlink in os
2167 2167 pass
2168 2168 with posixfile(pathname, b'rb') as fp:
2169 2169 return fp.read()
2170 2170
2171 2171
2172 2172 def fstat(fp):
2173 2173 '''stat file object that may not have fileno method.'''
2174 2174 try:
2175 2175 return os.fstat(fp.fileno())
2176 2176 except AttributeError:
2177 2177 return os.stat(fp.name)
2178 2178
2179 2179
2180 2180 # File system features
2181 2181
2182 2182
2183 2183 def fscasesensitive(path):
2184 2184 # type: (bytes) -> bool
2185 2185 """
2186 2186 Return true if the given path is on a case-sensitive filesystem
2187 2187
2188 2188 Requires a path (like /foo/.hg) ending with a foldable final
2189 2189 directory component.
2190 2190 """
2191 2191 s1 = os.lstat(path)
2192 2192 d, b = os.path.split(path)
2193 2193 b2 = b.upper()
2194 2194 if b == b2:
2195 2195 b2 = b.lower()
2196 2196 if b == b2:
2197 2197 return True # no evidence against case sensitivity
2198 2198 p2 = os.path.join(d, b2)
2199 2199 try:
2200 2200 s2 = os.lstat(p2)
2201 2201 if s2 == s1:
2202 2202 return False
2203 2203 return True
2204 2204 except OSError:
2205 2205 return True
2206 2206
2207 2207
2208 2208 _re2_input = lambda x: x
2209 2209 try:
2210 2210 import re2 # pytype: disable=import-error
2211 2211
2212 2212 _re2 = None
2213 2213 except ImportError:
2214 2214 _re2 = False
2215 2215
2216 2216
2217 2217 class _re:
2218 2218 def _checkre2(self):
2219 2219 global _re2
2220 2220 global _re2_input
2221 2221
2222 2222 check_pattern = br'\[([^\[]+)\]'
2223 2223 check_input = b'[ui]'
2224 2224 try:
2225 2225 # check if match works, see issue3964
2226 2226 _re2 = bool(re2.match(check_pattern, check_input))
2227 2227 except ImportError:
2228 2228 _re2 = False
2229 2229 except TypeError:
2230 2230 # the `pyre-2` project provides a re2 module that accept bytes
2231 2231 # the `fb-re2` project provides a re2 module that acccept sysstr
2232 2232 check_pattern = pycompat.sysstr(check_pattern)
2233 2233 check_input = pycompat.sysstr(check_input)
2234 2234 _re2 = bool(re2.match(check_pattern, check_input))
2235 2235 _re2_input = pycompat.sysstr
2236 2236
2237 2237 def compile(self, pat, flags=0):
2238 2238 """Compile a regular expression, using re2 if possible
2239 2239
2240 2240 For best performance, use only re2-compatible regexp features. The
2241 2241 only flags from the re module that are re2-compatible are
2242 2242 IGNORECASE and MULTILINE."""
2243 2243 if _re2 is None:
2244 2244 self._checkre2()
2245 2245 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
2246 2246 if flags & remod.IGNORECASE:
2247 2247 pat = b'(?i)' + pat
2248 2248 if flags & remod.MULTILINE:
2249 2249 pat = b'(?m)' + pat
2250 2250 try:
2251 2251 return re2.compile(_re2_input(pat))
2252 2252 except re2.error:
2253 2253 pass
2254 2254 return remod.compile(pat, flags)
2255 2255
2256 2256 @propertycache
2257 2257 def escape(self):
2258 2258 """Return the version of escape corresponding to self.compile.
2259 2259
2260 2260 This is imperfect because whether re2 or re is used for a particular
2261 2261 function depends on the flags, etc, but it's the best we can do.
2262 2262 """
2263 2263 global _re2
2264 2264 if _re2 is None:
2265 2265 self._checkre2()
2266 2266 if _re2:
2267 2267 return re2.escape
2268 2268 else:
2269 2269 return remod.escape
2270 2270
2271 2271
2272 2272 re = _re()
2273 2273
2274 2274 _fspathcache = {}
2275 2275
2276 2276
2277 2277 def fspath(name, root):
2278 2278 # type: (bytes, bytes) -> bytes
2279 2279 """Get name in the case stored in the filesystem
2280 2280
2281 2281 The name should be relative to root, and be normcase-ed for efficiency.
2282 2282
2283 2283 Note that this function is unnecessary, and should not be
2284 2284 called, for case-sensitive filesystems (simply because it's expensive).
2285 2285
2286 2286 The root should be normcase-ed, too.
2287 2287 """
2288 2288
2289 2289 def _makefspathcacheentry(dir):
2290 2290 return {normcase(n): n for n in os.listdir(dir)}
2291 2291
2292 2292 seps = pycompat.ossep
2293 2293 if pycompat.osaltsep:
2294 2294 seps = seps + pycompat.osaltsep
2295 2295 # Protect backslashes. This gets silly very quickly.
2296 2296 seps.replace(b'\\', b'\\\\')
2297 2297 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
2298 2298 dir = os.path.normpath(root)
2299 2299 result = []
2300 2300 for part, sep in pattern.findall(name):
2301 2301 if sep:
2302 2302 result.append(sep)
2303 2303 continue
2304 2304
2305 2305 if dir not in _fspathcache:
2306 2306 _fspathcache[dir] = _makefspathcacheentry(dir)
2307 2307 contents = _fspathcache[dir]
2308 2308
2309 2309 found = contents.get(part)
2310 2310 if not found:
2311 2311 # retry "once per directory" per "dirstate.walk" which
2312 2312 # may take place for each patches of "hg qpush", for example
2313 2313 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2314 2314 found = contents.get(part)
2315 2315
2316 2316 result.append(found or part)
2317 2317 dir = os.path.join(dir, part)
2318 2318
2319 2319 return b''.join(result)
2320 2320
2321 2321
2322 2322 def checknlink(testfile):
2323 2323 # type: (bytes) -> bool
2324 2324 '''check whether hardlink count reporting works properly'''
2325 2325
2326 2326 # testfile may be open, so we need a separate file for checking to
2327 2327 # work around issue2543 (or testfile may get lost on Samba shares)
2328 2328 f1, f2, fp = None, None, None
2329 2329 try:
2330 2330 fd, f1 = pycompat.mkstemp(
2331 2331 prefix=b'.%s-' % os.path.basename(testfile),
2332 2332 suffix=b'1~',
2333 2333 dir=os.path.dirname(testfile),
2334 2334 )
2335 2335 os.close(fd)
2336 2336 f2 = b'%s2~' % f1[:-2]
2337 2337
2338 2338 oslink(f1, f2)
2339 2339 # nlinks() may behave differently for files on Windows shares if
2340 2340 # the file is open.
2341 2341 fp = posixfile(f2)
2342 2342 return nlinks(f2) > 1
2343 2343 except OSError:
2344 2344 return False
2345 2345 finally:
2346 2346 if fp is not None:
2347 2347 fp.close()
2348 2348 for f in (f1, f2):
2349 2349 try:
2350 2350 if f is not None:
2351 2351 os.unlink(f)
2352 2352 except OSError:
2353 2353 pass
2354 2354
2355 2355
2356 2356 def endswithsep(path):
2357 2357 # type: (bytes) -> bool
2358 2358 '''Check path ends with os.sep or os.altsep.'''
2359 2359 return bool( # help pytype
2360 2360 path.endswith(pycompat.ossep)
2361 2361 or pycompat.osaltsep
2362 2362 and path.endswith(pycompat.osaltsep)
2363 2363 )
2364 2364
2365 2365
2366 2366 def splitpath(path):
2367 2367 # type: (bytes) -> List[bytes]
2368 2368 """Split path by os.sep.
2369 2369 Note that this function does not use os.altsep because this is
2370 2370 an alternative of simple "xxx.split(os.sep)".
2371 2371 It is recommended to use os.path.normpath() before using this
2372 2372 function if need."""
2373 2373 return path.split(pycompat.ossep)
2374 2374
2375 2375
2376 2376 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2377 2377 """Create a temporary file with the same contents from name
2378 2378
2379 2379 The permission bits are copied from the original file.
2380 2380
2381 2381 If the temporary file is going to be truncated immediately, you
2382 2382 can use emptyok=True as an optimization.
2383 2383
2384 2384 Returns the name of the temporary file.
2385 2385 """
2386 2386 d, fn = os.path.split(name)
2387 2387 fd, temp = pycompat.mkstemp(prefix=b'.%s-' % fn, suffix=b'~', dir=d)
2388 2388 os.close(fd)
2389 2389 # Temporary files are created with mode 0600, which is usually not
2390 2390 # what we want. If the original file already exists, just copy
2391 2391 # its mode. Otherwise, manually obey umask.
2392 2392 copymode(name, temp, createmode, enforcewritable)
2393 2393
2394 2394 if emptyok:
2395 2395 return temp
2396 2396 try:
2397 2397 try:
2398 2398 ifp = posixfile(name, b"rb")
2399 2399 except IOError as inst:
2400 2400 if inst.errno == errno.ENOENT:
2401 2401 return temp
2402 2402 if not getattr(inst, 'filename', None):
2403 2403 inst.filename = name
2404 2404 raise
2405 2405 ofp = posixfile(temp, b"wb")
2406 2406 for chunk in filechunkiter(ifp):
2407 2407 ofp.write(chunk)
2408 2408 ifp.close()
2409 2409 ofp.close()
2410 2410 except: # re-raises
2411 2411 try:
2412 2412 os.unlink(temp)
2413 2413 except OSError:
2414 2414 pass
2415 2415 raise
2416 2416 return temp
2417 2417
2418 2418
2419 2419 class filestat:
2420 2420 """help to exactly detect change of a file
2421 2421
2422 2422 'stat' attribute is result of 'os.stat()' if specified 'path'
2423 2423 exists. Otherwise, it is None. This can avoid preparative
2424 2424 'exists()' examination on client side of this class.
2425 2425 """
2426 2426
2427 2427 def __init__(self, stat):
2428 2428 self.stat = stat
2429 2429
2430 2430 @classmethod
2431 2431 def frompath(cls, path):
2432 2432 try:
2433 2433 stat = os.stat(path)
2434 2434 except OSError as err:
2435 2435 if err.errno != errno.ENOENT:
2436 2436 raise
2437 2437 stat = None
2438 2438 return cls(stat)
2439 2439
2440 2440 @classmethod
2441 2441 def fromfp(cls, fp):
2442 2442 stat = os.fstat(fp.fileno())
2443 2443 return cls(stat)
2444 2444
2445 2445 __hash__ = object.__hash__
2446 2446
2447 2447 def __eq__(self, old):
2448 2448 try:
2449 2449 # if ambiguity between stat of new and old file is
2450 2450 # avoided, comparison of size, ctime and mtime is enough
2451 2451 # to exactly detect change of a file regardless of platform
2452 2452 return (
2453 2453 self.stat.st_size == old.stat.st_size
2454 2454 and self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2455 2455 and self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME]
2456 2456 )
2457 2457 except AttributeError:
2458 2458 pass
2459 2459 try:
2460 2460 return self.stat is None and old.stat is None
2461 2461 except AttributeError:
2462 2462 return False
2463 2463
2464 2464 def isambig(self, old):
2465 2465 """Examine whether new (= self) stat is ambiguous against old one
2466 2466
2467 2467 "S[N]" below means stat of a file at N-th change:
2468 2468
2469 2469 - S[n-1].ctime < S[n].ctime: can detect change of a file
2470 2470 - S[n-1].ctime == S[n].ctime
2471 2471 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2472 2472 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2473 2473 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2474 2474 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2475 2475
2476 2476 Case (*2) above means that a file was changed twice or more at
2477 2477 same time in sec (= S[n-1].ctime), and comparison of timestamp
2478 2478 is ambiguous.
2479 2479
2480 2480 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2481 2481 timestamp is ambiguous".
2482 2482
2483 2483 But advancing mtime only in case (*2) doesn't work as
2484 2484 expected, because naturally advanced S[n].mtime in case (*1)
2485 2485 might be equal to manually advanced S[n-1 or earlier].mtime.
2486 2486
2487 2487 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2488 2488 treated as ambiguous regardless of mtime, to avoid overlooking
2489 2489 by confliction between such mtime.
2490 2490
2491 2491 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2492 2492 S[n].mtime", even if size of a file isn't changed.
2493 2493 """
2494 2494 try:
2495 2495 return self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2496 2496 except AttributeError:
2497 2497 return False
2498 2498
2499 2499 def avoidambig(self, path, old):
2500 2500 """Change file stat of specified path to avoid ambiguity
2501 2501
2502 2502 'old' should be previous filestat of 'path'.
2503 2503
2504 2504 This skips avoiding ambiguity, if a process doesn't have
2505 2505 appropriate privileges for 'path'. This returns False in this
2506 2506 case.
2507 2507
2508 2508 Otherwise, this returns True, as "ambiguity is avoided".
2509 2509 """
2510 2510 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2511 2511 try:
2512 2512 os.utime(path, (advanced, advanced))
2513 2513 except OSError as inst:
2514 2514 if inst.errno == errno.EPERM:
2515 2515 # utime() on the file created by another user causes EPERM,
2516 2516 # if a process doesn't have appropriate privileges
2517 2517 return False
2518 2518 raise
2519 2519 return True
2520 2520
2521 2521 def __ne__(self, other):
2522 2522 return not self == other
2523 2523
2524 2524
2525 2525 class atomictempfile:
2526 2526 """writable file object that atomically updates a file
2527 2527
2528 2528 All writes will go to a temporary copy of the original file. Call
2529 2529 close() when you are done writing, and atomictempfile will rename
2530 2530 the temporary copy to the original name, making the changes
2531 2531 visible. If the object is destroyed without being closed, all your
2532 2532 writes are discarded.
2533 2533
2534 2534 checkambig argument of constructor is used with filestat, and is
2535 2535 useful only if target file is guarded by any lock (e.g. repo.lock
2536 2536 or repo.wlock).
2537 2537 """
2538 2538
2539 2539 def __init__(self, name, mode=b'w+b', createmode=None, checkambig=False):
2540 2540 self.__name = name # permanent name
2541 2541 self._tempname = mktempcopy(
2542 2542 name,
2543 2543 emptyok=(b'w' in mode),
2544 2544 createmode=createmode,
2545 2545 enforcewritable=(b'w' in mode),
2546 2546 )
2547 2547
2548 2548 self._fp = posixfile(self._tempname, mode)
2549 2549 self._checkambig = checkambig
2550 2550
2551 2551 # delegated methods
2552 2552 self.read = self._fp.read
2553 2553 self.write = self._fp.write
2554 2554 self.seek = self._fp.seek
2555 2555 self.tell = self._fp.tell
2556 2556 self.fileno = self._fp.fileno
2557 2557
2558 2558 def close(self):
2559 2559 if not self._fp.closed:
2560 2560 self._fp.close()
2561 2561 filename = localpath(self.__name)
2562 2562 oldstat = self._checkambig and filestat.frompath(filename)
2563 2563 if oldstat and oldstat.stat:
2564 2564 rename(self._tempname, filename)
2565 2565 newstat = filestat.frompath(filename)
2566 2566 if newstat.isambig(oldstat):
2567 2567 # stat of changed file is ambiguous to original one
2568 2568 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2569 2569 os.utime(filename, (advanced, advanced))
2570 2570 else:
2571 2571 rename(self._tempname, filename)
2572 2572
2573 2573 def discard(self):
2574 2574 if not self._fp.closed:
2575 2575 try:
2576 2576 os.unlink(self._tempname)
2577 2577 except OSError:
2578 2578 pass
2579 2579 self._fp.close()
2580 2580
2581 2581 def __del__(self):
2582 2582 if safehasattr(self, '_fp'): # constructor actually did something
2583 2583 self.discard()
2584 2584
2585 2585 def __enter__(self):
2586 2586 return self
2587 2587
2588 2588 def __exit__(self, exctype, excvalue, traceback):
2589 2589 if exctype is not None:
2590 2590 self.discard()
2591 2591 else:
2592 2592 self.close()
2593 2593
2594 2594
2595 def tryrmdir(f):
2596 try:
2597 removedirs(f)
2598 except OSError as e:
2599 if e.errno != errno.ENOENT and e.errno != errno.ENOTEMPTY:
2600 raise
2601
2602
2595 2603 def unlinkpath(f, ignoremissing=False, rmdir=True):
2596 2604 # type: (bytes, bool, bool) -> None
2597 2605 """unlink and remove the directory if it is empty"""
2598 2606 if ignoremissing:
2599 2607 tryunlink(f)
2600 2608 else:
2601 2609 unlink(f)
2602 2610 if rmdir:
2603 2611 # try removing directories that might now be empty
2604 2612 try:
2605 2613 removedirs(os.path.dirname(f))
2606 2614 except OSError:
2607 2615 pass
2608 2616
2609 2617
2610 2618 def tryunlink(f):
2611 2619 # type: (bytes) -> None
2612 2620 """Attempt to remove a file, ignoring ENOENT errors."""
2613 2621 try:
2614 2622 unlink(f)
2615 2623 except OSError as e:
2616 2624 if e.errno != errno.ENOENT:
2617 2625 raise
2618 2626
2619 2627
2620 2628 def makedirs(name, mode=None, notindexed=False):
2621 2629 # type: (bytes, Optional[int], bool) -> None
2622 2630 """recursive directory creation with parent mode inheritance
2623 2631
2624 2632 Newly created directories are marked as "not to be indexed by
2625 2633 the content indexing service", if ``notindexed`` is specified
2626 2634 for "write" mode access.
2627 2635 """
2628 2636 try:
2629 2637 makedir(name, notindexed)
2630 2638 except OSError as err:
2631 2639 if err.errno == errno.EEXIST:
2632 2640 return
2633 2641 if err.errno != errno.ENOENT or not name:
2634 2642 raise
2635 2643 parent = os.path.dirname(abspath(name))
2636 2644 if parent == name:
2637 2645 raise
2638 2646 makedirs(parent, mode, notindexed)
2639 2647 try:
2640 2648 makedir(name, notindexed)
2641 2649 except OSError as err:
2642 2650 # Catch EEXIST to handle races
2643 2651 if err.errno == errno.EEXIST:
2644 2652 return
2645 2653 raise
2646 2654 if mode is not None:
2647 2655 os.chmod(name, mode)
2648 2656
2649 2657
2650 2658 def readfile(path):
2651 2659 # type: (bytes) -> bytes
2652 2660 with open(path, b'rb') as fp:
2653 2661 return fp.read()
2654 2662
2655 2663
2656 2664 def writefile(path, text):
2657 2665 # type: (bytes, bytes) -> None
2658 2666 with open(path, b'wb') as fp:
2659 2667 fp.write(text)
2660 2668
2661 2669
2662 2670 def appendfile(path, text):
2663 2671 # type: (bytes, bytes) -> None
2664 2672 with open(path, b'ab') as fp:
2665 2673 fp.write(text)
2666 2674
2667 2675
2668 2676 class chunkbuffer:
2669 2677 """Allow arbitrary sized chunks of data to be efficiently read from an
2670 2678 iterator over chunks of arbitrary size."""
2671 2679
2672 2680 def __init__(self, in_iter):
2673 2681 """in_iter is the iterator that's iterating over the input chunks."""
2674 2682
2675 2683 def splitbig(chunks):
2676 2684 for chunk in chunks:
2677 2685 if len(chunk) > 2 ** 20:
2678 2686 pos = 0
2679 2687 while pos < len(chunk):
2680 2688 end = pos + 2 ** 18
2681 2689 yield chunk[pos:end]
2682 2690 pos = end
2683 2691 else:
2684 2692 yield chunk
2685 2693
2686 2694 self.iter = splitbig(in_iter)
2687 2695 self._queue = collections.deque()
2688 2696 self._chunkoffset = 0
2689 2697
2690 2698 def read(self, l=None):
2691 2699 """Read L bytes of data from the iterator of chunks of data.
2692 2700 Returns less than L bytes if the iterator runs dry.
2693 2701
2694 2702 If size parameter is omitted, read everything"""
2695 2703 if l is None:
2696 2704 return b''.join(self.iter)
2697 2705
2698 2706 left = l
2699 2707 buf = []
2700 2708 queue = self._queue
2701 2709 while left > 0:
2702 2710 # refill the queue
2703 2711 if not queue:
2704 2712 target = 2 ** 18
2705 2713 for chunk in self.iter:
2706 2714 queue.append(chunk)
2707 2715 target -= len(chunk)
2708 2716 if target <= 0:
2709 2717 break
2710 2718 if not queue:
2711 2719 break
2712 2720
2713 2721 # The easy way to do this would be to queue.popleft(), modify the
2714 2722 # chunk (if necessary), then queue.appendleft(). However, for cases
2715 2723 # where we read partial chunk content, this incurs 2 dequeue
2716 2724 # mutations and creates a new str for the remaining chunk in the
2717 2725 # queue. Our code below avoids this overhead.
2718 2726
2719 2727 chunk = queue[0]
2720 2728 chunkl = len(chunk)
2721 2729 offset = self._chunkoffset
2722 2730
2723 2731 # Use full chunk.
2724 2732 if offset == 0 and left >= chunkl:
2725 2733 left -= chunkl
2726 2734 queue.popleft()
2727 2735 buf.append(chunk)
2728 2736 # self._chunkoffset remains at 0.
2729 2737 continue
2730 2738
2731 2739 chunkremaining = chunkl - offset
2732 2740
2733 2741 # Use all of unconsumed part of chunk.
2734 2742 if left >= chunkremaining:
2735 2743 left -= chunkremaining
2736 2744 queue.popleft()
2737 2745 # offset == 0 is enabled by block above, so this won't merely
2738 2746 # copy via ``chunk[0:]``.
2739 2747 buf.append(chunk[offset:])
2740 2748 self._chunkoffset = 0
2741 2749
2742 2750 # Partial chunk needed.
2743 2751 else:
2744 2752 buf.append(chunk[offset : offset + left])
2745 2753 self._chunkoffset += left
2746 2754 left -= chunkremaining
2747 2755
2748 2756 return b''.join(buf)
2749 2757
2750 2758
2751 2759 def filechunkiter(f, size=131072, limit=None):
2752 2760 """Create a generator that produces the data in the file size
2753 2761 (default 131072) bytes at a time, up to optional limit (default is
2754 2762 to read all data). Chunks may be less than size bytes if the
2755 2763 chunk is the last chunk in the file, or the file is a socket or
2756 2764 some other type of file that sometimes reads less data than is
2757 2765 requested."""
2758 2766 assert size >= 0
2759 2767 assert limit is None or limit >= 0
2760 2768 while True:
2761 2769 if limit is None:
2762 2770 nbytes = size
2763 2771 else:
2764 2772 nbytes = min(limit, size)
2765 2773 s = nbytes and f.read(nbytes)
2766 2774 if not s:
2767 2775 break
2768 2776 if limit:
2769 2777 limit -= len(s)
2770 2778 yield s
2771 2779
2772 2780
2773 2781 class cappedreader:
2774 2782 """A file object proxy that allows reading up to N bytes.
2775 2783
2776 2784 Given a source file object, instances of this type allow reading up to
2777 2785 N bytes from that source file object. Attempts to read past the allowed
2778 2786 limit are treated as EOF.
2779 2787
2780 2788 It is assumed that I/O is not performed on the original file object
2781 2789 in addition to I/O that is performed by this instance. If there is,
2782 2790 state tracking will get out of sync and unexpected results will ensue.
2783 2791 """
2784 2792
2785 2793 def __init__(self, fh, limit):
2786 2794 """Allow reading up to <limit> bytes from <fh>."""
2787 2795 self._fh = fh
2788 2796 self._left = limit
2789 2797
2790 2798 def read(self, n=-1):
2791 2799 if not self._left:
2792 2800 return b''
2793 2801
2794 2802 if n < 0:
2795 2803 n = self._left
2796 2804
2797 2805 data = self._fh.read(min(n, self._left))
2798 2806 self._left -= len(data)
2799 2807 assert self._left >= 0
2800 2808
2801 2809 return data
2802 2810
2803 2811 def readinto(self, b):
2804 2812 res = self.read(len(b))
2805 2813 if res is None:
2806 2814 return None
2807 2815
2808 2816 b[0 : len(res)] = res
2809 2817 return len(res)
2810 2818
2811 2819
2812 2820 def unitcountfn(*unittable):
2813 2821 '''return a function that renders a readable count of some quantity'''
2814 2822
2815 2823 def go(count):
2816 2824 for multiplier, divisor, format in unittable:
2817 2825 if abs(count) >= divisor * multiplier:
2818 2826 return format % (count / float(divisor))
2819 2827 return unittable[-1][2] % count
2820 2828
2821 2829 return go
2822 2830
2823 2831
2824 2832 def processlinerange(fromline, toline):
2825 2833 # type: (int, int) -> Tuple[int, int]
2826 2834 """Check that linerange <fromline>:<toline> makes sense and return a
2827 2835 0-based range.
2828 2836
2829 2837 >>> processlinerange(10, 20)
2830 2838 (9, 20)
2831 2839 >>> processlinerange(2, 1)
2832 2840 Traceback (most recent call last):
2833 2841 ...
2834 2842 ParseError: line range must be positive
2835 2843 >>> processlinerange(0, 5)
2836 2844 Traceback (most recent call last):
2837 2845 ...
2838 2846 ParseError: fromline must be strictly positive
2839 2847 """
2840 2848 if toline - fromline < 0:
2841 2849 raise error.ParseError(_(b"line range must be positive"))
2842 2850 if fromline < 1:
2843 2851 raise error.ParseError(_(b"fromline must be strictly positive"))
2844 2852 return fromline - 1, toline
2845 2853
2846 2854
2847 2855 bytecount = unitcountfn(
2848 2856 (100, 1 << 30, _(b'%.0f GB')),
2849 2857 (10, 1 << 30, _(b'%.1f GB')),
2850 2858 (1, 1 << 30, _(b'%.2f GB')),
2851 2859 (100, 1 << 20, _(b'%.0f MB')),
2852 2860 (10, 1 << 20, _(b'%.1f MB')),
2853 2861 (1, 1 << 20, _(b'%.2f MB')),
2854 2862 (100, 1 << 10, _(b'%.0f KB')),
2855 2863 (10, 1 << 10, _(b'%.1f KB')),
2856 2864 (1, 1 << 10, _(b'%.2f KB')),
2857 2865 (1, 1, _(b'%.0f bytes')),
2858 2866 )
2859 2867
2860 2868
2861 2869 class transformingwriter:
2862 2870 """Writable file wrapper to transform data by function"""
2863 2871
2864 2872 def __init__(self, fp, encode):
2865 2873 self._fp = fp
2866 2874 self._encode = encode
2867 2875
2868 2876 def close(self):
2869 2877 self._fp.close()
2870 2878
2871 2879 def flush(self):
2872 2880 self._fp.flush()
2873 2881
2874 2882 def write(self, data):
2875 2883 return self._fp.write(self._encode(data))
2876 2884
2877 2885
2878 2886 # Matches a single EOL which can either be a CRLF where repeated CR
2879 2887 # are removed or a LF. We do not care about old Macintosh files, so a
2880 2888 # stray CR is an error.
2881 2889 _eolre = remod.compile(br'\r*\n')
2882 2890
2883 2891
2884 2892 def tolf(s):
2885 2893 # type: (bytes) -> bytes
2886 2894 return _eolre.sub(b'\n', s)
2887 2895
2888 2896
2889 2897 def tocrlf(s):
2890 2898 # type: (bytes) -> bytes
2891 2899 return _eolre.sub(b'\r\n', s)
2892 2900
2893 2901
2894 2902 def _crlfwriter(fp):
2895 2903 return transformingwriter(fp, tocrlf)
2896 2904
2897 2905
2898 2906 if pycompat.oslinesep == b'\r\n':
2899 2907 tonativeeol = tocrlf
2900 2908 fromnativeeol = tolf
2901 2909 nativeeolwriter = _crlfwriter
2902 2910 else:
2903 2911 tonativeeol = pycompat.identity
2904 2912 fromnativeeol = pycompat.identity
2905 2913 nativeeolwriter = pycompat.identity
2906 2914
2907 2915
2908 2916 # TODO delete since workaround variant for Python 2 no longer needed.
2909 2917 def iterfile(fp):
2910 2918 return fp
2911 2919
2912 2920
2913 2921 def iterlines(iterator):
2914 2922 # type: (Iterator[bytes]) -> Iterator[bytes]
2915 2923 for chunk in iterator:
2916 2924 for line in chunk.splitlines():
2917 2925 yield line
2918 2926
2919 2927
2920 2928 def expandpath(path):
2921 2929 # type: (bytes) -> bytes
2922 2930 return os.path.expanduser(os.path.expandvars(path))
2923 2931
2924 2932
2925 2933 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2926 2934 """Return the result of interpolating items in the mapping into string s.
2927 2935
2928 2936 prefix is a single character string, or a two character string with
2929 2937 a backslash as the first character if the prefix needs to be escaped in
2930 2938 a regular expression.
2931 2939
2932 2940 fn is an optional function that will be applied to the replacement text
2933 2941 just before replacement.
2934 2942
2935 2943 escape_prefix is an optional flag that allows using doubled prefix for
2936 2944 its escaping.
2937 2945 """
2938 2946 fn = fn or (lambda s: s)
2939 2947 patterns = b'|'.join(mapping.keys())
2940 2948 if escape_prefix:
2941 2949 patterns += b'|' + prefix
2942 2950 if len(prefix) > 1:
2943 2951 prefix_char = prefix[1:]
2944 2952 else:
2945 2953 prefix_char = prefix
2946 2954 mapping[prefix_char] = prefix_char
2947 2955 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2948 2956 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2949 2957
2950 2958
2951 2959 timecount = unitcountfn(
2952 2960 (1, 1e3, _(b'%.0f s')),
2953 2961 (100, 1, _(b'%.1f s')),
2954 2962 (10, 1, _(b'%.2f s')),
2955 2963 (1, 1, _(b'%.3f s')),
2956 2964 (100, 0.001, _(b'%.1f ms')),
2957 2965 (10, 0.001, _(b'%.2f ms')),
2958 2966 (1, 0.001, _(b'%.3f ms')),
2959 2967 (100, 0.000001, _(b'%.1f us')),
2960 2968 (10, 0.000001, _(b'%.2f us')),
2961 2969 (1, 0.000001, _(b'%.3f us')),
2962 2970 (100, 0.000000001, _(b'%.1f ns')),
2963 2971 (10, 0.000000001, _(b'%.2f ns')),
2964 2972 (1, 0.000000001, _(b'%.3f ns')),
2965 2973 )
2966 2974
2967 2975
2968 2976 @attr.s
2969 2977 class timedcmstats:
2970 2978 """Stats information produced by the timedcm context manager on entering."""
2971 2979
2972 2980 # the starting value of the timer as a float (meaning and resulution is
2973 2981 # platform dependent, see util.timer)
2974 2982 start = attr.ib(default=attr.Factory(lambda: timer()))
2975 2983 # the number of seconds as a floating point value; starts at 0, updated when
2976 2984 # the context is exited.
2977 2985 elapsed = attr.ib(default=0)
2978 2986 # the number of nested timedcm context managers.
2979 2987 level = attr.ib(default=1)
2980 2988
2981 2989 def __bytes__(self):
2982 2990 return timecount(self.elapsed) if self.elapsed else b'<unknown>'
2983 2991
2984 2992 __str__ = encoding.strmethod(__bytes__)
2985 2993
2986 2994
2987 2995 @contextlib.contextmanager
2988 2996 def timedcm(whencefmt, *whenceargs):
2989 2997 """A context manager that produces timing information for a given context.
2990 2998
2991 2999 On entering a timedcmstats instance is produced.
2992 3000
2993 3001 This context manager is reentrant.
2994 3002
2995 3003 """
2996 3004 # track nested context managers
2997 3005 timedcm._nested += 1
2998 3006 timing_stats = timedcmstats(level=timedcm._nested)
2999 3007 try:
3000 3008 with tracing.log(whencefmt, *whenceargs):
3001 3009 yield timing_stats
3002 3010 finally:
3003 3011 timing_stats.elapsed = timer() - timing_stats.start
3004 3012 timedcm._nested -= 1
3005 3013
3006 3014
3007 3015 timedcm._nested = 0
3008 3016
3009 3017
3010 3018 def timed(func):
3011 3019 """Report the execution time of a function call to stderr.
3012 3020
3013 3021 During development, use as a decorator when you need to measure
3014 3022 the cost of a function, e.g. as follows:
3015 3023
3016 3024 @util.timed
3017 3025 def foo(a, b, c):
3018 3026 pass
3019 3027 """
3020 3028
3021 3029 def wrapper(*args, **kwargs):
3022 3030 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3023 3031 result = func(*args, **kwargs)
3024 3032 stderr = procutil.stderr
3025 3033 stderr.write(
3026 3034 b'%s%s: %s\n'
3027 3035 % (
3028 3036 b' ' * time_stats.level * 2,
3029 3037 pycompat.bytestr(func.__name__),
3030 3038 time_stats,
3031 3039 )
3032 3040 )
3033 3041 return result
3034 3042
3035 3043 return wrapper
3036 3044
3037 3045
3038 3046 _sizeunits = (
3039 3047 (b'm', 2 ** 20),
3040 3048 (b'k', 2 ** 10),
3041 3049 (b'g', 2 ** 30),
3042 3050 (b'kb', 2 ** 10),
3043 3051 (b'mb', 2 ** 20),
3044 3052 (b'gb', 2 ** 30),
3045 3053 (b'b', 1),
3046 3054 )
3047 3055
3048 3056
3049 3057 def sizetoint(s):
3050 3058 # type: (bytes) -> int
3051 3059 """Convert a space specifier to a byte count.
3052 3060
3053 3061 >>> sizetoint(b'30')
3054 3062 30
3055 3063 >>> sizetoint(b'2.2kb')
3056 3064 2252
3057 3065 >>> sizetoint(b'6M')
3058 3066 6291456
3059 3067 """
3060 3068 t = s.strip().lower()
3061 3069 try:
3062 3070 for k, u in _sizeunits:
3063 3071 if t.endswith(k):
3064 3072 return int(float(t[: -len(k)]) * u)
3065 3073 return int(t)
3066 3074 except ValueError:
3067 3075 raise error.ParseError(_(b"couldn't parse size: %s") % s)
3068 3076
3069 3077
3070 3078 class hooks:
3071 3079 """A collection of hook functions that can be used to extend a
3072 3080 function's behavior. Hooks are called in lexicographic order,
3073 3081 based on the names of their sources."""
3074 3082
3075 3083 def __init__(self):
3076 3084 self._hooks = []
3077 3085
3078 3086 def add(self, source, hook):
3079 3087 self._hooks.append((source, hook))
3080 3088
3081 3089 def __call__(self, *args):
3082 3090 self._hooks.sort(key=lambda x: x[0])
3083 3091 results = []
3084 3092 for source, hook in self._hooks:
3085 3093 results.append(hook(*args))
3086 3094 return results
3087 3095
3088 3096
3089 3097 def getstackframes(skip=0, line=b' %-*s in %s\n', fileline=b'%s:%d', depth=0):
3090 3098 """Yields lines for a nicely formatted stacktrace.
3091 3099 Skips the 'skip' last entries, then return the last 'depth' entries.
3092 3100 Each file+linenumber is formatted according to fileline.
3093 3101 Each line is formatted according to line.
3094 3102 If line is None, it yields:
3095 3103 length of longest filepath+line number,
3096 3104 filepath+linenumber,
3097 3105 function
3098 3106
3099 3107 Not be used in production code but very convenient while developing.
3100 3108 """
3101 3109 entries = [
3102 3110 (fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3103 3111 for fn, ln, func, _text in traceback.extract_stack()[: -skip - 1]
3104 3112 ][-depth:]
3105 3113 if entries:
3106 3114 fnmax = max(len(entry[0]) for entry in entries)
3107 3115 for fnln, func in entries:
3108 3116 if line is None:
3109 3117 yield (fnmax, fnln, func)
3110 3118 else:
3111 3119 yield line % (fnmax, fnln, func)
3112 3120
3113 3121
3114 3122 def debugstacktrace(
3115 3123 msg=b'stacktrace',
3116 3124 skip=0,
3117 3125 f=procutil.stderr,
3118 3126 otherf=procutil.stdout,
3119 3127 depth=0,
3120 3128 prefix=b'',
3121 3129 ):
3122 3130 """Writes a message to f (stderr) with a nicely formatted stacktrace.
3123 3131 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3124 3132 By default it will flush stdout first.
3125 3133 It can be used everywhere and intentionally does not require an ui object.
3126 3134 Not be used in production code but very convenient while developing.
3127 3135 """
3128 3136 if otherf:
3129 3137 otherf.flush()
3130 3138 f.write(b'%s%s at:\n' % (prefix, msg.rstrip()))
3131 3139 for line in getstackframes(skip + 1, depth=depth):
3132 3140 f.write(prefix + line)
3133 3141 f.flush()
3134 3142
3135 3143
3136 3144 # convenient shortcut
3137 3145 dst = debugstacktrace
3138 3146
3139 3147
3140 3148 def safename(f, tag, ctx, others=None):
3141 3149 """
3142 3150 Generate a name that it is safe to rename f to in the given context.
3143 3151
3144 3152 f: filename to rename
3145 3153 tag: a string tag that will be included in the new name
3146 3154 ctx: a context, in which the new name must not exist
3147 3155 others: a set of other filenames that the new name must not be in
3148 3156
3149 3157 Returns a file name of the form oldname~tag[~number] which does not exist
3150 3158 in the provided context and is not in the set of other names.
3151 3159 """
3152 3160 if others is None:
3153 3161 others = set()
3154 3162
3155 3163 fn = b'%s~%s' % (f, tag)
3156 3164 if fn not in ctx and fn not in others:
3157 3165 return fn
3158 3166 for n in itertools.count(1):
3159 3167 fn = b'%s~%s~%s' % (f, tag, n)
3160 3168 if fn not in ctx and fn not in others:
3161 3169 return fn
3162 3170
3163 3171
3164 3172 def readexactly(stream, n):
3165 3173 '''read n bytes from stream.read and abort if less was available'''
3166 3174 s = stream.read(n)
3167 3175 if len(s) < n:
3168 3176 raise error.Abort(
3169 3177 _(b"stream ended unexpectedly (got %d bytes, expected %d)")
3170 3178 % (len(s), n)
3171 3179 )
3172 3180 return s
3173 3181
3174 3182
3175 3183 def uvarintencode(value):
3176 3184 """Encode an unsigned integer value to a varint.
3177 3185
3178 3186 A varint is a variable length integer of 1 or more bytes. Each byte
3179 3187 except the last has the most significant bit set. The lower 7 bits of
3180 3188 each byte store the 2's complement representation, least significant group
3181 3189 first.
3182 3190
3183 3191 >>> uvarintencode(0)
3184 3192 '\\x00'
3185 3193 >>> uvarintencode(1)
3186 3194 '\\x01'
3187 3195 >>> uvarintencode(127)
3188 3196 '\\x7f'
3189 3197 >>> uvarintencode(1337)
3190 3198 '\\xb9\\n'
3191 3199 >>> uvarintencode(65536)
3192 3200 '\\x80\\x80\\x04'
3193 3201 >>> uvarintencode(-1)
3194 3202 Traceback (most recent call last):
3195 3203 ...
3196 3204 ProgrammingError: negative value for uvarint: -1
3197 3205 """
3198 3206 if value < 0:
3199 3207 raise error.ProgrammingError(b'negative value for uvarint: %d' % value)
3200 3208 bits = value & 0x7F
3201 3209 value >>= 7
3202 3210 bytes = []
3203 3211 while value:
3204 3212 bytes.append(pycompat.bytechr(0x80 | bits))
3205 3213 bits = value & 0x7F
3206 3214 value >>= 7
3207 3215 bytes.append(pycompat.bytechr(bits))
3208 3216
3209 3217 return b''.join(bytes)
3210 3218
3211 3219
3212 3220 def uvarintdecodestream(fh):
3213 3221 """Decode an unsigned variable length integer from a stream.
3214 3222
3215 3223 The passed argument is anything that has a ``.read(N)`` method.
3216 3224
3217 3225 >>> try:
3218 3226 ... from StringIO import StringIO as BytesIO
3219 3227 ... except ImportError:
3220 3228 ... from io import BytesIO
3221 3229 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3222 3230 0
3223 3231 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3224 3232 1
3225 3233 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3226 3234 127
3227 3235 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3228 3236 1337
3229 3237 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3230 3238 65536
3231 3239 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3232 3240 Traceback (most recent call last):
3233 3241 ...
3234 3242 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3235 3243 """
3236 3244 result = 0
3237 3245 shift = 0
3238 3246 while True:
3239 3247 byte = ord(readexactly(fh, 1))
3240 3248 result |= (byte & 0x7F) << shift
3241 3249 if not (byte & 0x80):
3242 3250 return result
3243 3251 shift += 7
3244 3252
3245 3253
3246 3254 # Passing the '' locale means that the locale should be set according to the
3247 3255 # user settings (environment variables).
3248 3256 # Python sometimes avoids setting the global locale settings. When interfacing
3249 3257 # with C code (e.g. the curses module or the Subversion bindings), the global
3250 3258 # locale settings must be initialized correctly. Python 2 does not initialize
3251 3259 # the global locale settings on interpreter startup. Python 3 sometimes
3252 3260 # initializes LC_CTYPE, but not consistently at least on Windows. Therefore we
3253 3261 # explicitly initialize it to get consistent behavior if it's not already
3254 3262 # initialized. Since CPython commit 177d921c8c03d30daa32994362023f777624b10d,
3255 3263 # LC_CTYPE is always initialized. If we require Python 3.8+, we should re-check
3256 3264 # if we can remove this code.
3257 3265 @contextlib.contextmanager
3258 3266 def with_lc_ctype():
3259 3267 oldloc = locale.setlocale(locale.LC_CTYPE, None)
3260 3268 if oldloc == 'C':
3261 3269 try:
3262 3270 try:
3263 3271 locale.setlocale(locale.LC_CTYPE, '')
3264 3272 except locale.Error:
3265 3273 # The likely case is that the locale from the environment
3266 3274 # variables is unknown.
3267 3275 pass
3268 3276 yield
3269 3277 finally:
3270 3278 locale.setlocale(locale.LC_CTYPE, oldloc)
3271 3279 else:
3272 3280 yield
3273 3281
3274 3282
3275 3283 def _estimatememory():
3276 3284 # type: () -> Optional[int]
3277 3285 """Provide an estimate for the available system memory in Bytes.
3278 3286
3279 3287 If no estimate can be provided on the platform, returns None.
3280 3288 """
3281 3289 if pycompat.sysplatform.startswith(b'win'):
3282 3290 # On Windows, use the GlobalMemoryStatusEx kernel function directly.
3283 3291 from ctypes import c_long as DWORD, c_ulonglong as DWORDLONG
3284 3292 from ctypes.wintypes import ( # pytype: disable=import-error
3285 3293 Structure,
3286 3294 byref,
3287 3295 sizeof,
3288 3296 windll,
3289 3297 )
3290 3298
3291 3299 class MEMORYSTATUSEX(Structure):
3292 3300 _fields_ = [
3293 3301 ('dwLength', DWORD),
3294 3302 ('dwMemoryLoad', DWORD),
3295 3303 ('ullTotalPhys', DWORDLONG),
3296 3304 ('ullAvailPhys', DWORDLONG),
3297 3305 ('ullTotalPageFile', DWORDLONG),
3298 3306 ('ullAvailPageFile', DWORDLONG),
3299 3307 ('ullTotalVirtual', DWORDLONG),
3300 3308 ('ullAvailVirtual', DWORDLONG),
3301 3309 ('ullExtendedVirtual', DWORDLONG),
3302 3310 ]
3303 3311
3304 3312 x = MEMORYSTATUSEX()
3305 3313 x.dwLength = sizeof(x)
3306 3314 windll.kernel32.GlobalMemoryStatusEx(byref(x))
3307 3315 return x.ullAvailPhys
3308 3316
3309 3317 # On newer Unix-like systems and Mac OSX, the sysconf interface
3310 3318 # can be used. _SC_PAGE_SIZE is part of POSIX; _SC_PHYS_PAGES
3311 3319 # seems to be implemented on most systems.
3312 3320 try:
3313 3321 pagesize = os.sysconf(os.sysconf_names['SC_PAGE_SIZE'])
3314 3322 pages = os.sysconf(os.sysconf_names['SC_PHYS_PAGES'])
3315 3323 return pagesize * pages
3316 3324 except OSError: # sysconf can fail
3317 3325 pass
3318 3326 except KeyError: # unknown parameter
3319 3327 pass
General Comments 0
You need to be logged in to leave comments. Login now