##// END OF EJS Templates
typing: narrow the scope of some recent disabled import warnings...
Matt Harbison -
r52624:92845af3 default
parent child Browse files
Show More
@@ -1,761 +1,761
1 1 # chgserver.py - command server extension for cHg
2 2 #
3 3 # Copyright 2011 Yuya Nishihara <yuya@tcha.org>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """command server extension for cHg
9 9
10 10 'S' channel (read/write)
11 11 propagate ui.system() request to client
12 12
13 13 'attachio' command
14 14 attach client's stdio passed by sendmsg()
15 15
16 16 'chdir' command
17 17 change current directory
18 18
19 19 'setenv' command
20 20 replace os.environ completely
21 21
22 22 'setumask' command (DEPRECATED)
23 23 'setumask2' command
24 24 set umask
25 25
26 26 'validate' command
27 27 reload the config and check if the server is up to date
28 28
29 29 Config
30 30 ------
31 31
32 32 ::
33 33
34 34 [chgserver]
35 35 # how long (in seconds) should an idle chg server exit
36 36 idletimeout = 3600
37 37
38 38 # whether to skip config or env change checks
39 39 skiphash = False
40 40 """
41 41
42 42
43 43 import inspect
44 44 import os
45 45 import re
46 46 import socket
47 47 import stat
48 48 import struct
49 49 import time
50 50
51 51 from typing import (
52 52 Optional,
53 53 )
54 54
55 55 from .i18n import _
56 56 from .node import hex
57 57
58 58 from . import (
59 59 commandserver,
60 60 encoding,
61 61 error,
62 62 extensions,
63 63 pycompat,
64 64 util,
65 65 )
66 66
67 67 from .utils import (
68 68 hashutil,
69 69 procutil,
70 70 stringutil,
71 71 )
72 72
73 73
74 74 def _hashlist(items):
75 75 """return sha1 hexdigest for a list"""
76 76 return hex(hashutil.sha1(stringutil.pprint(items)).digest())
77 77
78 78
79 79 # sensitive config sections affecting confighash
80 80 _configsections = [
81 81 b'alias', # affects global state commands.table
82 82 b'diff-tools', # affects whether gui or not in extdiff's uisetup
83 83 b'eol', # uses setconfig('eol', ...)
84 84 b'extdiff', # uisetup will register new commands
85 85 b'extensions',
86 86 b'fastannotate', # affects annotate command and adds fastannonate cmd
87 87 b'merge-tools', # affects whether gui or not in extdiff's uisetup
88 88 b'schemes', # extsetup will update global hg.schemes
89 89 ]
90 90
91 91 _configsectionitems = [
92 92 (b'commands', b'show.aliasprefix'), # show.py reads it in extsetup
93 93 ]
94 94
95 95 # sensitive environment variables affecting confighash
96 96 _envre = re.compile(
97 97 br'''\A(?:
98 98 CHGHG
99 99 |HG(?:DEMANDIMPORT|EMITWARNINGS|MODULEPOLICY|PROF|RCPATH)?
100 100 |HG(?:ENCODING|PLAIN).*
101 101 |LANG(?:UAGE)?
102 102 |LC_.*
103 103 |LD_.*
104 104 |PATH
105 105 |PYTHON.*
106 106 |TERM(?:INFO)?
107 107 |TZ
108 108 )\Z''',
109 109 re.X,
110 110 )
111 111
112 112
113 113 def _confighash(ui):
114 114 """return a quick hash for detecting config/env changes
115 115
116 116 confighash is the hash of sensitive config items and environment variables.
117 117
118 118 for chgserver, it is designed that once confighash changes, the server is
119 119 not qualified to serve its client and should redirect the client to a new
120 120 server. different from mtimehash, confighash change will not mark the
121 121 server outdated and exit since the user can have different configs at the
122 122 same time.
123 123 """
124 124 sectionitems = []
125 125 for section in _configsections:
126 126 sectionitems.append(ui.configitems(section))
127 127 for section, item in _configsectionitems:
128 128 sectionitems.append(ui.config(section, item))
129 129 sectionhash = _hashlist(sectionitems)
130 130 # If $CHGHG is set, the change to $HG should not trigger a new chg server
131 131 if b'CHGHG' in encoding.environ:
132 132 ignored = {b'HG'}
133 133 else:
134 134 ignored = set()
135 135 envitems = [
136 136 (k, v)
137 137 for k, v in encoding.environ.items()
138 138 if _envre.match(k) and k not in ignored
139 139 ]
140 140 envhash = _hashlist(sorted(envitems))
141 141 return sectionhash[:6] + envhash[:6]
142 142
143 143
144 144 def _getmtimepaths(ui):
145 145 """get a list of paths that should be checked to detect change
146 146
147 147 The list will include:
148 148 - extensions (will not cover all files for complex extensions)
149 149 - mercurial/__version__.py
150 150 - python binary
151 151 """
152 152 modules = [m for n, m in extensions.extensions(ui)]
153 153 try:
154 from . import __version__ # type: ignore
154 from . import __version__ # pytype: disable=import-error
155 155
156 156 modules.append(__version__)
157 157 except ImportError:
158 158 pass
159 159 files = []
160 160 if pycompat.sysexecutable:
161 161 files.append(pycompat.sysexecutable)
162 162 for m in modules:
163 163 try:
164 164 files.append(pycompat.fsencode(inspect.getabsfile(m)))
165 165 except TypeError:
166 166 pass
167 167 return sorted(set(files))
168 168
169 169
170 170 def _mtimehash(paths):
171 171 """return a quick hash for detecting file changes
172 172
173 173 mtimehash calls stat on given paths and calculate a hash based on size and
174 174 mtime of each file. mtimehash does not read file content because reading is
175 175 expensive. therefore it's not 100% reliable for detecting content changes.
176 176 it's possible to return different hashes for same file contents.
177 177 it's also possible to return a same hash for different file contents for
178 178 some carefully crafted situation.
179 179
180 180 for chgserver, it is designed that once mtimehash changes, the server is
181 181 considered outdated immediately and should no longer provide service.
182 182
183 183 mtimehash is not included in confighash because we only know the paths of
184 184 extensions after importing them (there is imp.find_module but that faces
185 185 race conditions). We need to calculate confighash without importing.
186 186 """
187 187
188 188 def trystat(path):
189 189 try:
190 190 st = os.stat(path)
191 191 return (st[stat.ST_MTIME], st.st_size)
192 192 except OSError:
193 193 # could be ENOENT, EPERM etc. not fatal in any case
194 194 pass
195 195
196 196 return _hashlist(pycompat.maplist(trystat, paths))[:12]
197 197
198 198
199 199 class hashstate:
200 200 """a structure storing confighash, mtimehash, paths used for mtimehash"""
201 201
202 202 def __init__(self, confighash, mtimehash, mtimepaths):
203 203 self.confighash = confighash
204 204 self.mtimehash = mtimehash
205 205 self.mtimepaths = mtimepaths
206 206
207 207 @staticmethod
208 208 def fromui(ui, mtimepaths=None):
209 209 if mtimepaths is None:
210 210 mtimepaths = _getmtimepaths(ui)
211 211 confighash = _confighash(ui)
212 212 mtimehash = _mtimehash(mtimepaths)
213 213 ui.log(
214 214 b'cmdserver',
215 215 b'confighash = %s mtimehash = %s\n',
216 216 confighash,
217 217 mtimehash,
218 218 )
219 219 return hashstate(confighash, mtimehash, mtimepaths)
220 220
221 221
222 222 def _newchgui(srcui, csystem, attachio):
223 223 class chgui(srcui.__class__):
224 224 def __init__(self, src=None):
225 225 super(chgui, self).__init__(src)
226 226 if src:
227 227 self._csystem = getattr(src, '_csystem', csystem)
228 228 else:
229 229 self._csystem = csystem
230 230
231 231 def _runsystem(self, cmd, environ, cwd, out):
232 232 # fallback to the original system method if
233 233 # a. the output stream is not stdout (e.g. stderr, cStringIO),
234 234 # b. or stdout is redirected by protectfinout(),
235 235 # because the chg client is not aware of these situations and
236 236 # will behave differently (i.e. write to stdout).
237 237 if (
238 238 out is not self.fout
239 239 or not hasattr(self.fout, 'fileno')
240 240 or self.fout.fileno() != procutil.stdout.fileno()
241 241 or self._finoutredirected
242 242 ):
243 243 return procutil.system(cmd, environ=environ, cwd=cwd, out=out)
244 244 self.flush()
245 245 return self._csystem(cmd, procutil.shellenviron(environ), cwd)
246 246
247 247 def _runpager(self, cmd, env=None):
248 248 self._csystem(
249 249 cmd,
250 250 procutil.shellenviron(env),
251 251 type=b'pager',
252 252 cmdtable={b'attachio': attachio},
253 253 )
254 254 return True
255 255
256 256 return chgui(srcui)
257 257
258 258
259 259 def _loadnewui(srcui, args, cdebug):
260 260 from . import dispatch # avoid cycle
261 261
262 262 newui = srcui.__class__.load()
263 263 for a in ['fin', 'fout', 'ferr', 'environ']:
264 264 setattr(newui, a, getattr(srcui, a))
265 265 if hasattr(srcui, '_csystem'):
266 266 newui._csystem = srcui._csystem
267 267
268 268 # command line args
269 269 options = dispatch._earlyparseopts(newui, args)
270 270 dispatch._parseconfig(newui, options[b'config'])
271 271
272 272 # stolen from tortoisehg.util.copydynamicconfig()
273 273 for section, name, value in srcui.walkconfig():
274 274 source = srcui.configsource(section, name)
275 275 if b':' in source or source == b'--config' or source.startswith(b'$'):
276 276 # path:line or command line, or environ
277 277 continue
278 278 newui.setconfig(section, name, value, source)
279 279
280 280 # load wd and repo config, copied from dispatch.py
281 281 cwd = options[b'cwd']
282 282 cwd = cwd and os.path.realpath(cwd) or None
283 283 rpath = options[b'repository']
284 284 path, newlui = dispatch._getlocal(newui, rpath, wd=cwd)
285 285
286 286 extensions.populateui(newui)
287 287 commandserver.setuplogging(newui, fp=cdebug)
288 288 if newui is not newlui:
289 289 extensions.populateui(newlui)
290 290 commandserver.setuplogging(newlui, fp=cdebug)
291 291
292 292 return (newui, newlui)
293 293
294 294
295 295 class channeledsystem:
296 296 """Propagate ui.system() request in the following format:
297 297
298 298 payload length (unsigned int),
299 299 type, '\0',
300 300 cmd, '\0',
301 301 cwd, '\0',
302 302 envkey, '=', val, '\0',
303 303 ...
304 304 envkey, '=', val
305 305
306 306 if type == 'system', waits for:
307 307
308 308 exitcode length (unsigned int),
309 309 exitcode (int)
310 310
311 311 if type == 'pager', repetitively waits for a command name ending with '\n'
312 312 and executes it defined by cmdtable, or exits the loop if the command name
313 313 is empty.
314 314 """
315 315
316 316 def __init__(self, in_, out, channel):
317 317 self.in_ = in_
318 318 self.out = out
319 319 self.channel = channel
320 320
321 321 def __call__(self, cmd, environ, cwd=None, type=b'system', cmdtable=None):
322 322 args = [type, cmd, util.abspath(cwd or b'.')]
323 323 args.extend(b'%s=%s' % (k, v) for k, v in environ.items())
324 324 data = b'\0'.join(args)
325 325 self.out.write(struct.pack(b'>cI', self.channel, len(data)))
326 326 self.out.write(data)
327 327 self.out.flush()
328 328
329 329 if type == b'system':
330 330 length = self.in_.read(4)
331 331 (length,) = struct.unpack(b'>I', length)
332 332 if length != 4:
333 333 raise error.Abort(_(b'invalid response'))
334 334 (rc,) = struct.unpack(b'>i', self.in_.read(4))
335 335 return rc
336 336 elif type == b'pager':
337 337 while True:
338 338 cmd = self.in_.readline()[:-1]
339 339 if not cmd:
340 340 break
341 341 if cmdtable and cmd in cmdtable:
342 342 cmdtable[cmd]()
343 343 else:
344 344 raise error.Abort(_(b'unexpected command: %s') % cmd)
345 345 else:
346 346 raise error.ProgrammingError(b'invalid S channel type: %s' % type)
347 347
348 348
349 349 _iochannels = [
350 350 # server.ch, ui.fp, mode
351 351 ('cin', 'fin', 'rb'),
352 352 ('cout', 'fout', 'wb'),
353 353 ('cerr', 'ferr', 'wb'),
354 354 ]
355 355
356 356
357 357 class chgcmdserver(commandserver.server):
358 358 def __init__(
359 359 self, ui, repo, fin, fout, sock, prereposetups, hashstate, baseaddress
360 360 ):
361 361 super(chgcmdserver, self).__init__(
362 362 _newchgui(ui, channeledsystem(fin, fout, b'S'), self.attachio),
363 363 repo,
364 364 fin,
365 365 fout,
366 366 prereposetups,
367 367 )
368 368 self.clientsock = sock
369 369 self._ioattached = False
370 370 self._oldios = [] # original (self.ch, ui.fp, fd) before "attachio"
371 371 self.hashstate = hashstate
372 372 self.baseaddress = baseaddress
373 373 if hashstate is not None:
374 374 self.capabilities = self.capabilities.copy()
375 375 self.capabilities[b'validate'] = chgcmdserver.validate
376 376
377 377 def cleanup(self):
378 378 super(chgcmdserver, self).cleanup()
379 379 # dispatch._runcatch() does not flush outputs if exception is not
380 380 # handled by dispatch._dispatch()
381 381 self.ui.flush()
382 382 self._restoreio()
383 383 self._ioattached = False
384 384
385 385 def attachio(self):
386 386 """Attach to client's stdio passed via unix domain socket; all
387 387 channels except cresult will no longer be used
388 388 """
389 389 # tell client to sendmsg() with 1-byte payload, which makes it
390 390 # distinctive from "attachio\n" command consumed by client.read()
391 391 self.clientsock.sendall(struct.pack(b'>cI', b'I', 1))
392 392
393 393 data, ancdata, msg_flags, address = self.clientsock.recvmsg(1, 256)
394 394 assert len(ancdata) == 1
395 395 cmsg_level, cmsg_type, cmsg_data = ancdata[0]
396 396 assert cmsg_level == socket.SOL_SOCKET
397 397 assert cmsg_type == socket.SCM_RIGHTS
398 398 # memoryview.cast() was added in typeshed 61600d68772a, but pytype
399 399 # still complains
400 400 # pytype: disable=attribute-error
401 401 clientfds = memoryview(cmsg_data).cast('i').tolist()
402 402 # pytype: enable=attribute-error
403 403 self.ui.log(b'chgserver', b'received fds: %r\n', clientfds)
404 404
405 405 ui = self.ui
406 406 ui.flush()
407 407 self._saveio()
408 408 for fd, (cn, fn, mode) in zip(clientfds, _iochannels):
409 409 assert fd > 0
410 410 fp = getattr(ui, fn)
411 411 os.dup2(fd, fp.fileno())
412 412 os.close(fd)
413 413 if self._ioattached:
414 414 continue
415 415 # reset buffering mode when client is first attached. as we want
416 416 # to see output immediately on pager, the mode stays unchanged
417 417 # when client re-attached. ferr is unchanged because it should
418 418 # be unbuffered no matter if it is a tty or not.
419 419 if fn == b'ferr':
420 420 newfp = fp
421 421 else:
422 422 # On Python 3, the standard library doesn't offer line-buffered
423 423 # binary streams, so wrap/unwrap it.
424 424 if fp.isatty():
425 425 newfp = procutil.make_line_buffered(fp)
426 426 else:
427 427 newfp = procutil.unwrap_line_buffered(fp)
428 428 if newfp is not fp:
429 429 setattr(ui, fn, newfp)
430 430 setattr(self, cn, newfp)
431 431
432 432 self._ioattached = True
433 433 self.cresult.write(struct.pack(b'>i', len(clientfds)))
434 434
435 435 def _saveio(self):
436 436 if self._oldios:
437 437 return
438 438 ui = self.ui
439 439 for cn, fn, _mode in _iochannels:
440 440 ch = getattr(self, cn)
441 441 fp = getattr(ui, fn)
442 442 fd = os.dup(fp.fileno())
443 443 self._oldios.append((ch, fp, fd))
444 444
445 445 def _restoreio(self):
446 446 if not self._oldios:
447 447 return
448 448 nullfd = os.open(os.devnull, os.O_WRONLY)
449 449 ui = self.ui
450 450 for (ch, fp, fd), (cn, fn, mode) in zip(self._oldios, _iochannels):
451 451 try:
452 452 if 'w' in mode:
453 453 # Discard buffered data which couldn't be flushed because
454 454 # of EPIPE. The data should belong to the current session
455 455 # and should never persist.
456 456 os.dup2(nullfd, fp.fileno())
457 457 fp.flush()
458 458 os.dup2(fd, fp.fileno())
459 459 os.close(fd)
460 460 except OSError as err:
461 461 # According to issue6330, running chg on heavy loaded systems
462 462 # can lead to EBUSY. [man dup2] indicates that, on Linux,
463 463 # EBUSY comes from a race condition between open() and dup2().
464 464 # However it's not clear why open() race occurred for
465 465 # newfd=stdin/out/err.
466 466 self.ui.log(
467 467 b'chgserver',
468 468 b'got %s while duplicating %s\n',
469 469 stringutil.forcebytestr(err),
470 470 fn,
471 471 )
472 472 setattr(self, cn, ch)
473 473 setattr(ui, fn, fp)
474 474 os.close(nullfd)
475 475 del self._oldios[:]
476 476
477 477 def validate(self):
478 478 """Reload the config and check if the server is up to date
479 479
480 480 Read a list of '\0' separated arguments.
481 481 Write a non-empty list of '\0' separated instruction strings or '\0'
482 482 if the list is empty.
483 483 An instruction string could be either:
484 484 - "unlink $path", the client should unlink the path to stop the
485 485 outdated server.
486 486 - "redirect $path", the client should attempt to connect to $path
487 487 first. If it does not work, start a new server. It implies
488 488 "reconnect".
489 489 - "exit $n", the client should exit directly with code n.
490 490 This may happen if we cannot parse the config.
491 491 - "reconnect", the client should close the connection and
492 492 reconnect.
493 493 If neither "reconnect" nor "redirect" is included in the instruction
494 494 list, the client can continue with this server after completing all
495 495 the instructions.
496 496 """
497 497 args = self._readlist()
498 498 errorraised = False
499 499 detailed_exit_code = 255
500 500 try:
501 501 self.ui, lui = _loadnewui(self.ui, args, self.cdebug)
502 502 except error.RepoError as inst:
503 503 # RepoError can be raised while trying to read shared source
504 504 # configuration
505 505 self.ui.error(_(b"abort: %s\n") % stringutil.forcebytestr(inst))
506 506 if inst.hint:
507 507 self.ui.error(_(b"(%s)\n") % inst.hint)
508 508 errorraised = True
509 509 except error.Error as inst:
510 510 if inst.detailed_exit_code is not None:
511 511 detailed_exit_code = inst.detailed_exit_code
512 512 self.ui.error(inst.format())
513 513 errorraised = True
514 514
515 515 if errorraised:
516 516 self.ui.flush()
517 517 exit_code = 255
518 518 if self.ui.configbool(b'ui', b'detailed-exit-code'):
519 519 exit_code = detailed_exit_code
520 520 self.cresult.write(b'exit %d' % exit_code)
521 521 return
522 522 newhash = hashstate.fromui(lui, self.hashstate.mtimepaths)
523 523 insts = []
524 524 if newhash.mtimehash != self.hashstate.mtimehash:
525 525 addr = _hashaddress(self.baseaddress, self.hashstate.confighash)
526 526 insts.append(b'unlink %s' % addr)
527 527 # mtimehash is empty if one or more extensions fail to load.
528 528 # to be compatible with hg, still serve the client this time.
529 529 if self.hashstate.mtimehash:
530 530 insts.append(b'reconnect')
531 531 if newhash.confighash != self.hashstate.confighash:
532 532 addr = _hashaddress(self.baseaddress, newhash.confighash)
533 533 insts.append(b'redirect %s' % addr)
534 534 self.ui.log(b'chgserver', b'validate: %s\n', stringutil.pprint(insts))
535 535 self.cresult.write(b'\0'.join(insts) or b'\0')
536 536
537 537 def chdir(self):
538 538 """Change current directory
539 539
540 540 Note that the behavior of --cwd option is bit different from this.
541 541 It does not affect --config parameter.
542 542 """
543 543 path = self._readstr()
544 544 if not path:
545 545 return
546 546 self.ui.log(b'chgserver', b"chdir to '%s'\n", path)
547 547 os.chdir(path)
548 548
549 549 def setumask(self):
550 550 """Change umask (DEPRECATED)"""
551 551 # BUG: this does not follow the message frame structure, but kept for
552 552 # backward compatibility with old chg clients for some time
553 553 self._setumask(self._read(4))
554 554
555 555 def setumask2(self):
556 556 """Change umask"""
557 557 data = self._readstr()
558 558 if len(data) != 4:
559 559 raise ValueError(b'invalid mask length in setumask2 request')
560 560 self._setumask(data)
561 561
562 562 def _setumask(self, data):
563 563 mask = struct.unpack(b'>I', data)[0]
564 564 self.ui.log(b'chgserver', b'setumask %r\n', mask)
565 565 util.setumask(mask)
566 566
567 567 def runcommand(self):
568 568 # pager may be attached within the runcommand session, which should
569 569 # be detached at the end of the session. otherwise the pager wouldn't
570 570 # receive EOF.
571 571 globaloldios = self._oldios
572 572 self._oldios = []
573 573 try:
574 574 return super(chgcmdserver, self).runcommand()
575 575 finally:
576 576 self._restoreio()
577 577 self._oldios = globaloldios
578 578
579 579 def setenv(self):
580 580 """Clear and update os.environ
581 581
582 582 Note that not all variables can make an effect on the running process.
583 583 """
584 584 l = self._readlist()
585 585 try:
586 586 newenv = dict(s.split(b'=', 1) for s in l)
587 587 except ValueError:
588 588 raise ValueError(b'unexpected value in setenv request')
589 589 self.ui.log(b'chgserver', b'setenv: %r\n', sorted(newenv.keys()))
590 590
591 591 encoding.environ.clear()
592 592 encoding.environ.update(newenv)
593 593
594 594 capabilities = commandserver.server.capabilities.copy()
595 595 capabilities.update(
596 596 {
597 597 b'attachio': attachio,
598 598 b'chdir': chdir,
599 599 b'runcommand': runcommand,
600 600 b'setenv': setenv,
601 601 b'setumask': setumask,
602 602 b'setumask2': setumask2,
603 603 }
604 604 )
605 605
606 606 if hasattr(procutil, 'setprocname'):
607 607
608 608 def setprocname(self):
609 609 """Change process title"""
610 610 name = self._readstr()
611 611 self.ui.log(b'chgserver', b'setprocname: %r\n', name)
612 612 procutil.setprocname(name)
613 613
614 614 capabilities[b'setprocname'] = setprocname
615 615
616 616
617 617 def _tempaddress(address):
618 618 return b'%s.%d.tmp' % (address, os.getpid())
619 619
620 620
621 621 def _hashaddress(address, hashstr):
622 622 # if the basename of address contains '.', use only the left part. this
623 623 # makes it possible for the client to pass 'server.tmp$PID' and follow by
624 624 # an atomic rename to avoid locking when spawning new servers.
625 625 dirname, basename = os.path.split(address)
626 626 basename = basename.split(b'.', 1)[0]
627 627 return b'%s-%s' % (os.path.join(dirname, basename), hashstr)
628 628
629 629
630 630 class chgunixservicehandler:
631 631 """Set of operations for chg services"""
632 632
633 633 pollinterval = 1 # [sec]
634 634
635 635 _hashstate: Optional[hashstate]
636 636 _baseaddress: Optional[bytes]
637 637 _realaddress: Optional[bytes]
638 638
639 639 def __init__(self, ui):
640 640 self.ui = ui
641 641
642 642 self._hashstate = None
643 643 self._baseaddress = None
644 644 self._realaddress = None
645 645
646 646 self._idletimeout = ui.configint(b'chgserver', b'idletimeout')
647 647 self._lastactive = time.time()
648 648
649 649 def bindsocket(self, sock, address):
650 650 self._inithashstate(address)
651 651 self._checkextensions()
652 652 self._bind(sock)
653 653 self._createsymlink()
654 654 # no "listening at" message should be printed to simulate hg behavior
655 655
656 656 def _inithashstate(self, address):
657 657 self._baseaddress = address
658 658 if self.ui.configbool(b'chgserver', b'skiphash'):
659 659 self._hashstate = None
660 660 self._realaddress = address
661 661 return
662 662 self._hashstate = hashstate.fromui(self.ui)
663 663 self._realaddress = _hashaddress(address, self._hashstate.confighash)
664 664
665 665 def _checkextensions(self):
666 666 if not self._hashstate:
667 667 return
668 668 if extensions.notloaded():
669 669 # one or more extensions failed to load. mtimehash becomes
670 670 # meaningless because we do not know the paths of those extensions.
671 671 # set mtimehash to an illegal hash value to invalidate the server.
672 672 self._hashstate.mtimehash = b''
673 673
674 674 def _bind(self, sock):
675 675 # use a unique temp address so we can stat the file and do ownership
676 676 # check later
677 677 tempaddress = _tempaddress(self._realaddress)
678 678 util.bindunixsocket(sock, tempaddress)
679 679 self._socketstat = os.stat(tempaddress)
680 680 sock.listen(socket.SOMAXCONN)
681 681 # rename will replace the old socket file if exists atomically. the
682 682 # old server will detect ownership change and exit.
683 683 util.rename(tempaddress, self._realaddress)
684 684
685 685 def _createsymlink(self):
686 686 if self._baseaddress == self._realaddress:
687 687 return
688 688 tempaddress = _tempaddress(self._baseaddress)
689 689 os.symlink(os.path.basename(self._realaddress), tempaddress)
690 690 util.rename(tempaddress, self._baseaddress)
691 691
692 692 def _issocketowner(self):
693 693 try:
694 694 st = os.stat(self._realaddress)
695 695 return (
696 696 st.st_ino == self._socketstat.st_ino
697 697 and st[stat.ST_MTIME] == self._socketstat[stat.ST_MTIME]
698 698 )
699 699 except OSError:
700 700 return False
701 701
702 702 def unlinksocket(self, address):
703 703 if not self._issocketowner():
704 704 return
705 705 # it is possible to have a race condition here that we may
706 706 # remove another server's socket file. but that's okay
707 707 # since that server will detect and exit automatically and
708 708 # the client will start a new server on demand.
709 709 util.tryunlink(self._realaddress)
710 710
711 711 def shouldexit(self):
712 712 if not self._issocketowner():
713 713 self.ui.log(
714 714 b'chgserver', b'%s is not owned, exiting.\n', self._realaddress
715 715 )
716 716 return True
717 717 if time.time() - self._lastactive > self._idletimeout:
718 718 self.ui.log(b'chgserver', b'being idle too long. exiting.\n')
719 719 return True
720 720 return False
721 721
722 722 def newconnection(self):
723 723 self._lastactive = time.time()
724 724
725 725 def createcmdserver(self, repo, conn, fin, fout, prereposetups):
726 726 return chgcmdserver(
727 727 self.ui,
728 728 repo,
729 729 fin,
730 730 fout,
731 731 conn,
732 732 prereposetups,
733 733 self._hashstate,
734 734 self._baseaddress,
735 735 )
736 736
737 737
738 738 def chgunixservice(ui, repo, opts):
739 739 # CHGINTERNALMARK is set by chg client. It is an indication of things are
740 740 # started by chg so other code can do things accordingly, like disabling
741 741 # demandimport or detecting chg client started by chg client. When executed
742 742 # here, CHGINTERNALMARK is no longer useful and hence dropped to make
743 743 # environ cleaner.
744 744 if b'CHGINTERNALMARK' in encoding.environ:
745 745 del encoding.environ[b'CHGINTERNALMARK']
746 746 # Python3.7+ "coerces" the LC_CTYPE environment variable to a UTF-8 one if
747 747 # it thinks the current value is "C". This breaks the hash computation and
748 748 # causes chg to restart loop.
749 749 if b'CHGORIG_LC_CTYPE' in encoding.environ:
750 750 encoding.environ[b'LC_CTYPE'] = encoding.environ[b'CHGORIG_LC_CTYPE']
751 751 del encoding.environ[b'CHGORIG_LC_CTYPE']
752 752 elif b'CHG_CLEAR_LC_CTYPE' in encoding.environ:
753 753 if b'LC_CTYPE' in encoding.environ:
754 754 del encoding.environ[b'LC_CTYPE']
755 755 del encoding.environ[b'CHG_CLEAR_LC_CTYPE']
756 756
757 757 if repo:
758 758 # one chgserver can serve multiple repos. drop repo information
759 759 ui.setconfig(b'bundle', b'mainreporoot', b'', b'repo')
760 760 h = chgunixservicehandler(ui)
761 761 return commandserver.unixforkingservice(ui, repo=None, opts=opts, handler=h)
@@ -1,159 +1,159
1 1 # policy.py - module policy logic for Mercurial.
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import os
10 10 import sys
11 11 import typing
12 12
13 13 if typing.TYPE_CHECKING:
14 14 from typing import (
15 15 Dict,
16 16 Optional,
17 17 Tuple,
18 18 )
19 19
20 20 # Rules for how modules can be loaded. Values are:
21 21 #
22 22 # c - require C extensions
23 23 # rust+c - require Rust and C extensions
24 24 # rust+c-allow - allow Rust and C extensions with fallback to pure Python
25 25 # for each
26 26 # allow - allow pure Python implementation when C loading fails
27 27 # cffi - required cffi versions (implemented within pure module)
28 28 # cffi-allow - allow pure Python implementation if cffi version is missing
29 29 # py - only load pure Python modules
30 30 #
31 31 # By default, fall back to the pure modules so the in-place build can
32 32 # run without recompiling the C extensions. This will be overridden by
33 33 # __modulepolicy__ generated by setup.py.
34 34 policy: bytes = b'allow'
35 35 _packageprefs: "Dict[bytes, Tuple[Optional[str], Optional[str]]]" = {
36 36 # policy: (versioned package, pure package)
37 37 b'c': ('cext', None),
38 38 b'allow': ('cext', 'pure'),
39 39 b'cffi': ('cffi', None),
40 40 b'cffi-allow': ('cffi', 'pure'),
41 41 b'py': (None, 'pure'),
42 42 # For now, rust policies impact importrust only
43 43 b'rust+c': ('cext', None),
44 44 b'rust+c-allow': ('cext', 'pure'),
45 45 }
46 46
47 47 try:
48 from . import __modulepolicy__ # type: ignore
48 from . import __modulepolicy__ # pytype: disable=import-error
49 49
50 50 policy: bytes = __modulepolicy__.modulepolicy
51 51 except ImportError:
52 52 pass
53 53
54 54 # PyPy doesn't load C extensions.
55 55 #
56 56 # The canonical way to do this is to test platform.python_implementation().
57 57 # But we don't import platform and don't bloat for it here.
58 58 if '__pypy__' in sys.builtin_module_names:
59 59 policy: bytes = b'cffi'
60 60
61 61 # Environment variable can always force settings.
62 62 if 'HGMODULEPOLICY' in os.environ:
63 63 policy: bytes = os.environ['HGMODULEPOLICY'].encode('utf-8')
64 64
65 65
66 66 def _importfrom(pkgname: str, modname: str):
67 67 # from .<pkgname> import <modname> (where . is looked through this module)
68 68 fakelocals = {}
69 69 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
70 70 try:
71 71 fakelocals[modname] = mod = getattr(pkg, modname)
72 72 except AttributeError:
73 73 raise ImportError('cannot import name %s' % modname)
74 74 # force import; fakelocals[modname] may be replaced with the real module
75 75 getattr(mod, '__doc__', None)
76 76 return fakelocals[modname]
77 77
78 78
79 79 # keep in sync with "version" in C modules
80 80 _cextversions: "Dict[Tuple[str, str], int]" = {
81 81 ('cext', 'base85'): 1,
82 82 ('cext', 'bdiff'): 3,
83 83 ('cext', 'mpatch'): 1,
84 84 ('cext', 'osutil'): 4,
85 85 ('cext', 'parsers'): 21,
86 86 }
87 87
88 88 # map import request to other package or module
89 89 _modredirects: "Dict[Tuple[str, str], Tuple[str, str]]" = {
90 90 ('cext', 'charencode'): ('cext', 'parsers'),
91 91 ('cffi', 'base85'): ('pure', 'base85'),
92 92 ('cffi', 'charencode'): ('pure', 'charencode'),
93 93 ('cffi', 'parsers'): ('pure', 'parsers'),
94 94 }
95 95
96 96
97 97 def _checkmod(pkgname: str, modname: str, mod) -> None:
98 98 expected = _cextversions.get((pkgname, modname))
99 99 actual = getattr(mod, 'version', None)
100 100 if actual != expected:
101 101 raise ImportError(
102 102 'cannot import module %s.%s '
103 103 '(expected version: %d, actual: %r)'
104 104 % (pkgname, modname, expected, actual)
105 105 )
106 106
107 107
108 108 def importmod(modname: str):
109 109 """Import module according to policy and check API version"""
110 110 try:
111 111 verpkg, purepkg = _packageprefs[policy]
112 112 except KeyError:
113 113 raise ImportError('invalid HGMODULEPOLICY %r' % policy)
114 114 assert verpkg or purepkg
115 115 if verpkg:
116 116 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
117 117 try:
118 118 mod = _importfrom(pn, mn)
119 119 if pn == verpkg:
120 120 _checkmod(pn, mn, mod)
121 121 return mod
122 122 except ImportError:
123 123 if not purepkg:
124 124 raise
125 125 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
126 126 return _importfrom(pn, mn)
127 127
128 128
129 129 def _isrustpermissive() -> bool:
130 130 """Assuming the policy is a Rust one, tell if it's permissive."""
131 131 return policy.endswith(b'-allow')
132 132
133 133
134 134 def importrust(modname: str, member: "Optional[str]" = None, default=None):
135 135 """Import Rust module according to policy and availability.
136 136
137 137 If policy isn't a Rust one, this returns `default`.
138 138
139 139 If either the module or its member is not available, this returns `default`
140 140 if policy is permissive and raises `ImportError` if not.
141 141 """
142 142 if not policy.startswith(b'rust'):
143 143 return default
144 144
145 145 try:
146 146 mod = _importfrom('rustext', modname)
147 147 except ImportError:
148 148 if _isrustpermissive():
149 149 return default
150 150 raise
151 151 if member is None:
152 152 return mod
153 153
154 154 try:
155 155 return getattr(mod, member)
156 156 except AttributeError:
157 157 if _isrustpermissive():
158 158 return default
159 159 raise ImportError("Cannot import name %s" % member)
@@ -1,3384 +1,3384
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16
17 17 import abc
18 18 import collections
19 19 import contextlib
20 20 import errno
21 21 import gc
22 22 import hashlib
23 23 import io
24 24 import itertools
25 25 import locale
26 26 import mmap
27 27 import os
28 28 import pickle # provides util.pickle symbol
29 29 import re as remod
30 30 import shutil
31 31 import stat
32 32 import sys
33 33 import time
34 34 import traceback
35 35 import typing
36 36 import warnings
37 37
38 38 from typing import (
39 39 Any,
40 40 BinaryIO,
41 41 Callable,
42 42 Iterable,
43 43 Iterator,
44 44 List,
45 45 Optional,
46 46 Tuple,
47 47 )
48 48
49 49 from .node import hex
50 50 from .thirdparty import attr
51 51
52 52 # Force pytype to use the non-vendored package
53 53 if typing.TYPE_CHECKING:
54 54 # noinspection PyPackageRequirements
55 55 import attr
56 56
57 57 from .pycompat import (
58 58 open,
59 59 )
60 60 from hgdemandimport import tracing
61 61 from . import (
62 62 encoding,
63 63 error,
64 64 i18n,
65 65 policy,
66 66 pycompat,
67 67 typelib,
68 68 urllibcompat,
69 69 )
70 70 from .utils import (
71 71 compression,
72 72 hashutil,
73 73 procutil,
74 74 stringutil,
75 75 )
76 76
77 77 # keeps pyflakes happy
78 78 assert [
79 79 Iterable,
80 80 Iterator,
81 81 List,
82 82 Optional,
83 83 Tuple,
84 84 ]
85 85
86 86
87 87 base85 = policy.importmod('base85')
88 88 osutil = policy.importmod('osutil')
89 89
90 90 b85decode = base85.b85decode
91 91 b85encode = base85.b85encode
92 92
93 93 cookielib = pycompat.cookielib
94 94 httplib = pycompat.httplib
95 95 safehasattr = pycompat.safehasattr
96 96 socketserver = pycompat.socketserver
97 97 bytesio = io.BytesIO
98 98 # TODO deprecate stringio name, as it is a lie on Python 3.
99 99 stringio = bytesio
100 100 xmlrpclib = pycompat.xmlrpclib
101 101
102 102 httpserver = urllibcompat.httpserver
103 103 urlerr = urllibcompat.urlerr
104 104 urlreq = urllibcompat.urlreq
105 105
106 106 # workaround for win32mbcs
107 107 _filenamebytestr = pycompat.bytestr
108 108
109 109 if pycompat.iswindows:
110 110 from . import windows as platform
111 111 else:
112 112 from . import posix as platform
113 113
114 114 _ = i18n._
115 115
116 116 abspath = platform.abspath
117 117 bindunixsocket = platform.bindunixsocket
118 118 cachestat = platform.cachestat
119 119 checkexec = platform.checkexec
120 120 checklink = platform.checklink
121 121 copymode = platform.copymode
122 122 expandglobs = platform.expandglobs
123 123 getfsmountpoint = platform.getfsmountpoint
124 124 getfstype = platform.getfstype
125 125 get_password = platform.get_password
126 126 groupmembers = platform.groupmembers
127 127 groupname = platform.groupname
128 128 isexec = platform.isexec
129 129 isowner = platform.isowner
130 130 listdir = osutil.listdir
131 131 localpath = platform.localpath
132 132 lookupreg = platform.lookupreg
133 133 makedir = platform.makedir
134 134 nlinks = platform.nlinks
135 135 normpath = platform.normpath
136 136 normcase = platform.normcase
137 137 normcasespec = platform.normcasespec
138 138 normcasefallback = platform.normcasefallback
139 139 openhardlinks = platform.openhardlinks
140 140 oslink = platform.oslink
141 141 parsepatchoutput = platform.parsepatchoutput
142 142 pconvert = platform.pconvert
143 143 poll = platform.poll
144 144 posixfile = platform.posixfile
145 145 readlink = platform.readlink
146 146 rename = platform.rename
147 147 removedirs = platform.removedirs
148 148 samedevice = platform.samedevice
149 149 samefile = platform.samefile
150 150 samestat = platform.samestat
151 151 setflags = platform.setflags
152 152 split = platform.split
153 153 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
154 154 statisexec = platform.statisexec
155 155 statislink = platform.statislink
156 156 umask = platform.umask
157 157 unlink = platform.unlink
158 158 username = platform.username
159 159
160 160
161 161 def setumask(val: int) -> None:
162 162 '''updates the umask. used by chg server'''
163 163 if pycompat.iswindows:
164 164 return
165 165 os.umask(val)
166 166 global umask
167 167 platform.umask = umask = val & 0o777
168 168
169 169
170 170 # small compat layer
171 171 compengines = compression.compengines
172 172 SERVERROLE = compression.SERVERROLE
173 173 CLIENTROLE = compression.CLIENTROLE
174 174
175 175 # Python compatibility
176 176
177 177 _notset = object()
178 178
179 179
180 180 def bitsfrom(container):
181 181 bits = 0
182 182 for bit in container:
183 183 bits |= bit
184 184 return bits
185 185
186 186
187 187 # python 2.6 still have deprecation warning enabled by default. We do not want
188 188 # to display anything to standard user so detect if we are running test and
189 189 # only use python deprecation warning in this case.
190 190 _dowarn = bool(encoding.environ.get(b'HGEMITWARNINGS'))
191 191 if _dowarn:
192 192 # explicitly unfilter our warning for python 2.7
193 193 #
194 194 # The option of setting PYTHONWARNINGS in the test runner was investigated.
195 195 # However, module name set through PYTHONWARNINGS was exactly matched, so
196 196 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
197 197 # makes the whole PYTHONWARNINGS thing useless for our usecase.
198 198 warnings.filterwarnings('default', '', DeprecationWarning, 'mercurial')
199 199 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext')
200 200 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext3rd')
201 201 if _dowarn:
202 202 # silence warning emitted by passing user string to re.sub()
203 203 warnings.filterwarnings(
204 204 'ignore', 'bad escape', DeprecationWarning, 'mercurial'
205 205 )
206 206 warnings.filterwarnings(
207 207 'ignore', 'invalid escape sequence', DeprecationWarning, 'mercurial'
208 208 )
209 209 # TODO: reinvent imp.is_frozen()
210 210 warnings.filterwarnings(
211 211 'ignore',
212 212 'the imp module is deprecated',
213 213 DeprecationWarning,
214 214 'mercurial',
215 215 )
216 216
217 217
218 218 def nouideprecwarn(msg, version, stacklevel=1):
219 219 """Issue an python native deprecation warning
220 220
221 221 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
222 222 """
223 223 if _dowarn:
224 224 msg += (
225 225 b"\n(compatibility will be dropped after Mercurial-%s,"
226 226 b" update your code.)"
227 227 ) % version
228 228 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
229 229 # on python 3 with chg, we will need to explicitly flush the output
230 230 sys.stderr.flush()
231 231
232 232
233 233 DIGESTS = {
234 234 b'md5': hashlib.md5,
235 235 b'sha1': hashutil.sha1,
236 236 b'sha512': hashlib.sha512,
237 237 }
238 238 # List of digest types from strongest to weakest
239 239 DIGESTS_BY_STRENGTH = [b'sha512', b'sha1', b'md5']
240 240
241 241 for k in DIGESTS_BY_STRENGTH:
242 242 assert k in DIGESTS
243 243
244 244
245 245 class digester:
246 246 """helper to compute digests.
247 247
248 248 This helper can be used to compute one or more digests given their name.
249 249
250 250 >>> d = digester([b'md5', b'sha1'])
251 251 >>> d.update(b'foo')
252 252 >>> [k for k in sorted(d)]
253 253 ['md5', 'sha1']
254 254 >>> d[b'md5']
255 255 'acbd18db4cc2f85cedef654fccc4a4d8'
256 256 >>> d[b'sha1']
257 257 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
258 258 >>> digester.preferred([b'md5', b'sha1'])
259 259 'sha1'
260 260 """
261 261
262 262 def __init__(self, digests, s=b''):
263 263 self._hashes = {}
264 264 for k in digests:
265 265 if k not in DIGESTS:
266 266 raise error.Abort(_(b'unknown digest type: %s') % k)
267 267 self._hashes[k] = DIGESTS[k]()
268 268 if s:
269 269 self.update(s)
270 270
271 271 def update(self, data):
272 272 for h in self._hashes.values():
273 273 h.update(data)
274 274
275 275 def __getitem__(self, key):
276 276 if key not in DIGESTS:
277 277 raise error.Abort(_(b'unknown digest type: %s') % k)
278 278 return hex(self._hashes[key].digest())
279 279
280 280 def __iter__(self):
281 281 return iter(self._hashes)
282 282
283 283 @staticmethod
284 284 def preferred(supported):
285 285 """returns the strongest digest type in both supported and DIGESTS."""
286 286
287 287 for k in DIGESTS_BY_STRENGTH:
288 288 if k in supported:
289 289 return k
290 290 return None
291 291
292 292
293 293 class digestchecker:
294 294 """file handle wrapper that additionally checks content against a given
295 295 size and digests.
296 296
297 297 d = digestchecker(fh, size, {'md5': '...'})
298 298
299 299 When multiple digests are given, all of them are validated.
300 300 """
301 301
302 302 def __init__(self, fh, size, digests):
303 303 self._fh = fh
304 304 self._size = size
305 305 self._got = 0
306 306 self._digests = dict(digests)
307 307 self._digester = digester(self._digests.keys())
308 308
309 309 def read(self, length=-1):
310 310 content = self._fh.read(length)
311 311 self._digester.update(content)
312 312 self._got += len(content)
313 313 return content
314 314
315 315 def validate(self):
316 316 if self._size != self._got:
317 317 raise error.Abort(
318 318 _(b'size mismatch: expected %d, got %d')
319 319 % (self._size, self._got)
320 320 )
321 321 for k, v in self._digests.items():
322 322 if v != self._digester[k]:
323 323 # i18n: first parameter is a digest name
324 324 raise error.Abort(
325 325 _(b'%s mismatch: expected %s, got %s')
326 326 % (k, v, self._digester[k])
327 327 )
328 328
329 329
330 330 try:
331 331 buffer = buffer # pytype: disable=name-error
332 332 except NameError:
333 333
334 334 def buffer(sliceable, offset=0, length=None):
335 335 if length is not None:
336 336 return memoryview(sliceable)[offset : offset + length]
337 337 return memoryview(sliceable)[offset:]
338 338
339 339
340 340 _chunksize = 4096
341 341
342 342
343 343 class bufferedinputpipe:
344 344 """a manually buffered input pipe
345 345
346 346 Python will not let us use buffered IO and lazy reading with 'polling' at
347 347 the same time. We cannot probe the buffer state and select will not detect
348 348 that data are ready to read if they are already buffered.
349 349
350 350 This class let us work around that by implementing its own buffering
351 351 (allowing efficient readline) while offering a way to know if the buffer is
352 352 empty from the output (allowing collaboration of the buffer with polling).
353 353
354 354 This class lives in the 'util' module because it makes use of the 'os'
355 355 module from the python stdlib.
356 356 """
357 357
358 358 def __new__(cls, fh):
359 359 # If we receive a fileobjectproxy, we need to use a variation of this
360 360 # class that notifies observers about activity.
361 361 if isinstance(fh, fileobjectproxy):
362 362 cls = observedbufferedinputpipe
363 363
364 364 return super(bufferedinputpipe, cls).__new__(cls)
365 365
366 366 def __init__(self, input):
367 367 self._input = input
368 368 self._buffer = []
369 369 self._eof = False
370 370 self._lenbuf = 0
371 371
372 372 @property
373 373 def hasbuffer(self):
374 374 """True is any data is currently buffered
375 375
376 376 This will be used externally a pre-step for polling IO. If there is
377 377 already data then no polling should be set in place."""
378 378 return bool(self._buffer)
379 379
380 380 @property
381 381 def closed(self):
382 382 return self._input.closed
383 383
384 384 def fileno(self):
385 385 return self._input.fileno()
386 386
387 387 def close(self):
388 388 return self._input.close()
389 389
390 390 def read(self, size):
391 391 while (not self._eof) and (self._lenbuf < size):
392 392 self._fillbuffer()
393 393 return self._frombuffer(size)
394 394
395 395 def unbufferedread(self, size):
396 396 if not self._eof and self._lenbuf == 0:
397 397 self._fillbuffer(max(size, _chunksize))
398 398 return self._frombuffer(min(self._lenbuf, size))
399 399
400 400 def readline(self, *args, **kwargs):
401 401 if len(self._buffer) > 1:
402 402 # this should not happen because both read and readline end with a
403 403 # _frombuffer call that collapse it.
404 404 self._buffer = [b''.join(self._buffer)]
405 405 self._lenbuf = len(self._buffer[0])
406 406 lfi = -1
407 407 if self._buffer:
408 408 lfi = self._buffer[-1].find(b'\n')
409 409 while (not self._eof) and lfi < 0:
410 410 self._fillbuffer()
411 411 if self._buffer:
412 412 lfi = self._buffer[-1].find(b'\n')
413 413 size = lfi + 1
414 414 if lfi < 0: # end of file
415 415 size = self._lenbuf
416 416 elif len(self._buffer) > 1:
417 417 # we need to take previous chunks into account
418 418 size += self._lenbuf - len(self._buffer[-1])
419 419 return self._frombuffer(size)
420 420
421 421 def _frombuffer(self, size):
422 422 """return at most 'size' data from the buffer
423 423
424 424 The data are removed from the buffer."""
425 425 if size == 0 or not self._buffer:
426 426 return b''
427 427 buf = self._buffer[0]
428 428 if len(self._buffer) > 1:
429 429 buf = b''.join(self._buffer)
430 430
431 431 data = buf[:size]
432 432 buf = buf[len(data) :]
433 433 if buf:
434 434 self._buffer = [buf]
435 435 self._lenbuf = len(buf)
436 436 else:
437 437 self._buffer = []
438 438 self._lenbuf = 0
439 439 return data
440 440
441 441 def _fillbuffer(self, size=_chunksize):
442 442 """read data to the buffer"""
443 443 data = os.read(self._input.fileno(), size)
444 444 if not data:
445 445 self._eof = True
446 446 else:
447 447 self._lenbuf += len(data)
448 448 self._buffer.append(data)
449 449
450 450 return data
451 451
452 452
453 453 def has_mmap_populate():
454 454 return hasattr(mmap, 'MAP_POPULATE')
455 455
456 456
457 457 def mmapread(fp, size=None, pre_populate=True):
458 458 """Read a file content using mmap
459 459
460 460 The responsability of checking the file system is mmap safe is the
461 461 responsability of the caller (see `vfs.is_mmap_safe`).
462 462
463 463 In some case, a normal string might be returned.
464 464
465 465 If `pre_populate` is True (the default), the mmapped data will be
466 466 pre-populated in memory if the system support this option, this slow down
467 467 the initial mmaping but avoid potentially crippling page fault on later
468 468 access. If this is not the desired behavior, set `pre_populate` to False.
469 469 """
470 470 if size == 0:
471 471 # size of 0 to mmap.mmap() means "all data"
472 472 # rather than "zero bytes", so special case that.
473 473 return b''
474 474 elif size is None:
475 475 size = 0
476 476 fd = getattr(fp, 'fileno', lambda: fp)()
477 477 flags = mmap.MAP_PRIVATE
478 478 if pre_populate:
479 479 flags |= getattr(mmap, 'MAP_POPULATE', 0)
480 480 try:
481 481 m = mmap.mmap(fd, size, flags=flags, prot=mmap.PROT_READ)
482 482 return m
483 483 except ValueError:
484 484 # Empty files cannot be mmapped, but mmapread should still work. Check
485 485 # if the file is empty, and if so, return an empty buffer.
486 486 if os.fstat(fd).st_size == 0:
487 487 return b''
488 488 raise
489 489
490 490
491 491 class fileobjectproxy:
492 492 """A proxy around file objects that tells a watcher when events occur.
493 493
494 494 This type is intended to only be used for testing purposes. Think hard
495 495 before using it in important code.
496 496 """
497 497
498 498 __slots__ = (
499 499 '_orig',
500 500 '_observer',
501 501 )
502 502
503 503 def __init__(self, fh, observer):
504 504 object.__setattr__(self, '_orig', fh)
505 505 object.__setattr__(self, '_observer', observer)
506 506
507 507 def __getattribute__(self, name):
508 508 ours = {
509 509 '_observer',
510 510 # IOBase
511 511 'close',
512 512 # closed if a property
513 513 'fileno',
514 514 'flush',
515 515 'isatty',
516 516 'readable',
517 517 'readline',
518 518 'readlines',
519 519 'seek',
520 520 'seekable',
521 521 'tell',
522 522 'truncate',
523 523 'writable',
524 524 'writelines',
525 525 # RawIOBase
526 526 'read',
527 527 'readall',
528 528 'readinto',
529 529 'write',
530 530 # BufferedIOBase
531 531 # raw is a property
532 532 'detach',
533 533 # read defined above
534 534 'read1',
535 535 # readinto defined above
536 536 # write defined above
537 537 }
538 538
539 539 # We only observe some methods.
540 540 if name in ours:
541 541 return object.__getattribute__(self, name)
542 542
543 543 return getattr(object.__getattribute__(self, '_orig'), name)
544 544
545 545 def __nonzero__(self):
546 546 return bool(object.__getattribute__(self, '_orig'))
547 547
548 548 __bool__ = __nonzero__
549 549
550 550 def __delattr__(self, name):
551 551 return delattr(object.__getattribute__(self, '_orig'), name)
552 552
553 553 def __setattr__(self, name, value):
554 554 return setattr(object.__getattribute__(self, '_orig'), name, value)
555 555
556 556 def __iter__(self):
557 557 return object.__getattribute__(self, '_orig').__iter__()
558 558
559 559 def _observedcall(self, name, *args, **kwargs):
560 560 # Call the original object.
561 561 orig = object.__getattribute__(self, '_orig')
562 562 res = getattr(orig, name)(*args, **kwargs)
563 563
564 564 # Call a method on the observer of the same name with arguments
565 565 # so it can react, log, etc.
566 566 observer = object.__getattribute__(self, '_observer')
567 567 fn = getattr(observer, name, None)
568 568 if fn:
569 569 fn(res, *args, **kwargs)
570 570
571 571 return res
572 572
573 573 def close(self, *args, **kwargs):
574 574 return object.__getattribute__(self, '_observedcall')(
575 575 'close', *args, **kwargs
576 576 )
577 577
578 578 def fileno(self, *args, **kwargs):
579 579 return object.__getattribute__(self, '_observedcall')(
580 580 'fileno', *args, **kwargs
581 581 )
582 582
583 583 def flush(self, *args, **kwargs):
584 584 return object.__getattribute__(self, '_observedcall')(
585 585 'flush', *args, **kwargs
586 586 )
587 587
588 588 def isatty(self, *args, **kwargs):
589 589 return object.__getattribute__(self, '_observedcall')(
590 590 'isatty', *args, **kwargs
591 591 )
592 592
593 593 def readable(self, *args, **kwargs):
594 594 return object.__getattribute__(self, '_observedcall')(
595 595 'readable', *args, **kwargs
596 596 )
597 597
598 598 def readline(self, *args, **kwargs):
599 599 return object.__getattribute__(self, '_observedcall')(
600 600 'readline', *args, **kwargs
601 601 )
602 602
603 603 def readlines(self, *args, **kwargs):
604 604 return object.__getattribute__(self, '_observedcall')(
605 605 'readlines', *args, **kwargs
606 606 )
607 607
608 608 def seek(self, *args, **kwargs):
609 609 return object.__getattribute__(self, '_observedcall')(
610 610 'seek', *args, **kwargs
611 611 )
612 612
613 613 def seekable(self, *args, **kwargs):
614 614 return object.__getattribute__(self, '_observedcall')(
615 615 'seekable', *args, **kwargs
616 616 )
617 617
618 618 def tell(self, *args, **kwargs):
619 619 return object.__getattribute__(self, '_observedcall')(
620 620 'tell', *args, **kwargs
621 621 )
622 622
623 623 def truncate(self, *args, **kwargs):
624 624 return object.__getattribute__(self, '_observedcall')(
625 625 'truncate', *args, **kwargs
626 626 )
627 627
628 628 def writable(self, *args, **kwargs):
629 629 return object.__getattribute__(self, '_observedcall')(
630 630 'writable', *args, **kwargs
631 631 )
632 632
633 633 def writelines(self, *args, **kwargs):
634 634 return object.__getattribute__(self, '_observedcall')(
635 635 'writelines', *args, **kwargs
636 636 )
637 637
638 638 def read(self, *args, **kwargs):
639 639 return object.__getattribute__(self, '_observedcall')(
640 640 'read', *args, **kwargs
641 641 )
642 642
643 643 def readall(self, *args, **kwargs):
644 644 return object.__getattribute__(self, '_observedcall')(
645 645 'readall', *args, **kwargs
646 646 )
647 647
648 648 def readinto(self, *args, **kwargs):
649 649 return object.__getattribute__(self, '_observedcall')(
650 650 'readinto', *args, **kwargs
651 651 )
652 652
653 653 def write(self, *args, **kwargs):
654 654 return object.__getattribute__(self, '_observedcall')(
655 655 'write', *args, **kwargs
656 656 )
657 657
658 658 def detach(self, *args, **kwargs):
659 659 return object.__getattribute__(self, '_observedcall')(
660 660 'detach', *args, **kwargs
661 661 )
662 662
663 663 def read1(self, *args, **kwargs):
664 664 return object.__getattribute__(self, '_observedcall')(
665 665 'read1', *args, **kwargs
666 666 )
667 667
668 668
669 669 class observedbufferedinputpipe(bufferedinputpipe):
670 670 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
671 671
672 672 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
673 673 bypass ``fileobjectproxy``. Because of this, we need to make
674 674 ``bufferedinputpipe`` aware of these operations.
675 675
676 676 This variation of ``bufferedinputpipe`` can notify observers about
677 677 ``os.read()`` events. It also re-publishes other events, such as
678 678 ``read()`` and ``readline()``.
679 679 """
680 680
681 681 def _fillbuffer(self, size=_chunksize):
682 682 res = super(observedbufferedinputpipe, self)._fillbuffer(size=size)
683 683
684 684 fn = getattr(self._input._observer, 'osread', None)
685 685 if fn:
686 686 fn(res, size)
687 687
688 688 return res
689 689
690 690 # We use different observer methods because the operation isn't
691 691 # performed on the actual file object but on us.
692 692 def read(self, size):
693 693 res = super(observedbufferedinputpipe, self).read(size)
694 694
695 695 fn = getattr(self._input._observer, 'bufferedread', None)
696 696 if fn:
697 697 fn(res, size)
698 698
699 699 return res
700 700
701 701 def readline(self, *args, **kwargs):
702 702 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
703 703
704 704 fn = getattr(self._input._observer, 'bufferedreadline', None)
705 705 if fn:
706 706 fn(res)
707 707
708 708 return res
709 709
710 710
711 711 PROXIED_SOCKET_METHODS = {
712 712 'makefile',
713 713 'recv',
714 714 'recvfrom',
715 715 'recvfrom_into',
716 716 'recv_into',
717 717 'send',
718 718 'sendall',
719 719 'sendto',
720 720 'setblocking',
721 721 'settimeout',
722 722 'gettimeout',
723 723 'setsockopt',
724 724 }
725 725
726 726
727 727 class socketproxy:
728 728 """A proxy around a socket that tells a watcher when events occur.
729 729
730 730 This is like ``fileobjectproxy`` except for sockets.
731 731
732 732 This type is intended to only be used for testing purposes. Think hard
733 733 before using it in important code.
734 734 """
735 735
736 736 __slots__ = (
737 737 '_orig',
738 738 '_observer',
739 739 )
740 740
741 741 def __init__(self, sock, observer):
742 742 object.__setattr__(self, '_orig', sock)
743 743 object.__setattr__(self, '_observer', observer)
744 744
745 745 def __getattribute__(self, name):
746 746 if name in PROXIED_SOCKET_METHODS:
747 747 return object.__getattribute__(self, name)
748 748
749 749 return getattr(object.__getattribute__(self, '_orig'), name)
750 750
751 751 def __delattr__(self, name):
752 752 return delattr(object.__getattribute__(self, '_orig'), name)
753 753
754 754 def __setattr__(self, name, value):
755 755 return setattr(object.__getattribute__(self, '_orig'), name, value)
756 756
757 757 def __nonzero__(self):
758 758 return bool(object.__getattribute__(self, '_orig'))
759 759
760 760 __bool__ = __nonzero__
761 761
762 762 def _observedcall(self, name, *args, **kwargs):
763 763 # Call the original object.
764 764 orig = object.__getattribute__(self, '_orig')
765 765 res = getattr(orig, name)(*args, **kwargs)
766 766
767 767 # Call a method on the observer of the same name with arguments
768 768 # so it can react, log, etc.
769 769 observer = object.__getattribute__(self, '_observer')
770 770 fn = getattr(observer, name, None)
771 771 if fn:
772 772 fn(res, *args, **kwargs)
773 773
774 774 return res
775 775
776 776 def makefile(self, *args, **kwargs):
777 777 res = object.__getattribute__(self, '_observedcall')(
778 778 'makefile', *args, **kwargs
779 779 )
780 780
781 781 # The file object may be used for I/O. So we turn it into a
782 782 # proxy using our observer.
783 783 observer = object.__getattribute__(self, '_observer')
784 784 return makeloggingfileobject(
785 785 observer.fh,
786 786 res,
787 787 observer.name,
788 788 reads=observer.reads,
789 789 writes=observer.writes,
790 790 logdata=observer.logdata,
791 791 logdataapis=observer.logdataapis,
792 792 )
793 793
794 794 def recv(self, *args, **kwargs):
795 795 return object.__getattribute__(self, '_observedcall')(
796 796 'recv', *args, **kwargs
797 797 )
798 798
799 799 def recvfrom(self, *args, **kwargs):
800 800 return object.__getattribute__(self, '_observedcall')(
801 801 'recvfrom', *args, **kwargs
802 802 )
803 803
804 804 def recvfrom_into(self, *args, **kwargs):
805 805 return object.__getattribute__(self, '_observedcall')(
806 806 'recvfrom_into', *args, **kwargs
807 807 )
808 808
809 809 def recv_into(self, *args, **kwargs):
810 810 return object.__getattribute__(self, '_observedcall')(
811 811 'recv_info', *args, **kwargs
812 812 )
813 813
814 814 def send(self, *args, **kwargs):
815 815 return object.__getattribute__(self, '_observedcall')(
816 816 'send', *args, **kwargs
817 817 )
818 818
819 819 def sendall(self, *args, **kwargs):
820 820 return object.__getattribute__(self, '_observedcall')(
821 821 'sendall', *args, **kwargs
822 822 )
823 823
824 824 def sendto(self, *args, **kwargs):
825 825 return object.__getattribute__(self, '_observedcall')(
826 826 'sendto', *args, **kwargs
827 827 )
828 828
829 829 def setblocking(self, *args, **kwargs):
830 830 return object.__getattribute__(self, '_observedcall')(
831 831 'setblocking', *args, **kwargs
832 832 )
833 833
834 834 def settimeout(self, *args, **kwargs):
835 835 return object.__getattribute__(self, '_observedcall')(
836 836 'settimeout', *args, **kwargs
837 837 )
838 838
839 839 def gettimeout(self, *args, **kwargs):
840 840 return object.__getattribute__(self, '_observedcall')(
841 841 'gettimeout', *args, **kwargs
842 842 )
843 843
844 844 def setsockopt(self, *args, **kwargs):
845 845 return object.__getattribute__(self, '_observedcall')(
846 846 'setsockopt', *args, **kwargs
847 847 )
848 848
849 849
850 850 class baseproxyobserver:
851 851 def __init__(self, fh, name, logdata, logdataapis):
852 852 self.fh = fh
853 853 self.name = name
854 854 self.logdata = logdata
855 855 self.logdataapis = logdataapis
856 856
857 857 def _writedata(self, data):
858 858 if not self.logdata:
859 859 if self.logdataapis:
860 860 self.fh.write(b'\n')
861 861 self.fh.flush()
862 862 return
863 863
864 864 # Simple case writes all data on a single line.
865 865 if b'\n' not in data:
866 866 if self.logdataapis:
867 867 self.fh.write(b': %s\n' % stringutil.escapestr(data))
868 868 else:
869 869 self.fh.write(
870 870 b'%s> %s\n' % (self.name, stringutil.escapestr(data))
871 871 )
872 872 self.fh.flush()
873 873 return
874 874
875 875 # Data with newlines is written to multiple lines.
876 876 if self.logdataapis:
877 877 self.fh.write(b':\n')
878 878
879 879 lines = data.splitlines(True)
880 880 for line in lines:
881 881 self.fh.write(
882 882 b'%s> %s\n' % (self.name, stringutil.escapestr(line))
883 883 )
884 884 self.fh.flush()
885 885
886 886
887 887 class fileobjectobserver(baseproxyobserver):
888 888 """Logs file object activity."""
889 889
890 890 def __init__(
891 891 self, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
892 892 ):
893 893 super(fileobjectobserver, self).__init__(fh, name, logdata, logdataapis)
894 894 self.reads = reads
895 895 self.writes = writes
896 896
897 897 def read(self, res, size=-1):
898 898 if not self.reads:
899 899 return
900 900 # Python 3 can return None from reads at EOF instead of empty strings.
901 901 if res is None:
902 902 res = b''
903 903
904 904 if size == -1 and res == b'':
905 905 # Suppress pointless read(-1) calls that return
906 906 # nothing. These happen _a lot_ on Python 3, and there
907 907 # doesn't seem to be a better workaround to have matching
908 908 # Python 2 and 3 behavior. :(
909 909 return
910 910
911 911 if self.logdataapis:
912 912 self.fh.write(b'%s> read(%d) -> %d' % (self.name, size, len(res)))
913 913
914 914 self._writedata(res)
915 915
916 916 def readline(self, res, limit=-1):
917 917 if not self.reads:
918 918 return
919 919
920 920 if self.logdataapis:
921 921 self.fh.write(b'%s> readline() -> %d' % (self.name, len(res)))
922 922
923 923 self._writedata(res)
924 924
925 925 def readinto(self, res, dest):
926 926 if not self.reads:
927 927 return
928 928
929 929 if self.logdataapis:
930 930 self.fh.write(
931 931 b'%s> readinto(%d) -> %r' % (self.name, len(dest), res)
932 932 )
933 933
934 934 data = dest[0:res] if res is not None else b''
935 935
936 936 # _writedata() uses "in" operator and is confused by memoryview because
937 937 # characters are ints on Python 3.
938 938 if isinstance(data, memoryview):
939 939 data = data.tobytes()
940 940
941 941 self._writedata(data)
942 942
943 943 def write(self, res, data):
944 944 if not self.writes:
945 945 return
946 946
947 947 # Python 2 returns None from some write() calls. Python 3 (reasonably)
948 948 # returns the integer bytes written.
949 949 if res is None and data:
950 950 res = len(data)
951 951
952 952 if self.logdataapis:
953 953 self.fh.write(b'%s> write(%d) -> %r' % (self.name, len(data), res))
954 954
955 955 self._writedata(data)
956 956
957 957 def flush(self, res):
958 958 if not self.writes:
959 959 return
960 960
961 961 self.fh.write(b'%s> flush() -> %r\n' % (self.name, res))
962 962
963 963 # For observedbufferedinputpipe.
964 964 def bufferedread(self, res, size):
965 965 if not self.reads:
966 966 return
967 967
968 968 if self.logdataapis:
969 969 self.fh.write(
970 970 b'%s> bufferedread(%d) -> %d' % (self.name, size, len(res))
971 971 )
972 972
973 973 self._writedata(res)
974 974
975 975 def bufferedreadline(self, res):
976 976 if not self.reads:
977 977 return
978 978
979 979 if self.logdataapis:
980 980 self.fh.write(
981 981 b'%s> bufferedreadline() -> %d' % (self.name, len(res))
982 982 )
983 983
984 984 self._writedata(res)
985 985
986 986
987 987 def makeloggingfileobject(
988 988 logh, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
989 989 ):
990 990 """Turn a file object into a logging file object."""
991 991
992 992 observer = fileobjectobserver(
993 993 logh,
994 994 name,
995 995 reads=reads,
996 996 writes=writes,
997 997 logdata=logdata,
998 998 logdataapis=logdataapis,
999 999 )
1000 1000 return fileobjectproxy(fh, observer)
1001 1001
1002 1002
1003 1003 class socketobserver(baseproxyobserver):
1004 1004 """Logs socket activity."""
1005 1005
1006 1006 def __init__(
1007 1007 self,
1008 1008 fh,
1009 1009 name,
1010 1010 reads=True,
1011 1011 writes=True,
1012 1012 states=True,
1013 1013 logdata=False,
1014 1014 logdataapis=True,
1015 1015 ):
1016 1016 super(socketobserver, self).__init__(fh, name, logdata, logdataapis)
1017 1017 self.reads = reads
1018 1018 self.writes = writes
1019 1019 self.states = states
1020 1020
1021 1021 def makefile(self, res, mode=None, bufsize=None):
1022 1022 if not self.states:
1023 1023 return
1024 1024
1025 1025 self.fh.write(b'%s> makefile(%r, %r)\n' % (self.name, mode, bufsize))
1026 1026
1027 1027 def recv(self, res, size, flags=0):
1028 1028 if not self.reads:
1029 1029 return
1030 1030
1031 1031 if self.logdataapis:
1032 1032 self.fh.write(
1033 1033 b'%s> recv(%d, %d) -> %d' % (self.name, size, flags, len(res))
1034 1034 )
1035 1035 self._writedata(res)
1036 1036
1037 1037 def recvfrom(self, res, size, flags=0):
1038 1038 if not self.reads:
1039 1039 return
1040 1040
1041 1041 if self.logdataapis:
1042 1042 self.fh.write(
1043 1043 b'%s> recvfrom(%d, %d) -> %d'
1044 1044 % (self.name, size, flags, len(res[0]))
1045 1045 )
1046 1046
1047 1047 self._writedata(res[0])
1048 1048
1049 1049 def recvfrom_into(self, res, buf, size, flags=0):
1050 1050 if not self.reads:
1051 1051 return
1052 1052
1053 1053 if self.logdataapis:
1054 1054 self.fh.write(
1055 1055 b'%s> recvfrom_into(%d, %d) -> %d'
1056 1056 % (self.name, size, flags, res[0])
1057 1057 )
1058 1058
1059 1059 self._writedata(buf[0 : res[0]])
1060 1060
1061 1061 def recv_into(self, res, buf, size=0, flags=0):
1062 1062 if not self.reads:
1063 1063 return
1064 1064
1065 1065 if self.logdataapis:
1066 1066 self.fh.write(
1067 1067 b'%s> recv_into(%d, %d) -> %d' % (self.name, size, flags, res)
1068 1068 )
1069 1069
1070 1070 self._writedata(buf[0:res])
1071 1071
1072 1072 def send(self, res, data, flags=0):
1073 1073 if not self.writes:
1074 1074 return
1075 1075
1076 1076 self.fh.write(
1077 1077 b'%s> send(%d, %d) -> %d' % (self.name, len(data), flags, len(res))
1078 1078 )
1079 1079 self._writedata(data)
1080 1080
1081 1081 def sendall(self, res, data, flags=0):
1082 1082 if not self.writes:
1083 1083 return
1084 1084
1085 1085 if self.logdataapis:
1086 1086 # Returns None on success. So don't bother reporting return value.
1087 1087 self.fh.write(
1088 1088 b'%s> sendall(%d, %d)' % (self.name, len(data), flags)
1089 1089 )
1090 1090
1091 1091 self._writedata(data)
1092 1092
1093 1093 def sendto(self, res, data, flagsoraddress, address=None):
1094 1094 if not self.writes:
1095 1095 return
1096 1096
1097 1097 if address:
1098 1098 flags = flagsoraddress
1099 1099 else:
1100 1100 flags = 0
1101 1101
1102 1102 if self.logdataapis:
1103 1103 self.fh.write(
1104 1104 b'%s> sendto(%d, %d, %r) -> %d'
1105 1105 % (self.name, len(data), flags, address, res)
1106 1106 )
1107 1107
1108 1108 self._writedata(data)
1109 1109
1110 1110 def setblocking(self, res, flag):
1111 1111 if not self.states:
1112 1112 return
1113 1113
1114 1114 self.fh.write(b'%s> setblocking(%r)\n' % (self.name, flag))
1115 1115
1116 1116 def settimeout(self, res, value):
1117 1117 if not self.states:
1118 1118 return
1119 1119
1120 1120 self.fh.write(b'%s> settimeout(%r)\n' % (self.name, value))
1121 1121
1122 1122 def gettimeout(self, res):
1123 1123 if not self.states:
1124 1124 return
1125 1125
1126 1126 self.fh.write(b'%s> gettimeout() -> %f\n' % (self.name, res))
1127 1127
1128 1128 def setsockopt(self, res, level, optname, value):
1129 1129 if not self.states:
1130 1130 return
1131 1131
1132 1132 self.fh.write(
1133 1133 b'%s> setsockopt(%r, %r, %r) -> %r\n'
1134 1134 % (self.name, level, optname, value, res)
1135 1135 )
1136 1136
1137 1137
1138 1138 def makeloggingsocket(
1139 1139 logh,
1140 1140 fh,
1141 1141 name,
1142 1142 reads=True,
1143 1143 writes=True,
1144 1144 states=True,
1145 1145 logdata=False,
1146 1146 logdataapis=True,
1147 1147 ):
1148 1148 """Turn a socket into a logging socket."""
1149 1149
1150 1150 observer = socketobserver(
1151 1151 logh,
1152 1152 name,
1153 1153 reads=reads,
1154 1154 writes=writes,
1155 1155 states=states,
1156 1156 logdata=logdata,
1157 1157 logdataapis=logdataapis,
1158 1158 )
1159 1159 return socketproxy(fh, observer)
1160 1160
1161 1161
1162 1162 def version():
1163 1163 """Return version information if available."""
1164 1164 try:
1165 from . import __version__ # type: ignore
1165 from . import __version__ # pytype: disable=import-error
1166 1166
1167 1167 return __version__.version
1168 1168 except ImportError:
1169 1169 return b'unknown'
1170 1170
1171 1171
1172 1172 def versiontuple(v=None, n=4):
1173 1173 """Parses a Mercurial version string into an N-tuple.
1174 1174
1175 1175 The version string to be parsed is specified with the ``v`` argument.
1176 1176 If it isn't defined, the current Mercurial version string will be parsed.
1177 1177
1178 1178 ``n`` can be 2, 3, or 4. Here is how some version strings map to
1179 1179 returned values:
1180 1180
1181 1181 >>> v = b'3.6.1+190-df9b73d2d444'
1182 1182 >>> versiontuple(v, 2)
1183 1183 (3, 6)
1184 1184 >>> versiontuple(v, 3)
1185 1185 (3, 6, 1)
1186 1186 >>> versiontuple(v, 4)
1187 1187 (3, 6, 1, '190-df9b73d2d444')
1188 1188
1189 1189 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1190 1190 (3, 6, 1, '190-df9b73d2d444+20151118')
1191 1191
1192 1192 >>> v = b'3.6'
1193 1193 >>> versiontuple(v, 2)
1194 1194 (3, 6)
1195 1195 >>> versiontuple(v, 3)
1196 1196 (3, 6, None)
1197 1197 >>> versiontuple(v, 4)
1198 1198 (3, 6, None, None)
1199 1199
1200 1200 >>> v = b'3.9-rc'
1201 1201 >>> versiontuple(v, 2)
1202 1202 (3, 9)
1203 1203 >>> versiontuple(v, 3)
1204 1204 (3, 9, None)
1205 1205 >>> versiontuple(v, 4)
1206 1206 (3, 9, None, 'rc')
1207 1207
1208 1208 >>> v = b'3.9-rc+2-02a8fea4289b'
1209 1209 >>> versiontuple(v, 2)
1210 1210 (3, 9)
1211 1211 >>> versiontuple(v, 3)
1212 1212 (3, 9, None)
1213 1213 >>> versiontuple(v, 4)
1214 1214 (3, 9, None, 'rc+2-02a8fea4289b')
1215 1215
1216 1216 >>> versiontuple(b'4.6rc0')
1217 1217 (4, 6, None, 'rc0')
1218 1218 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1219 1219 (4, 6, None, 'rc0+12-425d55e54f98')
1220 1220 >>> versiontuple(b'.1.2.3')
1221 1221 (None, None, None, '.1.2.3')
1222 1222 >>> versiontuple(b'12.34..5')
1223 1223 (12, 34, None, '..5')
1224 1224 >>> versiontuple(b'1.2.3.4.5.6')
1225 1225 (1, 2, 3, '.4.5.6')
1226 1226 """
1227 1227 if not v:
1228 1228 v = version()
1229 1229 m = remod.match(br'(\d+(?:\.\d+){,2})[+-]?(.*)', v)
1230 1230 if not m:
1231 1231 vparts, extra = b'', v
1232 1232 elif m.group(2):
1233 1233 vparts, extra = m.groups()
1234 1234 else:
1235 1235 vparts, extra = m.group(1), None
1236 1236
1237 1237 assert vparts is not None # help pytype
1238 1238
1239 1239 vints = []
1240 1240 for i in vparts.split(b'.'):
1241 1241 try:
1242 1242 vints.append(int(i))
1243 1243 except ValueError:
1244 1244 break
1245 1245 # (3, 6) -> (3, 6, None)
1246 1246 while len(vints) < 3:
1247 1247 vints.append(None)
1248 1248
1249 1249 if n == 2:
1250 1250 return (vints[0], vints[1])
1251 1251 if n == 3:
1252 1252 return (vints[0], vints[1], vints[2])
1253 1253 if n == 4:
1254 1254 return (vints[0], vints[1], vints[2], extra)
1255 1255
1256 1256 raise error.ProgrammingError(b"invalid version part request: %d" % n)
1257 1257
1258 1258
1259 1259 def cachefunc(func):
1260 1260 '''cache the result of function calls'''
1261 1261 # XXX doesn't handle keywords args
1262 1262 if func.__code__.co_argcount == 0:
1263 1263 listcache = []
1264 1264
1265 1265 def f():
1266 1266 if len(listcache) == 0:
1267 1267 listcache.append(func())
1268 1268 return listcache[0]
1269 1269
1270 1270 return f
1271 1271 cache = {}
1272 1272 if func.__code__.co_argcount == 1:
1273 1273 # we gain a small amount of time because
1274 1274 # we don't need to pack/unpack the list
1275 1275 def f(arg):
1276 1276 if arg not in cache:
1277 1277 cache[arg] = func(arg)
1278 1278 return cache[arg]
1279 1279
1280 1280 else:
1281 1281
1282 1282 def f(*args):
1283 1283 if args not in cache:
1284 1284 cache[args] = func(*args)
1285 1285 return cache[args]
1286 1286
1287 1287 return f
1288 1288
1289 1289
1290 1290 class cow:
1291 1291 """helper class to make copy-on-write easier
1292 1292
1293 1293 Call preparewrite before doing any writes.
1294 1294 """
1295 1295
1296 1296 def preparewrite(self):
1297 1297 """call this before writes, return self or a copied new object"""
1298 1298 if getattr(self, '_copied', 0):
1299 1299 self._copied -= 1
1300 1300 # Function cow.__init__ expects 1 arg(s), got 2 [wrong-arg-count]
1301 1301 return self.__class__(self) # pytype: disable=wrong-arg-count
1302 1302 return self
1303 1303
1304 1304 def copy(self):
1305 1305 """always do a cheap copy"""
1306 1306 self._copied = getattr(self, '_copied', 0) + 1
1307 1307 return self
1308 1308
1309 1309
1310 1310 class sortdict(collections.OrderedDict):
1311 1311 """a simple sorted dictionary
1312 1312
1313 1313 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1314 1314 >>> d2 = d1.copy()
1315 1315 >>> list(d2.items())
1316 1316 [('a', 0), ('b', 1)]
1317 1317 >>> d2.update([(b'a', 2)])
1318 1318 >>> list(d2.keys()) # should still be in last-set order
1319 1319 ['b', 'a']
1320 1320 >>> d1.insert(1, b'a.5', 0.5)
1321 1321 >>> list(d1.items())
1322 1322 [('a', 0), ('a.5', 0.5), ('b', 1)]
1323 1323 """
1324 1324
1325 1325 def __setitem__(self, key, value):
1326 1326 if key in self:
1327 1327 del self[key]
1328 1328 super(sortdict, self).__setitem__(key, value)
1329 1329
1330 1330 if pycompat.ispypy:
1331 1331 # __setitem__() isn't called as of PyPy 5.8.0
1332 1332 def update(self, src, **f):
1333 1333 if isinstance(src, dict):
1334 1334 src = src.items()
1335 1335 for k, v in src:
1336 1336 self[k] = v
1337 1337 for k in f:
1338 1338 self[k] = f[k]
1339 1339
1340 1340 def insert(self, position, key, value):
1341 1341 for i, (k, v) in enumerate(list(self.items())):
1342 1342 if i == position:
1343 1343 self[key] = value
1344 1344 if i >= position:
1345 1345 del self[k]
1346 1346 self[k] = v
1347 1347
1348 1348
1349 1349 class cowdict(cow, dict):
1350 1350 """copy-on-write dict
1351 1351
1352 1352 Be sure to call d = d.preparewrite() before writing to d.
1353 1353
1354 1354 >>> a = cowdict()
1355 1355 >>> a is a.preparewrite()
1356 1356 True
1357 1357 >>> b = a.copy()
1358 1358 >>> b is a
1359 1359 True
1360 1360 >>> c = b.copy()
1361 1361 >>> c is a
1362 1362 True
1363 1363 >>> a = a.preparewrite()
1364 1364 >>> b is a
1365 1365 False
1366 1366 >>> a is a.preparewrite()
1367 1367 True
1368 1368 >>> c = c.preparewrite()
1369 1369 >>> b is c
1370 1370 False
1371 1371 >>> b is b.preparewrite()
1372 1372 True
1373 1373 """
1374 1374
1375 1375
1376 1376 class cowsortdict(cow, sortdict):
1377 1377 """copy-on-write sortdict
1378 1378
1379 1379 Be sure to call d = d.preparewrite() before writing to d.
1380 1380 """
1381 1381
1382 1382
1383 1383 class transactional: # pytype: disable=ignored-metaclass
1384 1384 """Base class for making a transactional type into a context manager."""
1385 1385
1386 1386 __metaclass__ = abc.ABCMeta
1387 1387
1388 1388 @abc.abstractmethod
1389 1389 def close(self):
1390 1390 """Successfully closes the transaction."""
1391 1391
1392 1392 @abc.abstractmethod
1393 1393 def release(self):
1394 1394 """Marks the end of the transaction.
1395 1395
1396 1396 If the transaction has not been closed, it will be aborted.
1397 1397 """
1398 1398
1399 1399 def __enter__(self):
1400 1400 return self
1401 1401
1402 1402 def __exit__(self, exc_type, exc_val, exc_tb):
1403 1403 try:
1404 1404 if exc_type is None:
1405 1405 self.close()
1406 1406 finally:
1407 1407 self.release()
1408 1408
1409 1409
1410 1410 @contextlib.contextmanager
1411 1411 def acceptintervention(tr=None):
1412 1412 """A context manager that closes the transaction on InterventionRequired
1413 1413
1414 1414 If no transaction was provided, this simply runs the body and returns
1415 1415 """
1416 1416 if not tr:
1417 1417 yield
1418 1418 return
1419 1419 try:
1420 1420 yield
1421 1421 tr.close()
1422 1422 except error.InterventionRequired:
1423 1423 tr.close()
1424 1424 raise
1425 1425 finally:
1426 1426 tr.release()
1427 1427
1428 1428
1429 1429 @contextlib.contextmanager
1430 1430 def nullcontextmanager(enter_result=None):
1431 1431 yield enter_result
1432 1432
1433 1433
1434 1434 class _lrucachenode:
1435 1435 """A node in a doubly linked list.
1436 1436
1437 1437 Holds a reference to nodes on either side as well as a key-value
1438 1438 pair for the dictionary entry.
1439 1439 """
1440 1440
1441 1441 __slots__ = ('next', 'prev', 'key', 'value', 'cost')
1442 1442
1443 1443 def __init__(self):
1444 1444 self.next = self
1445 1445 self.prev = self
1446 1446
1447 1447 self.key = _notset
1448 1448 self.value = None
1449 1449 self.cost = 0
1450 1450
1451 1451 def markempty(self):
1452 1452 """Mark the node as emptied."""
1453 1453 self.key = _notset
1454 1454 self.value = None
1455 1455 self.cost = 0
1456 1456
1457 1457
1458 1458 class lrucachedict:
1459 1459 """Dict that caches most recent accesses and sets.
1460 1460
1461 1461 The dict consists of an actual backing dict - indexed by original
1462 1462 key - and a doubly linked circular list defining the order of entries in
1463 1463 the cache.
1464 1464
1465 1465 The head node is the newest entry in the cache. If the cache is full,
1466 1466 we recycle head.prev and make it the new head. Cache accesses result in
1467 1467 the node being moved to before the existing head and being marked as the
1468 1468 new head node.
1469 1469
1470 1470 Items in the cache can be inserted with an optional "cost" value. This is
1471 1471 simply an integer that is specified by the caller. The cache can be queried
1472 1472 for the total cost of all items presently in the cache.
1473 1473
1474 1474 The cache can also define a maximum cost. If a cache insertion would
1475 1475 cause the total cost of the cache to go beyond the maximum cost limit,
1476 1476 nodes will be evicted to make room for the new code. This can be used
1477 1477 to e.g. set a max memory limit and associate an estimated bytes size
1478 1478 cost to each item in the cache. By default, no maximum cost is enforced.
1479 1479 """
1480 1480
1481 1481 def __init__(self, max, maxcost=0):
1482 1482 self._cache = {}
1483 1483
1484 1484 self._head = _lrucachenode()
1485 1485 self._size = 1
1486 1486 self.capacity = max
1487 1487 self.totalcost = 0
1488 1488 self.maxcost = maxcost
1489 1489
1490 1490 def __len__(self):
1491 1491 return len(self._cache)
1492 1492
1493 1493 def __contains__(self, k):
1494 1494 return k in self._cache
1495 1495
1496 1496 def __iter__(self):
1497 1497 # We don't have to iterate in cache order, but why not.
1498 1498 n = self._head
1499 1499 for i in range(len(self._cache)):
1500 1500 yield n.key
1501 1501 n = n.next
1502 1502
1503 1503 def __getitem__(self, k):
1504 1504 node = self._cache[k]
1505 1505 self._movetohead(node)
1506 1506 return node.value
1507 1507
1508 1508 def insert(self, k, v, cost=0):
1509 1509 """Insert a new item in the cache with optional cost value."""
1510 1510 node = self._cache.get(k)
1511 1511 # Replace existing value and mark as newest.
1512 1512 if node is not None:
1513 1513 self.totalcost -= node.cost
1514 1514 node.value = v
1515 1515 node.cost = cost
1516 1516 self.totalcost += cost
1517 1517 self._movetohead(node)
1518 1518
1519 1519 if self.maxcost:
1520 1520 self._enforcecostlimit()
1521 1521
1522 1522 return
1523 1523
1524 1524 if self._size < self.capacity:
1525 1525 node = self._addcapacity()
1526 1526 else:
1527 1527 # Grab the last/oldest item.
1528 1528 node = self._head.prev
1529 1529
1530 1530 # At capacity. Kill the old entry.
1531 1531 if node.key is not _notset:
1532 1532 self.totalcost -= node.cost
1533 1533 del self._cache[node.key]
1534 1534
1535 1535 node.key = k
1536 1536 node.value = v
1537 1537 node.cost = cost
1538 1538 self.totalcost += cost
1539 1539 self._cache[k] = node
1540 1540 # And mark it as newest entry. No need to adjust order since it
1541 1541 # is already self._head.prev.
1542 1542 self._head = node
1543 1543
1544 1544 if self.maxcost:
1545 1545 self._enforcecostlimit()
1546 1546
1547 1547 def __setitem__(self, k, v):
1548 1548 self.insert(k, v)
1549 1549
1550 1550 def __delitem__(self, k):
1551 1551 self.pop(k)
1552 1552
1553 1553 def pop(self, k, default=_notset):
1554 1554 try:
1555 1555 node = self._cache.pop(k)
1556 1556 except KeyError:
1557 1557 if default is _notset:
1558 1558 raise
1559 1559 return default
1560 1560
1561 1561 value = node.value
1562 1562 self.totalcost -= node.cost
1563 1563 node.markempty()
1564 1564
1565 1565 # Temporarily mark as newest item before re-adjusting head to make
1566 1566 # this node the oldest item.
1567 1567 self._movetohead(node)
1568 1568 self._head = node.next
1569 1569
1570 1570 return value
1571 1571
1572 1572 # Additional dict methods.
1573 1573
1574 1574 def get(self, k, default=None):
1575 1575 try:
1576 1576 return self.__getitem__(k)
1577 1577 except KeyError:
1578 1578 return default
1579 1579
1580 1580 def peek(self, k, default=_notset):
1581 1581 """Get the specified item without moving it to the head
1582 1582
1583 1583 Unlike get(), this doesn't mutate the internal state. But be aware
1584 1584 that it doesn't mean peek() is thread safe.
1585 1585 """
1586 1586 try:
1587 1587 node = self._cache[k]
1588 1588 return node.value
1589 1589 except KeyError:
1590 1590 if default is _notset:
1591 1591 raise
1592 1592 return default
1593 1593
1594 1594 def clear(self):
1595 1595 n = self._head
1596 1596 while n.key is not _notset:
1597 1597 self.totalcost -= n.cost
1598 1598 n.markempty()
1599 1599 n = n.next
1600 1600
1601 1601 self._cache.clear()
1602 1602
1603 1603 def copy(self, capacity=None, maxcost=0):
1604 1604 """Create a new cache as a copy of the current one.
1605 1605
1606 1606 By default, the new cache has the same capacity as the existing one.
1607 1607 But, the cache capacity can be changed as part of performing the
1608 1608 copy.
1609 1609
1610 1610 Items in the copy have an insertion/access order matching this
1611 1611 instance.
1612 1612 """
1613 1613
1614 1614 capacity = capacity or self.capacity
1615 1615 maxcost = maxcost or self.maxcost
1616 1616 result = lrucachedict(capacity, maxcost=maxcost)
1617 1617
1618 1618 # We copy entries by iterating in oldest-to-newest order so the copy
1619 1619 # has the correct ordering.
1620 1620
1621 1621 # Find the first non-empty entry.
1622 1622 n = self._head.prev
1623 1623 while n.key is _notset and n is not self._head:
1624 1624 n = n.prev
1625 1625
1626 1626 # We could potentially skip the first N items when decreasing capacity.
1627 1627 # But let's keep it simple unless it is a performance problem.
1628 1628 for i in range(len(self._cache)):
1629 1629 result.insert(n.key, n.value, cost=n.cost)
1630 1630 n = n.prev
1631 1631
1632 1632 return result
1633 1633
1634 1634 def popoldest(self):
1635 1635 """Remove the oldest item from the cache.
1636 1636
1637 1637 Returns the (key, value) describing the removed cache entry.
1638 1638 """
1639 1639 if not self._cache:
1640 1640 return
1641 1641
1642 1642 # Walk the linked list backwards starting at tail node until we hit
1643 1643 # a non-empty node.
1644 1644 n = self._head.prev
1645 1645
1646 1646 while n.key is _notset:
1647 1647 n = n.prev
1648 1648
1649 1649 key, value = n.key, n.value
1650 1650
1651 1651 # And remove it from the cache and mark it as empty.
1652 1652 del self._cache[n.key]
1653 1653 self.totalcost -= n.cost
1654 1654 n.markempty()
1655 1655
1656 1656 return key, value
1657 1657
1658 1658 def _movetohead(self, node: _lrucachenode):
1659 1659 """Mark a node as the newest, making it the new head.
1660 1660
1661 1661 When a node is accessed, it becomes the freshest entry in the LRU
1662 1662 list, which is denoted by self._head.
1663 1663
1664 1664 Visually, let's make ``N`` the new head node (* denotes head):
1665 1665
1666 1666 previous/oldest <-> head <-> next/next newest
1667 1667
1668 1668 ----<->--- A* ---<->-----
1669 1669 | |
1670 1670 E <-> D <-> N <-> C <-> B
1671 1671
1672 1672 To:
1673 1673
1674 1674 ----<->--- N* ---<->-----
1675 1675 | |
1676 1676 E <-> D <-> C <-> B <-> A
1677 1677
1678 1678 This requires the following moves:
1679 1679
1680 1680 C.next = D (node.prev.next = node.next)
1681 1681 D.prev = C (node.next.prev = node.prev)
1682 1682 E.next = N (head.prev.next = node)
1683 1683 N.prev = E (node.prev = head.prev)
1684 1684 N.next = A (node.next = head)
1685 1685 A.prev = N (head.prev = node)
1686 1686 """
1687 1687 head = self._head
1688 1688 # C.next = D
1689 1689 node.prev.next = node.next
1690 1690 # D.prev = C
1691 1691 node.next.prev = node.prev
1692 1692 # N.prev = E
1693 1693 node.prev = head.prev
1694 1694 # N.next = A
1695 1695 # It is tempting to do just "head" here, however if node is
1696 1696 # adjacent to head, this will do bad things.
1697 1697 node.next = head.prev.next
1698 1698 # E.next = N
1699 1699 node.next.prev = node
1700 1700 # A.prev = N
1701 1701 node.prev.next = node
1702 1702
1703 1703 self._head = node
1704 1704
1705 1705 def _addcapacity(self) -> _lrucachenode:
1706 1706 """Add a node to the circular linked list.
1707 1707
1708 1708 The new node is inserted before the head node.
1709 1709 """
1710 1710 head = self._head
1711 1711 node = _lrucachenode()
1712 1712 head.prev.next = node
1713 1713 node.prev = head.prev
1714 1714 node.next = head
1715 1715 head.prev = node
1716 1716 self._size += 1
1717 1717 return node
1718 1718
1719 1719 def _enforcecostlimit(self):
1720 1720 # This should run after an insertion. It should only be called if total
1721 1721 # cost limits are being enforced.
1722 1722 # The most recently inserted node is never evicted.
1723 1723 if len(self) <= 1 or self.totalcost <= self.maxcost:
1724 1724 return
1725 1725
1726 1726 # This is logically equivalent to calling popoldest() until we
1727 1727 # free up enough cost. We don't do that since popoldest() needs
1728 1728 # to walk the linked list and doing this in a loop would be
1729 1729 # quadratic. So we find the first non-empty node and then
1730 1730 # walk nodes until we free up enough capacity.
1731 1731 #
1732 1732 # If we only removed the minimum number of nodes to free enough
1733 1733 # cost at insert time, chances are high that the next insert would
1734 1734 # also require pruning. This would effectively constitute quadratic
1735 1735 # behavior for insert-heavy workloads. To mitigate this, we set a
1736 1736 # target cost that is a percentage of the max cost. This will tend
1737 1737 # to free more nodes when the high water mark is reached, which
1738 1738 # lowers the chances of needing to prune on the subsequent insert.
1739 1739 targetcost = int(self.maxcost * 0.75)
1740 1740
1741 1741 n = self._head.prev
1742 1742 while n.key is _notset:
1743 1743 n = n.prev
1744 1744
1745 1745 while len(self) > 1 and self.totalcost > targetcost:
1746 1746 del self._cache[n.key]
1747 1747 self.totalcost -= n.cost
1748 1748 n.markempty()
1749 1749 n = n.prev
1750 1750
1751 1751
1752 1752 def lrucachefunc(func):
1753 1753 '''cache most recent results of function calls'''
1754 1754 cache = {}
1755 1755 order = collections.deque()
1756 1756 if func.__code__.co_argcount == 1:
1757 1757
1758 1758 def f(arg):
1759 1759 if arg not in cache:
1760 1760 if len(cache) > 20:
1761 1761 del cache[order.popleft()]
1762 1762 cache[arg] = func(arg)
1763 1763 else:
1764 1764 order.remove(arg)
1765 1765 order.append(arg)
1766 1766 return cache[arg]
1767 1767
1768 1768 else:
1769 1769
1770 1770 def f(*args):
1771 1771 if args not in cache:
1772 1772 if len(cache) > 20:
1773 1773 del cache[order.popleft()]
1774 1774 cache[args] = func(*args)
1775 1775 else:
1776 1776 order.remove(args)
1777 1777 order.append(args)
1778 1778 return cache[args]
1779 1779
1780 1780 return f
1781 1781
1782 1782
1783 1783 class propertycache:
1784 1784 def __init__(self, func):
1785 1785 self.func = func
1786 1786 self.name = func.__name__
1787 1787
1788 1788 def __get__(self, obj, type=None):
1789 1789 result = self.func(obj)
1790 1790 self.cachevalue(obj, result)
1791 1791 return result
1792 1792
1793 1793 def cachevalue(self, obj, value):
1794 1794 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1795 1795 obj.__dict__[self.name] = value
1796 1796
1797 1797
1798 1798 def clearcachedproperty(obj, prop):
1799 1799 '''clear a cached property value, if one has been set'''
1800 1800 prop = pycompat.sysstr(prop)
1801 1801 if prop in obj.__dict__:
1802 1802 del obj.__dict__[prop]
1803 1803
1804 1804
1805 1805 def increasingchunks(source, min=1024, max=65536):
1806 1806 """return no less than min bytes per chunk while data remains,
1807 1807 doubling min after each chunk until it reaches max"""
1808 1808
1809 1809 def log2(x):
1810 1810 if not x:
1811 1811 return 0
1812 1812 i = 0
1813 1813 while x:
1814 1814 x >>= 1
1815 1815 i += 1
1816 1816 return i - 1
1817 1817
1818 1818 buf = []
1819 1819 blen = 0
1820 1820 for chunk in source:
1821 1821 buf.append(chunk)
1822 1822 blen += len(chunk)
1823 1823 if blen >= min:
1824 1824 if min < max:
1825 1825 min = min << 1
1826 1826 nmin = 1 << log2(blen)
1827 1827 if nmin > min:
1828 1828 min = nmin
1829 1829 if min > max:
1830 1830 min = max
1831 1831 yield b''.join(buf)
1832 1832 blen = 0
1833 1833 buf = []
1834 1834 if buf:
1835 1835 yield b''.join(buf)
1836 1836
1837 1837
1838 1838 def always(fn):
1839 1839 return True
1840 1840
1841 1841
1842 1842 def never(fn):
1843 1843 return False
1844 1844
1845 1845
1846 1846 def nogc(func=None) -> Any:
1847 1847 """disable garbage collector
1848 1848
1849 1849 Python's garbage collector triggers a GC each time a certain number of
1850 1850 container objects (the number being defined by gc.get_threshold()) are
1851 1851 allocated even when marked not to be tracked by the collector. Tracking has
1852 1852 no effect on when GCs are triggered, only on what objects the GC looks
1853 1853 into. As a workaround, disable GC while building complex (huge)
1854 1854 containers.
1855 1855
1856 1856 This garbage collector issue have been fixed in 2.7. But it still affect
1857 1857 CPython's performance.
1858 1858 """
1859 1859 if func is None:
1860 1860 return _nogc_context()
1861 1861 else:
1862 1862 return _nogc_decorator(func)
1863 1863
1864 1864
1865 1865 @contextlib.contextmanager
1866 1866 def _nogc_context():
1867 1867 gcenabled = gc.isenabled()
1868 1868 gc.disable()
1869 1869 try:
1870 1870 yield
1871 1871 finally:
1872 1872 if gcenabled:
1873 1873 gc.enable()
1874 1874
1875 1875
1876 1876 def _nogc_decorator(func):
1877 1877 def wrapper(*args, **kwargs):
1878 1878 with _nogc_context():
1879 1879 return func(*args, **kwargs)
1880 1880
1881 1881 return wrapper
1882 1882
1883 1883
1884 1884 if pycompat.ispypy:
1885 1885 # PyPy runs slower with gc disabled
1886 1886 nogc = lambda x: x
1887 1887
1888 1888
1889 1889 def pathto(root: bytes, n1: bytes, n2: bytes) -> bytes:
1890 1890 """return the relative path from one place to another.
1891 1891 root should use os.sep to separate directories
1892 1892 n1 should use os.sep to separate directories
1893 1893 n2 should use "/" to separate directories
1894 1894 returns an os.sep-separated path.
1895 1895
1896 1896 If n1 is a relative path, it's assumed it's
1897 1897 relative to root.
1898 1898 n2 should always be relative to root.
1899 1899 """
1900 1900 if not n1:
1901 1901 return localpath(n2)
1902 1902 if os.path.isabs(n1):
1903 1903 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1904 1904 return os.path.join(root, localpath(n2))
1905 1905 n2 = b'/'.join((pconvert(root), n2))
1906 1906 a, b = splitpath(n1), n2.split(b'/')
1907 1907 a.reverse()
1908 1908 b.reverse()
1909 1909 while a and b and a[-1] == b[-1]:
1910 1910 a.pop()
1911 1911 b.pop()
1912 1912 b.reverse()
1913 1913 return pycompat.ossep.join(([b'..'] * len(a)) + b) or b'.'
1914 1914
1915 1915
1916 1916 def checksignature(func, depth=1):
1917 1917 '''wrap a function with code to check for calling errors'''
1918 1918
1919 1919 def check(*args, **kwargs):
1920 1920 try:
1921 1921 return func(*args, **kwargs)
1922 1922 except TypeError:
1923 1923 if len(traceback.extract_tb(sys.exc_info()[2])) == depth:
1924 1924 raise error.SignatureError
1925 1925 raise
1926 1926
1927 1927 return check
1928 1928
1929 1929
1930 1930 # a whilelist of known filesystems where hardlink works reliably
1931 1931 _hardlinkfswhitelist = {
1932 1932 b'apfs',
1933 1933 b'btrfs',
1934 1934 b'ext2',
1935 1935 b'ext3',
1936 1936 b'ext4',
1937 1937 b'hfs',
1938 1938 b'jfs',
1939 1939 b'NTFS',
1940 1940 b'reiserfs',
1941 1941 b'tmpfs',
1942 1942 b'ufs',
1943 1943 b'xfs',
1944 1944 b'zfs',
1945 1945 }
1946 1946
1947 1947
1948 1948 def copyfile(
1949 1949 src,
1950 1950 dest,
1951 1951 hardlink=False,
1952 1952 copystat=False,
1953 1953 checkambig=False,
1954 1954 nb_bytes=None,
1955 1955 no_hardlink_cb=None,
1956 1956 check_fs_hardlink=True,
1957 1957 ):
1958 1958 """copy a file, preserving mode and optionally other stat info like
1959 1959 atime/mtime
1960 1960
1961 1961 checkambig argument is used with filestat, and is useful only if
1962 1962 destination file is guarded by any lock (e.g. repo.lock or
1963 1963 repo.wlock).
1964 1964
1965 1965 copystat and checkambig should be exclusive.
1966 1966
1967 1967 nb_bytes: if set only copy the first `nb_bytes` of the source file.
1968 1968 """
1969 1969 assert not (copystat and checkambig)
1970 1970 oldstat = None
1971 1971 if os.path.lexists(dest):
1972 1972 if checkambig:
1973 1973 oldstat = checkambig and filestat.frompath(dest)
1974 1974 unlink(dest)
1975 1975 if hardlink and check_fs_hardlink:
1976 1976 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1977 1977 # unless we are confident that dest is on a whitelisted filesystem.
1978 1978 try:
1979 1979 fstype = getfstype(os.path.dirname(dest))
1980 1980 except OSError:
1981 1981 fstype = None
1982 1982 if fstype not in _hardlinkfswhitelist:
1983 1983 if no_hardlink_cb is not None:
1984 1984 no_hardlink_cb()
1985 1985 hardlink = False
1986 1986 if hardlink:
1987 1987 try:
1988 1988 oslink(src, dest)
1989 1989 if nb_bytes is not None:
1990 1990 m = "the `nb_bytes` argument is incompatible with `hardlink`"
1991 1991 raise error.ProgrammingError(m)
1992 1992 return
1993 1993 except (IOError, OSError) as exc:
1994 1994 if exc.errno != errno.EEXIST and no_hardlink_cb is not None:
1995 1995 no_hardlink_cb()
1996 1996 # fall back to normal copy
1997 1997 if os.path.islink(src):
1998 1998 os.symlink(os.readlink(src), dest)
1999 1999 # copytime is ignored for symlinks, but in general copytime isn't needed
2000 2000 # for them anyway
2001 2001 if nb_bytes is not None:
2002 2002 m = "cannot use `nb_bytes` on a symlink"
2003 2003 raise error.ProgrammingError(m)
2004 2004 else:
2005 2005 try:
2006 2006 shutil.copyfile(src, dest)
2007 2007 if copystat:
2008 2008 # copystat also copies mode
2009 2009 shutil.copystat(src, dest)
2010 2010 else:
2011 2011 shutil.copymode(src, dest)
2012 2012 if oldstat and oldstat.stat:
2013 2013 newstat = filestat.frompath(dest)
2014 2014 if newstat.isambig(oldstat):
2015 2015 # stat of copied file is ambiguous to original one
2016 2016 advanced = (
2017 2017 oldstat.stat[stat.ST_MTIME] + 1
2018 2018 ) & 0x7FFFFFFF
2019 2019 os.utime(dest, (advanced, advanced))
2020 2020 # We could do something smarter using `copy_file_range` call or similar
2021 2021 if nb_bytes is not None:
2022 2022 with open(dest, mode='r+') as f:
2023 2023 f.truncate(nb_bytes)
2024 2024 except shutil.Error as inst:
2025 2025 raise error.Abort(stringutil.forcebytestr(inst))
2026 2026
2027 2027
2028 2028 def copyfiles(src, dst, hardlink=None, progress=None):
2029 2029 """Copy a directory tree using hardlinks if possible."""
2030 2030 num = 0
2031 2031
2032 2032 def settopic():
2033 2033 if progress:
2034 2034 progress.topic = _(b'linking') if hardlink else _(b'copying')
2035 2035
2036 2036 if os.path.isdir(src):
2037 2037 if hardlink is None:
2038 2038 hardlink = (
2039 2039 os.stat(src).st_dev == os.stat(os.path.dirname(dst)).st_dev
2040 2040 )
2041 2041 settopic()
2042 2042 os.mkdir(dst)
2043 2043 for name, kind in listdir(src):
2044 2044 srcname = os.path.join(src, name)
2045 2045 dstname = os.path.join(dst, name)
2046 2046 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
2047 2047 num += n
2048 2048 else:
2049 2049 if hardlink is None:
2050 2050 hardlink = (
2051 2051 os.stat(os.path.dirname(src)).st_dev
2052 2052 == os.stat(os.path.dirname(dst)).st_dev
2053 2053 )
2054 2054 settopic()
2055 2055
2056 2056 if hardlink:
2057 2057 try:
2058 2058 oslink(src, dst)
2059 2059 except (IOError, OSError) as exc:
2060 2060 if exc.errno != errno.EEXIST:
2061 2061 hardlink = False
2062 2062 # XXX maybe try to relink if the file exist ?
2063 2063 shutil.copy(src, dst)
2064 2064 else:
2065 2065 shutil.copy(src, dst)
2066 2066 num += 1
2067 2067 if progress:
2068 2068 progress.increment()
2069 2069
2070 2070 return hardlink, num
2071 2071
2072 2072
2073 2073 _winreservednames = {
2074 2074 b'con',
2075 2075 b'prn',
2076 2076 b'aux',
2077 2077 b'nul',
2078 2078 b'com1',
2079 2079 b'com2',
2080 2080 b'com3',
2081 2081 b'com4',
2082 2082 b'com5',
2083 2083 b'com6',
2084 2084 b'com7',
2085 2085 b'com8',
2086 2086 b'com9',
2087 2087 b'lpt1',
2088 2088 b'lpt2',
2089 2089 b'lpt3',
2090 2090 b'lpt4',
2091 2091 b'lpt5',
2092 2092 b'lpt6',
2093 2093 b'lpt7',
2094 2094 b'lpt8',
2095 2095 b'lpt9',
2096 2096 }
2097 2097 _winreservedchars = b':*?"<>|'
2098 2098
2099 2099
2100 2100 def checkwinfilename(path: bytes) -> Optional[bytes]:
2101 2101 r"""Check that the base-relative path is a valid filename on Windows.
2102 2102 Returns None if the path is ok, or a UI string describing the problem.
2103 2103
2104 2104 >>> checkwinfilename(b"just/a/normal/path")
2105 2105 >>> checkwinfilename(b"foo/bar/con.xml")
2106 2106 "filename contains 'con', which is reserved on Windows"
2107 2107 >>> checkwinfilename(b"foo/con.xml/bar")
2108 2108 "filename contains 'con', which is reserved on Windows"
2109 2109 >>> checkwinfilename(b"foo/bar/xml.con")
2110 2110 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
2111 2111 "filename contains 'AUX', which is reserved on Windows"
2112 2112 >>> checkwinfilename(b"foo/bar/bla:.txt")
2113 2113 "filename contains ':', which is reserved on Windows"
2114 2114 >>> checkwinfilename(b"foo/bar/b\07la.txt")
2115 2115 "filename contains '\\x07', which is invalid on Windows"
2116 2116 >>> checkwinfilename(b"foo/bar/bla ")
2117 2117 "filename ends with ' ', which is not allowed on Windows"
2118 2118 >>> checkwinfilename(b"../bar")
2119 2119 >>> checkwinfilename(b"foo\\")
2120 2120 "filename ends with '\\', which is invalid on Windows"
2121 2121 >>> checkwinfilename(b"foo\\/bar")
2122 2122 "directory name ends with '\\', which is invalid on Windows"
2123 2123 """
2124 2124 if path.endswith(b'\\'):
2125 2125 return _(b"filename ends with '\\', which is invalid on Windows")
2126 2126 if b'\\/' in path:
2127 2127 return _(b"directory name ends with '\\', which is invalid on Windows")
2128 2128 for n in path.replace(b'\\', b'/').split(b'/'):
2129 2129 if not n:
2130 2130 continue
2131 2131 for c in _filenamebytestr(n):
2132 2132 if c in _winreservedchars:
2133 2133 return (
2134 2134 _(
2135 2135 b"filename contains '%s', which is reserved "
2136 2136 b"on Windows"
2137 2137 )
2138 2138 % c
2139 2139 )
2140 2140 if ord(c) <= 31:
2141 2141 return _(
2142 2142 b"filename contains '%s', which is invalid on Windows"
2143 2143 ) % stringutil.escapestr(c)
2144 2144 base = n.split(b'.')[0]
2145 2145 if base and base.lower() in _winreservednames:
2146 2146 return (
2147 2147 _(b"filename contains '%s', which is reserved on Windows")
2148 2148 % base
2149 2149 )
2150 2150 t = n[-1:]
2151 2151 if t in b'. ' and n not in b'..':
2152 2152 return (
2153 2153 _(
2154 2154 b"filename ends with '%s', which is not allowed "
2155 2155 b"on Windows"
2156 2156 )
2157 2157 % t
2158 2158 )
2159 2159
2160 2160
2161 2161 timer = getattr(time, "perf_counter", None)
2162 2162
2163 2163 if pycompat.iswindows:
2164 2164 checkosfilename = checkwinfilename
2165 2165 if not timer:
2166 2166 timer = time.clock # pytype: disable=module-attr
2167 2167 else:
2168 2168 # mercurial.windows doesn't have platform.checkosfilename
2169 2169 checkosfilename = platform.checkosfilename # pytype: disable=module-attr
2170 2170 if not timer:
2171 2171 timer = time.time
2172 2172
2173 2173
2174 2174 def makelock(info, pathname):
2175 2175 """Create a lock file atomically if possible
2176 2176
2177 2177 This may leave a stale lock file if symlink isn't supported and signal
2178 2178 interrupt is enabled.
2179 2179 """
2180 2180 try:
2181 2181 return os.symlink(info, pathname)
2182 2182 except OSError as why:
2183 2183 if why.errno == errno.EEXIST:
2184 2184 raise
2185 2185 except AttributeError: # no symlink in os
2186 2186 pass
2187 2187
2188 2188 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
2189 2189 ld = os.open(pathname, flags)
2190 2190 os.write(ld, info)
2191 2191 os.close(ld)
2192 2192
2193 2193
2194 2194 def readlock(pathname: bytes) -> bytes:
2195 2195 try:
2196 2196 return readlink(pathname)
2197 2197 except OSError as why:
2198 2198 if why.errno not in (errno.EINVAL, errno.ENOSYS):
2199 2199 raise
2200 2200 except AttributeError: # no symlink in os
2201 2201 pass
2202 2202 with posixfile(pathname, b'rb') as fp:
2203 2203 return fp.read()
2204 2204
2205 2205
2206 2206 def fstat(fp):
2207 2207 '''stat file object that may not have fileno method.'''
2208 2208 try:
2209 2209 return os.fstat(fp.fileno())
2210 2210 except AttributeError:
2211 2211 return os.stat(fp.name)
2212 2212
2213 2213
2214 2214 # File system features
2215 2215
2216 2216
2217 2217 def fscasesensitive(path: bytes) -> bool:
2218 2218 """
2219 2219 Return true if the given path is on a case-sensitive filesystem
2220 2220
2221 2221 Requires a path (like /foo/.hg) ending with a foldable final
2222 2222 directory component.
2223 2223 """
2224 2224 s1 = os.lstat(path)
2225 2225 d, b = os.path.split(path)
2226 2226 b2 = b.upper()
2227 2227 if b == b2:
2228 2228 b2 = b.lower()
2229 2229 if b == b2:
2230 2230 return True # no evidence against case sensitivity
2231 2231 p2 = os.path.join(d, b2)
2232 2232 try:
2233 2233 s2 = os.lstat(p2)
2234 2234 if s2 == s1:
2235 2235 return False
2236 2236 return True
2237 2237 except OSError:
2238 2238 return True
2239 2239
2240 2240
2241 2241 _re2_input = lambda x: x
2242 2242 # google-re2 will need to be tell to not output error on its own
2243 2243 _re2_options = None
2244 2244 try:
2245 2245 import re2 # pytype: disable=import-error
2246 2246
2247 2247 _re2 = None
2248 2248 except ImportError:
2249 2249 _re2 = False
2250 2250
2251 2251
2252 2252 def has_re2():
2253 2253 """return True is re2 is available, False otherwise"""
2254 2254 if _re2 is None:
2255 2255 _re._checkre2()
2256 2256 return _re2
2257 2257
2258 2258
2259 2259 class _re:
2260 2260 @staticmethod
2261 2261 def _checkre2():
2262 2262 global _re2
2263 2263 global _re2_input
2264 2264 global _re2_options
2265 2265 if _re2 is not None:
2266 2266 # we already have the answer
2267 2267 return
2268 2268
2269 2269 check_pattern = br'\[([^\[]+)\]'
2270 2270 check_input = b'[ui]'
2271 2271 try:
2272 2272 # check if match works, see issue3964
2273 2273 _re2 = bool(re2.match(check_pattern, check_input))
2274 2274 except ImportError:
2275 2275 _re2 = False
2276 2276 except TypeError:
2277 2277 # the `pyre-2` project provides a re2 module that accept bytes
2278 2278 # the `fb-re2` project provides a re2 module that acccept sysstr
2279 2279 check_pattern = pycompat.sysstr(check_pattern)
2280 2280 check_input = pycompat.sysstr(check_input)
2281 2281 _re2 = bool(re2.match(check_pattern, check_input))
2282 2282 _re2_input = pycompat.sysstr
2283 2283 try:
2284 2284 quiet = re2.Options()
2285 2285 quiet.log_errors = False
2286 2286 _re2_options = quiet
2287 2287 except AttributeError:
2288 2288 pass
2289 2289
2290 2290 def compile(self, pat, flags=0):
2291 2291 """Compile a regular expression, using re2 if possible
2292 2292
2293 2293 For best performance, use only re2-compatible regexp features. The
2294 2294 only flags from the re module that are re2-compatible are
2295 2295 IGNORECASE and MULTILINE."""
2296 2296 if _re2 is None:
2297 2297 self._checkre2()
2298 2298 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
2299 2299 if flags & remod.IGNORECASE:
2300 2300 pat = b'(?i)' + pat
2301 2301 if flags & remod.MULTILINE:
2302 2302 pat = b'(?m)' + pat
2303 2303 try:
2304 2304 input_regex = _re2_input(pat)
2305 2305 if _re2_options is not None:
2306 2306 compiled = re2.compile(input_regex, options=_re2_options)
2307 2307 else:
2308 2308 compiled = re2.compile(input_regex)
2309 2309 return compiled
2310 2310 except re2.error:
2311 2311 pass
2312 2312 return remod.compile(pat, flags)
2313 2313
2314 2314 @propertycache
2315 2315 def escape(self):
2316 2316 """Return the version of escape corresponding to self.compile.
2317 2317
2318 2318 This is imperfect because whether re2 or re is used for a particular
2319 2319 function depends on the flags, etc, but it's the best we can do.
2320 2320 """
2321 2321 global _re2
2322 2322 if _re2 is None:
2323 2323 self._checkre2()
2324 2324 if _re2:
2325 2325 return re2.escape
2326 2326 else:
2327 2327 return remod.escape
2328 2328
2329 2329
2330 2330 re = _re()
2331 2331
2332 2332 _fspathcache = {}
2333 2333
2334 2334
2335 2335 def fspath(name: bytes, root: bytes) -> bytes:
2336 2336 """Get name in the case stored in the filesystem
2337 2337
2338 2338 The name should be relative to root, and be normcase-ed for efficiency.
2339 2339
2340 2340 Note that this function is unnecessary, and should not be
2341 2341 called, for case-sensitive filesystems (simply because it's expensive).
2342 2342
2343 2343 The root should be normcase-ed, too.
2344 2344 """
2345 2345
2346 2346 def _makefspathcacheentry(dir):
2347 2347 return {normcase(n): n for n in os.listdir(dir)}
2348 2348
2349 2349 seps = pycompat.ossep
2350 2350 if pycompat.osaltsep:
2351 2351 seps = seps + pycompat.osaltsep
2352 2352 # Protect backslashes. This gets silly very quickly.
2353 2353 seps.replace(b'\\', b'\\\\')
2354 2354 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
2355 2355 dir = os.path.normpath(root)
2356 2356 result = []
2357 2357 for part, sep in pattern.findall(name):
2358 2358 if sep:
2359 2359 result.append(sep)
2360 2360 continue
2361 2361
2362 2362 if dir not in _fspathcache:
2363 2363 _fspathcache[dir] = _makefspathcacheentry(dir)
2364 2364 contents = _fspathcache[dir]
2365 2365
2366 2366 found = contents.get(part)
2367 2367 if not found:
2368 2368 # retry "once per directory" per "dirstate.walk" which
2369 2369 # may take place for each patches of "hg qpush", for example
2370 2370 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2371 2371 found = contents.get(part)
2372 2372
2373 2373 result.append(found or part)
2374 2374 dir = os.path.join(dir, part)
2375 2375
2376 2376 return b''.join(result)
2377 2377
2378 2378
2379 2379 def checknlink(testfile: bytes) -> bool:
2380 2380 '''check whether hardlink count reporting works properly'''
2381 2381
2382 2382 # testfile may be open, so we need a separate file for checking to
2383 2383 # work around issue2543 (or testfile may get lost on Samba shares)
2384 2384 f1, f2, fp = None, None, None
2385 2385 try:
2386 2386 fd, f1 = pycompat.mkstemp(
2387 2387 prefix=b'.%s-' % os.path.basename(testfile),
2388 2388 suffix=b'1~',
2389 2389 dir=os.path.dirname(testfile),
2390 2390 )
2391 2391 os.close(fd)
2392 2392 f2 = b'%s2~' % f1[:-2]
2393 2393
2394 2394 oslink(f1, f2)
2395 2395 # nlinks() may behave differently for files on Windows shares if
2396 2396 # the file is open.
2397 2397 fp = posixfile(f2)
2398 2398 return nlinks(f2) > 1
2399 2399 except OSError:
2400 2400 return False
2401 2401 finally:
2402 2402 if fp is not None:
2403 2403 fp.close()
2404 2404 for f in (f1, f2):
2405 2405 try:
2406 2406 if f is not None:
2407 2407 os.unlink(f)
2408 2408 except OSError:
2409 2409 pass
2410 2410
2411 2411
2412 2412 def endswithsep(path: bytes) -> bool:
2413 2413 '''Check path ends with os.sep or os.altsep.'''
2414 2414 return bool( # help pytype
2415 2415 path.endswith(pycompat.ossep)
2416 2416 or pycompat.osaltsep
2417 2417 and path.endswith(pycompat.osaltsep)
2418 2418 )
2419 2419
2420 2420
2421 2421 def splitpath(path: bytes) -> List[bytes]:
2422 2422 """Split path by os.sep.
2423 2423 Note that this function does not use os.altsep because this is
2424 2424 an alternative of simple "xxx.split(os.sep)".
2425 2425 It is recommended to use os.path.normpath() before using this
2426 2426 function if need."""
2427 2427 return path.split(pycompat.ossep)
2428 2428
2429 2429
2430 2430 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2431 2431 """Create a temporary file with the same contents from name
2432 2432
2433 2433 The permission bits are copied from the original file.
2434 2434
2435 2435 If the temporary file is going to be truncated immediately, you
2436 2436 can use emptyok=True as an optimization.
2437 2437
2438 2438 Returns the name of the temporary file.
2439 2439 """
2440 2440 d, fn = os.path.split(name)
2441 2441 fd, temp = pycompat.mkstemp(prefix=b'.%s-' % fn, suffix=b'~', dir=d)
2442 2442 os.close(fd)
2443 2443 # Temporary files are created with mode 0600, which is usually not
2444 2444 # what we want. If the original file already exists, just copy
2445 2445 # its mode. Otherwise, manually obey umask.
2446 2446 copymode(name, temp, createmode, enforcewritable)
2447 2447
2448 2448 if emptyok:
2449 2449 return temp
2450 2450 try:
2451 2451 try:
2452 2452 ifp = posixfile(name, b"rb")
2453 2453 except IOError as inst:
2454 2454 if inst.errno == errno.ENOENT:
2455 2455 return temp
2456 2456 if not getattr(inst, 'filename', None):
2457 2457 inst.filename = name
2458 2458 raise
2459 2459 ofp = posixfile(temp, b"wb")
2460 2460 for chunk in filechunkiter(ifp):
2461 2461 ofp.write(chunk)
2462 2462 ifp.close()
2463 2463 ofp.close()
2464 2464 except: # re-raises
2465 2465 try:
2466 2466 os.unlink(temp)
2467 2467 except OSError:
2468 2468 pass
2469 2469 raise
2470 2470 return temp
2471 2471
2472 2472
2473 2473 class filestat:
2474 2474 """help to exactly detect change of a file
2475 2475
2476 2476 'stat' attribute is result of 'os.stat()' if specified 'path'
2477 2477 exists. Otherwise, it is None. This can avoid preparative
2478 2478 'exists()' examination on client side of this class.
2479 2479 """
2480 2480
2481 2481 def __init__(self, stat):
2482 2482 self.stat = stat
2483 2483
2484 2484 @classmethod
2485 2485 def frompath(cls, path):
2486 2486 try:
2487 2487 stat = os.stat(path)
2488 2488 except FileNotFoundError:
2489 2489 stat = None
2490 2490 return cls(stat)
2491 2491
2492 2492 @classmethod
2493 2493 def fromfp(cls, fp):
2494 2494 stat = os.fstat(fp.fileno())
2495 2495 return cls(stat)
2496 2496
2497 2497 __hash__ = object.__hash__
2498 2498
2499 2499 def __eq__(self, old):
2500 2500 try:
2501 2501 # if ambiguity between stat of new and old file is
2502 2502 # avoided, comparison of size, ctime and mtime is enough
2503 2503 # to exactly detect change of a file regardless of platform
2504 2504 return (
2505 2505 self.stat.st_size == old.stat.st_size
2506 2506 and self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2507 2507 and self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME]
2508 2508 )
2509 2509 except AttributeError:
2510 2510 pass
2511 2511 try:
2512 2512 return self.stat is None and old.stat is None
2513 2513 except AttributeError:
2514 2514 return False
2515 2515
2516 2516 def isambig(self, old):
2517 2517 """Examine whether new (= self) stat is ambiguous against old one
2518 2518
2519 2519 "S[N]" below means stat of a file at N-th change:
2520 2520
2521 2521 - S[n-1].ctime < S[n].ctime: can detect change of a file
2522 2522 - S[n-1].ctime == S[n].ctime
2523 2523 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2524 2524 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2525 2525 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2526 2526 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2527 2527
2528 2528 Case (*2) above means that a file was changed twice or more at
2529 2529 same time in sec (= S[n-1].ctime), and comparison of timestamp
2530 2530 is ambiguous.
2531 2531
2532 2532 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2533 2533 timestamp is ambiguous".
2534 2534
2535 2535 But advancing mtime only in case (*2) doesn't work as
2536 2536 expected, because naturally advanced S[n].mtime in case (*1)
2537 2537 might be equal to manually advanced S[n-1 or earlier].mtime.
2538 2538
2539 2539 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2540 2540 treated as ambiguous regardless of mtime, to avoid overlooking
2541 2541 by confliction between such mtime.
2542 2542
2543 2543 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2544 2544 S[n].mtime", even if size of a file isn't changed.
2545 2545 """
2546 2546 try:
2547 2547 return self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2548 2548 except AttributeError:
2549 2549 return False
2550 2550
2551 2551 def avoidambig(self, path, old):
2552 2552 """Change file stat of specified path to avoid ambiguity
2553 2553
2554 2554 'old' should be previous filestat of 'path'.
2555 2555
2556 2556 This skips avoiding ambiguity, if a process doesn't have
2557 2557 appropriate privileges for 'path'. This returns False in this
2558 2558 case.
2559 2559
2560 2560 Otherwise, this returns True, as "ambiguity is avoided".
2561 2561 """
2562 2562 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2563 2563 try:
2564 2564 os.utime(path, (advanced, advanced))
2565 2565 except PermissionError:
2566 2566 # utime() on the file created by another user causes EPERM,
2567 2567 # if a process doesn't have appropriate privileges
2568 2568 return False
2569 2569 return True
2570 2570
2571 2571 def __ne__(self, other):
2572 2572 return not self == other
2573 2573
2574 2574
2575 2575 class atomictempfile:
2576 2576 """writable file object that atomically updates a file
2577 2577
2578 2578 All writes will go to a temporary copy of the original file. Call
2579 2579 close() when you are done writing, and atomictempfile will rename
2580 2580 the temporary copy to the original name, making the changes
2581 2581 visible. If the object is destroyed without being closed, all your
2582 2582 writes are discarded.
2583 2583
2584 2584 checkambig argument of constructor is used with filestat, and is
2585 2585 useful only if target file is guarded by any lock (e.g. repo.lock
2586 2586 or repo.wlock).
2587 2587 """
2588 2588
2589 2589 def __init__(self, name, mode=b'w+b', createmode=None, checkambig=False):
2590 2590 self.__name = name # permanent name
2591 2591 self._tempname = mktempcopy(
2592 2592 name,
2593 2593 emptyok=(b'w' in mode),
2594 2594 createmode=createmode,
2595 2595 enforcewritable=(b'w' in mode),
2596 2596 )
2597 2597
2598 2598 self._fp = posixfile(self._tempname, mode)
2599 2599 self._checkambig = checkambig
2600 2600
2601 2601 # delegated methods
2602 2602 self.read = self._fp.read
2603 2603 self.write = self._fp.write
2604 2604 self.writelines = self._fp.writelines
2605 2605 self.seek = self._fp.seek
2606 2606 self.tell = self._fp.tell
2607 2607 self.fileno = self._fp.fileno
2608 2608
2609 2609 def close(self):
2610 2610 if not self._fp.closed:
2611 2611 self._fp.close()
2612 2612 filename = localpath(self.__name)
2613 2613 oldstat = self._checkambig and filestat.frompath(filename)
2614 2614 if oldstat and oldstat.stat:
2615 2615 rename(self._tempname, filename)
2616 2616 newstat = filestat.frompath(filename)
2617 2617 if newstat.isambig(oldstat):
2618 2618 # stat of changed file is ambiguous to original one
2619 2619 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2620 2620 os.utime(filename, (advanced, advanced))
2621 2621 else:
2622 2622 rename(self._tempname, filename)
2623 2623
2624 2624 def discard(self):
2625 2625 if not self._fp.closed:
2626 2626 try:
2627 2627 os.unlink(self._tempname)
2628 2628 except OSError:
2629 2629 pass
2630 2630 self._fp.close()
2631 2631
2632 2632 def __del__(self):
2633 2633 if hasattr(self, '_fp'): # constructor actually did something
2634 2634 self.discard()
2635 2635
2636 2636 def __enter__(self):
2637 2637 return self
2638 2638
2639 2639 def __exit__(self, exctype, excvalue, traceback):
2640 2640 if exctype is not None:
2641 2641 self.discard()
2642 2642 else:
2643 2643 self.close()
2644 2644
2645 2645
2646 2646 def tryrmdir(f):
2647 2647 try:
2648 2648 removedirs(f)
2649 2649 except OSError as e:
2650 2650 if e.errno != errno.ENOENT and e.errno != errno.ENOTEMPTY:
2651 2651 raise
2652 2652
2653 2653
2654 2654 def unlinkpath(
2655 2655 f: bytes, ignoremissing: bool = False, rmdir: bool = True
2656 2656 ) -> None:
2657 2657 """unlink and remove the directory if it is empty"""
2658 2658 if ignoremissing:
2659 2659 tryunlink(f)
2660 2660 else:
2661 2661 unlink(f)
2662 2662 if rmdir:
2663 2663 # try removing directories that might now be empty
2664 2664 try:
2665 2665 removedirs(os.path.dirname(f))
2666 2666 except OSError:
2667 2667 pass
2668 2668
2669 2669
2670 2670 def tryunlink(f: bytes) -> bool:
2671 2671 """Attempt to remove a file, ignoring FileNotFoundError.
2672 2672
2673 2673 Returns False in case the file did not exit, True otherwise
2674 2674 """
2675 2675 try:
2676 2676 unlink(f)
2677 2677 return True
2678 2678 except FileNotFoundError:
2679 2679 return False
2680 2680
2681 2681
2682 2682 def makedirs(
2683 2683 name: bytes, mode: Optional[int] = None, notindexed: bool = False
2684 2684 ) -> None:
2685 2685 """recursive directory creation with parent mode inheritance
2686 2686
2687 2687 Newly created directories are marked as "not to be indexed by
2688 2688 the content indexing service", if ``notindexed`` is specified
2689 2689 for "write" mode access.
2690 2690 """
2691 2691 try:
2692 2692 makedir(name, notindexed)
2693 2693 except OSError as err:
2694 2694 if err.errno == errno.EEXIST:
2695 2695 return
2696 2696 if err.errno != errno.ENOENT or not name:
2697 2697 raise
2698 2698 parent = os.path.dirname(abspath(name))
2699 2699 if parent == name:
2700 2700 raise
2701 2701 makedirs(parent, mode, notindexed)
2702 2702 try:
2703 2703 makedir(name, notindexed)
2704 2704 except OSError as err:
2705 2705 # Catch EEXIST to handle races
2706 2706 if err.errno == errno.EEXIST:
2707 2707 return
2708 2708 raise
2709 2709 if mode is not None:
2710 2710 os.chmod(name, mode)
2711 2711
2712 2712
2713 2713 def readfile(path: bytes) -> bytes:
2714 2714 with open(path, b'rb') as fp:
2715 2715 return fp.read()
2716 2716
2717 2717
2718 2718 def writefile(path: bytes, text: bytes) -> None:
2719 2719 with open(path, b'wb') as fp:
2720 2720 fp.write(text)
2721 2721
2722 2722
2723 2723 def appendfile(path: bytes, text: bytes) -> None:
2724 2724 with open(path, b'ab') as fp:
2725 2725 fp.write(text)
2726 2726
2727 2727
2728 2728 class chunkbuffer:
2729 2729 """Allow arbitrary sized chunks of data to be efficiently read from an
2730 2730 iterator over chunks of arbitrary size."""
2731 2731
2732 2732 def __init__(self, in_iter):
2733 2733 """in_iter is the iterator that's iterating over the input chunks."""
2734 2734
2735 2735 def splitbig(chunks):
2736 2736 for chunk in chunks:
2737 2737 if len(chunk) > 2**20:
2738 2738 pos = 0
2739 2739 while pos < len(chunk):
2740 2740 end = pos + 2**18
2741 2741 yield chunk[pos:end]
2742 2742 pos = end
2743 2743 else:
2744 2744 yield chunk
2745 2745
2746 2746 self.iter = splitbig(in_iter)
2747 2747 self._queue = collections.deque()
2748 2748 self._chunkoffset = 0
2749 2749
2750 2750 def read(self, l=None):
2751 2751 """Read L bytes of data from the iterator of chunks of data.
2752 2752 Returns less than L bytes if the iterator runs dry.
2753 2753
2754 2754 If size parameter is omitted, read everything"""
2755 2755 if l is None:
2756 2756 return b''.join(self.iter)
2757 2757
2758 2758 left = l
2759 2759 buf = []
2760 2760 queue = self._queue
2761 2761 while left > 0:
2762 2762 # refill the queue
2763 2763 if not queue:
2764 2764 target = 2**18
2765 2765 for chunk in self.iter:
2766 2766 queue.append(chunk)
2767 2767 target -= len(chunk)
2768 2768 if target <= 0:
2769 2769 break
2770 2770 if not queue:
2771 2771 break
2772 2772
2773 2773 # The easy way to do this would be to queue.popleft(), modify the
2774 2774 # chunk (if necessary), then queue.appendleft(). However, for cases
2775 2775 # where we read partial chunk content, this incurs 2 dequeue
2776 2776 # mutations and creates a new str for the remaining chunk in the
2777 2777 # queue. Our code below avoids this overhead.
2778 2778
2779 2779 chunk = queue[0]
2780 2780 chunkl = len(chunk)
2781 2781 offset = self._chunkoffset
2782 2782
2783 2783 # Use full chunk.
2784 2784 if offset == 0 and left >= chunkl:
2785 2785 left -= chunkl
2786 2786 queue.popleft()
2787 2787 buf.append(chunk)
2788 2788 # self._chunkoffset remains at 0.
2789 2789 continue
2790 2790
2791 2791 chunkremaining = chunkl - offset
2792 2792
2793 2793 # Use all of unconsumed part of chunk.
2794 2794 if left >= chunkremaining:
2795 2795 left -= chunkremaining
2796 2796 queue.popleft()
2797 2797 # offset == 0 is enabled by block above, so this won't merely
2798 2798 # copy via ``chunk[0:]``.
2799 2799 buf.append(chunk[offset:])
2800 2800 self._chunkoffset = 0
2801 2801
2802 2802 # Partial chunk needed.
2803 2803 else:
2804 2804 buf.append(chunk[offset : offset + left])
2805 2805 self._chunkoffset += left
2806 2806 left -= chunkremaining
2807 2807
2808 2808 return b''.join(buf)
2809 2809
2810 2810
2811 2811 def filechunkiter(f, size=131072, limit=None):
2812 2812 """Create a generator that produces the data in the file size
2813 2813 (default 131072) bytes at a time, up to optional limit (default is
2814 2814 to read all data). Chunks may be less than size bytes if the
2815 2815 chunk is the last chunk in the file, or the file is a socket or
2816 2816 some other type of file that sometimes reads less data than is
2817 2817 requested."""
2818 2818 assert size >= 0
2819 2819 assert limit is None or limit >= 0
2820 2820 while True:
2821 2821 if limit is None:
2822 2822 nbytes = size
2823 2823 else:
2824 2824 nbytes = min(limit, size)
2825 2825 s = nbytes and f.read(nbytes)
2826 2826 if not s:
2827 2827 break
2828 2828 if limit:
2829 2829 limit -= len(s)
2830 2830 yield s
2831 2831
2832 2832
2833 2833 class cappedreader:
2834 2834 """A file object proxy that allows reading up to N bytes.
2835 2835
2836 2836 Given a source file object, instances of this type allow reading up to
2837 2837 N bytes from that source file object. Attempts to read past the allowed
2838 2838 limit are treated as EOF.
2839 2839
2840 2840 It is assumed that I/O is not performed on the original file object
2841 2841 in addition to I/O that is performed by this instance. If there is,
2842 2842 state tracking will get out of sync and unexpected results will ensue.
2843 2843 """
2844 2844
2845 2845 def __init__(self, fh, limit):
2846 2846 """Allow reading up to <limit> bytes from <fh>."""
2847 2847 self._fh = fh
2848 2848 self._left = limit
2849 2849
2850 2850 def read(self, n=-1):
2851 2851 if not self._left:
2852 2852 return b''
2853 2853
2854 2854 if n < 0:
2855 2855 n = self._left
2856 2856
2857 2857 data = self._fh.read(min(n, self._left))
2858 2858 self._left -= len(data)
2859 2859 assert self._left >= 0
2860 2860
2861 2861 return data
2862 2862
2863 2863 def readinto(self, b):
2864 2864 res = self.read(len(b))
2865 2865 if res is None:
2866 2866 return None
2867 2867
2868 2868 b[0 : len(res)] = res
2869 2869 return len(res)
2870 2870
2871 2871
2872 2872 def unitcountfn(*unittable):
2873 2873 '''return a function that renders a readable count of some quantity'''
2874 2874
2875 2875 def go(count):
2876 2876 for multiplier, divisor, format in unittable:
2877 2877 if abs(count) >= divisor * multiplier:
2878 2878 return format % (count / float(divisor))
2879 2879 return unittable[-1][2] % count
2880 2880
2881 2881 return go
2882 2882
2883 2883
2884 2884 def processlinerange(fromline: int, toline: int) -> Tuple[int, int]:
2885 2885 """Check that linerange <fromline>:<toline> makes sense and return a
2886 2886 0-based range.
2887 2887
2888 2888 >>> processlinerange(10, 20)
2889 2889 (9, 20)
2890 2890 >>> processlinerange(2, 1)
2891 2891 Traceback (most recent call last):
2892 2892 ...
2893 2893 ParseError: line range must be positive
2894 2894 >>> processlinerange(0, 5)
2895 2895 Traceback (most recent call last):
2896 2896 ...
2897 2897 ParseError: fromline must be strictly positive
2898 2898 """
2899 2899 if toline - fromline < 0:
2900 2900 raise error.ParseError(_(b"line range must be positive"))
2901 2901 if fromline < 1:
2902 2902 raise error.ParseError(_(b"fromline must be strictly positive"))
2903 2903 return fromline - 1, toline
2904 2904
2905 2905
2906 2906 bytecount = unitcountfn(
2907 2907 (100, 1 << 30, _(b'%.0f GB')),
2908 2908 (10, 1 << 30, _(b'%.1f GB')),
2909 2909 (1, 1 << 30, _(b'%.2f GB')),
2910 2910 (100, 1 << 20, _(b'%.0f MB')),
2911 2911 (10, 1 << 20, _(b'%.1f MB')),
2912 2912 (1, 1 << 20, _(b'%.2f MB')),
2913 2913 (100, 1 << 10, _(b'%.0f KB')),
2914 2914 (10, 1 << 10, _(b'%.1f KB')),
2915 2915 (1, 1 << 10, _(b'%.2f KB')),
2916 2916 (1, 1, _(b'%.0f bytes')),
2917 2917 )
2918 2918
2919 2919
2920 2920 class transformingwriter(typelib.BinaryIO_Proxy):
2921 2921 """Writable file wrapper to transform data by function"""
2922 2922
2923 2923 def __init__(self, fp: BinaryIO, encode: Callable[[bytes], bytes]) -> None:
2924 2924 self._fp = fp
2925 2925 self._encode = encode
2926 2926
2927 2927 def close(self) -> None:
2928 2928 self._fp.close()
2929 2929
2930 2930 def flush(self) -> None:
2931 2931 self._fp.flush()
2932 2932
2933 2933 def write(self, data: bytes) -> int:
2934 2934 return self._fp.write(self._encode(data))
2935 2935
2936 2936
2937 2937 # Matches a single EOL which can either be a CRLF where repeated CR
2938 2938 # are removed or a LF. We do not care about old Macintosh files, so a
2939 2939 # stray CR is an error.
2940 2940 _eolre = remod.compile(br'\r*\n')
2941 2941
2942 2942
2943 2943 def tolf(s: bytes) -> bytes:
2944 2944 return _eolre.sub(b'\n', s)
2945 2945
2946 2946
2947 2947 def tocrlf(s: bytes) -> bytes:
2948 2948 return _eolre.sub(b'\r\n', s)
2949 2949
2950 2950
2951 2951 def _crlfwriter(fp: typelib.BinaryIO_Proxy) -> typelib.BinaryIO_Proxy:
2952 2952 return transformingwriter(fp, tocrlf)
2953 2953
2954 2954
2955 2955 if pycompat.oslinesep == b'\r\n':
2956 2956 tonativeeol = tocrlf
2957 2957 fromnativeeol = tolf
2958 2958 nativeeolwriter = _crlfwriter
2959 2959 else:
2960 2960 tonativeeol = pycompat.identity
2961 2961 fromnativeeol = pycompat.identity
2962 2962 nativeeolwriter = pycompat.identity
2963 2963
2964 2964 if typing.TYPE_CHECKING:
2965 2965 # Replace the various overloads that come along with aliasing other methods
2966 2966 # with the narrow definition that we care about in the type checking phase
2967 2967 # only. This ensures that both Windows and POSIX see only the definition
2968 2968 # that is actually available.
2969 2969
2970 2970 def tonativeeol(s: bytes) -> bytes:
2971 2971 raise NotImplementedError
2972 2972
2973 2973 def fromnativeeol(s: bytes) -> bytes:
2974 2974 raise NotImplementedError
2975 2975
2976 2976 def nativeeolwriter(fp: typelib.BinaryIO_Proxy) -> typelib.BinaryIO_Proxy:
2977 2977 raise NotImplementedError
2978 2978
2979 2979
2980 2980 # TODO delete since workaround variant for Python 2 no longer needed.
2981 2981 def iterfile(fp):
2982 2982 return fp
2983 2983
2984 2984
2985 2985 def iterlines(iterator: Iterable[bytes]) -> Iterator[bytes]:
2986 2986 for chunk in iterator:
2987 2987 for line in chunk.splitlines():
2988 2988 yield line
2989 2989
2990 2990
2991 2991 def expandpath(path: bytes) -> bytes:
2992 2992 return os.path.expanduser(os.path.expandvars(path))
2993 2993
2994 2994
2995 2995 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2996 2996 """Return the result of interpolating items in the mapping into string s.
2997 2997
2998 2998 prefix is a single character string, or a two character string with
2999 2999 a backslash as the first character if the prefix needs to be escaped in
3000 3000 a regular expression.
3001 3001
3002 3002 fn is an optional function that will be applied to the replacement text
3003 3003 just before replacement.
3004 3004
3005 3005 escape_prefix is an optional flag that allows using doubled prefix for
3006 3006 its escaping.
3007 3007 """
3008 3008 fn = fn or (lambda s: s)
3009 3009 patterns = b'|'.join(mapping.keys())
3010 3010 if escape_prefix:
3011 3011 patterns += b'|' + prefix
3012 3012 if len(prefix) > 1:
3013 3013 prefix_char = prefix[1:]
3014 3014 else:
3015 3015 prefix_char = prefix
3016 3016 mapping[prefix_char] = prefix_char
3017 3017 r = remod.compile(br'%s(%s)' % (prefix, patterns))
3018 3018 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
3019 3019
3020 3020
3021 3021 timecount = unitcountfn(
3022 3022 (1, 1e3, _(b'%.0f s')),
3023 3023 (100, 1, _(b'%.1f s')),
3024 3024 (10, 1, _(b'%.2f s')),
3025 3025 (1, 1, _(b'%.3f s')),
3026 3026 (100, 0.001, _(b'%.1f ms')),
3027 3027 (10, 0.001, _(b'%.2f ms')),
3028 3028 (1, 0.001, _(b'%.3f ms')),
3029 3029 (100, 0.000001, _(b'%.1f us')),
3030 3030 (10, 0.000001, _(b'%.2f us')),
3031 3031 (1, 0.000001, _(b'%.3f us')),
3032 3032 (100, 0.000000001, _(b'%.1f ns')),
3033 3033 (10, 0.000000001, _(b'%.2f ns')),
3034 3034 (1, 0.000000001, _(b'%.3f ns')),
3035 3035 )
3036 3036
3037 3037
3038 3038 @attr.s
3039 3039 class timedcmstats:
3040 3040 """Stats information produced by the timedcm context manager on entering."""
3041 3041
3042 3042 # the starting value of the timer as a float (meaning and resulution is
3043 3043 # platform dependent, see util.timer)
3044 3044 start = attr.ib(default=attr.Factory(lambda: timer()))
3045 3045 # the number of seconds as a floating point value; starts at 0, updated when
3046 3046 # the context is exited.
3047 3047 elapsed = attr.ib(default=0)
3048 3048 # the number of nested timedcm context managers.
3049 3049 level = attr.ib(default=1)
3050 3050
3051 3051 def __bytes__(self):
3052 3052 return timecount(self.elapsed) if self.elapsed else b'<unknown>'
3053 3053
3054 3054 __str__ = encoding.strmethod(__bytes__)
3055 3055
3056 3056
3057 3057 @contextlib.contextmanager
3058 3058 def timedcm(whencefmt, *whenceargs):
3059 3059 """A context manager that produces timing information for a given context.
3060 3060
3061 3061 On entering a timedcmstats instance is produced.
3062 3062
3063 3063 This context manager is reentrant.
3064 3064
3065 3065 """
3066 3066 # track nested context managers
3067 3067 timedcm._nested += 1
3068 3068 timing_stats = timedcmstats(level=timedcm._nested)
3069 3069 try:
3070 3070 with tracing.log(whencefmt, *whenceargs):
3071 3071 yield timing_stats
3072 3072 finally:
3073 3073 timing_stats.elapsed = timer() - timing_stats.start
3074 3074 timedcm._nested -= 1
3075 3075
3076 3076
3077 3077 timedcm._nested = 0
3078 3078
3079 3079
3080 3080 def timed(func):
3081 3081 """Report the execution time of a function call to stderr.
3082 3082
3083 3083 During development, use as a decorator when you need to measure
3084 3084 the cost of a function, e.g. as follows:
3085 3085
3086 3086 @util.timed
3087 3087 def foo(a, b, c):
3088 3088 pass
3089 3089 """
3090 3090
3091 3091 def wrapper(*args, **kwargs):
3092 3092 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3093 3093 result = func(*args, **kwargs)
3094 3094 stderr = procutil.stderr
3095 3095 stderr.write(
3096 3096 b'%s%s: %s\n'
3097 3097 % (
3098 3098 b' ' * time_stats.level * 2,
3099 3099 pycompat.bytestr(func.__name__),
3100 3100 time_stats,
3101 3101 )
3102 3102 )
3103 3103 return result
3104 3104
3105 3105 return wrapper
3106 3106
3107 3107
3108 3108 _sizeunits = (
3109 3109 (b'm', 2**20),
3110 3110 (b'k', 2**10),
3111 3111 (b'g', 2**30),
3112 3112 (b'kb', 2**10),
3113 3113 (b'mb', 2**20),
3114 3114 (b'gb', 2**30),
3115 3115 (b'b', 1),
3116 3116 )
3117 3117
3118 3118
3119 3119 def sizetoint(s: bytes) -> int:
3120 3120 """Convert a space specifier to a byte count.
3121 3121
3122 3122 >>> sizetoint(b'30')
3123 3123 30
3124 3124 >>> sizetoint(b'2.2kb')
3125 3125 2252
3126 3126 >>> sizetoint(b'6M')
3127 3127 6291456
3128 3128 """
3129 3129 t = s.strip().lower()
3130 3130 try:
3131 3131 for k, u in _sizeunits:
3132 3132 if t.endswith(k):
3133 3133 return int(float(t[: -len(k)]) * u)
3134 3134 return int(t)
3135 3135 except ValueError:
3136 3136 raise error.ParseError(_(b"couldn't parse size: %s") % s)
3137 3137
3138 3138
3139 3139 class hooks:
3140 3140 """A collection of hook functions that can be used to extend a
3141 3141 function's behavior. Hooks are called in lexicographic order,
3142 3142 based on the names of their sources."""
3143 3143
3144 3144 def __init__(self):
3145 3145 self._hooks = []
3146 3146
3147 3147 def add(self, source, hook):
3148 3148 self._hooks.append((source, hook))
3149 3149
3150 3150 def __call__(self, *args):
3151 3151 self._hooks.sort(key=lambda x: x[0])
3152 3152 results = []
3153 3153 for source, hook in self._hooks:
3154 3154 results.append(hook(*args))
3155 3155 return results
3156 3156
3157 3157
3158 3158 def getstackframes(skip=0, line=b' %-*s in %s\n', fileline=b'%s:%d', depth=0):
3159 3159 """Yields lines for a nicely formatted stacktrace.
3160 3160 Skips the 'skip' last entries, then return the last 'depth' entries.
3161 3161 Each file+linenumber is formatted according to fileline.
3162 3162 Each line is formatted according to line.
3163 3163 If line is None, it yields:
3164 3164 length of longest filepath+line number,
3165 3165 filepath+linenumber,
3166 3166 function
3167 3167
3168 3168 Not be used in production code but very convenient while developing.
3169 3169 """
3170 3170 entries = [
3171 3171 (fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3172 3172 for fn, ln, func, _text in traceback.extract_stack()[: -skip - 1]
3173 3173 ][-depth:]
3174 3174 if entries:
3175 3175 fnmax = max(len(entry[0]) for entry in entries)
3176 3176 for fnln, func in entries:
3177 3177 if line is None:
3178 3178 yield (fnmax, fnln, func)
3179 3179 else:
3180 3180 yield line % (fnmax, fnln, func)
3181 3181
3182 3182
3183 3183 def debugstacktrace(
3184 3184 msg=b'stacktrace',
3185 3185 skip=0,
3186 3186 f=procutil.stderr,
3187 3187 otherf=procutil.stdout,
3188 3188 depth=0,
3189 3189 prefix=b'',
3190 3190 ):
3191 3191 """Writes a message to f (stderr) with a nicely formatted stacktrace.
3192 3192 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3193 3193 By default it will flush stdout first.
3194 3194 It can be used everywhere and intentionally does not require an ui object.
3195 3195 Not be used in production code but very convenient while developing.
3196 3196 """
3197 3197 if otherf:
3198 3198 otherf.flush()
3199 3199 f.write(b'%s%s at:\n' % (prefix, msg.rstrip()))
3200 3200 for line in getstackframes(skip + 1, depth=depth):
3201 3201 f.write(prefix + line)
3202 3202 f.flush()
3203 3203
3204 3204
3205 3205 # convenient shortcut
3206 3206 dst = debugstacktrace
3207 3207
3208 3208
3209 3209 def safename(f, tag, ctx, others=None):
3210 3210 """
3211 3211 Generate a name that it is safe to rename f to in the given context.
3212 3212
3213 3213 f: filename to rename
3214 3214 tag: a string tag that will be included in the new name
3215 3215 ctx: a context, in which the new name must not exist
3216 3216 others: a set of other filenames that the new name must not be in
3217 3217
3218 3218 Returns a file name of the form oldname~tag[~number] which does not exist
3219 3219 in the provided context and is not in the set of other names.
3220 3220 """
3221 3221 if others is None:
3222 3222 others = set()
3223 3223
3224 3224 fn = b'%s~%s' % (f, tag)
3225 3225 if fn not in ctx and fn not in others:
3226 3226 return fn
3227 3227 for n in itertools.count(1):
3228 3228 fn = b'%s~%s~%s' % (f, tag, n)
3229 3229 if fn not in ctx and fn not in others:
3230 3230 return fn
3231 3231
3232 3232
3233 3233 def readexactly(stream, n):
3234 3234 '''read n bytes from stream.read and abort if less was available'''
3235 3235 s = stream.read(n)
3236 3236 if len(s) < n:
3237 3237 raise error.Abort(
3238 3238 _(b"stream ended unexpectedly (got %d bytes, expected %d)")
3239 3239 % (len(s), n)
3240 3240 )
3241 3241 return s
3242 3242
3243 3243
3244 3244 def uvarintencode(value):
3245 3245 """Encode an unsigned integer value to a varint.
3246 3246
3247 3247 A varint is a variable length integer of 1 or more bytes. Each byte
3248 3248 except the last has the most significant bit set. The lower 7 bits of
3249 3249 each byte store the 2's complement representation, least significant group
3250 3250 first.
3251 3251
3252 3252 >>> uvarintencode(0)
3253 3253 '\\x00'
3254 3254 >>> uvarintencode(1)
3255 3255 '\\x01'
3256 3256 >>> uvarintencode(127)
3257 3257 '\\x7f'
3258 3258 >>> uvarintencode(1337)
3259 3259 '\\xb9\\n'
3260 3260 >>> uvarintencode(65536)
3261 3261 '\\x80\\x80\\x04'
3262 3262 >>> uvarintencode(-1)
3263 3263 Traceback (most recent call last):
3264 3264 ...
3265 3265 ProgrammingError: negative value for uvarint: -1
3266 3266 """
3267 3267 if value < 0:
3268 3268 raise error.ProgrammingError(b'negative value for uvarint: %d' % value)
3269 3269 bits = value & 0x7F
3270 3270 value >>= 7
3271 3271 bytes = []
3272 3272 while value:
3273 3273 bytes.append(pycompat.bytechr(0x80 | bits))
3274 3274 bits = value & 0x7F
3275 3275 value >>= 7
3276 3276 bytes.append(pycompat.bytechr(bits))
3277 3277
3278 3278 return b''.join(bytes)
3279 3279
3280 3280
3281 3281 def uvarintdecodestream(fh):
3282 3282 """Decode an unsigned variable length integer from a stream.
3283 3283
3284 3284 The passed argument is anything that has a ``.read(N)`` method.
3285 3285
3286 3286 >>> from io import BytesIO
3287 3287 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3288 3288 0
3289 3289 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3290 3290 1
3291 3291 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3292 3292 127
3293 3293 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3294 3294 1337
3295 3295 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3296 3296 65536
3297 3297 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3298 3298 Traceback (most recent call last):
3299 3299 ...
3300 3300 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3301 3301 """
3302 3302 result = 0
3303 3303 shift = 0
3304 3304 while True:
3305 3305 byte = ord(readexactly(fh, 1))
3306 3306 result |= (byte & 0x7F) << shift
3307 3307 if not (byte & 0x80):
3308 3308 return result
3309 3309 shift += 7
3310 3310
3311 3311
3312 3312 # Passing the '' locale means that the locale should be set according to the
3313 3313 # user settings (environment variables).
3314 3314 # Python sometimes avoids setting the global locale settings. When interfacing
3315 3315 # with C code (e.g. the curses module or the Subversion bindings), the global
3316 3316 # locale settings must be initialized correctly. Python 2 does not initialize
3317 3317 # the global locale settings on interpreter startup. Python 3 sometimes
3318 3318 # initializes LC_CTYPE, but not consistently at least on Windows. Therefore we
3319 3319 # explicitly initialize it to get consistent behavior if it's not already
3320 3320 # initialized. Since CPython commit 177d921c8c03d30daa32994362023f777624b10d,
3321 3321 # LC_CTYPE is always initialized. If we require Python 3.8+, we should re-check
3322 3322 # if we can remove this code.
3323 3323 @contextlib.contextmanager
3324 3324 def with_lc_ctype():
3325 3325 oldloc = locale.setlocale(locale.LC_CTYPE, None)
3326 3326 if oldloc == 'C':
3327 3327 try:
3328 3328 try:
3329 3329 locale.setlocale(locale.LC_CTYPE, '')
3330 3330 except locale.Error:
3331 3331 # The likely case is that the locale from the environment
3332 3332 # variables is unknown.
3333 3333 pass
3334 3334 yield
3335 3335 finally:
3336 3336 locale.setlocale(locale.LC_CTYPE, oldloc)
3337 3337 else:
3338 3338 yield
3339 3339
3340 3340
3341 3341 def _estimatememory() -> Optional[int]:
3342 3342 """Provide an estimate for the available system memory in Bytes.
3343 3343
3344 3344 If no estimate can be provided on the platform, returns None.
3345 3345 """
3346 3346 if pycompat.sysplatform.startswith(b'win'):
3347 3347 # On Windows, use the GlobalMemoryStatusEx kernel function directly.
3348 3348 from ctypes import c_long as DWORD, c_ulonglong as DWORDLONG
3349 3349 from ctypes.wintypes import ( # pytype: disable=import-error
3350 3350 Structure,
3351 3351 byref,
3352 3352 sizeof,
3353 3353 windll,
3354 3354 )
3355 3355
3356 3356 class MEMORYSTATUSEX(Structure):
3357 3357 _fields_ = [
3358 3358 ('dwLength', DWORD),
3359 3359 ('dwMemoryLoad', DWORD),
3360 3360 ('ullTotalPhys', DWORDLONG),
3361 3361 ('ullAvailPhys', DWORDLONG),
3362 3362 ('ullTotalPageFile', DWORDLONG),
3363 3363 ('ullAvailPageFile', DWORDLONG),
3364 3364 ('ullTotalVirtual', DWORDLONG),
3365 3365 ('ullAvailVirtual', DWORDLONG),
3366 3366 ('ullExtendedVirtual', DWORDLONG),
3367 3367 ]
3368 3368
3369 3369 x = MEMORYSTATUSEX()
3370 3370 x.dwLength = sizeof(x)
3371 3371 windll.kernel32.GlobalMemoryStatusEx(byref(x))
3372 3372 return x.ullAvailPhys
3373 3373
3374 3374 # On newer Unix-like systems and Mac OSX, the sysconf interface
3375 3375 # can be used. _SC_PAGE_SIZE is part of POSIX; _SC_PHYS_PAGES
3376 3376 # seems to be implemented on most systems.
3377 3377 try:
3378 3378 pagesize = os.sysconf(os.sysconf_names['SC_PAGE_SIZE'])
3379 3379 pages = os.sysconf(os.sysconf_names['SC_PHYS_PAGES'])
3380 3380 return pagesize * pages
3381 3381 except OSError: # sysconf can fail
3382 3382 pass
3383 3383 except KeyError: # unknown parameter
3384 3384 pass
General Comments 0
You need to be logged in to leave comments. Login now