##// END OF EJS Templates
http: handle push of bundles > 2 GB again (issue3017)...
Mads Kiilerich -
r15152:94b200a1 stable
parent child Browse files
Show More
@@ -1,282 +1,280 b''
1 1 # httpconnection.py - urllib2 handler for new http support
2 2 #
3 3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 # Copyright 2011 Google, Inc.
7 7 #
8 8 # This software may be used and distributed according to the terms of the
9 9 # GNU General Public License version 2 or any later version.
10 10 import logging
11 11 import socket
12 12 import urllib
13 13 import urllib2
14 14 import os
15 15
16 16 from mercurial import httpclient
17 17 from mercurial import sslutil
18 18 from mercurial import util
19 19 from mercurial.i18n import _
20 20
21 21 # moved here from url.py to avoid a cycle
22 22 class httpsendfile(object):
23 23 """This is a wrapper around the objects returned by python's "open".
24 24
25 Its purpose is to send file-like objects via HTTP and, to do so, it
26 defines a __len__ attribute to feed the Content-Length header.
25 Its purpose is to send file-like objects via HTTP.
26 It do however not define a __len__ attribute because the length
27 might be more than Py_ssize_t can handle.
27 28 """
28 29
29 30 def __init__(self, ui, *args, **kwargs):
30 31 # We can't just "self._data = open(*args, **kwargs)" here because there
31 32 # is an "open" function defined in this module that shadows the global
32 33 # one
33 34 self.ui = ui
34 35 self._data = open(*args, **kwargs)
35 36 self.seek = self._data.seek
36 37 self.close = self._data.close
37 38 self.write = self._data.write
38 self._len = os.fstat(self._data.fileno()).st_size
39 self.length = os.fstat(self._data.fileno()).st_size
39 40 self._pos = 0
40 self._total = self._len / 1024 * 2
41 self._total = self.length / 1024 * 2
41 42
42 43 def read(self, *args, **kwargs):
43 44 try:
44 45 ret = self._data.read(*args, **kwargs)
45 46 except EOFError:
46 47 self.ui.progress(_('sending'), None)
47 48 self._pos += len(ret)
48 49 # We pass double the max for total because we currently have
49 50 # to send the bundle twice in the case of a server that
50 51 # requires authentication. Since we can't know until we try
51 52 # once whether authentication will be required, just lie to
52 53 # the user and maybe the push succeeds suddenly at 50%.
53 54 self.ui.progress(_('sending'), self._pos / 1024,
54 55 unit=_('kb'), total=self._total)
55 56 return ret
56 57
57 def __len__(self):
58 return self._len
59
60 58 # moved here from url.py to avoid a cycle
61 59 def readauthforuri(ui, uri, user):
62 60 # Read configuration
63 61 config = dict()
64 62 for key, val in ui.configitems('auth'):
65 63 if '.' not in key:
66 64 ui.warn(_("ignoring invalid [auth] key '%s'\n") % key)
67 65 continue
68 66 group, setting = key.rsplit('.', 1)
69 67 gdict = config.setdefault(group, dict())
70 68 if setting in ('username', 'cert', 'key'):
71 69 val = util.expandpath(val)
72 70 gdict[setting] = val
73 71
74 72 # Find the best match
75 73 scheme, hostpath = uri.split('://', 1)
76 74 bestuser = None
77 75 bestlen = 0
78 76 bestauth = None
79 77 for group, auth in config.iteritems():
80 78 if user and user != auth.get('username', user):
81 79 # If a username was set in the URI, the entry username
82 80 # must either match it or be unset
83 81 continue
84 82 prefix = auth.get('prefix')
85 83 if not prefix:
86 84 continue
87 85 p = prefix.split('://', 1)
88 86 if len(p) > 1:
89 87 schemes, prefix = [p[0]], p[1]
90 88 else:
91 89 schemes = (auth.get('schemes') or 'https').split()
92 90 if (prefix == '*' or hostpath.startswith(prefix)) and \
93 91 (len(prefix) > bestlen or (len(prefix) == bestlen and \
94 92 not bestuser and 'username' in auth)) \
95 93 and scheme in schemes:
96 94 bestlen = len(prefix)
97 95 bestauth = group, auth
98 96 bestuser = auth.get('username')
99 97 if user and not bestuser:
100 98 auth['username'] = user
101 99 return bestauth
102 100
103 101 # Mercurial (at least until we can remove the old codepath) requires
104 102 # that the http response object be sufficiently file-like, so we
105 103 # provide a close() method here.
106 104 class HTTPResponse(httpclient.HTTPResponse):
107 105 def close(self):
108 106 pass
109 107
110 108 class HTTPConnection(httpclient.HTTPConnection):
111 109 response_class = HTTPResponse
112 110 def request(self, method, uri, body=None, headers={}):
113 111 if isinstance(body, httpsendfile):
114 112 body.seek(0)
115 113 httpclient.HTTPConnection.request(self, method, uri, body=body,
116 114 headers=headers)
117 115
118 116
119 117 _configuredlogging = False
120 118 LOGFMT = '%(levelname)s:%(name)s:%(lineno)d:%(message)s'
121 119 # Subclass BOTH of these because otherwise urllib2 "helpfully"
122 120 # reinserts them since it notices we don't include any subclasses of
123 121 # them.
124 122 class http2handler(urllib2.HTTPHandler, urllib2.HTTPSHandler):
125 123 def __init__(self, ui, pwmgr):
126 124 global _configuredlogging
127 125 urllib2.AbstractHTTPHandler.__init__(self)
128 126 self.ui = ui
129 127 self.pwmgr = pwmgr
130 128 self._connections = {}
131 129 loglevel = ui.config('ui', 'http2debuglevel', default=None)
132 130 if loglevel and not _configuredlogging:
133 131 _configuredlogging = True
134 132 logger = logging.getLogger('mercurial.httpclient')
135 133 logger.setLevel(getattr(logging, loglevel.upper()))
136 134 handler = logging.StreamHandler()
137 135 handler.setFormatter(logging.Formatter(LOGFMT))
138 136 logger.addHandler(handler)
139 137
140 138 def close_all(self):
141 139 """Close and remove all connection objects being kept for reuse."""
142 140 for openconns in self._connections.values():
143 141 for conn in openconns:
144 142 conn.close()
145 143 self._connections = {}
146 144
147 145 # shamelessly borrowed from urllib2.AbstractHTTPHandler
148 146 def do_open(self, http_class, req, use_ssl):
149 147 """Return an addinfourl object for the request, using http_class.
150 148
151 149 http_class must implement the HTTPConnection API from httplib.
152 150 The addinfourl return value is a file-like object. It also
153 151 has methods and attributes including:
154 152 - info(): return a mimetools.Message object for the headers
155 153 - geturl(): return the original request URL
156 154 - code: HTTP status code
157 155 """
158 156 # If using a proxy, the host returned by get_host() is
159 157 # actually the proxy. On Python 2.6.1, the real destination
160 158 # hostname is encoded in the URI in the urllib2 request
161 159 # object. On Python 2.6.5, it's stored in the _tunnel_host
162 160 # attribute which has no accessor.
163 161 tunhost = getattr(req, '_tunnel_host', None)
164 162 host = req.get_host()
165 163 if tunhost:
166 164 proxyhost = host
167 165 host = tunhost
168 166 elif req.has_proxy():
169 167 proxyhost = req.get_host()
170 168 host = req.get_selector().split('://', 1)[1].split('/', 1)[0]
171 169 else:
172 170 proxyhost = None
173 171
174 172 if proxyhost:
175 173 if ':' in proxyhost:
176 174 # Note: this means we'll explode if we try and use an
177 175 # IPv6 http proxy. This isn't a regression, so we
178 176 # won't worry about it for now.
179 177 proxyhost, proxyport = proxyhost.rsplit(':', 1)
180 178 else:
181 179 proxyport = 3128 # squid default
182 180 proxy = (proxyhost, proxyport)
183 181 else:
184 182 proxy = None
185 183
186 184 if not host:
187 185 raise urllib2.URLError('no host given')
188 186
189 187 connkey = use_ssl, host, proxy
190 188 allconns = self._connections.get(connkey, [])
191 189 conns = [c for c in allconns if not c.busy()]
192 190 if conns:
193 191 h = conns[0]
194 192 else:
195 193 if allconns:
196 194 self.ui.debug('all connections for %s busy, making a new '
197 195 'one\n' % host)
198 196 timeout = None
199 197 if req.timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
200 198 timeout = req.timeout
201 199 h = http_class(host, timeout=timeout, proxy_hostport=proxy)
202 200 self._connections.setdefault(connkey, []).append(h)
203 201
204 202 headers = dict(req.headers)
205 203 headers.update(req.unredirected_hdrs)
206 204 headers = dict(
207 205 (name.title(), val) for name, val in headers.items())
208 206 try:
209 207 path = req.get_selector()
210 208 if '://' in path:
211 209 path = path.split('://', 1)[1].split('/', 1)[1]
212 210 if path[0] != '/':
213 211 path = '/' + path
214 212 h.request(req.get_method(), path, req.data, headers)
215 213 r = h.getresponse()
216 214 except socket.error, err: # XXX what error?
217 215 raise urllib2.URLError(err)
218 216
219 217 # Pick apart the HTTPResponse object to get the addinfourl
220 218 # object initialized properly.
221 219 r.recv = r.read
222 220
223 221 resp = urllib.addinfourl(r, r.headers, req.get_full_url())
224 222 resp.code = r.status
225 223 resp.msg = r.reason
226 224 return resp
227 225
228 226 # httplib always uses the given host/port as the socket connect
229 227 # target, and then allows full URIs in the request path, which it
230 228 # then observes and treats as a signal to do proxying instead.
231 229 def http_open(self, req):
232 230 if req.get_full_url().startswith('https'):
233 231 return self.https_open(req)
234 232 return self.do_open(HTTPConnection, req, False)
235 233
236 234 def https_open(self, req):
237 235 # req.get_full_url() does not contain credentials and we may
238 236 # need them to match the certificates.
239 237 url = req.get_full_url()
240 238 user, password = self.pwmgr.find_stored_password(url)
241 239 res = readauthforuri(self.ui, url, user)
242 240 if res:
243 241 group, auth = res
244 242 self.auth = auth
245 243 self.ui.debug("using auth.%s.* for authentication\n" % group)
246 244 else:
247 245 self.auth = None
248 246 return self.do_open(self._makesslconnection, req, True)
249 247
250 248 def _makesslconnection(self, host, port=443, *args, **kwargs):
251 249 keyfile = None
252 250 certfile = None
253 251
254 252 if args: # key_file
255 253 keyfile = args.pop(0)
256 254 if args: # cert_file
257 255 certfile = args.pop(0)
258 256
259 257 # if the user has specified different key/cert files in
260 258 # hgrc, we prefer these
261 259 if self.auth and 'key' in self.auth and 'cert' in self.auth:
262 260 keyfile = self.auth['key']
263 261 certfile = self.auth['cert']
264 262
265 263 # let host port take precedence
266 264 if ':' in host and '[' not in host or ']:' in host:
267 265 host, port = host.rsplit(':', 1)
268 266 port = int(port)
269 267 if '[' in host:
270 268 host = host[1:-1]
271 269
272 270 if keyfile:
273 271 kwargs['keyfile'] = keyfile
274 272 if certfile:
275 273 kwargs['certfile'] = certfile
276 274
277 275 kwargs.update(sslutil.sslkwargs(self.ui, host))
278 276
279 277 con = HTTPConnection(host, port, use_ssl=True,
280 278 ssl_validator=sslutil.validator(self.ui, host),
281 279 **kwargs)
282 280 return con
@@ -1,242 +1,244 b''
1 1 # httprepo.py - HTTP repository proxy classes for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from node import nullid
10 10 from i18n import _
11 11 import changegroup, statichttprepo, error, httpconnection, url, util, wireproto
12 12 import os, urllib, urllib2, zlib, httplib
13 13 import errno, socket
14 14
15 15 def zgenerator(f):
16 16 zd = zlib.decompressobj()
17 17 try:
18 18 for chunk in util.filechunkiter(f):
19 19 while chunk:
20 20 yield zd.decompress(chunk, 2**18)
21 21 chunk = zd.unconsumed_tail
22 22 except httplib.HTTPException:
23 23 raise IOError(None, _('connection ended unexpectedly'))
24 24 yield zd.flush()
25 25
26 26 class httprepository(wireproto.wirerepository):
27 27 def __init__(self, ui, path):
28 28 self.path = path
29 29 self.caps = None
30 30 self.handler = None
31 31 u = util.url(path)
32 32 if u.query or u.fragment:
33 33 raise util.Abort(_('unsupported URL component: "%s"') %
34 34 (u.query or u.fragment))
35 35
36 36 # urllib cannot handle URLs with embedded user or passwd
37 37 self._url, authinfo = u.authinfo()
38 38
39 39 self.ui = ui
40 40 self.ui.debug('using %s\n' % self._url)
41 41
42 42 self.urlopener = url.opener(ui, authinfo)
43 43
44 44 def __del__(self):
45 45 for h in self.urlopener.handlers:
46 46 h.close()
47 47 if hasattr(h, "close_all"):
48 48 h.close_all()
49 49
50 50 def url(self):
51 51 return self.path
52 52
53 53 # look up capabilities only when needed
54 54
55 55 def _fetchcaps(self):
56 56 self.caps = set(self._call('capabilities').split())
57 57
58 58 def get_caps(self):
59 59 if self.caps is None:
60 60 try:
61 61 self._fetchcaps()
62 62 except error.RepoError:
63 63 self.caps = set()
64 64 self.ui.debug('capabilities: %s\n' %
65 65 (' '.join(self.caps or ['none'])))
66 66 return self.caps
67 67
68 68 capabilities = property(get_caps)
69 69
70 70 def lock(self):
71 71 raise util.Abort(_('operation not supported over http'))
72 72
73 73 def _callstream(self, cmd, **args):
74 74 if cmd == 'pushkey':
75 75 args['data'] = ''
76 76 data = args.pop('data', None)
77 size = 0
78 if util.safehasattr(data, 'length'):
79 size = data.length
80 elif data is not None:
81 size = len(data)
77 82 headers = args.pop('headers', {})
78 83
79 if data and self.ui.configbool('ui', 'usehttp2', False):
84 if size and self.ui.configbool('ui', 'usehttp2', False):
80 85 headers['Expect'] = '100-Continue'
81 86 headers['X-HgHttp2'] = '1'
82 87
83 88 self.ui.debug("sending %s command\n" % cmd)
84 89 q = [('cmd', cmd)]
85 90 headersize = 0
86 91 if len(args) > 0:
87 92 httpheader = self.capable('httpheader')
88 93 if httpheader:
89 94 headersize = int(httpheader.split(',')[0])
90 95 if headersize > 0:
91 96 # The headers can typically carry more data than the URL.
92 97 encargs = urllib.urlencode(sorted(args.items()))
93 98 headerfmt = 'X-HgArg-%s'
94 99 contentlen = headersize - len(headerfmt % '000' + ': \r\n')
95 100 headernum = 0
96 101 for i in xrange(0, len(encargs), contentlen):
97 102 headernum += 1
98 103 header = headerfmt % str(headernum)
99 104 headers[header] = encargs[i:i + contentlen]
100 105 varyheaders = [headerfmt % str(h) for h in range(1, headernum + 1)]
101 106 headers['Vary'] = ','.join(varyheaders)
102 107 else:
103 108 q += sorted(args.items())
104 109 qs = '?%s' % urllib.urlencode(q)
105 110 cu = "%s%s" % (self._url, qs)
106 111 req = urllib2.Request(cu, data, headers)
107 112 if data is not None:
108 # len(data) is broken if data doesn't fit into Py_ssize_t
109 # add the header ourself to avoid OverflowError
110 size = data.__len__()
111 113 self.ui.debug("sending %s bytes\n" % size)
112 114 req.add_unredirected_header('Content-Length', '%d' % size)
113 115 try:
114 116 resp = self.urlopener.open(req)
115 117 except urllib2.HTTPError, inst:
116 118 if inst.code == 401:
117 119 raise util.Abort(_('authorization failed'))
118 120 raise
119 121 except httplib.HTTPException, inst:
120 122 self.ui.debug('http error while sending %s command\n' % cmd)
121 123 self.ui.traceback()
122 124 raise IOError(None, inst)
123 125 except IndexError:
124 126 # this only happens with Python 2.3, later versions raise URLError
125 127 raise util.Abort(_('http error, possibly caused by proxy setting'))
126 128 # record the url we got redirected to
127 129 resp_url = resp.geturl()
128 130 if resp_url.endswith(qs):
129 131 resp_url = resp_url[:-len(qs)]
130 132 if self._url.rstrip('/') != resp_url.rstrip('/'):
131 133 if not self.ui.quiet:
132 134 self.ui.warn(_('real URL is %s\n') % resp_url)
133 135 self._url = resp_url
134 136 try:
135 137 proto = resp.getheader('content-type')
136 138 except AttributeError:
137 139 proto = resp.headers.get('content-type', '')
138 140
139 141 safeurl = util.hidepassword(self._url)
140 142 # accept old "text/plain" and "application/hg-changegroup" for now
141 143 if not (proto.startswith('application/mercurial-') or
142 144 proto.startswith('text/plain') or
143 145 proto.startswith('application/hg-changegroup')):
144 146 self.ui.debug("requested URL: '%s'\n" % util.hidepassword(cu))
145 147 raise error.RepoError(
146 148 _("'%s' does not appear to be an hg repository:\n"
147 149 "---%%<--- (%s)\n%s\n---%%<---\n")
148 150 % (safeurl, proto or 'no content-type', resp.read()))
149 151
150 152 if proto.startswith('application/mercurial-'):
151 153 try:
152 154 version = proto.split('-', 1)[1]
153 155 version_info = tuple([int(n) for n in version.split('.')])
154 156 except ValueError:
155 157 raise error.RepoError(_("'%s' sent a broken Content-Type "
156 158 "header (%s)") % (safeurl, proto))
157 159 if version_info > (0, 1):
158 160 raise error.RepoError(_("'%s' uses newer protocol %s") %
159 161 (safeurl, version))
160 162
161 163 return resp
162 164
163 165 def _call(self, cmd, **args):
164 166 fp = self._callstream(cmd, **args)
165 167 try:
166 168 return fp.read()
167 169 finally:
168 170 # if using keepalive, allow connection to be reused
169 171 fp.close()
170 172
171 173 def _callpush(self, cmd, cg, **args):
172 174 # have to stream bundle to a temp file because we do not have
173 175 # http 1.1 chunked transfer.
174 176
175 177 types = self.capable('unbundle')
176 178 try:
177 179 types = types.split(',')
178 180 except AttributeError:
179 181 # servers older than d1b16a746db6 will send 'unbundle' as a
180 182 # boolean capability. They only support headerless/uncompressed
181 183 # bundles.
182 184 types = [""]
183 185 for x in types:
184 186 if x in changegroup.bundletypes:
185 187 type = x
186 188 break
187 189
188 190 tempname = changegroup.writebundle(cg, None, type)
189 191 fp = httpconnection.httpsendfile(self.ui, tempname, "rb")
190 192 headers = {'Content-Type': 'application/mercurial-0.1'}
191 193
192 194 try:
193 195 try:
194 196 r = self._call(cmd, data=fp, headers=headers, **args)
195 197 vals = r.split('\n', 1)
196 198 if len(vals) < 2:
197 199 raise error.ResponseError(_("unexpected response:"), r)
198 200 return vals
199 201 except socket.error, err:
200 202 if err.args[0] in (errno.ECONNRESET, errno.EPIPE):
201 203 raise util.Abort(_('push failed: %s') % err.args[1])
202 204 raise util.Abort(err.args[1])
203 205 finally:
204 206 fp.close()
205 207 os.unlink(tempname)
206 208
207 209 def _abort(self, exception):
208 210 raise exception
209 211
210 212 def _decompress(self, stream):
211 213 return util.chunkbuffer(zgenerator(stream))
212 214
213 215 class httpsrepository(httprepository):
214 216 def __init__(self, ui, path):
215 217 if not url.has_https:
216 218 raise util.Abort(_('Python support for SSL and HTTPS '
217 219 'is not installed'))
218 220 httprepository.__init__(self, ui, path)
219 221
220 222 def instance(ui, path, create):
221 223 if create:
222 224 raise util.Abort(_('cannot create new http repository'))
223 225 try:
224 226 if path.startswith('https:'):
225 227 inst = httpsrepository(ui, path)
226 228 else:
227 229 inst = httprepository(ui, path)
228 230 try:
229 231 # Try to do useful work when checking compatibility.
230 232 # Usually saves a roundtrip since we want the caps anyway.
231 233 inst._fetchcaps()
232 234 except error.RepoError:
233 235 # No luck, try older compatibility check.
234 236 inst.between([(nullid, nullid)])
235 237 return inst
236 238 except error.RepoError, httpexception:
237 239 try:
238 240 r = statichttprepo.instance(ui, "static-" + path, create)
239 241 ui.note('(falling back to static-http)\n')
240 242 return r
241 243 except error.RepoError:
242 244 raise httpexception # use the original http RepoError instead
@@ -1,1696 +1,1700 b''
1 1 # util.py - Mercurial utility functions and platform specfic implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specfic implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from i18n import _
17 17 import error, osutil, encoding
18 18 import errno, re, shutil, sys, tempfile, traceback
19 19 import os, time, calendar, textwrap, signal
20 20 import imp, socket, urllib
21 21
22 22 # Python compatibility
23 23
24 24 def sha1(s):
25 25 return _fastsha1(s)
26 26
27 _notset = object()
28 def safehasattr(thing, attr):
29 return getattr(thing, attr, _notset) is not _notset
30
27 31 def _fastsha1(s):
28 32 # This function will import sha1 from hashlib or sha (whichever is
29 33 # available) and overwrite itself with it on the first call.
30 34 # Subsequent calls will go directly to the imported function.
31 35 if sys.version_info >= (2, 5):
32 36 from hashlib import sha1 as _sha1
33 37 else:
34 38 from sha import sha as _sha1
35 39 global _fastsha1, sha1
36 40 _fastsha1 = sha1 = _sha1
37 41 return _sha1(s)
38 42
39 43 import __builtin__
40 44
41 45 if sys.version_info[0] < 3:
42 46 def fakebuffer(sliceable, offset=0):
43 47 return sliceable[offset:]
44 48 else:
45 49 def fakebuffer(sliceable, offset=0):
46 50 return memoryview(sliceable)[offset:]
47 51 try:
48 52 buffer
49 53 except NameError:
50 54 __builtin__.buffer = fakebuffer
51 55
52 56 import subprocess
53 57 closefds = os.name == 'posix'
54 58
55 59 def popen2(cmd, env=None, newlines=False):
56 60 # Setting bufsize to -1 lets the system decide the buffer size.
57 61 # The default for bufsize is 0, meaning unbuffered. This leads to
58 62 # poor performance on Mac OS X: http://bugs.python.org/issue4194
59 63 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
60 64 close_fds=closefds,
61 65 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
62 66 universal_newlines=newlines,
63 67 env=env)
64 68 return p.stdin, p.stdout
65 69
66 70 def popen3(cmd, env=None, newlines=False):
67 71 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
68 72 close_fds=closefds,
69 73 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
70 74 stderr=subprocess.PIPE,
71 75 universal_newlines=newlines,
72 76 env=env)
73 77 return p.stdin, p.stdout, p.stderr
74 78
75 79 def version():
76 80 """Return version information if available."""
77 81 try:
78 82 import __version__
79 83 return __version__.version
80 84 except ImportError:
81 85 return 'unknown'
82 86
83 87 # used by parsedate
84 88 defaultdateformats = (
85 89 '%Y-%m-%d %H:%M:%S',
86 90 '%Y-%m-%d %I:%M:%S%p',
87 91 '%Y-%m-%d %H:%M',
88 92 '%Y-%m-%d %I:%M%p',
89 93 '%Y-%m-%d',
90 94 '%m-%d',
91 95 '%m/%d',
92 96 '%m/%d/%y',
93 97 '%m/%d/%Y',
94 98 '%a %b %d %H:%M:%S %Y',
95 99 '%a %b %d %I:%M:%S%p %Y',
96 100 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
97 101 '%b %d %H:%M:%S %Y',
98 102 '%b %d %I:%M:%S%p %Y',
99 103 '%b %d %H:%M:%S',
100 104 '%b %d %I:%M:%S%p',
101 105 '%b %d %H:%M',
102 106 '%b %d %I:%M%p',
103 107 '%b %d %Y',
104 108 '%b %d',
105 109 '%H:%M:%S',
106 110 '%I:%M:%S%p',
107 111 '%H:%M',
108 112 '%I:%M%p',
109 113 )
110 114
111 115 extendeddateformats = defaultdateformats + (
112 116 "%Y",
113 117 "%Y-%m",
114 118 "%b",
115 119 "%b %Y",
116 120 )
117 121
118 122 def cachefunc(func):
119 123 '''cache the result of function calls'''
120 124 # XXX doesn't handle keywords args
121 125 cache = {}
122 126 if func.func_code.co_argcount == 1:
123 127 # we gain a small amount of time because
124 128 # we don't need to pack/unpack the list
125 129 def f(arg):
126 130 if arg not in cache:
127 131 cache[arg] = func(arg)
128 132 return cache[arg]
129 133 else:
130 134 def f(*args):
131 135 if args not in cache:
132 136 cache[args] = func(*args)
133 137 return cache[args]
134 138
135 139 return f
136 140
137 141 def lrucachefunc(func):
138 142 '''cache most recent results of function calls'''
139 143 cache = {}
140 144 order = []
141 145 if func.func_code.co_argcount == 1:
142 146 def f(arg):
143 147 if arg not in cache:
144 148 if len(cache) > 20:
145 149 del cache[order.pop(0)]
146 150 cache[arg] = func(arg)
147 151 else:
148 152 order.remove(arg)
149 153 order.append(arg)
150 154 return cache[arg]
151 155 else:
152 156 def f(*args):
153 157 if args not in cache:
154 158 if len(cache) > 20:
155 159 del cache[order.pop(0)]
156 160 cache[args] = func(*args)
157 161 else:
158 162 order.remove(args)
159 163 order.append(args)
160 164 return cache[args]
161 165
162 166 return f
163 167
164 168 class propertycache(object):
165 169 def __init__(self, func):
166 170 self.func = func
167 171 self.name = func.__name__
168 172 def __get__(self, obj, type=None):
169 173 result = self.func(obj)
170 174 setattr(obj, self.name, result)
171 175 return result
172 176
173 177 def pipefilter(s, cmd):
174 178 '''filter string S through command CMD, returning its output'''
175 179 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
176 180 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
177 181 pout, perr = p.communicate(s)
178 182 return pout
179 183
180 184 def tempfilter(s, cmd):
181 185 '''filter string S through a pair of temporary files with CMD.
182 186 CMD is used as a template to create the real command to be run,
183 187 with the strings INFILE and OUTFILE replaced by the real names of
184 188 the temporary files generated.'''
185 189 inname, outname = None, None
186 190 try:
187 191 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
188 192 fp = os.fdopen(infd, 'wb')
189 193 fp.write(s)
190 194 fp.close()
191 195 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
192 196 os.close(outfd)
193 197 cmd = cmd.replace('INFILE', inname)
194 198 cmd = cmd.replace('OUTFILE', outname)
195 199 code = os.system(cmd)
196 200 if sys.platform == 'OpenVMS' and code & 1:
197 201 code = 0
198 202 if code:
199 203 raise Abort(_("command '%s' failed: %s") %
200 204 (cmd, explainexit(code)))
201 205 fp = open(outname, 'rb')
202 206 r = fp.read()
203 207 fp.close()
204 208 return r
205 209 finally:
206 210 try:
207 211 if inname:
208 212 os.unlink(inname)
209 213 except OSError:
210 214 pass
211 215 try:
212 216 if outname:
213 217 os.unlink(outname)
214 218 except OSError:
215 219 pass
216 220
217 221 filtertable = {
218 222 'tempfile:': tempfilter,
219 223 'pipe:': pipefilter,
220 224 }
221 225
222 226 def filter(s, cmd):
223 227 "filter a string through a command that transforms its input to its output"
224 228 for name, fn in filtertable.iteritems():
225 229 if cmd.startswith(name):
226 230 return fn(s, cmd[len(name):].lstrip())
227 231 return pipefilter(s, cmd)
228 232
229 233 def binary(s):
230 234 """return true if a string is binary data"""
231 235 return bool(s and '\0' in s)
232 236
233 237 def increasingchunks(source, min=1024, max=65536):
234 238 '''return no less than min bytes per chunk while data remains,
235 239 doubling min after each chunk until it reaches max'''
236 240 def log2(x):
237 241 if not x:
238 242 return 0
239 243 i = 0
240 244 while x:
241 245 x >>= 1
242 246 i += 1
243 247 return i - 1
244 248
245 249 buf = []
246 250 blen = 0
247 251 for chunk in source:
248 252 buf.append(chunk)
249 253 blen += len(chunk)
250 254 if blen >= min:
251 255 if min < max:
252 256 min = min << 1
253 257 nmin = 1 << log2(blen)
254 258 if nmin > min:
255 259 min = nmin
256 260 if min > max:
257 261 min = max
258 262 yield ''.join(buf)
259 263 blen = 0
260 264 buf = []
261 265 if buf:
262 266 yield ''.join(buf)
263 267
264 268 Abort = error.Abort
265 269
266 270 def always(fn):
267 271 return True
268 272
269 273 def never(fn):
270 274 return False
271 275
272 276 def pathto(root, n1, n2):
273 277 '''return the relative path from one place to another.
274 278 root should use os.sep to separate directories
275 279 n1 should use os.sep to separate directories
276 280 n2 should use "/" to separate directories
277 281 returns an os.sep-separated path.
278 282
279 283 If n1 is a relative path, it's assumed it's
280 284 relative to root.
281 285 n2 should always be relative to root.
282 286 '''
283 287 if not n1:
284 288 return localpath(n2)
285 289 if os.path.isabs(n1):
286 290 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
287 291 return os.path.join(root, localpath(n2))
288 292 n2 = '/'.join((pconvert(root), n2))
289 293 a, b = splitpath(n1), n2.split('/')
290 294 a.reverse()
291 295 b.reverse()
292 296 while a and b and a[-1] == b[-1]:
293 297 a.pop()
294 298 b.pop()
295 299 b.reverse()
296 300 return os.sep.join((['..'] * len(a)) + b) or '.'
297 301
298 302 _hgexecutable = None
299 303
300 304 def mainfrozen():
301 305 """return True if we are a frozen executable.
302 306
303 307 The code supports py2exe (most common, Windows only) and tools/freeze
304 308 (portable, not much used).
305 309 """
306 310 return (hasattr(sys, "frozen") or # new py2exe
307 311 hasattr(sys, "importers") or # old py2exe
308 312 imp.is_frozen("__main__")) # tools/freeze
309 313
310 314 def hgexecutable():
311 315 """return location of the 'hg' executable.
312 316
313 317 Defaults to $HG or 'hg' in the search path.
314 318 """
315 319 if _hgexecutable is None:
316 320 hg = os.environ.get('HG')
317 321 if hg:
318 322 _sethgexecutable(hg)
319 323 elif mainfrozen():
320 324 _sethgexecutable(sys.executable)
321 325 else:
322 326 exe = findexe('hg') or os.path.basename(sys.argv[0])
323 327 _sethgexecutable(exe)
324 328 return _hgexecutable
325 329
326 330 def _sethgexecutable(path):
327 331 """set location of the 'hg' executable"""
328 332 global _hgexecutable
329 333 _hgexecutable = path
330 334
331 335 def system(cmd, environ={}, cwd=None, onerr=None, errprefix=None, out=None):
332 336 '''enhanced shell command execution.
333 337 run with environment maybe modified, maybe in different dir.
334 338
335 339 if command fails and onerr is None, return status. if ui object,
336 340 print error message and return status, else raise onerr object as
337 341 exception.
338 342
339 343 if out is specified, it is assumed to be a file-like object that has a
340 344 write() method. stdout and stderr will be redirected to out.'''
341 345 try:
342 346 sys.stdout.flush()
343 347 except Exception:
344 348 pass
345 349 def py2shell(val):
346 350 'convert python object into string that is useful to shell'
347 351 if val is None or val is False:
348 352 return '0'
349 353 if val is True:
350 354 return '1'
351 355 return str(val)
352 356 origcmd = cmd
353 357 cmd = quotecommand(cmd)
354 358 env = dict(os.environ)
355 359 env.update((k, py2shell(v)) for k, v in environ.iteritems())
356 360 env['HG'] = hgexecutable()
357 361 if out is None or out == sys.__stdout__:
358 362 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
359 363 env=env, cwd=cwd)
360 364 else:
361 365 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
362 366 env=env, cwd=cwd, stdout=subprocess.PIPE,
363 367 stderr=subprocess.STDOUT)
364 368 for line in proc.stdout:
365 369 out.write(line)
366 370 proc.wait()
367 371 rc = proc.returncode
368 372 if sys.platform == 'OpenVMS' and rc & 1:
369 373 rc = 0
370 374 if rc and onerr:
371 375 errmsg = '%s %s' % (os.path.basename(origcmd.split(None, 1)[0]),
372 376 explainexit(rc)[0])
373 377 if errprefix:
374 378 errmsg = '%s: %s' % (errprefix, errmsg)
375 379 try:
376 380 onerr.warn(errmsg + '\n')
377 381 except AttributeError:
378 382 raise onerr(errmsg)
379 383 return rc
380 384
381 385 def checksignature(func):
382 386 '''wrap a function with code to check for calling errors'''
383 387 def check(*args, **kwargs):
384 388 try:
385 389 return func(*args, **kwargs)
386 390 except TypeError:
387 391 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
388 392 raise error.SignatureError
389 393 raise
390 394
391 395 return check
392 396
393 397 def makedir(path, notindexed):
394 398 os.mkdir(path)
395 399
396 400 def unlinkpath(f):
397 401 """unlink and remove the directory if it is empty"""
398 402 os.unlink(f)
399 403 # try removing directories that might now be empty
400 404 try:
401 405 os.removedirs(os.path.dirname(f))
402 406 except OSError:
403 407 pass
404 408
405 409 def copyfile(src, dest):
406 410 "copy a file, preserving mode and atime/mtime"
407 411 if os.path.islink(src):
408 412 try:
409 413 os.unlink(dest)
410 414 except OSError:
411 415 pass
412 416 os.symlink(os.readlink(src), dest)
413 417 else:
414 418 try:
415 419 shutil.copyfile(src, dest)
416 420 shutil.copymode(src, dest)
417 421 except shutil.Error, inst:
418 422 raise Abort(str(inst))
419 423
420 424 def copyfiles(src, dst, hardlink=None):
421 425 """Copy a directory tree using hardlinks if possible"""
422 426
423 427 if hardlink is None:
424 428 hardlink = (os.stat(src).st_dev ==
425 429 os.stat(os.path.dirname(dst)).st_dev)
426 430
427 431 num = 0
428 432 if os.path.isdir(src):
429 433 os.mkdir(dst)
430 434 for name, kind in osutil.listdir(src):
431 435 srcname = os.path.join(src, name)
432 436 dstname = os.path.join(dst, name)
433 437 hardlink, n = copyfiles(srcname, dstname, hardlink)
434 438 num += n
435 439 else:
436 440 if hardlink:
437 441 try:
438 442 oslink(src, dst)
439 443 except (IOError, OSError):
440 444 hardlink = False
441 445 shutil.copy(src, dst)
442 446 else:
443 447 shutil.copy(src, dst)
444 448 num += 1
445 449
446 450 return hardlink, num
447 451
448 452 _winreservednames = '''con prn aux nul
449 453 com1 com2 com3 com4 com5 com6 com7 com8 com9
450 454 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
451 455 _winreservedchars = ':*?"<>|'
452 456 def checkwinfilename(path):
453 457 '''Check that the base-relative path is a valid filename on Windows.
454 458 Returns None if the path is ok, or a UI string describing the problem.
455 459
456 460 >>> checkwinfilename("just/a/normal/path")
457 461 >>> checkwinfilename("foo/bar/con.xml")
458 462 "filename contains 'con', which is reserved on Windows"
459 463 >>> checkwinfilename("foo/con.xml/bar")
460 464 "filename contains 'con', which is reserved on Windows"
461 465 >>> checkwinfilename("foo/bar/xml.con")
462 466 >>> checkwinfilename("foo/bar/AUX/bla.txt")
463 467 "filename contains 'AUX', which is reserved on Windows"
464 468 >>> checkwinfilename("foo/bar/bla:.txt")
465 469 "filename contains ':', which is reserved on Windows"
466 470 >>> checkwinfilename("foo/bar/b\07la.txt")
467 471 "filename contains '\\\\x07', which is invalid on Windows"
468 472 >>> checkwinfilename("foo/bar/bla ")
469 473 "filename ends with ' ', which is not allowed on Windows"
470 474 '''
471 475 for n in path.replace('\\', '/').split('/'):
472 476 if not n:
473 477 continue
474 478 for c in n:
475 479 if c in _winreservedchars:
476 480 return _("filename contains '%s', which is reserved "
477 481 "on Windows") % c
478 482 if ord(c) <= 31:
479 483 return _("filename contains %r, which is invalid "
480 484 "on Windows") % c
481 485 base = n.split('.')[0]
482 486 if base and base.lower() in _winreservednames:
483 487 return _("filename contains '%s', which is reserved "
484 488 "on Windows") % base
485 489 t = n[-1]
486 490 if t in '. ':
487 491 return _("filename ends with '%s', which is not allowed "
488 492 "on Windows") % t
489 493
490 494 def lookupreg(key, name=None, scope=None):
491 495 return None
492 496
493 497 def hidewindow():
494 498 """Hide current shell window.
495 499
496 500 Used to hide the window opened when starting asynchronous
497 501 child process under Windows, unneeded on other systems.
498 502 """
499 503 pass
500 504
501 505 if os.name == 'nt':
502 506 checkosfilename = checkwinfilename
503 507 from windows import *
504 508 else:
505 509 from posix import *
506 510
507 511 def makelock(info, pathname):
508 512 try:
509 513 return os.symlink(info, pathname)
510 514 except OSError, why:
511 515 if why.errno == errno.EEXIST:
512 516 raise
513 517 except AttributeError: # no symlink in os
514 518 pass
515 519
516 520 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
517 521 os.write(ld, info)
518 522 os.close(ld)
519 523
520 524 def readlock(pathname):
521 525 try:
522 526 return os.readlink(pathname)
523 527 except OSError, why:
524 528 if why.errno not in (errno.EINVAL, errno.ENOSYS):
525 529 raise
526 530 except AttributeError: # no symlink in os
527 531 pass
528 532 fp = posixfile(pathname)
529 533 r = fp.read()
530 534 fp.close()
531 535 return r
532 536
533 537 def fstat(fp):
534 538 '''stat file object that may not have fileno method.'''
535 539 try:
536 540 return os.fstat(fp.fileno())
537 541 except AttributeError:
538 542 return os.stat(fp.name)
539 543
540 544 # File system features
541 545
542 546 def checkcase(path):
543 547 """
544 548 Check whether the given path is on a case-sensitive filesystem
545 549
546 550 Requires a path (like /foo/.hg) ending with a foldable final
547 551 directory component.
548 552 """
549 553 s1 = os.stat(path)
550 554 d, b = os.path.split(path)
551 555 p2 = os.path.join(d, b.upper())
552 556 if path == p2:
553 557 p2 = os.path.join(d, b.lower())
554 558 try:
555 559 s2 = os.stat(p2)
556 560 if s2 == s1:
557 561 return False
558 562 return True
559 563 except OSError:
560 564 return True
561 565
562 566 _fspathcache = {}
563 567 def fspath(name, root):
564 568 '''Get name in the case stored in the filesystem
565 569
566 570 The name is either relative to root, or it is an absolute path starting
567 571 with root. Note that this function is unnecessary, and should not be
568 572 called, for case-sensitive filesystems (simply because it's expensive).
569 573 '''
570 574 # If name is absolute, make it relative
571 575 if name.lower().startswith(root.lower()):
572 576 l = len(root)
573 577 if name[l] == os.sep or name[l] == os.altsep:
574 578 l = l + 1
575 579 name = name[l:]
576 580
577 581 if not os.path.lexists(os.path.join(root, name)):
578 582 return None
579 583
580 584 seps = os.sep
581 585 if os.altsep:
582 586 seps = seps + os.altsep
583 587 # Protect backslashes. This gets silly very quickly.
584 588 seps.replace('\\','\\\\')
585 589 pattern = re.compile(r'([^%s]+)|([%s]+)' % (seps, seps))
586 590 dir = os.path.normcase(os.path.normpath(root))
587 591 result = []
588 592 for part, sep in pattern.findall(name):
589 593 if sep:
590 594 result.append(sep)
591 595 continue
592 596
593 597 if dir not in _fspathcache:
594 598 _fspathcache[dir] = os.listdir(dir)
595 599 contents = _fspathcache[dir]
596 600
597 601 lpart = part.lower()
598 602 lenp = len(part)
599 603 for n in contents:
600 604 if lenp == len(n) and n.lower() == lpart:
601 605 result.append(n)
602 606 break
603 607 else:
604 608 # Cannot happen, as the file exists!
605 609 result.append(part)
606 610 dir = os.path.join(dir, lpart)
607 611
608 612 return ''.join(result)
609 613
610 614 def checknlink(testfile):
611 615 '''check whether hardlink count reporting works properly'''
612 616
613 617 # testfile may be open, so we need a separate file for checking to
614 618 # work around issue2543 (or testfile may get lost on Samba shares)
615 619 f1 = testfile + ".hgtmp1"
616 620 if os.path.lexists(f1):
617 621 return False
618 622 try:
619 623 posixfile(f1, 'w').close()
620 624 except IOError:
621 625 return False
622 626
623 627 f2 = testfile + ".hgtmp2"
624 628 fd = None
625 629 try:
626 630 try:
627 631 oslink(f1, f2)
628 632 except OSError:
629 633 return False
630 634
631 635 # nlinks() may behave differently for files on Windows shares if
632 636 # the file is open.
633 637 fd = posixfile(f2)
634 638 return nlinks(f2) > 1
635 639 finally:
636 640 if fd is not None:
637 641 fd.close()
638 642 for f in (f1, f2):
639 643 try:
640 644 os.unlink(f)
641 645 except OSError:
642 646 pass
643 647
644 648 return False
645 649
646 650 def endswithsep(path):
647 651 '''Check path ends with os.sep or os.altsep.'''
648 652 return path.endswith(os.sep) or os.altsep and path.endswith(os.altsep)
649 653
650 654 def splitpath(path):
651 655 '''Split path by os.sep.
652 656 Note that this function does not use os.altsep because this is
653 657 an alternative of simple "xxx.split(os.sep)".
654 658 It is recommended to use os.path.normpath() before using this
655 659 function if need.'''
656 660 return path.split(os.sep)
657 661
658 662 def gui():
659 663 '''Are we running in a GUI?'''
660 664 if sys.platform == 'darwin':
661 665 if 'SSH_CONNECTION' in os.environ:
662 666 # handle SSH access to a box where the user is logged in
663 667 return False
664 668 elif getattr(osutil, 'isgui', None):
665 669 # check if a CoreGraphics session is available
666 670 return osutil.isgui()
667 671 else:
668 672 # pure build; use a safe default
669 673 return True
670 674 else:
671 675 return os.name == "nt" or os.environ.get("DISPLAY")
672 676
673 677 def mktempcopy(name, emptyok=False, createmode=None):
674 678 """Create a temporary file with the same contents from name
675 679
676 680 The permission bits are copied from the original file.
677 681
678 682 If the temporary file is going to be truncated immediately, you
679 683 can use emptyok=True as an optimization.
680 684
681 685 Returns the name of the temporary file.
682 686 """
683 687 d, fn = os.path.split(name)
684 688 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
685 689 os.close(fd)
686 690 # Temporary files are created with mode 0600, which is usually not
687 691 # what we want. If the original file already exists, just copy
688 692 # its mode. Otherwise, manually obey umask.
689 693 try:
690 694 st_mode = os.lstat(name).st_mode & 0777
691 695 except OSError, inst:
692 696 if inst.errno != errno.ENOENT:
693 697 raise
694 698 st_mode = createmode
695 699 if st_mode is None:
696 700 st_mode = ~umask
697 701 st_mode &= 0666
698 702 os.chmod(temp, st_mode)
699 703 if emptyok:
700 704 return temp
701 705 try:
702 706 try:
703 707 ifp = posixfile(name, "rb")
704 708 except IOError, inst:
705 709 if inst.errno == errno.ENOENT:
706 710 return temp
707 711 if not getattr(inst, 'filename', None):
708 712 inst.filename = name
709 713 raise
710 714 ofp = posixfile(temp, "wb")
711 715 for chunk in filechunkiter(ifp):
712 716 ofp.write(chunk)
713 717 ifp.close()
714 718 ofp.close()
715 719 except:
716 720 try: os.unlink(temp)
717 721 except: pass
718 722 raise
719 723 return temp
720 724
721 725 class atomictempfile(object):
722 726 '''writeable file object that atomically updates a file
723 727
724 728 All writes will go to a temporary copy of the original file. Call
725 729 rename() when you are done writing, and atomictempfile will rename
726 730 the temporary copy to the original name, making the changes visible.
727 731
728 732 Unlike other file-like objects, close() discards your writes by
729 733 simply deleting the temporary file.
730 734 '''
731 735 def __init__(self, name, mode='w+b', createmode=None):
732 736 self.__name = name # permanent name
733 737 self._tempname = mktempcopy(name, emptyok=('w' in mode),
734 738 createmode=createmode)
735 739 self._fp = posixfile(self._tempname, mode)
736 740
737 741 # delegated methods
738 742 self.write = self._fp.write
739 743 self.fileno = self._fp.fileno
740 744
741 745 def rename(self):
742 746 if not self._fp.closed:
743 747 self._fp.close()
744 748 rename(self._tempname, localpath(self.__name))
745 749
746 750 def close(self):
747 751 if not self._fp.closed:
748 752 try:
749 753 os.unlink(self._tempname)
750 754 except OSError:
751 755 pass
752 756 self._fp.close()
753 757
754 758 def __del__(self):
755 759 if hasattr(self, '_fp'): # constructor actually did something
756 760 self.close()
757 761
758 762 def makedirs(name, mode=None):
759 763 """recursive directory creation with parent mode inheritance"""
760 764 parent = os.path.abspath(os.path.dirname(name))
761 765 try:
762 766 os.mkdir(name)
763 767 if mode is not None:
764 768 os.chmod(name, mode)
765 769 return
766 770 except OSError, err:
767 771 if err.errno == errno.EEXIST:
768 772 return
769 773 if not name or parent == name or err.errno != errno.ENOENT:
770 774 raise
771 775 makedirs(parent, mode)
772 776 makedirs(name, mode)
773 777
774 778 def readfile(path):
775 779 fp = open(path, 'rb')
776 780 try:
777 781 return fp.read()
778 782 finally:
779 783 fp.close()
780 784
781 785 def writefile(path, text):
782 786 fp = open(path, 'wb')
783 787 try:
784 788 fp.write(text)
785 789 finally:
786 790 fp.close()
787 791
788 792 def appendfile(path, text):
789 793 fp = open(path, 'ab')
790 794 try:
791 795 fp.write(text)
792 796 finally:
793 797 fp.close()
794 798
795 799 class chunkbuffer(object):
796 800 """Allow arbitrary sized chunks of data to be efficiently read from an
797 801 iterator over chunks of arbitrary size."""
798 802
799 803 def __init__(self, in_iter):
800 804 """in_iter is the iterator that's iterating over the input chunks.
801 805 targetsize is how big a buffer to try to maintain."""
802 806 def splitbig(chunks):
803 807 for chunk in chunks:
804 808 if len(chunk) > 2**20:
805 809 pos = 0
806 810 while pos < len(chunk):
807 811 end = pos + 2 ** 18
808 812 yield chunk[pos:end]
809 813 pos = end
810 814 else:
811 815 yield chunk
812 816 self.iter = splitbig(in_iter)
813 817 self._queue = []
814 818
815 819 def read(self, l):
816 820 """Read L bytes of data from the iterator of chunks of data.
817 821 Returns less than L bytes if the iterator runs dry."""
818 822 left = l
819 823 buf = ''
820 824 queue = self._queue
821 825 while left > 0:
822 826 # refill the queue
823 827 if not queue:
824 828 target = 2**18
825 829 for chunk in self.iter:
826 830 queue.append(chunk)
827 831 target -= len(chunk)
828 832 if target <= 0:
829 833 break
830 834 if not queue:
831 835 break
832 836
833 837 chunk = queue.pop(0)
834 838 left -= len(chunk)
835 839 if left < 0:
836 840 queue.insert(0, chunk[left:])
837 841 buf += chunk[:left]
838 842 else:
839 843 buf += chunk
840 844
841 845 return buf
842 846
843 847 def filechunkiter(f, size=65536, limit=None):
844 848 """Create a generator that produces the data in the file size
845 849 (default 65536) bytes at a time, up to optional limit (default is
846 850 to read all data). Chunks may be less than size bytes if the
847 851 chunk is the last chunk in the file, or the file is a socket or
848 852 some other type of file that sometimes reads less data than is
849 853 requested."""
850 854 assert size >= 0
851 855 assert limit is None or limit >= 0
852 856 while True:
853 857 if limit is None:
854 858 nbytes = size
855 859 else:
856 860 nbytes = min(limit, size)
857 861 s = nbytes and f.read(nbytes)
858 862 if not s:
859 863 break
860 864 if limit:
861 865 limit -= len(s)
862 866 yield s
863 867
864 868 def makedate():
865 869 lt = time.localtime()
866 870 if lt[8] == 1 and time.daylight:
867 871 tz = time.altzone
868 872 else:
869 873 tz = time.timezone
870 874 t = time.mktime(lt)
871 875 if t < 0:
872 876 hint = _("check your clock")
873 877 raise Abort(_("negative timestamp: %d") % t, hint=hint)
874 878 return t, tz
875 879
876 880 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
877 881 """represent a (unixtime, offset) tuple as a localized time.
878 882 unixtime is seconds since the epoch, and offset is the time zone's
879 883 number of seconds away from UTC. if timezone is false, do not
880 884 append time zone to string."""
881 885 t, tz = date or makedate()
882 886 if t < 0:
883 887 t = 0 # time.gmtime(lt) fails on Windows for lt < -43200
884 888 tz = 0
885 889 if "%1" in format or "%2" in format:
886 890 sign = (tz > 0) and "-" or "+"
887 891 minutes = abs(tz) // 60
888 892 format = format.replace("%1", "%c%02d" % (sign, minutes // 60))
889 893 format = format.replace("%2", "%02d" % (minutes % 60))
890 894 s = time.strftime(format, time.gmtime(float(t) - tz))
891 895 return s
892 896
893 897 def shortdate(date=None):
894 898 """turn (timestamp, tzoff) tuple into iso 8631 date."""
895 899 return datestr(date, format='%Y-%m-%d')
896 900
897 901 def strdate(string, format, defaults=[]):
898 902 """parse a localized time string and return a (unixtime, offset) tuple.
899 903 if the string cannot be parsed, ValueError is raised."""
900 904 def timezone(string):
901 905 tz = string.split()[-1]
902 906 if tz[0] in "+-" and len(tz) == 5 and tz[1:].isdigit():
903 907 sign = (tz[0] == "+") and 1 or -1
904 908 hours = int(tz[1:3])
905 909 minutes = int(tz[3:5])
906 910 return -sign * (hours * 60 + minutes) * 60
907 911 if tz == "GMT" or tz == "UTC":
908 912 return 0
909 913 return None
910 914
911 915 # NOTE: unixtime = localunixtime + offset
912 916 offset, date = timezone(string), string
913 917 if offset is not None:
914 918 date = " ".join(string.split()[:-1])
915 919
916 920 # add missing elements from defaults
917 921 usenow = False # default to using biased defaults
918 922 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
919 923 found = [True for p in part if ("%"+p) in format]
920 924 if not found:
921 925 date += "@" + defaults[part][usenow]
922 926 format += "@%" + part[0]
923 927 else:
924 928 # We've found a specific time element, less specific time
925 929 # elements are relative to today
926 930 usenow = True
927 931
928 932 timetuple = time.strptime(date, format)
929 933 localunixtime = int(calendar.timegm(timetuple))
930 934 if offset is None:
931 935 # local timezone
932 936 unixtime = int(time.mktime(timetuple))
933 937 offset = unixtime - localunixtime
934 938 else:
935 939 unixtime = localunixtime + offset
936 940 return unixtime, offset
937 941
938 942 def parsedate(date, formats=None, bias={}):
939 943 """parse a localized date/time and return a (unixtime, offset) tuple.
940 944
941 945 The date may be a "unixtime offset" string or in one of the specified
942 946 formats. If the date already is a (unixtime, offset) tuple, it is returned.
943 947 """
944 948 if not date:
945 949 return 0, 0
946 950 if isinstance(date, tuple) and len(date) == 2:
947 951 return date
948 952 if not formats:
949 953 formats = defaultdateformats
950 954 date = date.strip()
951 955 try:
952 956 when, offset = map(int, date.split(' '))
953 957 except ValueError:
954 958 # fill out defaults
955 959 now = makedate()
956 960 defaults = {}
957 961 for part in ("d", "mb", "yY", "HI", "M", "S"):
958 962 # this piece is for rounding the specific end of unknowns
959 963 b = bias.get(part)
960 964 if b is None:
961 965 if part[0] in "HMS":
962 966 b = "00"
963 967 else:
964 968 b = "0"
965 969
966 970 # this piece is for matching the generic end to today's date
967 971 n = datestr(now, "%" + part[0])
968 972
969 973 defaults[part] = (b, n)
970 974
971 975 for format in formats:
972 976 try:
973 977 when, offset = strdate(date, format, defaults)
974 978 except (ValueError, OverflowError):
975 979 pass
976 980 else:
977 981 break
978 982 else:
979 983 raise Abort(_('invalid date: %r') % date)
980 984 # validate explicit (probably user-specified) date and
981 985 # time zone offset. values must fit in signed 32 bits for
982 986 # current 32-bit linux runtimes. timezones go from UTC-12
983 987 # to UTC+14
984 988 if abs(when) > 0x7fffffff:
985 989 raise Abort(_('date exceeds 32 bits: %d') % when)
986 990 if when < 0:
987 991 raise Abort(_('negative date value: %d') % when)
988 992 if offset < -50400 or offset > 43200:
989 993 raise Abort(_('impossible time zone offset: %d') % offset)
990 994 return when, offset
991 995
992 996 def matchdate(date):
993 997 """Return a function that matches a given date match specifier
994 998
995 999 Formats include:
996 1000
997 1001 '{date}' match a given date to the accuracy provided
998 1002
999 1003 '<{date}' on or before a given date
1000 1004
1001 1005 '>{date}' on or after a given date
1002 1006
1003 1007 >>> p1 = parsedate("10:29:59")
1004 1008 >>> p2 = parsedate("10:30:00")
1005 1009 >>> p3 = parsedate("10:30:59")
1006 1010 >>> p4 = parsedate("10:31:00")
1007 1011 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1008 1012 >>> f = matchdate("10:30")
1009 1013 >>> f(p1[0])
1010 1014 False
1011 1015 >>> f(p2[0])
1012 1016 True
1013 1017 >>> f(p3[0])
1014 1018 True
1015 1019 >>> f(p4[0])
1016 1020 False
1017 1021 >>> f(p5[0])
1018 1022 False
1019 1023 """
1020 1024
1021 1025 def lower(date):
1022 1026 d = dict(mb="1", d="1")
1023 1027 return parsedate(date, extendeddateformats, d)[0]
1024 1028
1025 1029 def upper(date):
1026 1030 d = dict(mb="12", HI="23", M="59", S="59")
1027 1031 for days in ("31", "30", "29"):
1028 1032 try:
1029 1033 d["d"] = days
1030 1034 return parsedate(date, extendeddateformats, d)[0]
1031 1035 except:
1032 1036 pass
1033 1037 d["d"] = "28"
1034 1038 return parsedate(date, extendeddateformats, d)[0]
1035 1039
1036 1040 date = date.strip()
1037 1041
1038 1042 if not date:
1039 1043 raise Abort(_("dates cannot consist entirely of whitespace"))
1040 1044 elif date[0] == "<":
1041 1045 if not date[1:]:
1042 1046 raise Abort(_("invalid day spec, use '<DATE'"))
1043 1047 when = upper(date[1:])
1044 1048 return lambda x: x <= when
1045 1049 elif date[0] == ">":
1046 1050 if not date[1:]:
1047 1051 raise Abort(_("invalid day spec, use '>DATE'"))
1048 1052 when = lower(date[1:])
1049 1053 return lambda x: x >= when
1050 1054 elif date[0] == "-":
1051 1055 try:
1052 1056 days = int(date[1:])
1053 1057 except ValueError:
1054 1058 raise Abort(_("invalid day spec: %s") % date[1:])
1055 1059 if days < 0:
1056 1060 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
1057 1061 % date[1:])
1058 1062 when = makedate()[0] - days * 3600 * 24
1059 1063 return lambda x: x >= when
1060 1064 elif " to " in date:
1061 1065 a, b = date.split(" to ")
1062 1066 start, stop = lower(a), upper(b)
1063 1067 return lambda x: x >= start and x <= stop
1064 1068 else:
1065 1069 start, stop = lower(date), upper(date)
1066 1070 return lambda x: x >= start and x <= stop
1067 1071
1068 1072 def shortuser(user):
1069 1073 """Return a short representation of a user name or email address."""
1070 1074 f = user.find('@')
1071 1075 if f >= 0:
1072 1076 user = user[:f]
1073 1077 f = user.find('<')
1074 1078 if f >= 0:
1075 1079 user = user[f + 1:]
1076 1080 f = user.find(' ')
1077 1081 if f >= 0:
1078 1082 user = user[:f]
1079 1083 f = user.find('.')
1080 1084 if f >= 0:
1081 1085 user = user[:f]
1082 1086 return user
1083 1087
1084 1088 def email(author):
1085 1089 '''get email of author.'''
1086 1090 r = author.find('>')
1087 1091 if r == -1:
1088 1092 r = None
1089 1093 return author[author.find('<') + 1:r]
1090 1094
1091 1095 def _ellipsis(text, maxlength):
1092 1096 if len(text) <= maxlength:
1093 1097 return text, False
1094 1098 else:
1095 1099 return "%s..." % (text[:maxlength - 3]), True
1096 1100
1097 1101 def ellipsis(text, maxlength=400):
1098 1102 """Trim string to at most maxlength (default: 400) characters."""
1099 1103 try:
1100 1104 # use unicode not to split at intermediate multi-byte sequence
1101 1105 utext, truncated = _ellipsis(text.decode(encoding.encoding),
1102 1106 maxlength)
1103 1107 if not truncated:
1104 1108 return text
1105 1109 return utext.encode(encoding.encoding)
1106 1110 except (UnicodeDecodeError, UnicodeEncodeError):
1107 1111 return _ellipsis(text, maxlength)[0]
1108 1112
1109 1113 def bytecount(nbytes):
1110 1114 '''return byte count formatted as readable string, with units'''
1111 1115
1112 1116 units = (
1113 1117 (100, 1 << 30, _('%.0f GB')),
1114 1118 (10, 1 << 30, _('%.1f GB')),
1115 1119 (1, 1 << 30, _('%.2f GB')),
1116 1120 (100, 1 << 20, _('%.0f MB')),
1117 1121 (10, 1 << 20, _('%.1f MB')),
1118 1122 (1, 1 << 20, _('%.2f MB')),
1119 1123 (100, 1 << 10, _('%.0f KB')),
1120 1124 (10, 1 << 10, _('%.1f KB')),
1121 1125 (1, 1 << 10, _('%.2f KB')),
1122 1126 (1, 1, _('%.0f bytes')),
1123 1127 )
1124 1128
1125 1129 for multiplier, divisor, format in units:
1126 1130 if nbytes >= divisor * multiplier:
1127 1131 return format % (nbytes / float(divisor))
1128 1132 return units[-1][2] % nbytes
1129 1133
1130 1134 def uirepr(s):
1131 1135 # Avoid double backslash in Windows path repr()
1132 1136 return repr(s).replace('\\\\', '\\')
1133 1137
1134 1138 # delay import of textwrap
1135 1139 def MBTextWrapper(**kwargs):
1136 1140 class tw(textwrap.TextWrapper):
1137 1141 """
1138 1142 Extend TextWrapper for width-awareness.
1139 1143
1140 1144 Neither number of 'bytes' in any encoding nor 'characters' is
1141 1145 appropriate to calculate terminal columns for specified string.
1142 1146
1143 1147 Original TextWrapper implementation uses built-in 'len()' directly,
1144 1148 so overriding is needed to use width information of each characters.
1145 1149
1146 1150 In addition, characters classified into 'ambiguous' width are
1147 1151 treated as wide in east asian area, but as narrow in other.
1148 1152
1149 1153 This requires use decision to determine width of such characters.
1150 1154 """
1151 1155 def __init__(self, **kwargs):
1152 1156 textwrap.TextWrapper.__init__(self, **kwargs)
1153 1157
1154 1158 # for compatibility between 2.4 and 2.6
1155 1159 if getattr(self, 'drop_whitespace', None) is None:
1156 1160 self.drop_whitespace = kwargs.get('drop_whitespace', True)
1157 1161
1158 1162 def _cutdown(self, ucstr, space_left):
1159 1163 l = 0
1160 1164 colwidth = encoding.ucolwidth
1161 1165 for i in xrange(len(ucstr)):
1162 1166 l += colwidth(ucstr[i])
1163 1167 if space_left < l:
1164 1168 return (ucstr[:i], ucstr[i:])
1165 1169 return ucstr, ''
1166 1170
1167 1171 # overriding of base class
1168 1172 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
1169 1173 space_left = max(width - cur_len, 1)
1170 1174
1171 1175 if self.break_long_words:
1172 1176 cut, res = self._cutdown(reversed_chunks[-1], space_left)
1173 1177 cur_line.append(cut)
1174 1178 reversed_chunks[-1] = res
1175 1179 elif not cur_line:
1176 1180 cur_line.append(reversed_chunks.pop())
1177 1181
1178 1182 # this overriding code is imported from TextWrapper of python 2.6
1179 1183 # to calculate columns of string by 'encoding.ucolwidth()'
1180 1184 def _wrap_chunks(self, chunks):
1181 1185 colwidth = encoding.ucolwidth
1182 1186
1183 1187 lines = []
1184 1188 if self.width <= 0:
1185 1189 raise ValueError("invalid width %r (must be > 0)" % self.width)
1186 1190
1187 1191 # Arrange in reverse order so items can be efficiently popped
1188 1192 # from a stack of chucks.
1189 1193 chunks.reverse()
1190 1194
1191 1195 while chunks:
1192 1196
1193 1197 # Start the list of chunks that will make up the current line.
1194 1198 # cur_len is just the length of all the chunks in cur_line.
1195 1199 cur_line = []
1196 1200 cur_len = 0
1197 1201
1198 1202 # Figure out which static string will prefix this line.
1199 1203 if lines:
1200 1204 indent = self.subsequent_indent
1201 1205 else:
1202 1206 indent = self.initial_indent
1203 1207
1204 1208 # Maximum width for this line.
1205 1209 width = self.width - len(indent)
1206 1210
1207 1211 # First chunk on line is whitespace -- drop it, unless this
1208 1212 # is the very beginning of the text (ie. no lines started yet).
1209 1213 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
1210 1214 del chunks[-1]
1211 1215
1212 1216 while chunks:
1213 1217 l = colwidth(chunks[-1])
1214 1218
1215 1219 # Can at least squeeze this chunk onto the current line.
1216 1220 if cur_len + l <= width:
1217 1221 cur_line.append(chunks.pop())
1218 1222 cur_len += l
1219 1223
1220 1224 # Nope, this line is full.
1221 1225 else:
1222 1226 break
1223 1227
1224 1228 # The current line is full, and the next chunk is too big to
1225 1229 # fit on *any* line (not just this one).
1226 1230 if chunks and colwidth(chunks[-1]) > width:
1227 1231 self._handle_long_word(chunks, cur_line, cur_len, width)
1228 1232
1229 1233 # If the last chunk on this line is all whitespace, drop it.
1230 1234 if (self.drop_whitespace and
1231 1235 cur_line and cur_line[-1].strip() == ''):
1232 1236 del cur_line[-1]
1233 1237
1234 1238 # Convert current line back to a string and store it in list
1235 1239 # of all lines (return value).
1236 1240 if cur_line:
1237 1241 lines.append(indent + ''.join(cur_line))
1238 1242
1239 1243 return lines
1240 1244
1241 1245 global MBTextWrapper
1242 1246 MBTextWrapper = tw
1243 1247 return tw(**kwargs)
1244 1248
1245 1249 def wrap(line, width, initindent='', hangindent=''):
1246 1250 maxindent = max(len(hangindent), len(initindent))
1247 1251 if width <= maxindent:
1248 1252 # adjust for weird terminal size
1249 1253 width = max(78, maxindent + 1)
1250 1254 line = line.decode(encoding.encoding, encoding.encodingmode)
1251 1255 initindent = initindent.decode(encoding.encoding, encoding.encodingmode)
1252 1256 hangindent = hangindent.decode(encoding.encoding, encoding.encodingmode)
1253 1257 wrapper = MBTextWrapper(width=width,
1254 1258 initial_indent=initindent,
1255 1259 subsequent_indent=hangindent)
1256 1260 return wrapper.fill(line).encode(encoding.encoding)
1257 1261
1258 1262 def iterlines(iterator):
1259 1263 for chunk in iterator:
1260 1264 for line in chunk.splitlines():
1261 1265 yield line
1262 1266
1263 1267 def expandpath(path):
1264 1268 return os.path.expanduser(os.path.expandvars(path))
1265 1269
1266 1270 def hgcmd():
1267 1271 """Return the command used to execute current hg
1268 1272
1269 1273 This is different from hgexecutable() because on Windows we want
1270 1274 to avoid things opening new shell windows like batch files, so we
1271 1275 get either the python call or current executable.
1272 1276 """
1273 1277 if mainfrozen():
1274 1278 return [sys.executable]
1275 1279 return gethgcmd()
1276 1280
1277 1281 def rundetached(args, condfn):
1278 1282 """Execute the argument list in a detached process.
1279 1283
1280 1284 condfn is a callable which is called repeatedly and should return
1281 1285 True once the child process is known to have started successfully.
1282 1286 At this point, the child process PID is returned. If the child
1283 1287 process fails to start or finishes before condfn() evaluates to
1284 1288 True, return -1.
1285 1289 """
1286 1290 # Windows case is easier because the child process is either
1287 1291 # successfully starting and validating the condition or exiting
1288 1292 # on failure. We just poll on its PID. On Unix, if the child
1289 1293 # process fails to start, it will be left in a zombie state until
1290 1294 # the parent wait on it, which we cannot do since we expect a long
1291 1295 # running process on success. Instead we listen for SIGCHLD telling
1292 1296 # us our child process terminated.
1293 1297 terminated = set()
1294 1298 def handler(signum, frame):
1295 1299 terminated.add(os.wait())
1296 1300 prevhandler = None
1297 1301 if hasattr(signal, 'SIGCHLD'):
1298 1302 prevhandler = signal.signal(signal.SIGCHLD, handler)
1299 1303 try:
1300 1304 pid = spawndetached(args)
1301 1305 while not condfn():
1302 1306 if ((pid in terminated or not testpid(pid))
1303 1307 and not condfn()):
1304 1308 return -1
1305 1309 time.sleep(0.1)
1306 1310 return pid
1307 1311 finally:
1308 1312 if prevhandler is not None:
1309 1313 signal.signal(signal.SIGCHLD, prevhandler)
1310 1314
1311 1315 try:
1312 1316 any, all = any, all
1313 1317 except NameError:
1314 1318 def any(iterable):
1315 1319 for i in iterable:
1316 1320 if i:
1317 1321 return True
1318 1322 return False
1319 1323
1320 1324 def all(iterable):
1321 1325 for i in iterable:
1322 1326 if not i:
1323 1327 return False
1324 1328 return True
1325 1329
1326 1330 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
1327 1331 """Return the result of interpolating items in the mapping into string s.
1328 1332
1329 1333 prefix is a single character string, or a two character string with
1330 1334 a backslash as the first character if the prefix needs to be escaped in
1331 1335 a regular expression.
1332 1336
1333 1337 fn is an optional function that will be applied to the replacement text
1334 1338 just before replacement.
1335 1339
1336 1340 escape_prefix is an optional flag that allows using doubled prefix for
1337 1341 its escaping.
1338 1342 """
1339 1343 fn = fn or (lambda s: s)
1340 1344 patterns = '|'.join(mapping.keys())
1341 1345 if escape_prefix:
1342 1346 patterns += '|' + prefix
1343 1347 if len(prefix) > 1:
1344 1348 prefix_char = prefix[1:]
1345 1349 else:
1346 1350 prefix_char = prefix
1347 1351 mapping[prefix_char] = prefix_char
1348 1352 r = re.compile(r'%s(%s)' % (prefix, patterns))
1349 1353 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
1350 1354
1351 1355 def getport(port):
1352 1356 """Return the port for a given network service.
1353 1357
1354 1358 If port is an integer, it's returned as is. If it's a string, it's
1355 1359 looked up using socket.getservbyname(). If there's no matching
1356 1360 service, util.Abort is raised.
1357 1361 """
1358 1362 try:
1359 1363 return int(port)
1360 1364 except ValueError:
1361 1365 pass
1362 1366
1363 1367 try:
1364 1368 return socket.getservbyname(port)
1365 1369 except socket.error:
1366 1370 raise Abort(_("no port number associated with service '%s'") % port)
1367 1371
1368 1372 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
1369 1373 '0': False, 'no': False, 'false': False, 'off': False,
1370 1374 'never': False}
1371 1375
1372 1376 def parsebool(s):
1373 1377 """Parse s into a boolean.
1374 1378
1375 1379 If s is not a valid boolean, returns None.
1376 1380 """
1377 1381 return _booleans.get(s.lower(), None)
1378 1382
1379 1383 _hexdig = '0123456789ABCDEFabcdef'
1380 1384 _hextochr = dict((a + b, chr(int(a + b, 16)))
1381 1385 for a in _hexdig for b in _hexdig)
1382 1386
1383 1387 def _urlunquote(s):
1384 1388 """unquote('abc%20def') -> 'abc def'."""
1385 1389 res = s.split('%')
1386 1390 # fastpath
1387 1391 if len(res) == 1:
1388 1392 return s
1389 1393 s = res[0]
1390 1394 for item in res[1:]:
1391 1395 try:
1392 1396 s += _hextochr[item[:2]] + item[2:]
1393 1397 except KeyError:
1394 1398 s += '%' + item
1395 1399 except UnicodeDecodeError:
1396 1400 s += unichr(int(item[:2], 16)) + item[2:]
1397 1401 return s
1398 1402
1399 1403 class url(object):
1400 1404 r"""Reliable URL parser.
1401 1405
1402 1406 This parses URLs and provides attributes for the following
1403 1407 components:
1404 1408
1405 1409 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
1406 1410
1407 1411 Missing components are set to None. The only exception is
1408 1412 fragment, which is set to '' if present but empty.
1409 1413
1410 1414 If parsefragment is False, fragment is included in query. If
1411 1415 parsequery is False, query is included in path. If both are
1412 1416 False, both fragment and query are included in path.
1413 1417
1414 1418 See http://www.ietf.org/rfc/rfc2396.txt for more information.
1415 1419
1416 1420 Note that for backward compatibility reasons, bundle URLs do not
1417 1421 take host names. That means 'bundle://../' has a path of '../'.
1418 1422
1419 1423 Examples:
1420 1424
1421 1425 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
1422 1426 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
1423 1427 >>> url('ssh://[::1]:2200//home/joe/repo')
1424 1428 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
1425 1429 >>> url('file:///home/joe/repo')
1426 1430 <url scheme: 'file', path: '/home/joe/repo'>
1427 1431 >>> url('file:///c:/temp/foo/')
1428 1432 <url scheme: 'file', path: 'c:/temp/foo/'>
1429 1433 >>> url('bundle:foo')
1430 1434 <url scheme: 'bundle', path: 'foo'>
1431 1435 >>> url('bundle://../foo')
1432 1436 <url scheme: 'bundle', path: '../foo'>
1433 1437 >>> url(r'c:\foo\bar')
1434 1438 <url path: 'c:\\foo\\bar'>
1435 1439 >>> url(r'\\blah\blah\blah')
1436 1440 <url path: '\\\\blah\\blah\\blah'>
1437 1441 >>> url(r'\\blah\blah\blah#baz')
1438 1442 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
1439 1443
1440 1444 Authentication credentials:
1441 1445
1442 1446 >>> url('ssh://joe:xyz@x/repo')
1443 1447 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
1444 1448 >>> url('ssh://joe@x/repo')
1445 1449 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
1446 1450
1447 1451 Query strings and fragments:
1448 1452
1449 1453 >>> url('http://host/a?b#c')
1450 1454 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
1451 1455 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
1452 1456 <url scheme: 'http', host: 'host', path: 'a?b#c'>
1453 1457 """
1454 1458
1455 1459 _safechars = "!~*'()+"
1456 1460 _safepchars = "/!~*'()+"
1457 1461 _matchscheme = re.compile(r'^[a-zA-Z0-9+.\-]+:').match
1458 1462
1459 1463 def __init__(self, path, parsequery=True, parsefragment=True):
1460 1464 # We slowly chomp away at path until we have only the path left
1461 1465 self.scheme = self.user = self.passwd = self.host = None
1462 1466 self.port = self.path = self.query = self.fragment = None
1463 1467 self._localpath = True
1464 1468 self._hostport = ''
1465 1469 self._origpath = path
1466 1470
1467 1471 if parsefragment and '#' in path:
1468 1472 path, self.fragment = path.split('#', 1)
1469 1473 if not path:
1470 1474 path = None
1471 1475
1472 1476 # special case for Windows drive letters and UNC paths
1473 1477 if hasdriveletter(path) or path.startswith(r'\\'):
1474 1478 self.path = path
1475 1479 return
1476 1480
1477 1481 # For compatibility reasons, we can't handle bundle paths as
1478 1482 # normal URLS
1479 1483 if path.startswith('bundle:'):
1480 1484 self.scheme = 'bundle'
1481 1485 path = path[7:]
1482 1486 if path.startswith('//'):
1483 1487 path = path[2:]
1484 1488 self.path = path
1485 1489 return
1486 1490
1487 1491 if self._matchscheme(path):
1488 1492 parts = path.split(':', 1)
1489 1493 if parts[0]:
1490 1494 self.scheme, path = parts
1491 1495 self._localpath = False
1492 1496
1493 1497 if not path:
1494 1498 path = None
1495 1499 if self._localpath:
1496 1500 self.path = ''
1497 1501 return
1498 1502 else:
1499 1503 if self._localpath:
1500 1504 self.path = path
1501 1505 return
1502 1506
1503 1507 if parsequery and '?' in path:
1504 1508 path, self.query = path.split('?', 1)
1505 1509 if not path:
1506 1510 path = None
1507 1511 if not self.query:
1508 1512 self.query = None
1509 1513
1510 1514 # // is required to specify a host/authority
1511 1515 if path and path.startswith('//'):
1512 1516 parts = path[2:].split('/', 1)
1513 1517 if len(parts) > 1:
1514 1518 self.host, path = parts
1515 1519 path = path
1516 1520 else:
1517 1521 self.host = parts[0]
1518 1522 path = None
1519 1523 if not self.host:
1520 1524 self.host = None
1521 1525 # path of file:///d is /d
1522 1526 # path of file:///d:/ is d:/, not /d:/
1523 1527 if path and not hasdriveletter(path):
1524 1528 path = '/' + path
1525 1529
1526 1530 if self.host and '@' in self.host:
1527 1531 self.user, self.host = self.host.rsplit('@', 1)
1528 1532 if ':' in self.user:
1529 1533 self.user, self.passwd = self.user.split(':', 1)
1530 1534 if not self.host:
1531 1535 self.host = None
1532 1536
1533 1537 # Don't split on colons in IPv6 addresses without ports
1534 1538 if (self.host and ':' in self.host and
1535 1539 not (self.host.startswith('[') and self.host.endswith(']'))):
1536 1540 self._hostport = self.host
1537 1541 self.host, self.port = self.host.rsplit(':', 1)
1538 1542 if not self.host:
1539 1543 self.host = None
1540 1544
1541 1545 if (self.host and self.scheme == 'file' and
1542 1546 self.host not in ('localhost', '127.0.0.1', '[::1]')):
1543 1547 raise Abort(_('file:// URLs can only refer to localhost'))
1544 1548
1545 1549 self.path = path
1546 1550
1547 1551 # leave the query string escaped
1548 1552 for a in ('user', 'passwd', 'host', 'port',
1549 1553 'path', 'fragment'):
1550 1554 v = getattr(self, a)
1551 1555 if v is not None:
1552 1556 setattr(self, a, _urlunquote(v))
1553 1557
1554 1558 def __repr__(self):
1555 1559 attrs = []
1556 1560 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
1557 1561 'query', 'fragment'):
1558 1562 v = getattr(self, a)
1559 1563 if v is not None:
1560 1564 attrs.append('%s: %r' % (a, v))
1561 1565 return '<url %s>' % ', '.join(attrs)
1562 1566
1563 1567 def __str__(self):
1564 1568 r"""Join the URL's components back into a URL string.
1565 1569
1566 1570 Examples:
1567 1571
1568 1572 >>> str(url('http://user:pw@host:80/?foo#bar'))
1569 1573 'http://user:pw@host:80/?foo#bar'
1570 1574 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
1571 1575 'http://user:pw@host:80/?foo=bar&baz=42'
1572 1576 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
1573 1577 'http://user:pw@host:80/?foo=bar%3dbaz'
1574 1578 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
1575 1579 'ssh://user:pw@[::1]:2200//home/joe#'
1576 1580 >>> str(url('http://localhost:80//'))
1577 1581 'http://localhost:80//'
1578 1582 >>> str(url('http://localhost:80/'))
1579 1583 'http://localhost:80/'
1580 1584 >>> str(url('http://localhost:80'))
1581 1585 'http://localhost:80/'
1582 1586 >>> str(url('bundle:foo'))
1583 1587 'bundle:foo'
1584 1588 >>> str(url('bundle://../foo'))
1585 1589 'bundle:../foo'
1586 1590 >>> str(url('path'))
1587 1591 'path'
1588 1592 >>> str(url('file:///tmp/foo/bar'))
1589 1593 'file:///tmp/foo/bar'
1590 1594 >>> print url(r'bundle:foo\bar')
1591 1595 bundle:foo\bar
1592 1596 """
1593 1597 if self._localpath:
1594 1598 s = self.path
1595 1599 if self.scheme == 'bundle':
1596 1600 s = 'bundle:' + s
1597 1601 if self.fragment:
1598 1602 s += '#' + self.fragment
1599 1603 return s
1600 1604
1601 1605 s = self.scheme + ':'
1602 1606 if self.user or self.passwd or self.host:
1603 1607 s += '//'
1604 1608 elif self.scheme and (not self.path or self.path.startswith('/')):
1605 1609 s += '//'
1606 1610 if self.user:
1607 1611 s += urllib.quote(self.user, safe=self._safechars)
1608 1612 if self.passwd:
1609 1613 s += ':' + urllib.quote(self.passwd, safe=self._safechars)
1610 1614 if self.user or self.passwd:
1611 1615 s += '@'
1612 1616 if self.host:
1613 1617 if not (self.host.startswith('[') and self.host.endswith(']')):
1614 1618 s += urllib.quote(self.host)
1615 1619 else:
1616 1620 s += self.host
1617 1621 if self.port:
1618 1622 s += ':' + urllib.quote(self.port)
1619 1623 if self.host:
1620 1624 s += '/'
1621 1625 if self.path:
1622 1626 # TODO: similar to the query string, we should not unescape the
1623 1627 # path when we store it, the path might contain '%2f' = '/',
1624 1628 # which we should *not* escape.
1625 1629 s += urllib.quote(self.path, safe=self._safepchars)
1626 1630 if self.query:
1627 1631 # we store the query in escaped form.
1628 1632 s += '?' + self.query
1629 1633 if self.fragment is not None:
1630 1634 s += '#' + urllib.quote(self.fragment, safe=self._safepchars)
1631 1635 return s
1632 1636
1633 1637 def authinfo(self):
1634 1638 user, passwd = self.user, self.passwd
1635 1639 try:
1636 1640 self.user, self.passwd = None, None
1637 1641 s = str(self)
1638 1642 finally:
1639 1643 self.user, self.passwd = user, passwd
1640 1644 if not self.user:
1641 1645 return (s, None)
1642 1646 # authinfo[1] is passed to urllib2 password manager, and its URIs
1643 1647 # must not contain credentials.
1644 1648 return (s, (None, (s, self.host),
1645 1649 self.user, self.passwd or ''))
1646 1650
1647 1651 def isabs(self):
1648 1652 if self.scheme and self.scheme != 'file':
1649 1653 return True # remote URL
1650 1654 if hasdriveletter(self.path):
1651 1655 return True # absolute for our purposes - can't be joined()
1652 1656 if self.path.startswith(r'\\'):
1653 1657 return True # Windows UNC path
1654 1658 if self.path.startswith('/'):
1655 1659 return True # POSIX-style
1656 1660 return False
1657 1661
1658 1662 def localpath(self):
1659 1663 if self.scheme == 'file' or self.scheme == 'bundle':
1660 1664 path = self.path or '/'
1661 1665 # For Windows, we need to promote hosts containing drive
1662 1666 # letters to paths with drive letters.
1663 1667 if hasdriveletter(self._hostport):
1664 1668 path = self._hostport + '/' + self.path
1665 1669 elif self.host is not None and self.path:
1666 1670 path = '/' + path
1667 1671 return path
1668 1672 return self._origpath
1669 1673
1670 1674 def hasscheme(path):
1671 1675 return bool(url(path).scheme)
1672 1676
1673 1677 def hasdriveletter(path):
1674 1678 return path[1:2] == ':' and path[0:1].isalpha()
1675 1679
1676 1680 def urllocalpath(path):
1677 1681 return url(path, parsequery=False, parsefragment=False).localpath()
1678 1682
1679 1683 def hidepassword(u):
1680 1684 '''hide user credential in a url string'''
1681 1685 u = url(u)
1682 1686 if u.passwd:
1683 1687 u.passwd = '***'
1684 1688 return str(u)
1685 1689
1686 1690 def removeauth(u):
1687 1691 '''remove all authentication information from a url string'''
1688 1692 u = url(u)
1689 1693 u.user = u.passwd = None
1690 1694 return str(u)
1691 1695
1692 1696 def isatty(fd):
1693 1697 try:
1694 1698 return fd.isatty()
1695 1699 except AttributeError:
1696 1700 return False
General Comments 0
You need to be logged in to leave comments. Login now