##// END OF EJS Templates
matcher: use re2 bindings if available...
Bryan O'Sullivan -
r16943:8d08a28a default
parent child Browse files
Show More
@@ -1,344 +1,352 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 9 import scmutil, util, fileset
10 10 from i18n import _
11 11
12 def _rematcher(pat):
13 m = util.compilere(pat)
14 try:
15 # slightly faster, provided by facebook's re2 bindings
16 return m.test_match
17 except AttributeError:
18 return m.match
19
12 20 def _expandsets(pats, ctx):
13 21 '''convert set: patterns into a list of files in the given context'''
14 22 fset = set()
15 23 other = []
16 24
17 25 for kind, expr in pats:
18 26 if kind == 'set':
19 27 if not ctx:
20 28 raise util.Abort("fileset expression with no context")
21 29 s = fileset.getfileset(ctx, expr)
22 30 fset.update(s)
23 31 continue
24 32 other.append((kind, expr))
25 33 return fset, other
26 34
27 35 class match(object):
28 36 def __init__(self, root, cwd, patterns, include=[], exclude=[],
29 37 default='glob', exact=False, auditor=None, ctx=None):
30 38 """build an object to match a set of file patterns
31 39
32 40 arguments:
33 41 root - the canonical root of the tree you're matching against
34 42 cwd - the current working directory, if relevant
35 43 patterns - patterns to find
36 44 include - patterns to include
37 45 exclude - patterns to exclude
38 46 default - if a pattern in names has no explicit type, assume this one
39 47 exact - patterns are actually literals
40 48
41 49 a pattern is one of:
42 50 'glob:<glob>' - a glob relative to cwd
43 51 're:<regexp>' - a regular expression
44 52 'path:<path>' - a path relative to canonroot
45 53 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
46 54 'relpath:<path>' - a path relative to cwd
47 55 'relre:<regexp>' - a regexp that needn't match the start of a name
48 56 'set:<fileset>' - a fileset expression
49 57 '<something>' - a pattern of the specified default type
50 58 """
51 59
52 60 self._root = root
53 61 self._cwd = cwd
54 62 self._files = []
55 63 self._anypats = bool(include or exclude)
56 64 self._ctx = ctx
57 65
58 66 if include:
59 67 pats = _normalize(include, 'glob', root, cwd, auditor)
60 68 self.includepat, im = _buildmatch(ctx, pats, '(?:/|$)')
61 69 if exclude:
62 70 pats = _normalize(exclude, 'glob', root, cwd, auditor)
63 71 self.excludepat, em = _buildmatch(ctx, pats, '(?:/|$)')
64 72 if exact:
65 73 if isinstance(patterns, list):
66 74 self._files = patterns
67 75 else:
68 76 self._files = list(patterns)
69 77 pm = self.exact
70 78 elif patterns:
71 79 pats = _normalize(patterns, default, root, cwd, auditor)
72 80 self._files = _roots(pats)
73 81 self._anypats = self._anypats or _anypats(pats)
74 82 self.patternspat, pm = _buildmatch(ctx, pats, '$')
75 83
76 84 if patterns or exact:
77 85 if include:
78 86 if exclude:
79 87 m = lambda f: im(f) and not em(f) and pm(f)
80 88 else:
81 89 m = lambda f: im(f) and pm(f)
82 90 else:
83 91 if exclude:
84 92 m = lambda f: not em(f) and pm(f)
85 93 else:
86 94 m = pm
87 95 else:
88 96 if include:
89 97 if exclude:
90 98 m = lambda f: im(f) and not em(f)
91 99 else:
92 100 m = im
93 101 else:
94 102 if exclude:
95 103 m = lambda f: not em(f)
96 104 else:
97 105 m = lambda f: True
98 106
99 107 self.matchfn = m
100 108 self._fmap = set(self._files)
101 109
102 110 def __call__(self, fn):
103 111 return self.matchfn(fn)
104 112 def __iter__(self):
105 113 for f in self._files:
106 114 yield f
107 115 def bad(self, f, msg):
108 116 '''callback for each explicit file that can't be
109 117 found/accessed, with an error message
110 118 '''
111 119 pass
112 120 def dir(self, f):
113 121 pass
114 122 def missing(self, f):
115 123 pass
116 124 def exact(self, f):
117 125 return f in self._fmap
118 126 def rel(self, f):
119 127 return util.pathto(self._root, self._cwd, f)
120 128 def files(self):
121 129 return self._files
122 130 def anypats(self):
123 131 return self._anypats
124 132 def always(self):
125 133 return False
126 134
127 135 class exact(match):
128 136 def __init__(self, root, cwd, files):
129 137 match.__init__(self, root, cwd, files, exact = True)
130 138
131 139 class always(match):
132 140 def __init__(self, root, cwd):
133 141 match.__init__(self, root, cwd, [])
134 142 def always(self):
135 143 return True
136 144
137 145 class narrowmatcher(match):
138 146 """Adapt a matcher to work on a subdirectory only.
139 147
140 148 The paths are remapped to remove/insert the path as needed:
141 149
142 150 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
143 151 >>> m2 = narrowmatcher('sub', m1)
144 152 >>> bool(m2('a.txt'))
145 153 False
146 154 >>> bool(m2('b.txt'))
147 155 True
148 156 >>> bool(m2.matchfn('a.txt'))
149 157 False
150 158 >>> bool(m2.matchfn('b.txt'))
151 159 True
152 160 >>> m2.files()
153 161 ['b.txt']
154 162 >>> m2.exact('b.txt')
155 163 True
156 164 >>> m2.rel('b.txt')
157 165 'b.txt'
158 166 >>> def bad(f, msg):
159 167 ... print "%s: %s" % (f, msg)
160 168 >>> m1.bad = bad
161 169 >>> m2.bad('x.txt', 'No such file')
162 170 sub/x.txt: No such file
163 171 """
164 172
165 173 def __init__(self, path, matcher):
166 174 self._root = matcher._root
167 175 self._cwd = matcher._cwd
168 176 self._path = path
169 177 self._matcher = matcher
170 178
171 179 self._files = [f[len(path) + 1:] for f in matcher._files
172 180 if f.startswith(path + "/")]
173 181 self._anypats = matcher._anypats
174 182 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
175 183 self._fmap = set(self._files)
176 184
177 185 def bad(self, f, msg):
178 186 self._matcher.bad(self._path + "/" + f, msg)
179 187
180 188 def patkind(pat):
181 189 return _patsplit(pat, None)[0]
182 190
183 191 def _patsplit(pat, default):
184 192 """Split a string into an optional pattern kind prefix and the
185 193 actual pattern."""
186 194 if ':' in pat:
187 195 kind, val = pat.split(':', 1)
188 196 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
189 197 'listfile', 'listfile0', 'set'):
190 198 return kind, val
191 199 return default, pat
192 200
193 201 def _globre(pat):
194 202 "convert a glob pattern into a regexp"
195 203 i, n = 0, len(pat)
196 204 res = ''
197 205 group = 0
198 206 escape = re.escape
199 207 def peek():
200 208 return i < n and pat[i]
201 209 while i < n:
202 210 c = pat[i]
203 211 i += 1
204 212 if c not in '*?[{},\\':
205 213 res += escape(c)
206 214 elif c == '*':
207 215 if peek() == '*':
208 216 i += 1
209 217 res += '.*'
210 218 else:
211 219 res += '[^/]*'
212 220 elif c == '?':
213 221 res += '.'
214 222 elif c == '[':
215 223 j = i
216 224 if j < n and pat[j] in '!]':
217 225 j += 1
218 226 while j < n and pat[j] != ']':
219 227 j += 1
220 228 if j >= n:
221 229 res += '\\['
222 230 else:
223 231 stuff = pat[i:j].replace('\\','\\\\')
224 232 i = j + 1
225 233 if stuff[0] == '!':
226 234 stuff = '^' + stuff[1:]
227 235 elif stuff[0] == '^':
228 236 stuff = '\\' + stuff
229 237 res = '%s[%s]' % (res, stuff)
230 238 elif c == '{':
231 239 group += 1
232 240 res += '(?:'
233 241 elif c == '}' and group:
234 242 res += ')'
235 243 group -= 1
236 244 elif c == ',' and group:
237 245 res += '|'
238 246 elif c == '\\':
239 247 p = peek()
240 248 if p:
241 249 i += 1
242 250 res += escape(p)
243 251 else:
244 252 res += escape(c)
245 253 else:
246 254 res += escape(c)
247 255 return res
248 256
249 257 def _regex(kind, name, tail):
250 258 '''convert a pattern into a regular expression'''
251 259 if not name:
252 260 return ''
253 261 if kind == 're':
254 262 return name
255 263 elif kind == 'path':
256 264 return '^' + re.escape(name) + '(?:/|$)'
257 265 elif kind == 'relglob':
258 266 return '(?:|.*/)' + _globre(name) + tail
259 267 elif kind == 'relpath':
260 268 return re.escape(name) + '(?:/|$)'
261 269 elif kind == 'relre':
262 270 if name.startswith('^'):
263 271 return name
264 272 return '.*' + name
265 273 return _globre(name) + tail
266 274
267 275 def _buildmatch(ctx, pats, tail):
268 276 fset, pats = _expandsets(pats, ctx)
269 277 if not pats:
270 278 return "", fset.__contains__
271 279
272 280 pat, mf = _buildregexmatch(pats, tail)
273 281 if fset:
274 282 return pat, lambda f: f in fset or mf(f)
275 283 return pat, mf
276 284
277 285 def _buildregexmatch(pats, tail):
278 286 """build a matching function from a set of patterns"""
279 287 try:
280 288 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
281 289 if len(pat) > 20000:
282 290 raise OverflowError
283 return pat, re.compile(pat).match
291 return pat, _rematcher(pat)
284 292 except OverflowError:
285 293 # We're using a Python with a tiny regex engine and we
286 294 # made it explode, so we'll divide the pattern list in two
287 295 # until it works
288 296 l = len(pats)
289 297 if l < 2:
290 298 raise
291 299 pata, a = _buildregexmatch(pats[:l//2], tail)
292 300 patb, b = _buildregexmatch(pats[l//2:], tail)
293 301 return pat, lambda s: a(s) or b(s)
294 302 except re.error:
295 303 for k, p in pats:
296 304 try:
297 re.compile('(?:%s)' % _regex(k, p, tail))
305 _rematcher('(?:%s)' % _regex(k, p, tail))
298 306 except re.error:
299 307 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
300 308 raise util.Abort(_("invalid pattern"))
301 309
302 310 def _normalize(names, default, root, cwd, auditor):
303 311 pats = []
304 312 for kind, name in [_patsplit(p, default) for p in names]:
305 313 if kind in ('glob', 'relpath'):
306 314 name = scmutil.canonpath(root, cwd, name, auditor)
307 315 elif kind in ('relglob', 'path'):
308 316 name = util.normpath(name)
309 317 elif kind in ('listfile', 'listfile0'):
310 318 try:
311 319 files = util.readfile(name)
312 320 if kind == 'listfile0':
313 321 files = files.split('\0')
314 322 else:
315 323 files = files.splitlines()
316 324 files = [f for f in files if f]
317 325 except EnvironmentError:
318 326 raise util.Abort(_("unable to read file list (%s)") % name)
319 327 pats += _normalize(files, default, root, cwd, auditor)
320 328 continue
321 329
322 330 pats.append((kind, name))
323 331 return pats
324 332
325 333 def _roots(patterns):
326 334 r = []
327 335 for kind, name in patterns:
328 336 if kind == 'glob': # find the non-glob prefix
329 337 root = []
330 338 for p in name.split('/'):
331 339 if '[' in p or '{' in p or '*' in p or '?' in p:
332 340 break
333 341 root.append(p)
334 342 r.append('/'.join(root) or '.')
335 343 elif kind in ('relpath', 'path'):
336 344 r.append(name or '.')
337 345 elif kind == 'relglob':
338 346 r.append('.')
339 347 return r
340 348
341 349 def _anypats(patterns):
342 350 for kind, name in patterns:
343 351 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
344 352 return True
@@ -1,1778 +1,1802 b''
1 1 # util.py - Mercurial utility functions and platform specfic implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specfic implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from i18n import _
17 17 import error, osutil, encoding, collections
18 18 import errno, re, shutil, sys, tempfile, traceback
19 19 import os, time, datetime, calendar, textwrap, signal
20 20 import imp, socket, urllib
21 21
22 22 if os.name == 'nt':
23 23 import windows as platform
24 24 else:
25 25 import posix as platform
26 26
27 27 platform.encodinglower = encoding.lower
28 28 platform.encodingupper = encoding.upper
29 29
30 30 cachestat = platform.cachestat
31 31 checkexec = platform.checkexec
32 32 checklink = platform.checklink
33 33 copymode = platform.copymode
34 34 executablepath = platform.executablepath
35 35 expandglobs = platform.expandglobs
36 36 explainexit = platform.explainexit
37 37 findexe = platform.findexe
38 38 gethgcmd = platform.gethgcmd
39 39 getuser = platform.getuser
40 40 groupmembers = platform.groupmembers
41 41 groupname = platform.groupname
42 42 hidewindow = platform.hidewindow
43 43 isexec = platform.isexec
44 44 isowner = platform.isowner
45 45 localpath = platform.localpath
46 46 lookupreg = platform.lookupreg
47 47 makedir = platform.makedir
48 48 nlinks = platform.nlinks
49 49 normpath = platform.normpath
50 50 normcase = platform.normcase
51 51 nulldev = platform.nulldev
52 52 openhardlinks = platform.openhardlinks
53 53 oslink = platform.oslink
54 54 parsepatchoutput = platform.parsepatchoutput
55 55 pconvert = platform.pconvert
56 56 popen = platform.popen
57 57 posixfile = platform.posixfile
58 58 quotecommand = platform.quotecommand
59 59 realpath = platform.realpath
60 60 rename = platform.rename
61 61 samedevice = platform.samedevice
62 62 samefile = platform.samefile
63 63 samestat = platform.samestat
64 64 setbinary = platform.setbinary
65 65 setflags = platform.setflags
66 66 setsignalhandler = platform.setsignalhandler
67 67 shellquote = platform.shellquote
68 68 spawndetached = platform.spawndetached
69 69 sshargs = platform.sshargs
70 70 statfiles = platform.statfiles
71 71 termwidth = platform.termwidth
72 72 testpid = platform.testpid
73 73 umask = platform.umask
74 74 unlink = platform.unlink
75 75 unlinkpath = platform.unlinkpath
76 76 username = platform.username
77 77
78 78 # Python compatibility
79 79
80 80 _notset = object()
81 81
82 82 def safehasattr(thing, attr):
83 83 return getattr(thing, attr, _notset) is not _notset
84 84
85 85 def sha1(s=''):
86 86 '''
87 87 Low-overhead wrapper around Python's SHA support
88 88
89 89 >>> f = _fastsha1
90 90 >>> a = sha1()
91 91 >>> a = f()
92 92 >>> a.hexdigest()
93 93 'da39a3ee5e6b4b0d3255bfef95601890afd80709'
94 94 '''
95 95
96 96 return _fastsha1(s)
97 97
98 98 def _fastsha1(s=''):
99 99 # This function will import sha1 from hashlib or sha (whichever is
100 100 # available) and overwrite itself with it on the first call.
101 101 # Subsequent calls will go directly to the imported function.
102 102 if sys.version_info >= (2, 5):
103 103 from hashlib import sha1 as _sha1
104 104 else:
105 105 from sha import sha as _sha1
106 106 global _fastsha1, sha1
107 107 _fastsha1 = sha1 = _sha1
108 108 return _sha1(s)
109 109
110 110 try:
111 111 buffer = buffer
112 112 except NameError:
113 113 if sys.version_info[0] < 3:
114 114 def buffer(sliceable, offset=0):
115 115 return sliceable[offset:]
116 116 else:
117 117 def buffer(sliceable, offset=0):
118 118 return memoryview(sliceable)[offset:]
119 119
120 120 import subprocess
121 121 closefds = os.name == 'posix'
122 122
123 123 def popen2(cmd, env=None, newlines=False):
124 124 # Setting bufsize to -1 lets the system decide the buffer size.
125 125 # The default for bufsize is 0, meaning unbuffered. This leads to
126 126 # poor performance on Mac OS X: http://bugs.python.org/issue4194
127 127 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
128 128 close_fds=closefds,
129 129 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
130 130 universal_newlines=newlines,
131 131 env=env)
132 132 return p.stdin, p.stdout
133 133
134 134 def popen3(cmd, env=None, newlines=False):
135 135 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
136 136 close_fds=closefds,
137 137 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
138 138 stderr=subprocess.PIPE,
139 139 universal_newlines=newlines,
140 140 env=env)
141 141 return p.stdin, p.stdout, p.stderr
142 142
143 143 def version():
144 144 """Return version information if available."""
145 145 try:
146 146 import __version__
147 147 return __version__.version
148 148 except ImportError:
149 149 return 'unknown'
150 150
151 151 # used by parsedate
152 152 defaultdateformats = (
153 153 '%Y-%m-%d %H:%M:%S',
154 154 '%Y-%m-%d %I:%M:%S%p',
155 155 '%Y-%m-%d %H:%M',
156 156 '%Y-%m-%d %I:%M%p',
157 157 '%Y-%m-%d',
158 158 '%m-%d',
159 159 '%m/%d',
160 160 '%m/%d/%y',
161 161 '%m/%d/%Y',
162 162 '%a %b %d %H:%M:%S %Y',
163 163 '%a %b %d %I:%M:%S%p %Y',
164 164 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
165 165 '%b %d %H:%M:%S %Y',
166 166 '%b %d %I:%M:%S%p %Y',
167 167 '%b %d %H:%M:%S',
168 168 '%b %d %I:%M:%S%p',
169 169 '%b %d %H:%M',
170 170 '%b %d %I:%M%p',
171 171 '%b %d %Y',
172 172 '%b %d',
173 173 '%H:%M:%S',
174 174 '%I:%M:%S%p',
175 175 '%H:%M',
176 176 '%I:%M%p',
177 177 )
178 178
179 179 extendeddateformats = defaultdateformats + (
180 180 "%Y",
181 181 "%Y-%m",
182 182 "%b",
183 183 "%b %Y",
184 184 )
185 185
186 186 def cachefunc(func):
187 187 '''cache the result of function calls'''
188 188 # XXX doesn't handle keywords args
189 189 cache = {}
190 190 if func.func_code.co_argcount == 1:
191 191 # we gain a small amount of time because
192 192 # we don't need to pack/unpack the list
193 193 def f(arg):
194 194 if arg not in cache:
195 195 cache[arg] = func(arg)
196 196 return cache[arg]
197 197 else:
198 198 def f(*args):
199 199 if args not in cache:
200 200 cache[args] = func(*args)
201 201 return cache[args]
202 202
203 203 return f
204 204
205 205 try:
206 206 collections.deque.remove
207 207 deque = collections.deque
208 208 except AttributeError:
209 209 # python 2.4 lacks deque.remove
210 210 class deque(collections.deque):
211 211 def remove(self, val):
212 212 for i, v in enumerate(self):
213 213 if v == val:
214 214 del self[i]
215 215 break
216 216
217 217 def lrucachefunc(func):
218 218 '''cache most recent results of function calls'''
219 219 cache = {}
220 220 order = deque()
221 221 if func.func_code.co_argcount == 1:
222 222 def f(arg):
223 223 if arg not in cache:
224 224 if len(cache) > 20:
225 225 del cache[order.popleft()]
226 226 cache[arg] = func(arg)
227 227 else:
228 228 order.remove(arg)
229 229 order.append(arg)
230 230 return cache[arg]
231 231 else:
232 232 def f(*args):
233 233 if args not in cache:
234 234 if len(cache) > 20:
235 235 del cache[order.popleft()]
236 236 cache[args] = func(*args)
237 237 else:
238 238 order.remove(args)
239 239 order.append(args)
240 240 return cache[args]
241 241
242 242 return f
243 243
244 244 class propertycache(object):
245 245 def __init__(self, func):
246 246 self.func = func
247 247 self.name = func.__name__
248 248 def __get__(self, obj, type=None):
249 249 result = self.func(obj)
250 250 setattr(obj, self.name, result)
251 251 return result
252 252
253 253 def pipefilter(s, cmd):
254 254 '''filter string S through command CMD, returning its output'''
255 255 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
256 256 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
257 257 pout, perr = p.communicate(s)
258 258 return pout
259 259
260 260 def tempfilter(s, cmd):
261 261 '''filter string S through a pair of temporary files with CMD.
262 262 CMD is used as a template to create the real command to be run,
263 263 with the strings INFILE and OUTFILE replaced by the real names of
264 264 the temporary files generated.'''
265 265 inname, outname = None, None
266 266 try:
267 267 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
268 268 fp = os.fdopen(infd, 'wb')
269 269 fp.write(s)
270 270 fp.close()
271 271 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
272 272 os.close(outfd)
273 273 cmd = cmd.replace('INFILE', inname)
274 274 cmd = cmd.replace('OUTFILE', outname)
275 275 code = os.system(cmd)
276 276 if sys.platform == 'OpenVMS' and code & 1:
277 277 code = 0
278 278 if code:
279 279 raise Abort(_("command '%s' failed: %s") %
280 280 (cmd, explainexit(code)))
281 281 fp = open(outname, 'rb')
282 282 r = fp.read()
283 283 fp.close()
284 284 return r
285 285 finally:
286 286 try:
287 287 if inname:
288 288 os.unlink(inname)
289 289 except OSError:
290 290 pass
291 291 try:
292 292 if outname:
293 293 os.unlink(outname)
294 294 except OSError:
295 295 pass
296 296
297 297 filtertable = {
298 298 'tempfile:': tempfilter,
299 299 'pipe:': pipefilter,
300 300 }
301 301
302 302 def filter(s, cmd):
303 303 "filter a string through a command that transforms its input to its output"
304 304 for name, fn in filtertable.iteritems():
305 305 if cmd.startswith(name):
306 306 return fn(s, cmd[len(name):].lstrip())
307 307 return pipefilter(s, cmd)
308 308
309 309 def binary(s):
310 310 """return true if a string is binary data"""
311 311 return bool(s and '\0' in s)
312 312
313 313 def increasingchunks(source, min=1024, max=65536):
314 314 '''return no less than min bytes per chunk while data remains,
315 315 doubling min after each chunk until it reaches max'''
316 316 def log2(x):
317 317 if not x:
318 318 return 0
319 319 i = 0
320 320 while x:
321 321 x >>= 1
322 322 i += 1
323 323 return i - 1
324 324
325 325 buf = []
326 326 blen = 0
327 327 for chunk in source:
328 328 buf.append(chunk)
329 329 blen += len(chunk)
330 330 if blen >= min:
331 331 if min < max:
332 332 min = min << 1
333 333 nmin = 1 << log2(blen)
334 334 if nmin > min:
335 335 min = nmin
336 336 if min > max:
337 337 min = max
338 338 yield ''.join(buf)
339 339 blen = 0
340 340 buf = []
341 341 if buf:
342 342 yield ''.join(buf)
343 343
344 344 Abort = error.Abort
345 345
346 346 def always(fn):
347 347 return True
348 348
349 349 def never(fn):
350 350 return False
351 351
352 352 def pathto(root, n1, n2):
353 353 '''return the relative path from one place to another.
354 354 root should use os.sep to separate directories
355 355 n1 should use os.sep to separate directories
356 356 n2 should use "/" to separate directories
357 357 returns an os.sep-separated path.
358 358
359 359 If n1 is a relative path, it's assumed it's
360 360 relative to root.
361 361 n2 should always be relative to root.
362 362 '''
363 363 if not n1:
364 364 return localpath(n2)
365 365 if os.path.isabs(n1):
366 366 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
367 367 return os.path.join(root, localpath(n2))
368 368 n2 = '/'.join((pconvert(root), n2))
369 369 a, b = splitpath(n1), n2.split('/')
370 370 a.reverse()
371 371 b.reverse()
372 372 while a and b and a[-1] == b[-1]:
373 373 a.pop()
374 374 b.pop()
375 375 b.reverse()
376 376 return os.sep.join((['..'] * len(a)) + b) or '.'
377 377
378 378 _hgexecutable = None
379 379
380 380 def mainfrozen():
381 381 """return True if we are a frozen executable.
382 382
383 383 The code supports py2exe (most common, Windows only) and tools/freeze
384 384 (portable, not much used).
385 385 """
386 386 return (safehasattr(sys, "frozen") or # new py2exe
387 387 safehasattr(sys, "importers") or # old py2exe
388 388 imp.is_frozen("__main__")) # tools/freeze
389 389
390 390 def hgexecutable():
391 391 """return location of the 'hg' executable.
392 392
393 393 Defaults to $HG or 'hg' in the search path.
394 394 """
395 395 if _hgexecutable is None:
396 396 hg = os.environ.get('HG')
397 397 mainmod = sys.modules['__main__']
398 398 if hg:
399 399 _sethgexecutable(hg)
400 400 elif mainfrozen():
401 401 _sethgexecutable(sys.executable)
402 402 elif os.path.basename(getattr(mainmod, '__file__', '')) == 'hg':
403 403 _sethgexecutable(mainmod.__file__)
404 404 else:
405 405 exe = findexe('hg') or os.path.basename(sys.argv[0])
406 406 _sethgexecutable(exe)
407 407 return _hgexecutable
408 408
409 409 def _sethgexecutable(path):
410 410 """set location of the 'hg' executable"""
411 411 global _hgexecutable
412 412 _hgexecutable = path
413 413
414 414 def system(cmd, environ={}, cwd=None, onerr=None, errprefix=None, out=None):
415 415 '''enhanced shell command execution.
416 416 run with environment maybe modified, maybe in different dir.
417 417
418 418 if command fails and onerr is None, return status. if ui object,
419 419 print error message and return status, else raise onerr object as
420 420 exception.
421 421
422 422 if out is specified, it is assumed to be a file-like object that has a
423 423 write() method. stdout and stderr will be redirected to out.'''
424 424 try:
425 425 sys.stdout.flush()
426 426 except Exception:
427 427 pass
428 428 def py2shell(val):
429 429 'convert python object into string that is useful to shell'
430 430 if val is None or val is False:
431 431 return '0'
432 432 if val is True:
433 433 return '1'
434 434 return str(val)
435 435 origcmd = cmd
436 436 cmd = quotecommand(cmd)
437 437 if sys.platform == 'plan9':
438 438 # subprocess kludge to work around issues in half-baked Python
439 439 # ports, notably bichued/python:
440 440 if not cwd is None:
441 441 os.chdir(cwd)
442 442 rc = os.system(cmd)
443 443 else:
444 444 env = dict(os.environ)
445 445 env.update((k, py2shell(v)) for k, v in environ.iteritems())
446 446 env['HG'] = hgexecutable()
447 447 if out is None or out == sys.__stdout__:
448 448 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
449 449 env=env, cwd=cwd)
450 450 else:
451 451 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
452 452 env=env, cwd=cwd, stdout=subprocess.PIPE,
453 453 stderr=subprocess.STDOUT)
454 454 for line in proc.stdout:
455 455 out.write(line)
456 456 proc.wait()
457 457 rc = proc.returncode
458 458 if sys.platform == 'OpenVMS' and rc & 1:
459 459 rc = 0
460 460 if rc and onerr:
461 461 errmsg = '%s %s' % (os.path.basename(origcmd.split(None, 1)[0]),
462 462 explainexit(rc)[0])
463 463 if errprefix:
464 464 errmsg = '%s: %s' % (errprefix, errmsg)
465 465 try:
466 466 onerr.warn(errmsg + '\n')
467 467 except AttributeError:
468 468 raise onerr(errmsg)
469 469 return rc
470 470
471 471 def checksignature(func):
472 472 '''wrap a function with code to check for calling errors'''
473 473 def check(*args, **kwargs):
474 474 try:
475 475 return func(*args, **kwargs)
476 476 except TypeError:
477 477 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
478 478 raise error.SignatureError
479 479 raise
480 480
481 481 return check
482 482
483 483 def copyfile(src, dest):
484 484 "copy a file, preserving mode and atime/mtime"
485 485 if os.path.islink(src):
486 486 try:
487 487 os.unlink(dest)
488 488 except OSError:
489 489 pass
490 490 os.symlink(os.readlink(src), dest)
491 491 else:
492 492 try:
493 493 shutil.copyfile(src, dest)
494 494 shutil.copymode(src, dest)
495 495 except shutil.Error, inst:
496 496 raise Abort(str(inst))
497 497
498 498 def copyfiles(src, dst, hardlink=None):
499 499 """Copy a directory tree using hardlinks if possible"""
500 500
501 501 if hardlink is None:
502 502 hardlink = (os.stat(src).st_dev ==
503 503 os.stat(os.path.dirname(dst)).st_dev)
504 504
505 505 num = 0
506 506 if os.path.isdir(src):
507 507 os.mkdir(dst)
508 508 for name, kind in osutil.listdir(src):
509 509 srcname = os.path.join(src, name)
510 510 dstname = os.path.join(dst, name)
511 511 hardlink, n = copyfiles(srcname, dstname, hardlink)
512 512 num += n
513 513 else:
514 514 if hardlink:
515 515 try:
516 516 oslink(src, dst)
517 517 except (IOError, OSError):
518 518 hardlink = False
519 519 shutil.copy(src, dst)
520 520 else:
521 521 shutil.copy(src, dst)
522 522 num += 1
523 523
524 524 return hardlink, num
525 525
526 526 _winreservednames = '''con prn aux nul
527 527 com1 com2 com3 com4 com5 com6 com7 com8 com9
528 528 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
529 529 _winreservedchars = ':*?"<>|'
530 530 def checkwinfilename(path):
531 531 '''Check that the base-relative path is a valid filename on Windows.
532 532 Returns None if the path is ok, or a UI string describing the problem.
533 533
534 534 >>> checkwinfilename("just/a/normal/path")
535 535 >>> checkwinfilename("foo/bar/con.xml")
536 536 "filename contains 'con', which is reserved on Windows"
537 537 >>> checkwinfilename("foo/con.xml/bar")
538 538 "filename contains 'con', which is reserved on Windows"
539 539 >>> checkwinfilename("foo/bar/xml.con")
540 540 >>> checkwinfilename("foo/bar/AUX/bla.txt")
541 541 "filename contains 'AUX', which is reserved on Windows"
542 542 >>> checkwinfilename("foo/bar/bla:.txt")
543 543 "filename contains ':', which is reserved on Windows"
544 544 >>> checkwinfilename("foo/bar/b\07la.txt")
545 545 "filename contains '\\\\x07', which is invalid on Windows"
546 546 >>> checkwinfilename("foo/bar/bla ")
547 547 "filename ends with ' ', which is not allowed on Windows"
548 548 >>> checkwinfilename("../bar")
549 549 '''
550 550 for n in path.replace('\\', '/').split('/'):
551 551 if not n:
552 552 continue
553 553 for c in n:
554 554 if c in _winreservedchars:
555 555 return _("filename contains '%s', which is reserved "
556 556 "on Windows") % c
557 557 if ord(c) <= 31:
558 558 return _("filename contains %r, which is invalid "
559 559 "on Windows") % c
560 560 base = n.split('.')[0]
561 561 if base and base.lower() in _winreservednames:
562 562 return _("filename contains '%s', which is reserved "
563 563 "on Windows") % base
564 564 t = n[-1]
565 565 if t in '. ' and n not in '..':
566 566 return _("filename ends with '%s', which is not allowed "
567 567 "on Windows") % t
568 568
569 569 if os.name == 'nt':
570 570 checkosfilename = checkwinfilename
571 571 else:
572 572 checkosfilename = platform.checkosfilename
573 573
574 574 def makelock(info, pathname):
575 575 try:
576 576 return os.symlink(info, pathname)
577 577 except OSError, why:
578 578 if why.errno == errno.EEXIST:
579 579 raise
580 580 except AttributeError: # no symlink in os
581 581 pass
582 582
583 583 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
584 584 os.write(ld, info)
585 585 os.close(ld)
586 586
587 587 def readlock(pathname):
588 588 try:
589 589 return os.readlink(pathname)
590 590 except OSError, why:
591 591 if why.errno not in (errno.EINVAL, errno.ENOSYS):
592 592 raise
593 593 except AttributeError: # no symlink in os
594 594 pass
595 595 fp = posixfile(pathname)
596 596 r = fp.read()
597 597 fp.close()
598 598 return r
599 599
600 600 def fstat(fp):
601 601 '''stat file object that may not have fileno method.'''
602 602 try:
603 603 return os.fstat(fp.fileno())
604 604 except AttributeError:
605 605 return os.stat(fp.name)
606 606
607 607 # File system features
608 608
609 609 def checkcase(path):
610 610 """
611 611 Check whether the given path is on a case-sensitive filesystem
612 612
613 613 Requires a path (like /foo/.hg) ending with a foldable final
614 614 directory component.
615 615 """
616 616 s1 = os.stat(path)
617 617 d, b = os.path.split(path)
618 618 b2 = b.upper()
619 619 if b == b2:
620 620 b2 = b.lower()
621 621 if b == b2:
622 622 return True # no evidence against case sensitivity
623 623 p2 = os.path.join(d, b2)
624 624 try:
625 625 s2 = os.stat(p2)
626 626 if s2 == s1:
627 627 return False
628 628 return True
629 629 except OSError:
630 630 return True
631 631
632 try:
633 import re2
634 _re2 = None
635 except ImportError:
636 _re2 = False
637
638 def compilere(pat):
639 '''Compile a regular expression, using re2 if possible
640
641 For best performance, use only re2-compatible regexp features.'''
642 global _re2
643 if _re2 is None:
644 try:
645 re2.compile
646 _re2 = True
647 except ImportError:
648 _re2 = False
649 if _re2:
650 try:
651 return re2.compile(pat)
652 except re2.error:
653 pass
654 return re.compile(pat)
655
632 656 _fspathcache = {}
633 657 def fspath(name, root):
634 658 '''Get name in the case stored in the filesystem
635 659
636 660 The name should be relative to root, and be normcase-ed for efficiency.
637 661
638 662 Note that this function is unnecessary, and should not be
639 663 called, for case-sensitive filesystems (simply because it's expensive).
640 664
641 665 The root should be normcase-ed, too.
642 666 '''
643 667 def find(p, contents):
644 668 for n in contents:
645 669 if normcase(n) == p:
646 670 return n
647 671 return None
648 672
649 673 seps = os.sep
650 674 if os.altsep:
651 675 seps = seps + os.altsep
652 676 # Protect backslashes. This gets silly very quickly.
653 677 seps.replace('\\','\\\\')
654 678 pattern = re.compile(r'([^%s]+)|([%s]+)' % (seps, seps))
655 679 dir = os.path.normpath(root)
656 680 result = []
657 681 for part, sep in pattern.findall(name):
658 682 if sep:
659 683 result.append(sep)
660 684 continue
661 685
662 686 if dir not in _fspathcache:
663 687 _fspathcache[dir] = os.listdir(dir)
664 688 contents = _fspathcache[dir]
665 689
666 690 found = find(part, contents)
667 691 if not found:
668 692 # retry "once per directory" per "dirstate.walk" which
669 693 # may take place for each patches of "hg qpush", for example
670 694 contents = os.listdir(dir)
671 695 _fspathcache[dir] = contents
672 696 found = find(part, contents)
673 697
674 698 result.append(found or part)
675 699 dir = os.path.join(dir, part)
676 700
677 701 return ''.join(result)
678 702
679 703 def checknlink(testfile):
680 704 '''check whether hardlink count reporting works properly'''
681 705
682 706 # testfile may be open, so we need a separate file for checking to
683 707 # work around issue2543 (or testfile may get lost on Samba shares)
684 708 f1 = testfile + ".hgtmp1"
685 709 if os.path.lexists(f1):
686 710 return False
687 711 try:
688 712 posixfile(f1, 'w').close()
689 713 except IOError:
690 714 return False
691 715
692 716 f2 = testfile + ".hgtmp2"
693 717 fd = None
694 718 try:
695 719 try:
696 720 oslink(f1, f2)
697 721 except OSError:
698 722 return False
699 723
700 724 # nlinks() may behave differently for files on Windows shares if
701 725 # the file is open.
702 726 fd = posixfile(f2)
703 727 return nlinks(f2) > 1
704 728 finally:
705 729 if fd is not None:
706 730 fd.close()
707 731 for f in (f1, f2):
708 732 try:
709 733 os.unlink(f)
710 734 except OSError:
711 735 pass
712 736
713 737 return False
714 738
715 739 def endswithsep(path):
716 740 '''Check path ends with os.sep or os.altsep.'''
717 741 return path.endswith(os.sep) or os.altsep and path.endswith(os.altsep)
718 742
719 743 def splitpath(path):
720 744 '''Split path by os.sep.
721 745 Note that this function does not use os.altsep because this is
722 746 an alternative of simple "xxx.split(os.sep)".
723 747 It is recommended to use os.path.normpath() before using this
724 748 function if need.'''
725 749 return path.split(os.sep)
726 750
727 751 def gui():
728 752 '''Are we running in a GUI?'''
729 753 if sys.platform == 'darwin':
730 754 if 'SSH_CONNECTION' in os.environ:
731 755 # handle SSH access to a box where the user is logged in
732 756 return False
733 757 elif getattr(osutil, 'isgui', None):
734 758 # check if a CoreGraphics session is available
735 759 return osutil.isgui()
736 760 else:
737 761 # pure build; use a safe default
738 762 return True
739 763 else:
740 764 return os.name == "nt" or os.environ.get("DISPLAY")
741 765
742 766 def mktempcopy(name, emptyok=False, createmode=None):
743 767 """Create a temporary file with the same contents from name
744 768
745 769 The permission bits are copied from the original file.
746 770
747 771 If the temporary file is going to be truncated immediately, you
748 772 can use emptyok=True as an optimization.
749 773
750 774 Returns the name of the temporary file.
751 775 """
752 776 d, fn = os.path.split(name)
753 777 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
754 778 os.close(fd)
755 779 # Temporary files are created with mode 0600, which is usually not
756 780 # what we want. If the original file already exists, just copy
757 781 # its mode. Otherwise, manually obey umask.
758 782 copymode(name, temp, createmode)
759 783 if emptyok:
760 784 return temp
761 785 try:
762 786 try:
763 787 ifp = posixfile(name, "rb")
764 788 except IOError, inst:
765 789 if inst.errno == errno.ENOENT:
766 790 return temp
767 791 if not getattr(inst, 'filename', None):
768 792 inst.filename = name
769 793 raise
770 794 ofp = posixfile(temp, "wb")
771 795 for chunk in filechunkiter(ifp):
772 796 ofp.write(chunk)
773 797 ifp.close()
774 798 ofp.close()
775 799 except: # re-raises
776 800 try: os.unlink(temp)
777 801 except OSError: pass
778 802 raise
779 803 return temp
780 804
781 805 class atomictempfile(object):
782 806 '''writeable file object that atomically updates a file
783 807
784 808 All writes will go to a temporary copy of the original file. Call
785 809 close() when you are done writing, and atomictempfile will rename
786 810 the temporary copy to the original name, making the changes
787 811 visible. If the object is destroyed without being closed, all your
788 812 writes are discarded.
789 813 '''
790 814 def __init__(self, name, mode='w+b', createmode=None):
791 815 self.__name = name # permanent name
792 816 self._tempname = mktempcopy(name, emptyok=('w' in mode),
793 817 createmode=createmode)
794 818 self._fp = posixfile(self._tempname, mode)
795 819
796 820 # delegated methods
797 821 self.write = self._fp.write
798 822 self.fileno = self._fp.fileno
799 823
800 824 def close(self):
801 825 if not self._fp.closed:
802 826 self._fp.close()
803 827 rename(self._tempname, localpath(self.__name))
804 828
805 829 def discard(self):
806 830 if not self._fp.closed:
807 831 try:
808 832 os.unlink(self._tempname)
809 833 except OSError:
810 834 pass
811 835 self._fp.close()
812 836
813 837 def __del__(self):
814 838 if safehasattr(self, '_fp'): # constructor actually did something
815 839 self.discard()
816 840
817 841 def makedirs(name, mode=None):
818 842 """recursive directory creation with parent mode inheritance"""
819 843 try:
820 844 os.mkdir(name)
821 845 except OSError, err:
822 846 if err.errno == errno.EEXIST:
823 847 return
824 848 if err.errno != errno.ENOENT or not name:
825 849 raise
826 850 parent = os.path.dirname(os.path.abspath(name))
827 851 if parent == name:
828 852 raise
829 853 makedirs(parent, mode)
830 854 os.mkdir(name)
831 855 if mode is not None:
832 856 os.chmod(name, mode)
833 857
834 858 def readfile(path):
835 859 fp = open(path, 'rb')
836 860 try:
837 861 return fp.read()
838 862 finally:
839 863 fp.close()
840 864
841 865 def writefile(path, text):
842 866 fp = open(path, 'wb')
843 867 try:
844 868 fp.write(text)
845 869 finally:
846 870 fp.close()
847 871
848 872 def appendfile(path, text):
849 873 fp = open(path, 'ab')
850 874 try:
851 875 fp.write(text)
852 876 finally:
853 877 fp.close()
854 878
855 879 class chunkbuffer(object):
856 880 """Allow arbitrary sized chunks of data to be efficiently read from an
857 881 iterator over chunks of arbitrary size."""
858 882
859 883 def __init__(self, in_iter):
860 884 """in_iter is the iterator that's iterating over the input chunks.
861 885 targetsize is how big a buffer to try to maintain."""
862 886 def splitbig(chunks):
863 887 for chunk in chunks:
864 888 if len(chunk) > 2**20:
865 889 pos = 0
866 890 while pos < len(chunk):
867 891 end = pos + 2 ** 18
868 892 yield chunk[pos:end]
869 893 pos = end
870 894 else:
871 895 yield chunk
872 896 self.iter = splitbig(in_iter)
873 897 self._queue = deque()
874 898
875 899 def read(self, l):
876 900 """Read L bytes of data from the iterator of chunks of data.
877 901 Returns less than L bytes if the iterator runs dry."""
878 902 left = l
879 903 buf = ''
880 904 queue = self._queue
881 905 while left > 0:
882 906 # refill the queue
883 907 if not queue:
884 908 target = 2**18
885 909 for chunk in self.iter:
886 910 queue.append(chunk)
887 911 target -= len(chunk)
888 912 if target <= 0:
889 913 break
890 914 if not queue:
891 915 break
892 916
893 917 chunk = queue.popleft()
894 918 left -= len(chunk)
895 919 if left < 0:
896 920 queue.appendleft(chunk[left:])
897 921 buf += chunk[:left]
898 922 else:
899 923 buf += chunk
900 924
901 925 return buf
902 926
903 927 def filechunkiter(f, size=65536, limit=None):
904 928 """Create a generator that produces the data in the file size
905 929 (default 65536) bytes at a time, up to optional limit (default is
906 930 to read all data). Chunks may be less than size bytes if the
907 931 chunk is the last chunk in the file, or the file is a socket or
908 932 some other type of file that sometimes reads less data than is
909 933 requested."""
910 934 assert size >= 0
911 935 assert limit is None or limit >= 0
912 936 while True:
913 937 if limit is None:
914 938 nbytes = size
915 939 else:
916 940 nbytes = min(limit, size)
917 941 s = nbytes and f.read(nbytes)
918 942 if not s:
919 943 break
920 944 if limit:
921 945 limit -= len(s)
922 946 yield s
923 947
924 948 def makedate():
925 949 ct = time.time()
926 950 if ct < 0:
927 951 hint = _("check your clock")
928 952 raise Abort(_("negative timestamp: %d") % ct, hint=hint)
929 953 delta = (datetime.datetime.utcfromtimestamp(ct) -
930 954 datetime.datetime.fromtimestamp(ct))
931 955 tz = delta.days * 86400 + delta.seconds
932 956 return ct, tz
933 957
934 958 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
935 959 """represent a (unixtime, offset) tuple as a localized time.
936 960 unixtime is seconds since the epoch, and offset is the time zone's
937 961 number of seconds away from UTC. if timezone is false, do not
938 962 append time zone to string."""
939 963 t, tz = date or makedate()
940 964 if t < 0:
941 965 t = 0 # time.gmtime(lt) fails on Windows for lt < -43200
942 966 tz = 0
943 967 if "%1" in format or "%2" in format:
944 968 sign = (tz > 0) and "-" or "+"
945 969 minutes = abs(tz) // 60
946 970 format = format.replace("%1", "%c%02d" % (sign, minutes // 60))
947 971 format = format.replace("%2", "%02d" % (minutes % 60))
948 972 try:
949 973 t = time.gmtime(float(t) - tz)
950 974 except ValueError:
951 975 # time was out of range
952 976 t = time.gmtime(sys.maxint)
953 977 s = time.strftime(format, t)
954 978 return s
955 979
956 980 def shortdate(date=None):
957 981 """turn (timestamp, tzoff) tuple into iso 8631 date."""
958 982 return datestr(date, format='%Y-%m-%d')
959 983
960 984 def strdate(string, format, defaults=[]):
961 985 """parse a localized time string and return a (unixtime, offset) tuple.
962 986 if the string cannot be parsed, ValueError is raised."""
963 987 def timezone(string):
964 988 tz = string.split()[-1]
965 989 if tz[0] in "+-" and len(tz) == 5 and tz[1:].isdigit():
966 990 sign = (tz[0] == "+") and 1 or -1
967 991 hours = int(tz[1:3])
968 992 minutes = int(tz[3:5])
969 993 return -sign * (hours * 60 + minutes) * 60
970 994 if tz == "GMT" or tz == "UTC":
971 995 return 0
972 996 return None
973 997
974 998 # NOTE: unixtime = localunixtime + offset
975 999 offset, date = timezone(string), string
976 1000 if offset is not None:
977 1001 date = " ".join(string.split()[:-1])
978 1002
979 1003 # add missing elements from defaults
980 1004 usenow = False # default to using biased defaults
981 1005 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
982 1006 found = [True for p in part if ("%"+p) in format]
983 1007 if not found:
984 1008 date += "@" + defaults[part][usenow]
985 1009 format += "@%" + part[0]
986 1010 else:
987 1011 # We've found a specific time element, less specific time
988 1012 # elements are relative to today
989 1013 usenow = True
990 1014
991 1015 timetuple = time.strptime(date, format)
992 1016 localunixtime = int(calendar.timegm(timetuple))
993 1017 if offset is None:
994 1018 # local timezone
995 1019 unixtime = int(time.mktime(timetuple))
996 1020 offset = unixtime - localunixtime
997 1021 else:
998 1022 unixtime = localunixtime + offset
999 1023 return unixtime, offset
1000 1024
1001 1025 def parsedate(date, formats=None, bias={}):
1002 1026 """parse a localized date/time and return a (unixtime, offset) tuple.
1003 1027
1004 1028 The date may be a "unixtime offset" string or in one of the specified
1005 1029 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1006 1030 """
1007 1031 if not date:
1008 1032 return 0, 0
1009 1033 if isinstance(date, tuple) and len(date) == 2:
1010 1034 return date
1011 1035 if not formats:
1012 1036 formats = defaultdateformats
1013 1037 date = date.strip()
1014 1038 try:
1015 1039 when, offset = map(int, date.split(' '))
1016 1040 except ValueError:
1017 1041 # fill out defaults
1018 1042 now = makedate()
1019 1043 defaults = {}
1020 1044 for part in ("d", "mb", "yY", "HI", "M", "S"):
1021 1045 # this piece is for rounding the specific end of unknowns
1022 1046 b = bias.get(part)
1023 1047 if b is None:
1024 1048 if part[0] in "HMS":
1025 1049 b = "00"
1026 1050 else:
1027 1051 b = "0"
1028 1052
1029 1053 # this piece is for matching the generic end to today's date
1030 1054 n = datestr(now, "%" + part[0])
1031 1055
1032 1056 defaults[part] = (b, n)
1033 1057
1034 1058 for format in formats:
1035 1059 try:
1036 1060 when, offset = strdate(date, format, defaults)
1037 1061 except (ValueError, OverflowError):
1038 1062 pass
1039 1063 else:
1040 1064 break
1041 1065 else:
1042 1066 raise Abort(_('invalid date: %r') % date)
1043 1067 # validate explicit (probably user-specified) date and
1044 1068 # time zone offset. values must fit in signed 32 bits for
1045 1069 # current 32-bit linux runtimes. timezones go from UTC-12
1046 1070 # to UTC+14
1047 1071 if abs(when) > 0x7fffffff:
1048 1072 raise Abort(_('date exceeds 32 bits: %d') % when)
1049 1073 if when < 0:
1050 1074 raise Abort(_('negative date value: %d') % when)
1051 1075 if offset < -50400 or offset > 43200:
1052 1076 raise Abort(_('impossible time zone offset: %d') % offset)
1053 1077 return when, offset
1054 1078
1055 1079 def matchdate(date):
1056 1080 """Return a function that matches a given date match specifier
1057 1081
1058 1082 Formats include:
1059 1083
1060 1084 '{date}' match a given date to the accuracy provided
1061 1085
1062 1086 '<{date}' on or before a given date
1063 1087
1064 1088 '>{date}' on or after a given date
1065 1089
1066 1090 >>> p1 = parsedate("10:29:59")
1067 1091 >>> p2 = parsedate("10:30:00")
1068 1092 >>> p3 = parsedate("10:30:59")
1069 1093 >>> p4 = parsedate("10:31:00")
1070 1094 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1071 1095 >>> f = matchdate("10:30")
1072 1096 >>> f(p1[0])
1073 1097 False
1074 1098 >>> f(p2[0])
1075 1099 True
1076 1100 >>> f(p3[0])
1077 1101 True
1078 1102 >>> f(p4[0])
1079 1103 False
1080 1104 >>> f(p5[0])
1081 1105 False
1082 1106 """
1083 1107
1084 1108 def lower(date):
1085 1109 d = dict(mb="1", d="1")
1086 1110 return parsedate(date, extendeddateformats, d)[0]
1087 1111
1088 1112 def upper(date):
1089 1113 d = dict(mb="12", HI="23", M="59", S="59")
1090 1114 for days in ("31", "30", "29"):
1091 1115 try:
1092 1116 d["d"] = days
1093 1117 return parsedate(date, extendeddateformats, d)[0]
1094 1118 except Abort:
1095 1119 pass
1096 1120 d["d"] = "28"
1097 1121 return parsedate(date, extendeddateformats, d)[0]
1098 1122
1099 1123 date = date.strip()
1100 1124
1101 1125 if not date:
1102 1126 raise Abort(_("dates cannot consist entirely of whitespace"))
1103 1127 elif date[0] == "<":
1104 1128 if not date[1:]:
1105 1129 raise Abort(_("invalid day spec, use '<DATE'"))
1106 1130 when = upper(date[1:])
1107 1131 return lambda x: x <= when
1108 1132 elif date[0] == ">":
1109 1133 if not date[1:]:
1110 1134 raise Abort(_("invalid day spec, use '>DATE'"))
1111 1135 when = lower(date[1:])
1112 1136 return lambda x: x >= when
1113 1137 elif date[0] == "-":
1114 1138 try:
1115 1139 days = int(date[1:])
1116 1140 except ValueError:
1117 1141 raise Abort(_("invalid day spec: %s") % date[1:])
1118 1142 if days < 0:
1119 1143 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
1120 1144 % date[1:])
1121 1145 when = makedate()[0] - days * 3600 * 24
1122 1146 return lambda x: x >= when
1123 1147 elif " to " in date:
1124 1148 a, b = date.split(" to ")
1125 1149 start, stop = lower(a), upper(b)
1126 1150 return lambda x: x >= start and x <= stop
1127 1151 else:
1128 1152 start, stop = lower(date), upper(date)
1129 1153 return lambda x: x >= start and x <= stop
1130 1154
1131 1155 def shortuser(user):
1132 1156 """Return a short representation of a user name or email address."""
1133 1157 f = user.find('@')
1134 1158 if f >= 0:
1135 1159 user = user[:f]
1136 1160 f = user.find('<')
1137 1161 if f >= 0:
1138 1162 user = user[f + 1:]
1139 1163 f = user.find(' ')
1140 1164 if f >= 0:
1141 1165 user = user[:f]
1142 1166 f = user.find('.')
1143 1167 if f >= 0:
1144 1168 user = user[:f]
1145 1169 return user
1146 1170
1147 1171 def emailuser(user):
1148 1172 """Return the user portion of an email address."""
1149 1173 f = user.find('@')
1150 1174 if f >= 0:
1151 1175 user = user[:f]
1152 1176 f = user.find('<')
1153 1177 if f >= 0:
1154 1178 user = user[f + 1:]
1155 1179 return user
1156 1180
1157 1181 def email(author):
1158 1182 '''get email of author.'''
1159 1183 r = author.find('>')
1160 1184 if r == -1:
1161 1185 r = None
1162 1186 return author[author.find('<') + 1:r]
1163 1187
1164 1188 def _ellipsis(text, maxlength):
1165 1189 if len(text) <= maxlength:
1166 1190 return text, False
1167 1191 else:
1168 1192 return "%s..." % (text[:maxlength - 3]), True
1169 1193
1170 1194 def ellipsis(text, maxlength=400):
1171 1195 """Trim string to at most maxlength (default: 400) characters."""
1172 1196 try:
1173 1197 # use unicode not to split at intermediate multi-byte sequence
1174 1198 utext, truncated = _ellipsis(text.decode(encoding.encoding),
1175 1199 maxlength)
1176 1200 if not truncated:
1177 1201 return text
1178 1202 return utext.encode(encoding.encoding)
1179 1203 except (UnicodeDecodeError, UnicodeEncodeError):
1180 1204 return _ellipsis(text, maxlength)[0]
1181 1205
1182 1206 _byteunits = (
1183 1207 (100, 1 << 30, _('%.0f GB')),
1184 1208 (10, 1 << 30, _('%.1f GB')),
1185 1209 (1, 1 << 30, _('%.2f GB')),
1186 1210 (100, 1 << 20, _('%.0f MB')),
1187 1211 (10, 1 << 20, _('%.1f MB')),
1188 1212 (1, 1 << 20, _('%.2f MB')),
1189 1213 (100, 1 << 10, _('%.0f KB')),
1190 1214 (10, 1 << 10, _('%.1f KB')),
1191 1215 (1, 1 << 10, _('%.2f KB')),
1192 1216 (1, 1, _('%.0f bytes')),
1193 1217 )
1194 1218
1195 1219 def bytecount(nbytes):
1196 1220 '''return byte count formatted as readable string, with units'''
1197 1221
1198 1222 for multiplier, divisor, format in _byteunits:
1199 1223 if nbytes >= divisor * multiplier:
1200 1224 return format % (nbytes / float(divisor))
1201 1225 return _byteunits[-1][2] % nbytes
1202 1226
1203 1227 def uirepr(s):
1204 1228 # Avoid double backslash in Windows path repr()
1205 1229 return repr(s).replace('\\\\', '\\')
1206 1230
1207 1231 # delay import of textwrap
1208 1232 def MBTextWrapper(**kwargs):
1209 1233 class tw(textwrap.TextWrapper):
1210 1234 """
1211 1235 Extend TextWrapper for width-awareness.
1212 1236
1213 1237 Neither number of 'bytes' in any encoding nor 'characters' is
1214 1238 appropriate to calculate terminal columns for specified string.
1215 1239
1216 1240 Original TextWrapper implementation uses built-in 'len()' directly,
1217 1241 so overriding is needed to use width information of each characters.
1218 1242
1219 1243 In addition, characters classified into 'ambiguous' width are
1220 1244 treated as wide in east asian area, but as narrow in other.
1221 1245
1222 1246 This requires use decision to determine width of such characters.
1223 1247 """
1224 1248 def __init__(self, **kwargs):
1225 1249 textwrap.TextWrapper.__init__(self, **kwargs)
1226 1250
1227 1251 # for compatibility between 2.4 and 2.6
1228 1252 if getattr(self, 'drop_whitespace', None) is None:
1229 1253 self.drop_whitespace = kwargs.get('drop_whitespace', True)
1230 1254
1231 1255 def _cutdown(self, ucstr, space_left):
1232 1256 l = 0
1233 1257 colwidth = encoding.ucolwidth
1234 1258 for i in xrange(len(ucstr)):
1235 1259 l += colwidth(ucstr[i])
1236 1260 if space_left < l:
1237 1261 return (ucstr[:i], ucstr[i:])
1238 1262 return ucstr, ''
1239 1263
1240 1264 # overriding of base class
1241 1265 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
1242 1266 space_left = max(width - cur_len, 1)
1243 1267
1244 1268 if self.break_long_words:
1245 1269 cut, res = self._cutdown(reversed_chunks[-1], space_left)
1246 1270 cur_line.append(cut)
1247 1271 reversed_chunks[-1] = res
1248 1272 elif not cur_line:
1249 1273 cur_line.append(reversed_chunks.pop())
1250 1274
1251 1275 # this overriding code is imported from TextWrapper of python 2.6
1252 1276 # to calculate columns of string by 'encoding.ucolwidth()'
1253 1277 def _wrap_chunks(self, chunks):
1254 1278 colwidth = encoding.ucolwidth
1255 1279
1256 1280 lines = []
1257 1281 if self.width <= 0:
1258 1282 raise ValueError("invalid width %r (must be > 0)" % self.width)
1259 1283
1260 1284 # Arrange in reverse order so items can be efficiently popped
1261 1285 # from a stack of chucks.
1262 1286 chunks.reverse()
1263 1287
1264 1288 while chunks:
1265 1289
1266 1290 # Start the list of chunks that will make up the current line.
1267 1291 # cur_len is just the length of all the chunks in cur_line.
1268 1292 cur_line = []
1269 1293 cur_len = 0
1270 1294
1271 1295 # Figure out which static string will prefix this line.
1272 1296 if lines:
1273 1297 indent = self.subsequent_indent
1274 1298 else:
1275 1299 indent = self.initial_indent
1276 1300
1277 1301 # Maximum width for this line.
1278 1302 width = self.width - len(indent)
1279 1303
1280 1304 # First chunk on line is whitespace -- drop it, unless this
1281 1305 # is the very beginning of the text (ie. no lines started yet).
1282 1306 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
1283 1307 del chunks[-1]
1284 1308
1285 1309 while chunks:
1286 1310 l = colwidth(chunks[-1])
1287 1311
1288 1312 # Can at least squeeze this chunk onto the current line.
1289 1313 if cur_len + l <= width:
1290 1314 cur_line.append(chunks.pop())
1291 1315 cur_len += l
1292 1316
1293 1317 # Nope, this line is full.
1294 1318 else:
1295 1319 break
1296 1320
1297 1321 # The current line is full, and the next chunk is too big to
1298 1322 # fit on *any* line (not just this one).
1299 1323 if chunks and colwidth(chunks[-1]) > width:
1300 1324 self._handle_long_word(chunks, cur_line, cur_len, width)
1301 1325
1302 1326 # If the last chunk on this line is all whitespace, drop it.
1303 1327 if (self.drop_whitespace and
1304 1328 cur_line and cur_line[-1].strip() == ''):
1305 1329 del cur_line[-1]
1306 1330
1307 1331 # Convert current line back to a string and store it in list
1308 1332 # of all lines (return value).
1309 1333 if cur_line:
1310 1334 lines.append(indent + ''.join(cur_line))
1311 1335
1312 1336 return lines
1313 1337
1314 1338 global MBTextWrapper
1315 1339 MBTextWrapper = tw
1316 1340 return tw(**kwargs)
1317 1341
1318 1342 def wrap(line, width, initindent='', hangindent=''):
1319 1343 maxindent = max(len(hangindent), len(initindent))
1320 1344 if width <= maxindent:
1321 1345 # adjust for weird terminal size
1322 1346 width = max(78, maxindent + 1)
1323 1347 line = line.decode(encoding.encoding, encoding.encodingmode)
1324 1348 initindent = initindent.decode(encoding.encoding, encoding.encodingmode)
1325 1349 hangindent = hangindent.decode(encoding.encoding, encoding.encodingmode)
1326 1350 wrapper = MBTextWrapper(width=width,
1327 1351 initial_indent=initindent,
1328 1352 subsequent_indent=hangindent)
1329 1353 return wrapper.fill(line).encode(encoding.encoding)
1330 1354
1331 1355 def iterlines(iterator):
1332 1356 for chunk in iterator:
1333 1357 for line in chunk.splitlines():
1334 1358 yield line
1335 1359
1336 1360 def expandpath(path):
1337 1361 return os.path.expanduser(os.path.expandvars(path))
1338 1362
1339 1363 def hgcmd():
1340 1364 """Return the command used to execute current hg
1341 1365
1342 1366 This is different from hgexecutable() because on Windows we want
1343 1367 to avoid things opening new shell windows like batch files, so we
1344 1368 get either the python call or current executable.
1345 1369 """
1346 1370 if mainfrozen():
1347 1371 return [sys.executable]
1348 1372 return gethgcmd()
1349 1373
1350 1374 def rundetached(args, condfn):
1351 1375 """Execute the argument list in a detached process.
1352 1376
1353 1377 condfn is a callable which is called repeatedly and should return
1354 1378 True once the child process is known to have started successfully.
1355 1379 At this point, the child process PID is returned. If the child
1356 1380 process fails to start or finishes before condfn() evaluates to
1357 1381 True, return -1.
1358 1382 """
1359 1383 # Windows case is easier because the child process is either
1360 1384 # successfully starting and validating the condition or exiting
1361 1385 # on failure. We just poll on its PID. On Unix, if the child
1362 1386 # process fails to start, it will be left in a zombie state until
1363 1387 # the parent wait on it, which we cannot do since we expect a long
1364 1388 # running process on success. Instead we listen for SIGCHLD telling
1365 1389 # us our child process terminated.
1366 1390 terminated = set()
1367 1391 def handler(signum, frame):
1368 1392 terminated.add(os.wait())
1369 1393 prevhandler = None
1370 1394 SIGCHLD = getattr(signal, 'SIGCHLD', None)
1371 1395 if SIGCHLD is not None:
1372 1396 prevhandler = signal.signal(SIGCHLD, handler)
1373 1397 try:
1374 1398 pid = spawndetached(args)
1375 1399 while not condfn():
1376 1400 if ((pid in terminated or not testpid(pid))
1377 1401 and not condfn()):
1378 1402 return -1
1379 1403 time.sleep(0.1)
1380 1404 return pid
1381 1405 finally:
1382 1406 if prevhandler is not None:
1383 1407 signal.signal(signal.SIGCHLD, prevhandler)
1384 1408
1385 1409 try:
1386 1410 any, all = any, all
1387 1411 except NameError:
1388 1412 def any(iterable):
1389 1413 for i in iterable:
1390 1414 if i:
1391 1415 return True
1392 1416 return False
1393 1417
1394 1418 def all(iterable):
1395 1419 for i in iterable:
1396 1420 if not i:
1397 1421 return False
1398 1422 return True
1399 1423
1400 1424 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
1401 1425 """Return the result of interpolating items in the mapping into string s.
1402 1426
1403 1427 prefix is a single character string, or a two character string with
1404 1428 a backslash as the first character if the prefix needs to be escaped in
1405 1429 a regular expression.
1406 1430
1407 1431 fn is an optional function that will be applied to the replacement text
1408 1432 just before replacement.
1409 1433
1410 1434 escape_prefix is an optional flag that allows using doubled prefix for
1411 1435 its escaping.
1412 1436 """
1413 1437 fn = fn or (lambda s: s)
1414 1438 patterns = '|'.join(mapping.keys())
1415 1439 if escape_prefix:
1416 1440 patterns += '|' + prefix
1417 1441 if len(prefix) > 1:
1418 1442 prefix_char = prefix[1:]
1419 1443 else:
1420 1444 prefix_char = prefix
1421 1445 mapping[prefix_char] = prefix_char
1422 1446 r = re.compile(r'%s(%s)' % (prefix, patterns))
1423 1447 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
1424 1448
1425 1449 def getport(port):
1426 1450 """Return the port for a given network service.
1427 1451
1428 1452 If port is an integer, it's returned as is. If it's a string, it's
1429 1453 looked up using socket.getservbyname(). If there's no matching
1430 1454 service, util.Abort is raised.
1431 1455 """
1432 1456 try:
1433 1457 return int(port)
1434 1458 except ValueError:
1435 1459 pass
1436 1460
1437 1461 try:
1438 1462 return socket.getservbyname(port)
1439 1463 except socket.error:
1440 1464 raise Abort(_("no port number associated with service '%s'") % port)
1441 1465
1442 1466 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
1443 1467 '0': False, 'no': False, 'false': False, 'off': False,
1444 1468 'never': False}
1445 1469
1446 1470 def parsebool(s):
1447 1471 """Parse s into a boolean.
1448 1472
1449 1473 If s is not a valid boolean, returns None.
1450 1474 """
1451 1475 return _booleans.get(s.lower(), None)
1452 1476
1453 1477 _hexdig = '0123456789ABCDEFabcdef'
1454 1478 _hextochr = dict((a + b, chr(int(a + b, 16)))
1455 1479 for a in _hexdig for b in _hexdig)
1456 1480
1457 1481 def _urlunquote(s):
1458 1482 """unquote('abc%20def') -> 'abc def'."""
1459 1483 res = s.split('%')
1460 1484 # fastpath
1461 1485 if len(res) == 1:
1462 1486 return s
1463 1487 s = res[0]
1464 1488 for item in res[1:]:
1465 1489 try:
1466 1490 s += _hextochr[item[:2]] + item[2:]
1467 1491 except KeyError:
1468 1492 s += '%' + item
1469 1493 except UnicodeDecodeError:
1470 1494 s += unichr(int(item[:2], 16)) + item[2:]
1471 1495 return s
1472 1496
1473 1497 class url(object):
1474 1498 r"""Reliable URL parser.
1475 1499
1476 1500 This parses URLs and provides attributes for the following
1477 1501 components:
1478 1502
1479 1503 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
1480 1504
1481 1505 Missing components are set to None. The only exception is
1482 1506 fragment, which is set to '' if present but empty.
1483 1507
1484 1508 If parsefragment is False, fragment is included in query. If
1485 1509 parsequery is False, query is included in path. If both are
1486 1510 False, both fragment and query are included in path.
1487 1511
1488 1512 See http://www.ietf.org/rfc/rfc2396.txt for more information.
1489 1513
1490 1514 Note that for backward compatibility reasons, bundle URLs do not
1491 1515 take host names. That means 'bundle://../' has a path of '../'.
1492 1516
1493 1517 Examples:
1494 1518
1495 1519 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
1496 1520 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
1497 1521 >>> url('ssh://[::1]:2200//home/joe/repo')
1498 1522 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
1499 1523 >>> url('file:///home/joe/repo')
1500 1524 <url scheme: 'file', path: '/home/joe/repo'>
1501 1525 >>> url('file:///c:/temp/foo/')
1502 1526 <url scheme: 'file', path: 'c:/temp/foo/'>
1503 1527 >>> url('bundle:foo')
1504 1528 <url scheme: 'bundle', path: 'foo'>
1505 1529 >>> url('bundle://../foo')
1506 1530 <url scheme: 'bundle', path: '../foo'>
1507 1531 >>> url(r'c:\foo\bar')
1508 1532 <url path: 'c:\\foo\\bar'>
1509 1533 >>> url(r'\\blah\blah\blah')
1510 1534 <url path: '\\\\blah\\blah\\blah'>
1511 1535 >>> url(r'\\blah\blah\blah#baz')
1512 1536 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
1513 1537
1514 1538 Authentication credentials:
1515 1539
1516 1540 >>> url('ssh://joe:xyz@x/repo')
1517 1541 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
1518 1542 >>> url('ssh://joe@x/repo')
1519 1543 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
1520 1544
1521 1545 Query strings and fragments:
1522 1546
1523 1547 >>> url('http://host/a?b#c')
1524 1548 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
1525 1549 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
1526 1550 <url scheme: 'http', host: 'host', path: 'a?b#c'>
1527 1551 """
1528 1552
1529 1553 _safechars = "!~*'()+"
1530 1554 _safepchars = "/!~*'()+:"
1531 1555 _matchscheme = re.compile(r'^[a-zA-Z0-9+.\-]+:').match
1532 1556
1533 1557 def __init__(self, path, parsequery=True, parsefragment=True):
1534 1558 # We slowly chomp away at path until we have only the path left
1535 1559 self.scheme = self.user = self.passwd = self.host = None
1536 1560 self.port = self.path = self.query = self.fragment = None
1537 1561 self._localpath = True
1538 1562 self._hostport = ''
1539 1563 self._origpath = path
1540 1564
1541 1565 if parsefragment and '#' in path:
1542 1566 path, self.fragment = path.split('#', 1)
1543 1567 if not path:
1544 1568 path = None
1545 1569
1546 1570 # special case for Windows drive letters and UNC paths
1547 1571 if hasdriveletter(path) or path.startswith(r'\\'):
1548 1572 self.path = path
1549 1573 return
1550 1574
1551 1575 # For compatibility reasons, we can't handle bundle paths as
1552 1576 # normal URLS
1553 1577 if path.startswith('bundle:'):
1554 1578 self.scheme = 'bundle'
1555 1579 path = path[7:]
1556 1580 if path.startswith('//'):
1557 1581 path = path[2:]
1558 1582 self.path = path
1559 1583 return
1560 1584
1561 1585 if self._matchscheme(path):
1562 1586 parts = path.split(':', 1)
1563 1587 if parts[0]:
1564 1588 self.scheme, path = parts
1565 1589 self._localpath = False
1566 1590
1567 1591 if not path:
1568 1592 path = None
1569 1593 if self._localpath:
1570 1594 self.path = ''
1571 1595 return
1572 1596 else:
1573 1597 if self._localpath:
1574 1598 self.path = path
1575 1599 return
1576 1600
1577 1601 if parsequery and '?' in path:
1578 1602 path, self.query = path.split('?', 1)
1579 1603 if not path:
1580 1604 path = None
1581 1605 if not self.query:
1582 1606 self.query = None
1583 1607
1584 1608 # // is required to specify a host/authority
1585 1609 if path and path.startswith('//'):
1586 1610 parts = path[2:].split('/', 1)
1587 1611 if len(parts) > 1:
1588 1612 self.host, path = parts
1589 1613 path = path
1590 1614 else:
1591 1615 self.host = parts[0]
1592 1616 path = None
1593 1617 if not self.host:
1594 1618 self.host = None
1595 1619 # path of file:///d is /d
1596 1620 # path of file:///d:/ is d:/, not /d:/
1597 1621 if path and not hasdriveletter(path):
1598 1622 path = '/' + path
1599 1623
1600 1624 if self.host and '@' in self.host:
1601 1625 self.user, self.host = self.host.rsplit('@', 1)
1602 1626 if ':' in self.user:
1603 1627 self.user, self.passwd = self.user.split(':', 1)
1604 1628 if not self.host:
1605 1629 self.host = None
1606 1630
1607 1631 # Don't split on colons in IPv6 addresses without ports
1608 1632 if (self.host and ':' in self.host and
1609 1633 not (self.host.startswith('[') and self.host.endswith(']'))):
1610 1634 self._hostport = self.host
1611 1635 self.host, self.port = self.host.rsplit(':', 1)
1612 1636 if not self.host:
1613 1637 self.host = None
1614 1638
1615 1639 if (self.host and self.scheme == 'file' and
1616 1640 self.host not in ('localhost', '127.0.0.1', '[::1]')):
1617 1641 raise Abort(_('file:// URLs can only refer to localhost'))
1618 1642
1619 1643 self.path = path
1620 1644
1621 1645 # leave the query string escaped
1622 1646 for a in ('user', 'passwd', 'host', 'port',
1623 1647 'path', 'fragment'):
1624 1648 v = getattr(self, a)
1625 1649 if v is not None:
1626 1650 setattr(self, a, _urlunquote(v))
1627 1651
1628 1652 def __repr__(self):
1629 1653 attrs = []
1630 1654 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
1631 1655 'query', 'fragment'):
1632 1656 v = getattr(self, a)
1633 1657 if v is not None:
1634 1658 attrs.append('%s: %r' % (a, v))
1635 1659 return '<url %s>' % ', '.join(attrs)
1636 1660
1637 1661 def __str__(self):
1638 1662 r"""Join the URL's components back into a URL string.
1639 1663
1640 1664 Examples:
1641 1665
1642 1666 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
1643 1667 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
1644 1668 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
1645 1669 'http://user:pw@host:80/?foo=bar&baz=42'
1646 1670 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
1647 1671 'http://user:pw@host:80/?foo=bar%3dbaz'
1648 1672 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
1649 1673 'ssh://user:pw@[::1]:2200//home/joe#'
1650 1674 >>> str(url('http://localhost:80//'))
1651 1675 'http://localhost:80//'
1652 1676 >>> str(url('http://localhost:80/'))
1653 1677 'http://localhost:80/'
1654 1678 >>> str(url('http://localhost:80'))
1655 1679 'http://localhost:80/'
1656 1680 >>> str(url('bundle:foo'))
1657 1681 'bundle:foo'
1658 1682 >>> str(url('bundle://../foo'))
1659 1683 'bundle:../foo'
1660 1684 >>> str(url('path'))
1661 1685 'path'
1662 1686 >>> str(url('file:///tmp/foo/bar'))
1663 1687 'file:///tmp/foo/bar'
1664 1688 >>> str(url('file:///c:/tmp/foo/bar'))
1665 1689 'file:///c:/tmp/foo/bar'
1666 1690 >>> print url(r'bundle:foo\bar')
1667 1691 bundle:foo\bar
1668 1692 """
1669 1693 if self._localpath:
1670 1694 s = self.path
1671 1695 if self.scheme == 'bundle':
1672 1696 s = 'bundle:' + s
1673 1697 if self.fragment:
1674 1698 s += '#' + self.fragment
1675 1699 return s
1676 1700
1677 1701 s = self.scheme + ':'
1678 1702 if self.user or self.passwd or self.host:
1679 1703 s += '//'
1680 1704 elif self.scheme and (not self.path or self.path.startswith('/')
1681 1705 or hasdriveletter(self.path)):
1682 1706 s += '//'
1683 1707 if hasdriveletter(self.path):
1684 1708 s += '/'
1685 1709 if self.user:
1686 1710 s += urllib.quote(self.user, safe=self._safechars)
1687 1711 if self.passwd:
1688 1712 s += ':' + urllib.quote(self.passwd, safe=self._safechars)
1689 1713 if self.user or self.passwd:
1690 1714 s += '@'
1691 1715 if self.host:
1692 1716 if not (self.host.startswith('[') and self.host.endswith(']')):
1693 1717 s += urllib.quote(self.host)
1694 1718 else:
1695 1719 s += self.host
1696 1720 if self.port:
1697 1721 s += ':' + urllib.quote(self.port)
1698 1722 if self.host:
1699 1723 s += '/'
1700 1724 if self.path:
1701 1725 # TODO: similar to the query string, we should not unescape the
1702 1726 # path when we store it, the path might contain '%2f' = '/',
1703 1727 # which we should *not* escape.
1704 1728 s += urllib.quote(self.path, safe=self._safepchars)
1705 1729 if self.query:
1706 1730 # we store the query in escaped form.
1707 1731 s += '?' + self.query
1708 1732 if self.fragment is not None:
1709 1733 s += '#' + urllib.quote(self.fragment, safe=self._safepchars)
1710 1734 return s
1711 1735
1712 1736 def authinfo(self):
1713 1737 user, passwd = self.user, self.passwd
1714 1738 try:
1715 1739 self.user, self.passwd = None, None
1716 1740 s = str(self)
1717 1741 finally:
1718 1742 self.user, self.passwd = user, passwd
1719 1743 if not self.user:
1720 1744 return (s, None)
1721 1745 # authinfo[1] is passed to urllib2 password manager, and its
1722 1746 # URIs must not contain credentials. The host is passed in the
1723 1747 # URIs list because Python < 2.4.3 uses only that to search for
1724 1748 # a password.
1725 1749 return (s, (None, (s, self.host),
1726 1750 self.user, self.passwd or ''))
1727 1751
1728 1752 def isabs(self):
1729 1753 if self.scheme and self.scheme != 'file':
1730 1754 return True # remote URL
1731 1755 if hasdriveletter(self.path):
1732 1756 return True # absolute for our purposes - can't be joined()
1733 1757 if self.path.startswith(r'\\'):
1734 1758 return True # Windows UNC path
1735 1759 if self.path.startswith('/'):
1736 1760 return True # POSIX-style
1737 1761 return False
1738 1762
1739 1763 def localpath(self):
1740 1764 if self.scheme == 'file' or self.scheme == 'bundle':
1741 1765 path = self.path or '/'
1742 1766 # For Windows, we need to promote hosts containing drive
1743 1767 # letters to paths with drive letters.
1744 1768 if hasdriveletter(self._hostport):
1745 1769 path = self._hostport + '/' + self.path
1746 1770 elif (self.host is not None and self.path
1747 1771 and not hasdriveletter(path)):
1748 1772 path = '/' + path
1749 1773 return path
1750 1774 return self._origpath
1751 1775
1752 1776 def hasscheme(path):
1753 1777 return bool(url(path).scheme)
1754 1778
1755 1779 def hasdriveletter(path):
1756 1780 return path and path[1:2] == ':' and path[0:1].isalpha()
1757 1781
1758 1782 def urllocalpath(path):
1759 1783 return url(path, parsequery=False, parsefragment=False).localpath()
1760 1784
1761 1785 def hidepassword(u):
1762 1786 '''hide user credential in a url string'''
1763 1787 u = url(u)
1764 1788 if u.passwd:
1765 1789 u.passwd = '***'
1766 1790 return str(u)
1767 1791
1768 1792 def removeauth(u):
1769 1793 '''remove all authentication information from a url string'''
1770 1794 u = url(u)
1771 1795 u.user = u.passwd = None
1772 1796 return str(u)
1773 1797
1774 1798 def isatty(fd):
1775 1799 try:
1776 1800 return fd.isatty()
1777 1801 except AttributeError:
1778 1802 return False
General Comments 0
You need to be logged in to leave comments. Login now