##// END OF EJS Templates
util: wrap s.decode('string_escape') calls for future py3 compatibility
Yuya Nishihara -
r31484:afb33535 default
parent child Browse files
Show More
@@ -1,537 +1,537 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 bin,
15 15 hex,
16 16 nullid,
17 17 )
18 18
19 19 from . import (
20 20 encoding,
21 21 error,
22 22 revlog,
23 23 util,
24 24 )
25 25
26 26 _defaultextra = {'branch': 'default'}
27 27
28 28 def _string_escape(text):
29 29 """
30 30 >>> d = {'nl': chr(10), 'bs': chr(92), 'cr': chr(13), 'nul': chr(0)}
31 31 >>> s = "ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
32 32 >>> s
33 33 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
34 34 >>> res = _string_escape(s)
35 >>> s == res.decode('string_escape')
35 >>> s == util.unescapestr(res)
36 36 True
37 37 """
38 38 # subset of the string_escape codec
39 39 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
40 40 return text.replace('\0', '\\0')
41 41
42 42 def decodeextra(text):
43 43 """
44 44 >>> sorted(decodeextra(encodeextra({'foo': 'bar', 'baz': chr(0) + '2'})
45 45 ... ).iteritems())
46 46 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
47 47 >>> sorted(decodeextra(encodeextra({'foo': 'bar',
48 48 ... 'baz': chr(92) + chr(0) + '2'})
49 49 ... ).iteritems())
50 50 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
51 51 """
52 52 extra = _defaultextra.copy()
53 53 for l in text.split('\0'):
54 54 if l:
55 55 if '\\0' in l:
56 56 # fix up \0 without getting into trouble with \\0
57 57 l = l.replace('\\\\', '\\\\\n')
58 58 l = l.replace('\\0', '\0')
59 59 l = l.replace('\n', '')
60 k, v = l.decode('string_escape').split(':', 1)
60 k, v = util.unescapestr(l).split(':', 1)
61 61 extra[k] = v
62 62 return extra
63 63
64 64 def encodeextra(d):
65 65 # keys must be sorted to produce a deterministic changelog entry
66 66 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
67 67 return "\0".join(items)
68 68
69 69 def stripdesc(desc):
70 70 """strip trailing whitespace and leading and trailing empty lines"""
71 71 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
72 72
73 73 class appender(object):
74 74 '''the changelog index must be updated last on disk, so we use this class
75 75 to delay writes to it'''
76 76 def __init__(self, vfs, name, mode, buf):
77 77 self.data = buf
78 78 fp = vfs(name, mode)
79 79 self.fp = fp
80 80 self.offset = fp.tell()
81 81 self.size = vfs.fstat(fp).st_size
82 82 self._end = self.size
83 83
84 84 def end(self):
85 85 return self._end
86 86 def tell(self):
87 87 return self.offset
88 88 def flush(self):
89 89 pass
90 90 def close(self):
91 91 self.fp.close()
92 92
93 93 def seek(self, offset, whence=0):
94 94 '''virtual file offset spans real file and data'''
95 95 if whence == 0:
96 96 self.offset = offset
97 97 elif whence == 1:
98 98 self.offset += offset
99 99 elif whence == 2:
100 100 self.offset = self.end() + offset
101 101 if self.offset < self.size:
102 102 self.fp.seek(self.offset)
103 103
104 104 def read(self, count=-1):
105 105 '''only trick here is reads that span real file and data'''
106 106 ret = ""
107 107 if self.offset < self.size:
108 108 s = self.fp.read(count)
109 109 ret = s
110 110 self.offset += len(s)
111 111 if count > 0:
112 112 count -= len(s)
113 113 if count != 0:
114 114 doff = self.offset - self.size
115 115 self.data.insert(0, "".join(self.data))
116 116 del self.data[1:]
117 117 s = self.data[0][doff:doff + count]
118 118 self.offset += len(s)
119 119 ret += s
120 120 return ret
121 121
122 122 def write(self, s):
123 123 self.data.append(str(s))
124 124 self.offset += len(s)
125 125 self._end += len(s)
126 126
127 127 def _divertopener(opener, target):
128 128 """build an opener that writes in 'target.a' instead of 'target'"""
129 129 def _divert(name, mode='r', checkambig=False):
130 130 if name != target:
131 131 return opener(name, mode)
132 132 return opener(name + ".a", mode)
133 133 return _divert
134 134
135 135 def _delayopener(opener, target, buf):
136 136 """build an opener that stores chunks in 'buf' instead of 'target'"""
137 137 def _delay(name, mode='r', checkambig=False):
138 138 if name != target:
139 139 return opener(name, mode)
140 140 return appender(opener, name, mode, buf)
141 141 return _delay
142 142
143 143 _changelogrevision = collections.namedtuple(u'changelogrevision',
144 144 (u'manifest', u'user', u'date',
145 145 u'files', u'description',
146 146 u'extra'))
147 147
148 148 class changelogrevision(object):
149 149 """Holds results of a parsed changelog revision.
150 150
151 151 Changelog revisions consist of multiple pieces of data, including
152 152 the manifest node, user, and date. This object exposes a view into
153 153 the parsed object.
154 154 """
155 155
156 156 __slots__ = (
157 157 u'_offsets',
158 158 u'_text',
159 159 )
160 160
161 161 def __new__(cls, text):
162 162 if not text:
163 163 return _changelogrevision(
164 164 manifest=nullid,
165 165 user='',
166 166 date=(0, 0),
167 167 files=[],
168 168 description='',
169 169 extra=_defaultextra,
170 170 )
171 171
172 172 self = super(changelogrevision, cls).__new__(cls)
173 173 # We could return here and implement the following as an __init__.
174 174 # But doing it here is equivalent and saves an extra function call.
175 175
176 176 # format used:
177 177 # nodeid\n : manifest node in ascii
178 178 # user\n : user, no \n or \r allowed
179 179 # time tz extra\n : date (time is int or float, timezone is int)
180 180 # : extra is metadata, encoded and separated by '\0'
181 181 # : older versions ignore it
182 182 # files\n\n : files modified by the cset, no \n or \r allowed
183 183 # (.*) : comment (free text, ideally utf-8)
184 184 #
185 185 # changelog v0 doesn't use extra
186 186
187 187 nl1 = text.index('\n')
188 188 nl2 = text.index('\n', nl1 + 1)
189 189 nl3 = text.index('\n', nl2 + 1)
190 190
191 191 # The list of files may be empty. Which means nl3 is the first of the
192 192 # double newline that precedes the description.
193 193 if text[nl3 + 1] == '\n':
194 194 doublenl = nl3
195 195 else:
196 196 doublenl = text.index('\n\n', nl3 + 1)
197 197
198 198 self._offsets = (nl1, nl2, nl3, doublenl)
199 199 self._text = text
200 200
201 201 return self
202 202
203 203 @property
204 204 def manifest(self):
205 205 return bin(self._text[0:self._offsets[0]])
206 206
207 207 @property
208 208 def user(self):
209 209 off = self._offsets
210 210 return encoding.tolocal(self._text[off[0] + 1:off[1]])
211 211
212 212 @property
213 213 def _rawdate(self):
214 214 off = self._offsets
215 215 dateextra = self._text[off[1] + 1:off[2]]
216 216 return dateextra.split(' ', 2)[0:2]
217 217
218 218 @property
219 219 def _rawextra(self):
220 220 off = self._offsets
221 221 dateextra = self._text[off[1] + 1:off[2]]
222 222 fields = dateextra.split(' ', 2)
223 223 if len(fields) != 3:
224 224 return None
225 225
226 226 return fields[2]
227 227
228 228 @property
229 229 def date(self):
230 230 raw = self._rawdate
231 231 time = float(raw[0])
232 232 # Various tools did silly things with the timezone.
233 233 try:
234 234 timezone = int(raw[1])
235 235 except ValueError:
236 236 timezone = 0
237 237
238 238 return time, timezone
239 239
240 240 @property
241 241 def extra(self):
242 242 raw = self._rawextra
243 243 if raw is None:
244 244 return _defaultextra
245 245
246 246 return decodeextra(raw)
247 247
248 248 @property
249 249 def files(self):
250 250 off = self._offsets
251 251 if off[2] == off[3]:
252 252 return []
253 253
254 254 return self._text[off[2] + 1:off[3]].split('\n')
255 255
256 256 @property
257 257 def description(self):
258 258 return encoding.tolocal(self._text[self._offsets[3] + 2:])
259 259
260 260 class changelog(revlog.revlog):
261 261 def __init__(self, opener):
262 262 revlog.revlog.__init__(self, opener, "00changelog.i",
263 263 checkambig=True)
264 264 if self._initempty:
265 265 # changelogs don't benefit from generaldelta
266 266 self.version &= ~revlog.REVLOGGENERALDELTA
267 267 self._generaldelta = False
268 268
269 269 # Delta chains for changelogs tend to be very small because entries
270 270 # tend to be small and don't delta well with each. So disable delta
271 271 # chains.
272 272 self.storedeltachains = False
273 273
274 274 self._realopener = opener
275 275 self._delayed = False
276 276 self._delaybuf = None
277 277 self._divert = False
278 278 self.filteredrevs = frozenset()
279 279
280 280 def tip(self):
281 281 """filtered version of revlog.tip"""
282 282 for i in xrange(len(self) -1, -2, -1):
283 283 if i not in self.filteredrevs:
284 284 return self.node(i)
285 285
286 286 def __contains__(self, rev):
287 287 """filtered version of revlog.__contains__"""
288 288 return (0 <= rev < len(self)
289 289 and rev not in self.filteredrevs)
290 290
291 291 def __iter__(self):
292 292 """filtered version of revlog.__iter__"""
293 293 if len(self.filteredrevs) == 0:
294 294 return revlog.revlog.__iter__(self)
295 295
296 296 def filterediter():
297 297 for i in xrange(len(self)):
298 298 if i not in self.filteredrevs:
299 299 yield i
300 300
301 301 return filterediter()
302 302
303 303 def revs(self, start=0, stop=None):
304 304 """filtered version of revlog.revs"""
305 305 for i in super(changelog, self).revs(start, stop):
306 306 if i not in self.filteredrevs:
307 307 yield i
308 308
309 309 @util.propertycache
310 310 def nodemap(self):
311 311 # XXX need filtering too
312 312 self.rev(self.node(0))
313 313 return self._nodecache
314 314
315 315 def reachableroots(self, minroot, heads, roots, includepath=False):
316 316 return self.index.reachableroots2(minroot, heads, roots, includepath)
317 317
318 318 def headrevs(self):
319 319 if self.filteredrevs:
320 320 try:
321 321 return self.index.headrevsfiltered(self.filteredrevs)
322 322 # AttributeError covers non-c-extension environments and
323 323 # old c extensions without filter handling.
324 324 except AttributeError:
325 325 return self._headrevs()
326 326
327 327 return super(changelog, self).headrevs()
328 328
329 329 def strip(self, *args, **kwargs):
330 330 # XXX make something better than assert
331 331 # We can't expect proper strip behavior if we are filtered.
332 332 assert not self.filteredrevs
333 333 super(changelog, self).strip(*args, **kwargs)
334 334
335 335 def rev(self, node):
336 336 """filtered version of revlog.rev"""
337 337 r = super(changelog, self).rev(node)
338 338 if r in self.filteredrevs:
339 339 raise error.FilteredLookupError(hex(node), self.indexfile,
340 340 _('filtered node'))
341 341 return r
342 342
343 343 def node(self, rev):
344 344 """filtered version of revlog.node"""
345 345 if rev in self.filteredrevs:
346 346 raise error.FilteredIndexError(rev)
347 347 return super(changelog, self).node(rev)
348 348
349 349 def linkrev(self, rev):
350 350 """filtered version of revlog.linkrev"""
351 351 if rev in self.filteredrevs:
352 352 raise error.FilteredIndexError(rev)
353 353 return super(changelog, self).linkrev(rev)
354 354
355 355 def parentrevs(self, rev):
356 356 """filtered version of revlog.parentrevs"""
357 357 if rev in self.filteredrevs:
358 358 raise error.FilteredIndexError(rev)
359 359 return super(changelog, self).parentrevs(rev)
360 360
361 361 def flags(self, rev):
362 362 """filtered version of revlog.flags"""
363 363 if rev in self.filteredrevs:
364 364 raise error.FilteredIndexError(rev)
365 365 return super(changelog, self).flags(rev)
366 366
367 367 def delayupdate(self, tr):
368 368 "delay visibility of index updates to other readers"
369 369
370 370 if not self._delayed:
371 371 if len(self) == 0:
372 372 self._divert = True
373 373 if self._realopener.exists(self.indexfile + '.a'):
374 374 self._realopener.unlink(self.indexfile + '.a')
375 375 self.opener = _divertopener(self._realopener, self.indexfile)
376 376 else:
377 377 self._delaybuf = []
378 378 self.opener = _delayopener(self._realopener, self.indexfile,
379 379 self._delaybuf)
380 380 self._delayed = True
381 381 tr.addpending('cl-%i' % id(self), self._writepending)
382 382 tr.addfinalize('cl-%i' % id(self), self._finalize)
383 383
384 384 def _finalize(self, tr):
385 385 "finalize index updates"
386 386 self._delayed = False
387 387 self.opener = self._realopener
388 388 # move redirected index data back into place
389 389 if self._divert:
390 390 assert not self._delaybuf
391 391 tmpname = self.indexfile + ".a"
392 392 nfile = self.opener.open(tmpname)
393 393 nfile.close()
394 394 self.opener.rename(tmpname, self.indexfile, checkambig=True)
395 395 elif self._delaybuf:
396 396 fp = self.opener(self.indexfile, 'a', checkambig=True)
397 397 fp.write("".join(self._delaybuf))
398 398 fp.close()
399 399 self._delaybuf = None
400 400 self._divert = False
401 401 # split when we're done
402 402 self.checkinlinesize(tr)
403 403
404 404 def readpending(self, file):
405 405 """read index data from a "pending" file
406 406
407 407 During a transaction, the actual changeset data is already stored in the
408 408 main file, but not yet finalized in the on-disk index. Instead, a
409 409 "pending" index is written by the transaction logic. If this function
410 410 is running, we are likely in a subprocess invoked in a hook. The
411 411 subprocess is informed that it is within a transaction and needs to
412 412 access its content.
413 413
414 414 This function will read all the index data out of the pending file and
415 415 overwrite the main index."""
416 416
417 417 if not self.opener.exists(file):
418 418 return # no pending data for changelog
419 419 r = revlog.revlog(self.opener, file)
420 420 self.index = r.index
421 421 self.nodemap = r.nodemap
422 422 self._nodecache = r._nodecache
423 423 self._chunkcache = r._chunkcache
424 424
425 425 def _writepending(self, tr):
426 426 "create a file containing the unfinalized state for pretxnchangegroup"
427 427 if self._delaybuf:
428 428 # make a temporary copy of the index
429 429 fp1 = self._realopener(self.indexfile)
430 430 pendingfilename = self.indexfile + ".a"
431 431 # register as a temp file to ensure cleanup on failure
432 432 tr.registertmp(pendingfilename)
433 433 # write existing data
434 434 fp2 = self._realopener(pendingfilename, "w")
435 435 fp2.write(fp1.read())
436 436 # add pending data
437 437 fp2.write("".join(self._delaybuf))
438 438 fp2.close()
439 439 # switch modes so finalize can simply rename
440 440 self._delaybuf = None
441 441 self._divert = True
442 442 self.opener = _divertopener(self._realopener, self.indexfile)
443 443
444 444 if self._divert:
445 445 return True
446 446
447 447 return False
448 448
449 449 def checkinlinesize(self, tr, fp=None):
450 450 if not self._delayed:
451 451 revlog.revlog.checkinlinesize(self, tr, fp)
452 452
453 453 def read(self, node):
454 454 """Obtain data from a parsed changelog revision.
455 455
456 456 Returns a 6-tuple of:
457 457
458 458 - manifest node in binary
459 459 - author/user as a localstr
460 460 - date as a 2-tuple of (time, timezone)
461 461 - list of files
462 462 - commit message as a localstr
463 463 - dict of extra metadata
464 464
465 465 Unless you need to access all fields, consider calling
466 466 ``changelogrevision`` instead, as it is faster for partial object
467 467 access.
468 468 """
469 469 c = changelogrevision(self.revision(node))
470 470 return (
471 471 c.manifest,
472 472 c.user,
473 473 c.date,
474 474 c.files,
475 475 c.description,
476 476 c.extra
477 477 )
478 478
479 479 def changelogrevision(self, nodeorrev):
480 480 """Obtain a ``changelogrevision`` for a node or revision."""
481 481 return changelogrevision(self.revision(nodeorrev))
482 482
483 483 def readfiles(self, node):
484 484 """
485 485 short version of read that only returns the files modified by the cset
486 486 """
487 487 text = self.revision(node)
488 488 if not text:
489 489 return []
490 490 last = text.index("\n\n")
491 491 l = text[:last].split('\n')
492 492 return l[3:]
493 493
494 494 def add(self, manifest, files, desc, transaction, p1, p2,
495 495 user, date=None, extra=None):
496 496 # Convert to UTF-8 encoded bytestrings as the very first
497 497 # thing: calling any method on a localstr object will turn it
498 498 # into a str object and the cached UTF-8 string is thus lost.
499 499 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
500 500
501 501 user = user.strip()
502 502 # An empty username or a username with a "\n" will make the
503 503 # revision text contain two "\n\n" sequences -> corrupt
504 504 # repository since read cannot unpack the revision.
505 505 if not user:
506 506 raise error.RevlogError(_("empty username"))
507 507 if "\n" in user:
508 508 raise error.RevlogError(_("username %s contains a newline")
509 509 % repr(user))
510 510
511 511 desc = stripdesc(desc)
512 512
513 513 if date:
514 514 parseddate = "%d %d" % util.parsedate(date)
515 515 else:
516 516 parseddate = "%d %d" % util.makedate()
517 517 if extra:
518 518 branch = extra.get("branch")
519 519 if branch in ("default", ""):
520 520 del extra["branch"]
521 521 elif branch in (".", "null", "tip"):
522 522 raise error.RevlogError(_('the name \'%s\' is reserved')
523 523 % branch)
524 524 if extra:
525 525 extra = encodeextra(extra)
526 526 parseddate = "%s %s" % (parseddate, extra)
527 527 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
528 528 text = "\n".join(l)
529 529 return self.addrevision(text, transaction, len(self), p1, p2)
530 530
531 531 def branchinfo(self, rev):
532 532 """return the branch name and open/close state of a revision
533 533
534 534 This function exists because creating a changectx object
535 535 just to access this is costly."""
536 536 extra = self.read(rev)[5]
537 537 return encoding.tolocal(extra.get("branch")), 'close' in extra
@@ -1,580 +1,583 b''
1 1 # parser.py - simple top-down operator precedence parser for mercurial
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # see http://effbot.org/zone/simple-top-down-parsing.htm and
9 9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
10 10 # for background
11 11
12 12 # takes a tokenizer and elements
13 13 # tokenizer is an iterator that returns (type, value, pos) tuples
14 14 # elements is a mapping of types to binding strength, primary, prefix, infix
15 15 # and suffix actions
16 16 # an action is a tree node name, a tree label, and an optional match
17 17 # __call__(program) parses program into a labeled tree
18 18
19 19 from __future__ import absolute_import
20 20
21 21 from .i18n import _
22 from . import error
22 from . import (
23 error,
24 util,
25 )
23 26
24 27 class parser(object):
25 28 def __init__(self, elements, methods=None):
26 29 self._elements = elements
27 30 self._methods = methods
28 31 self.current = None
29 32 def _advance(self):
30 33 'advance the tokenizer'
31 34 t = self.current
32 35 self.current = next(self._iter, None)
33 36 return t
34 37 def _hasnewterm(self):
35 38 'True if next token may start new term'
36 39 return any(self._elements[self.current[0]][1:3])
37 40 def _match(self, m):
38 41 'make sure the tokenizer matches an end condition'
39 42 if self.current[0] != m:
40 43 raise error.ParseError(_("unexpected token: %s") % self.current[0],
41 44 self.current[2])
42 45 self._advance()
43 46 def _parseoperand(self, bind, m=None):
44 47 'gather right-hand-side operand until an end condition or binding met'
45 48 if m and self.current[0] == m:
46 49 expr = None
47 50 else:
48 51 expr = self._parse(bind)
49 52 if m:
50 53 self._match(m)
51 54 return expr
52 55 def _parse(self, bind=0):
53 56 token, value, pos = self._advance()
54 57 # handle prefix rules on current token, take as primary if unambiguous
55 58 primary, prefix = self._elements[token][1:3]
56 59 if primary and not (prefix and self._hasnewterm()):
57 60 expr = (primary, value)
58 61 elif prefix:
59 62 expr = (prefix[0], self._parseoperand(*prefix[1:]))
60 63 else:
61 64 raise error.ParseError(_("not a prefix: %s") % token, pos)
62 65 # gather tokens until we meet a lower binding strength
63 66 while bind < self._elements[self.current[0]][0]:
64 67 token, value, pos = self._advance()
65 68 # handle infix rules, take as suffix if unambiguous
66 69 infix, suffix = self._elements[token][3:]
67 70 if suffix and not (infix and self._hasnewterm()):
68 71 expr = (suffix, expr)
69 72 elif infix:
70 73 expr = (infix[0], expr, self._parseoperand(*infix[1:]))
71 74 else:
72 75 raise error.ParseError(_("not an infix: %s") % token, pos)
73 76 return expr
74 77 def parse(self, tokeniter):
75 78 'generate a parse tree from tokens'
76 79 self._iter = tokeniter
77 80 self._advance()
78 81 res = self._parse()
79 82 token, value, pos = self.current
80 83 return res, pos
81 84 def eval(self, tree):
82 85 'recursively evaluate a parse tree using node methods'
83 86 if not isinstance(tree, tuple):
84 87 return tree
85 88 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
86 89 def __call__(self, tokeniter):
87 90 'parse tokens into a parse tree and evaluate if methods given'
88 91 t = self.parse(tokeniter)
89 92 if self._methods:
90 93 return self.eval(t)
91 94 return t
92 95
93 96 def splitargspec(spec):
94 97 """Parse spec of function arguments into (poskeys, varkey, keys)
95 98
96 99 >>> splitargspec('')
97 100 ([], None, [])
98 101 >>> splitargspec('foo bar')
99 102 ([], None, ['foo', 'bar'])
100 103 >>> splitargspec('foo *bar baz')
101 104 (['foo'], 'bar', ['baz'])
102 105 >>> splitargspec('*foo')
103 106 ([], 'foo', [])
104 107 """
105 108 pre, sep, post = spec.partition('*')
106 109 pres = pre.split()
107 110 posts = post.split()
108 111 if sep:
109 112 if not posts:
110 113 raise error.ProgrammingError('no *varkey name provided')
111 114 return pres, posts[0], posts[1:]
112 115 return [], None, pres
113 116
114 117 def buildargsdict(trees, funcname, argspec, keyvaluenode, keynode):
115 118 """Build dict from list containing positional and keyword arguments
116 119
117 120 Arguments are specified by a tuple of ``(poskeys, varkey, keys)`` where
118 121
119 122 - ``poskeys``: list of names of positional arguments
120 123 - ``varkey``: optional argument name that takes up remainder
121 124 - ``keys``: list of names that can be either positional or keyword arguments
122 125
123 126 If ``varkey`` specified, all ``keys`` must be given as keyword arguments.
124 127
125 128 Invalid keywords, too few positional arguments, or too many positional
126 129 arguments are rejected, but missing keyword arguments are just omitted.
127 130 """
128 131 poskeys, varkey, keys = argspec
129 132 kwstart = next((i for i, x in enumerate(trees) if x[0] == keyvaluenode),
130 133 len(trees))
131 134 if kwstart < len(poskeys):
132 135 raise error.ParseError(_("%(func)s takes at least %(nargs)d positional "
133 136 "arguments")
134 137 % {'func': funcname, 'nargs': len(poskeys)})
135 138 if not varkey and len(trees) > len(poskeys) + len(keys):
136 139 raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments")
137 140 % {'func': funcname,
138 141 'nargs': len(poskeys) + len(keys)})
139 142 args = {}
140 143 # consume positional arguments
141 144 for k, x in zip(poskeys, trees[:kwstart]):
142 145 args[k] = x
143 146 if varkey:
144 147 args[varkey] = trees[len(args):kwstart]
145 148 else:
146 149 for k, x in zip(keys, trees[len(args):kwstart]):
147 150 args[k] = x
148 151 # remainder should be keyword arguments
149 152 for x in trees[kwstart:]:
150 153 if x[0] != keyvaluenode or x[1][0] != keynode:
151 154 raise error.ParseError(_("%(func)s got an invalid argument")
152 155 % {'func': funcname})
153 156 k = x[1][1]
154 157 if k not in keys:
155 158 raise error.ParseError(_("%(func)s got an unexpected keyword "
156 159 "argument '%(key)s'")
157 160 % {'func': funcname, 'key': k})
158 161 if k in args:
159 162 raise error.ParseError(_("%(func)s got multiple values for keyword "
160 163 "argument '%(key)s'")
161 164 % {'func': funcname, 'key': k})
162 165 args[k] = x[2]
163 166 return args
164 167
165 168 def unescapestr(s):
166 169 try:
167 return s.decode("string_escape")
170 return util.unescapestr(s)
168 171 except ValueError as e:
169 172 # mangle Python's exception into our format
170 173 raise error.ParseError(str(e).lower())
171 174
172 175 def _prettyformat(tree, leafnodes, level, lines):
173 176 if not isinstance(tree, tuple) or tree[0] in leafnodes:
174 177 lines.append((level, str(tree)))
175 178 else:
176 179 lines.append((level, '(%s' % tree[0]))
177 180 for s in tree[1:]:
178 181 _prettyformat(s, leafnodes, level + 1, lines)
179 182 lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
180 183
181 184 def prettyformat(tree, leafnodes):
182 185 lines = []
183 186 _prettyformat(tree, leafnodes, 0, lines)
184 187 output = '\n'.join((' ' * l + s) for l, s in lines)
185 188 return output
186 189
187 190 def simplifyinfixops(tree, targetnodes):
188 191 """Flatten chained infix operations to reduce usage of Python stack
189 192
190 193 >>> def f(tree):
191 194 ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
192 195 >>> f(('or',
193 196 ... ('or',
194 197 ... ('symbol', '1'),
195 198 ... ('symbol', '2')),
196 199 ... ('symbol', '3')))
197 200 (or
198 201 ('symbol', '1')
199 202 ('symbol', '2')
200 203 ('symbol', '3'))
201 204 >>> f(('func',
202 205 ... ('symbol', 'p1'),
203 206 ... ('or',
204 207 ... ('or',
205 208 ... ('func',
206 209 ... ('symbol', 'sort'),
207 210 ... ('list',
208 211 ... ('or',
209 212 ... ('or',
210 213 ... ('symbol', '1'),
211 214 ... ('symbol', '2')),
212 215 ... ('symbol', '3')),
213 216 ... ('negate',
214 217 ... ('symbol', 'rev')))),
215 218 ... ('and',
216 219 ... ('symbol', '4'),
217 220 ... ('group',
218 221 ... ('or',
219 222 ... ('or',
220 223 ... ('symbol', '5'),
221 224 ... ('symbol', '6')),
222 225 ... ('symbol', '7'))))),
223 226 ... ('symbol', '8'))))
224 227 (func
225 228 ('symbol', 'p1')
226 229 (or
227 230 (func
228 231 ('symbol', 'sort')
229 232 (list
230 233 (or
231 234 ('symbol', '1')
232 235 ('symbol', '2')
233 236 ('symbol', '3'))
234 237 (negate
235 238 ('symbol', 'rev'))))
236 239 (and
237 240 ('symbol', '4')
238 241 (group
239 242 (or
240 243 ('symbol', '5')
241 244 ('symbol', '6')
242 245 ('symbol', '7'))))
243 246 ('symbol', '8')))
244 247 """
245 248 if not isinstance(tree, tuple):
246 249 return tree
247 250 op = tree[0]
248 251 if op not in targetnodes:
249 252 return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])
250 253
251 254 # walk down left nodes taking each right node. no recursion to left nodes
252 255 # because infix operators are left-associative, i.e. left tree is deep.
253 256 # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
254 257 simplified = []
255 258 x = tree
256 259 while x[0] == op:
257 260 l, r = x[1:]
258 261 simplified.append(simplifyinfixops(r, targetnodes))
259 262 x = l
260 263 simplified.append(simplifyinfixops(x, targetnodes))
261 264 simplified.append(op)
262 265 return tuple(reversed(simplified))
263 266
264 267 def parseerrordetail(inst):
265 268 """Compose error message from specified ParseError object
266 269 """
267 270 if len(inst.args) > 1:
268 271 return _('at %d: %s') % (inst.args[1], inst.args[0])
269 272 else:
270 273 return inst.args[0]
271 274
272 275 class alias(object):
273 276 """Parsed result of alias"""
274 277
275 278 def __init__(self, name, args, err, replacement):
276 279 self.name = name
277 280 self.args = args
278 281 self.error = err
279 282 self.replacement = replacement
280 283 # whether own `error` information is already shown or not.
281 284 # this avoids showing same warning multiple times at each
282 285 # `expandaliases`.
283 286 self.warned = False
284 287
285 288 class basealiasrules(object):
286 289 """Parsing and expansion rule set of aliases
287 290
288 291 This is a helper for fileset/revset/template aliases. A concrete rule set
289 292 should be made by sub-classing this and implementing class/static methods.
290 293
291 294 It supports alias expansion of symbol and function-call styles::
292 295
293 296 # decl = defn
294 297 h = heads(default)
295 298 b($1) = ancestors($1) - ancestors(default)
296 299 """
297 300 # typically a config section, which will be included in error messages
298 301 _section = None
299 302 # tag of symbol node
300 303 _symbolnode = 'symbol'
301 304
302 305 def __new__(cls):
303 306 raise TypeError("'%s' is not instantiatable" % cls.__name__)
304 307
305 308 @staticmethod
306 309 def _parse(spec):
307 310 """Parse an alias name, arguments and definition"""
308 311 raise NotImplementedError
309 312
310 313 @staticmethod
311 314 def _trygetfunc(tree):
312 315 """Return (name, args) if tree is a function; otherwise None"""
313 316 raise NotImplementedError
314 317
315 318 @classmethod
316 319 def _builddecl(cls, decl):
317 320 """Parse an alias declaration into ``(name, args, errorstr)``
318 321
319 322 This function analyzes the parsed tree. The parsing rule is provided
320 323 by ``_parse()``.
321 324
322 325 - ``name``: of declared alias (may be ``decl`` itself at error)
323 326 - ``args``: list of argument names (or None for symbol declaration)
324 327 - ``errorstr``: detail about detected error (or None)
325 328
326 329 >>> sym = lambda x: ('symbol', x)
327 330 >>> symlist = lambda *xs: ('list',) + tuple(sym(x) for x in xs)
328 331 >>> func = lambda n, a: ('func', sym(n), a)
329 332 >>> parsemap = {
330 333 ... 'foo': sym('foo'),
331 334 ... '$foo': sym('$foo'),
332 335 ... 'foo::bar': ('dagrange', sym('foo'), sym('bar')),
333 336 ... 'foo()': func('foo', None),
334 337 ... '$foo()': func('$foo', None),
335 338 ... 'foo($1, $2)': func('foo', symlist('$1', '$2')),
336 339 ... 'foo(bar_bar, baz.baz)':
337 340 ... func('foo', symlist('bar_bar', 'baz.baz')),
338 341 ... 'foo(bar($1, $2))':
339 342 ... func('foo', func('bar', symlist('$1', '$2'))),
340 343 ... 'foo($1, $2, nested($1, $2))':
341 344 ... func('foo', (symlist('$1', '$2') +
342 345 ... (func('nested', symlist('$1', '$2')),))),
343 346 ... 'foo("bar")': func('foo', ('string', 'bar')),
344 347 ... 'foo($1, $2': error.ParseError('unexpected token: end', 10),
345 348 ... 'foo("bar': error.ParseError('unterminated string', 5),
346 349 ... 'foo($1, $2, $1)': func('foo', symlist('$1', '$2', '$1')),
347 350 ... }
348 351 >>> def parse(expr):
349 352 ... x = parsemap[expr]
350 353 ... if isinstance(x, Exception):
351 354 ... raise x
352 355 ... return x
353 356 >>> def trygetfunc(tree):
354 357 ... if not tree or tree[0] != 'func' or tree[1][0] != 'symbol':
355 358 ... return None
356 359 ... if not tree[2]:
357 360 ... return tree[1][1], []
358 361 ... if tree[2][0] == 'list':
359 362 ... return tree[1][1], list(tree[2][1:])
360 363 ... return tree[1][1], [tree[2]]
361 364 >>> class aliasrules(basealiasrules):
362 365 ... _parse = staticmethod(parse)
363 366 ... _trygetfunc = staticmethod(trygetfunc)
364 367 >>> builddecl = aliasrules._builddecl
365 368 >>> builddecl('foo')
366 369 ('foo', None, None)
367 370 >>> builddecl('$foo')
368 371 ('$foo', None, "invalid symbol '$foo'")
369 372 >>> builddecl('foo::bar')
370 373 ('foo::bar', None, 'invalid format')
371 374 >>> builddecl('foo()')
372 375 ('foo', [], None)
373 376 >>> builddecl('$foo()')
374 377 ('$foo()', None, "invalid function '$foo'")
375 378 >>> builddecl('foo($1, $2)')
376 379 ('foo', ['$1', '$2'], None)
377 380 >>> builddecl('foo(bar_bar, baz.baz)')
378 381 ('foo', ['bar_bar', 'baz.baz'], None)
379 382 >>> builddecl('foo($1, $2, nested($1, $2))')
380 383 ('foo($1, $2, nested($1, $2))', None, 'invalid argument list')
381 384 >>> builddecl('foo(bar($1, $2))')
382 385 ('foo(bar($1, $2))', None, 'invalid argument list')
383 386 >>> builddecl('foo("bar")')
384 387 ('foo("bar")', None, 'invalid argument list')
385 388 >>> builddecl('foo($1, $2')
386 389 ('foo($1, $2', None, 'at 10: unexpected token: end')
387 390 >>> builddecl('foo("bar')
388 391 ('foo("bar', None, 'at 5: unterminated string')
389 392 >>> builddecl('foo($1, $2, $1)')
390 393 ('foo', None, 'argument names collide with each other')
391 394 """
392 395 try:
393 396 tree = cls._parse(decl)
394 397 except error.ParseError as inst:
395 398 return (decl, None, parseerrordetail(inst))
396 399
397 400 if tree[0] == cls._symbolnode:
398 401 # "name = ...." style
399 402 name = tree[1]
400 403 if name.startswith('$'):
401 404 return (decl, None, _("invalid symbol '%s'") % name)
402 405 return (name, None, None)
403 406
404 407 func = cls._trygetfunc(tree)
405 408 if func:
406 409 # "name(arg, ....) = ...." style
407 410 name, args = func
408 411 if name.startswith('$'):
409 412 return (decl, None, _("invalid function '%s'") % name)
410 413 if any(t[0] != cls._symbolnode for t in args):
411 414 return (decl, None, _("invalid argument list"))
412 415 if len(args) != len(set(args)):
413 416 return (name, None, _("argument names collide with each other"))
414 417 return (name, [t[1] for t in args], None)
415 418
416 419 return (decl, None, _("invalid format"))
417 420
418 421 @classmethod
419 422 def _relabelargs(cls, tree, args):
420 423 """Mark alias arguments as ``_aliasarg``"""
421 424 if not isinstance(tree, tuple):
422 425 return tree
423 426 op = tree[0]
424 427 if op != cls._symbolnode:
425 428 return (op,) + tuple(cls._relabelargs(x, args) for x in tree[1:])
426 429
427 430 assert len(tree) == 2
428 431 sym = tree[1]
429 432 if sym in args:
430 433 op = '_aliasarg'
431 434 elif sym.startswith('$'):
432 435 raise error.ParseError(_("invalid symbol '%s'") % sym)
433 436 return (op, sym)
434 437
435 438 @classmethod
436 439 def _builddefn(cls, defn, args):
437 440 """Parse an alias definition into a tree and marks substitutions
438 441
439 442 This function marks alias argument references as ``_aliasarg``. The
440 443 parsing rule is provided by ``_parse()``.
441 444
442 445 ``args`` is a list of alias argument names, or None if the alias
443 446 is declared as a symbol.
444 447
445 448 >>> parsemap = {
446 449 ... '$1 or foo': ('or', ('symbol', '$1'), ('symbol', 'foo')),
447 450 ... '$1 or $bar': ('or', ('symbol', '$1'), ('symbol', '$bar')),
448 451 ... '$10 or baz': ('or', ('symbol', '$10'), ('symbol', 'baz')),
449 452 ... '"$1" or "foo"': ('or', ('string', '$1'), ('string', 'foo')),
450 453 ... }
451 454 >>> class aliasrules(basealiasrules):
452 455 ... _parse = staticmethod(parsemap.__getitem__)
453 456 ... _trygetfunc = staticmethod(lambda x: None)
454 457 >>> builddefn = aliasrules._builddefn
455 458 >>> def pprint(tree):
456 459 ... print prettyformat(tree, ('_aliasarg', 'string', 'symbol'))
457 460 >>> args = ['$1', '$2', 'foo']
458 461 >>> pprint(builddefn('$1 or foo', args))
459 462 (or
460 463 ('_aliasarg', '$1')
461 464 ('_aliasarg', 'foo'))
462 465 >>> try:
463 466 ... builddefn('$1 or $bar', args)
464 467 ... except error.ParseError as inst:
465 468 ... print parseerrordetail(inst)
466 469 invalid symbol '$bar'
467 470 >>> args = ['$1', '$10', 'foo']
468 471 >>> pprint(builddefn('$10 or baz', args))
469 472 (or
470 473 ('_aliasarg', '$10')
471 474 ('symbol', 'baz'))
472 475 >>> pprint(builddefn('"$1" or "foo"', args))
473 476 (or
474 477 ('string', '$1')
475 478 ('string', 'foo'))
476 479 """
477 480 tree = cls._parse(defn)
478 481 if args:
479 482 args = set(args)
480 483 else:
481 484 args = set()
482 485 return cls._relabelargs(tree, args)
483 486
484 487 @classmethod
485 488 def build(cls, decl, defn):
486 489 """Parse an alias declaration and definition into an alias object"""
487 490 repl = efmt = None
488 491 name, args, err = cls._builddecl(decl)
489 492 if err:
490 493 efmt = _('bad declaration of %(section)s "%(name)s": %(error)s')
491 494 else:
492 495 try:
493 496 repl = cls._builddefn(defn, args)
494 497 except error.ParseError as inst:
495 498 err = parseerrordetail(inst)
496 499 efmt = _('bad definition of %(section)s "%(name)s": %(error)s')
497 500 if err:
498 501 err = efmt % {'section': cls._section, 'name': name, 'error': err}
499 502 return alias(name, args, err, repl)
500 503
501 504 @classmethod
502 505 def buildmap(cls, items):
503 506 """Parse a list of alias (name, replacement) pairs into a dict of
504 507 alias objects"""
505 508 aliases = {}
506 509 for decl, defn in items:
507 510 a = cls.build(decl, defn)
508 511 aliases[a.name] = a
509 512 return aliases
510 513
511 514 @classmethod
512 515 def _getalias(cls, aliases, tree):
513 516 """If tree looks like an unexpanded alias, return (alias, pattern-args)
514 517 pair. Return None otherwise.
515 518 """
516 519 if not isinstance(tree, tuple):
517 520 return None
518 521 if tree[0] == cls._symbolnode:
519 522 name = tree[1]
520 523 a = aliases.get(name)
521 524 if a and a.args is None:
522 525 return a, None
523 526 func = cls._trygetfunc(tree)
524 527 if func:
525 528 name, args = func
526 529 a = aliases.get(name)
527 530 if a and a.args is not None:
528 531 return a, args
529 532 return None
530 533
531 534 @classmethod
532 535 def _expandargs(cls, tree, args):
533 536 """Replace _aliasarg instances with the substitution value of the
534 537 same name in args, recursively.
535 538 """
536 539 if not isinstance(tree, tuple):
537 540 return tree
538 541 if tree[0] == '_aliasarg':
539 542 sym = tree[1]
540 543 return args[sym]
541 544 return tuple(cls._expandargs(t, args) for t in tree)
542 545
543 546 @classmethod
544 547 def _expand(cls, aliases, tree, expanding, cache):
545 548 if not isinstance(tree, tuple):
546 549 return tree
547 550 r = cls._getalias(aliases, tree)
548 551 if r is None:
549 552 return tuple(cls._expand(aliases, t, expanding, cache)
550 553 for t in tree)
551 554 a, l = r
552 555 if a.error:
553 556 raise error.Abort(a.error)
554 557 if a in expanding:
555 558 raise error.ParseError(_('infinite expansion of %(section)s '
556 559 '"%(name)s" detected')
557 560 % {'section': cls._section, 'name': a.name})
558 561 # get cacheable replacement tree by expanding aliases recursively
559 562 expanding.append(a)
560 563 if a.name not in cache:
561 564 cache[a.name] = cls._expand(aliases, a.replacement, expanding,
562 565 cache)
563 566 result = cache[a.name]
564 567 expanding.pop()
565 568 if a.args is None:
566 569 return result
567 570 # substitute function arguments in replacement tree
568 571 if len(l) != len(a.args):
569 572 raise error.ParseError(_('invalid number of arguments: %d')
570 573 % len(l))
571 574 l = [cls._expand(aliases, t, [], cache) for t in l]
572 575 return cls._expandargs(result, dict(zip(a.args, l)))
573 576
574 577 @classmethod
575 578 def expand(cls, aliases, tree):
576 579 """Expand aliases in tree, recursively.
577 580
578 581 'aliases' is a dictionary mapping user defined aliases to alias objects.
579 582 """
580 583 return cls._expand(aliases, tree, [], {})
@@ -1,3565 +1,3568 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import bz2
19 19 import calendar
20 20 import codecs
21 21 import collections
22 22 import datetime
23 23 import errno
24 24 import gc
25 25 import hashlib
26 26 import imp
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import signal
32 32 import socket
33 33 import stat
34 34 import string
35 35 import subprocess
36 36 import sys
37 37 import tempfile
38 38 import textwrap
39 39 import time
40 40 import traceback
41 41 import zlib
42 42
43 43 from . import (
44 44 encoding,
45 45 error,
46 46 i18n,
47 47 osutil,
48 48 parsers,
49 49 pycompat,
50 50 )
51 51
52 52 empty = pycompat.empty
53 53 httplib = pycompat.httplib
54 54 httpserver = pycompat.httpserver
55 55 pickle = pycompat.pickle
56 56 queue = pycompat.queue
57 57 socketserver = pycompat.socketserver
58 58 stderr = pycompat.stderr
59 59 stdin = pycompat.stdin
60 60 stdout = pycompat.stdout
61 61 stringio = pycompat.stringio
62 62 urlerr = pycompat.urlerr
63 63 urlparse = pycompat.urlparse
64 64 urlreq = pycompat.urlreq
65 65 xmlrpclib = pycompat.xmlrpclib
66 66
67 67 def isatty(fp):
68 68 try:
69 69 return fp.isatty()
70 70 except AttributeError:
71 71 return False
72 72
73 73 # glibc determines buffering on first write to stdout - if we replace a TTY
74 74 # destined stdout with a pipe destined stdout (e.g. pager), we want line
75 75 # buffering
76 76 if isatty(stdout):
77 77 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
78 78
79 79 if pycompat.osname == 'nt':
80 80 from . import windows as platform
81 81 stdout = platform.winstdout(stdout)
82 82 else:
83 83 from . import posix as platform
84 84
85 85 _ = i18n._
86 86
87 87 bindunixsocket = platform.bindunixsocket
88 88 cachestat = platform.cachestat
89 89 checkexec = platform.checkexec
90 90 checklink = platform.checklink
91 91 copymode = platform.copymode
92 92 executablepath = platform.executablepath
93 93 expandglobs = platform.expandglobs
94 94 explainexit = platform.explainexit
95 95 findexe = platform.findexe
96 96 gethgcmd = platform.gethgcmd
97 97 getuser = platform.getuser
98 98 getpid = os.getpid
99 99 groupmembers = platform.groupmembers
100 100 groupname = platform.groupname
101 101 hidewindow = platform.hidewindow
102 102 isexec = platform.isexec
103 103 isowner = platform.isowner
104 104 localpath = platform.localpath
105 105 lookupreg = platform.lookupreg
106 106 makedir = platform.makedir
107 107 nlinks = platform.nlinks
108 108 normpath = platform.normpath
109 109 normcase = platform.normcase
110 110 normcasespec = platform.normcasespec
111 111 normcasefallback = platform.normcasefallback
112 112 openhardlinks = platform.openhardlinks
113 113 oslink = platform.oslink
114 114 parsepatchoutput = platform.parsepatchoutput
115 115 pconvert = platform.pconvert
116 116 poll = platform.poll
117 117 popen = platform.popen
118 118 posixfile = platform.posixfile
119 119 quotecommand = platform.quotecommand
120 120 readpipe = platform.readpipe
121 121 rename = platform.rename
122 122 removedirs = platform.removedirs
123 123 samedevice = platform.samedevice
124 124 samefile = platform.samefile
125 125 samestat = platform.samestat
126 126 setbinary = platform.setbinary
127 127 setflags = platform.setflags
128 128 setsignalhandler = platform.setsignalhandler
129 129 shellquote = platform.shellquote
130 130 spawndetached = platform.spawndetached
131 131 split = platform.split
132 132 sshargs = platform.sshargs
133 133 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
134 134 statisexec = platform.statisexec
135 135 statislink = platform.statislink
136 136 testpid = platform.testpid
137 137 umask = platform.umask
138 138 unlink = platform.unlink
139 139 unlinkpath = platform.unlinkpath
140 140 username = platform.username
141 141
142 142 # Python compatibility
143 143
144 144 _notset = object()
145 145
146 146 # disable Python's problematic floating point timestamps (issue4836)
147 147 # (Python hypocritically says you shouldn't change this behavior in
148 148 # libraries, and sure enough Mercurial is not a library.)
149 149 os.stat_float_times(False)
150 150
151 151 def safehasattr(thing, attr):
152 152 return getattr(thing, attr, _notset) is not _notset
153 153
154 154 def bitsfrom(container):
155 155 bits = 0
156 156 for bit in container:
157 157 bits |= bit
158 158 return bits
159 159
160 160 DIGESTS = {
161 161 'md5': hashlib.md5,
162 162 'sha1': hashlib.sha1,
163 163 'sha512': hashlib.sha512,
164 164 }
165 165 # List of digest types from strongest to weakest
166 166 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
167 167
168 168 for k in DIGESTS_BY_STRENGTH:
169 169 assert k in DIGESTS
170 170
171 171 class digester(object):
172 172 """helper to compute digests.
173 173
174 174 This helper can be used to compute one or more digests given their name.
175 175
176 176 >>> d = digester(['md5', 'sha1'])
177 177 >>> d.update('foo')
178 178 >>> [k for k in sorted(d)]
179 179 ['md5', 'sha1']
180 180 >>> d['md5']
181 181 'acbd18db4cc2f85cedef654fccc4a4d8'
182 182 >>> d['sha1']
183 183 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
184 184 >>> digester.preferred(['md5', 'sha1'])
185 185 'sha1'
186 186 """
187 187
188 188 def __init__(self, digests, s=''):
189 189 self._hashes = {}
190 190 for k in digests:
191 191 if k not in DIGESTS:
192 192 raise Abort(_('unknown digest type: %s') % k)
193 193 self._hashes[k] = DIGESTS[k]()
194 194 if s:
195 195 self.update(s)
196 196
197 197 def update(self, data):
198 198 for h in self._hashes.values():
199 199 h.update(data)
200 200
201 201 def __getitem__(self, key):
202 202 if key not in DIGESTS:
203 203 raise Abort(_('unknown digest type: %s') % k)
204 204 return self._hashes[key].hexdigest()
205 205
206 206 def __iter__(self):
207 207 return iter(self._hashes)
208 208
209 209 @staticmethod
210 210 def preferred(supported):
211 211 """returns the strongest digest type in both supported and DIGESTS."""
212 212
213 213 for k in DIGESTS_BY_STRENGTH:
214 214 if k in supported:
215 215 return k
216 216 return None
217 217
218 218 class digestchecker(object):
219 219 """file handle wrapper that additionally checks content against a given
220 220 size and digests.
221 221
222 222 d = digestchecker(fh, size, {'md5': '...'})
223 223
224 224 When multiple digests are given, all of them are validated.
225 225 """
226 226
227 227 def __init__(self, fh, size, digests):
228 228 self._fh = fh
229 229 self._size = size
230 230 self._got = 0
231 231 self._digests = dict(digests)
232 232 self._digester = digester(self._digests.keys())
233 233
234 234 def read(self, length=-1):
235 235 content = self._fh.read(length)
236 236 self._digester.update(content)
237 237 self._got += len(content)
238 238 return content
239 239
240 240 def validate(self):
241 241 if self._size != self._got:
242 242 raise Abort(_('size mismatch: expected %d, got %d') %
243 243 (self._size, self._got))
244 244 for k, v in self._digests.items():
245 245 if v != self._digester[k]:
246 246 # i18n: first parameter is a digest name
247 247 raise Abort(_('%s mismatch: expected %s, got %s') %
248 248 (k, v, self._digester[k]))
249 249
250 250 try:
251 251 buffer = buffer
252 252 except NameError:
253 253 if not pycompat.ispy3:
254 254 def buffer(sliceable, offset=0, length=None):
255 255 if length is not None:
256 256 return sliceable[offset:offset + length]
257 257 return sliceable[offset:]
258 258 else:
259 259 def buffer(sliceable, offset=0, length=None):
260 260 if length is not None:
261 261 return memoryview(sliceable)[offset:offset + length]
262 262 return memoryview(sliceable)[offset:]
263 263
264 264 closefds = pycompat.osname == 'posix'
265 265
266 266 _chunksize = 4096
267 267
268 268 class bufferedinputpipe(object):
269 269 """a manually buffered input pipe
270 270
271 271 Python will not let us use buffered IO and lazy reading with 'polling' at
272 272 the same time. We cannot probe the buffer state and select will not detect
273 273 that data are ready to read if they are already buffered.
274 274
275 275 This class let us work around that by implementing its own buffering
276 276 (allowing efficient readline) while offering a way to know if the buffer is
277 277 empty from the output (allowing collaboration of the buffer with polling).
278 278
279 279 This class lives in the 'util' module because it makes use of the 'os'
280 280 module from the python stdlib.
281 281 """
282 282
283 283 def __init__(self, input):
284 284 self._input = input
285 285 self._buffer = []
286 286 self._eof = False
287 287 self._lenbuf = 0
288 288
289 289 @property
290 290 def hasbuffer(self):
291 291 """True is any data is currently buffered
292 292
293 293 This will be used externally a pre-step for polling IO. If there is
294 294 already data then no polling should be set in place."""
295 295 return bool(self._buffer)
296 296
297 297 @property
298 298 def closed(self):
299 299 return self._input.closed
300 300
301 301 def fileno(self):
302 302 return self._input.fileno()
303 303
304 304 def close(self):
305 305 return self._input.close()
306 306
307 307 def read(self, size):
308 308 while (not self._eof) and (self._lenbuf < size):
309 309 self._fillbuffer()
310 310 return self._frombuffer(size)
311 311
312 312 def readline(self, *args, **kwargs):
313 313 if 1 < len(self._buffer):
314 314 # this should not happen because both read and readline end with a
315 315 # _frombuffer call that collapse it.
316 316 self._buffer = [''.join(self._buffer)]
317 317 self._lenbuf = len(self._buffer[0])
318 318 lfi = -1
319 319 if self._buffer:
320 320 lfi = self._buffer[-1].find('\n')
321 321 while (not self._eof) and lfi < 0:
322 322 self._fillbuffer()
323 323 if self._buffer:
324 324 lfi = self._buffer[-1].find('\n')
325 325 size = lfi + 1
326 326 if lfi < 0: # end of file
327 327 size = self._lenbuf
328 328 elif 1 < len(self._buffer):
329 329 # we need to take previous chunks into account
330 330 size += self._lenbuf - len(self._buffer[-1])
331 331 return self._frombuffer(size)
332 332
333 333 def _frombuffer(self, size):
334 334 """return at most 'size' data from the buffer
335 335
336 336 The data are removed from the buffer."""
337 337 if size == 0 or not self._buffer:
338 338 return ''
339 339 buf = self._buffer[0]
340 340 if 1 < len(self._buffer):
341 341 buf = ''.join(self._buffer)
342 342
343 343 data = buf[:size]
344 344 buf = buf[len(data):]
345 345 if buf:
346 346 self._buffer = [buf]
347 347 self._lenbuf = len(buf)
348 348 else:
349 349 self._buffer = []
350 350 self._lenbuf = 0
351 351 return data
352 352
353 353 def _fillbuffer(self):
354 354 """read data to the buffer"""
355 355 data = os.read(self._input.fileno(), _chunksize)
356 356 if not data:
357 357 self._eof = True
358 358 else:
359 359 self._lenbuf += len(data)
360 360 self._buffer.append(data)
361 361
362 362 def popen2(cmd, env=None, newlines=False):
363 363 # Setting bufsize to -1 lets the system decide the buffer size.
364 364 # The default for bufsize is 0, meaning unbuffered. This leads to
365 365 # poor performance on Mac OS X: http://bugs.python.org/issue4194
366 366 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
367 367 close_fds=closefds,
368 368 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
369 369 universal_newlines=newlines,
370 370 env=env)
371 371 return p.stdin, p.stdout
372 372
373 373 def popen3(cmd, env=None, newlines=False):
374 374 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
375 375 return stdin, stdout, stderr
376 376
377 377 def popen4(cmd, env=None, newlines=False, bufsize=-1):
378 378 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
379 379 close_fds=closefds,
380 380 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
381 381 stderr=subprocess.PIPE,
382 382 universal_newlines=newlines,
383 383 env=env)
384 384 return p.stdin, p.stdout, p.stderr, p
385 385
386 386 def version():
387 387 """Return version information if available."""
388 388 try:
389 389 from . import __version__
390 390 return __version__.version
391 391 except ImportError:
392 392 return 'unknown'
393 393
394 394 def versiontuple(v=None, n=4):
395 395 """Parses a Mercurial version string into an N-tuple.
396 396
397 397 The version string to be parsed is specified with the ``v`` argument.
398 398 If it isn't defined, the current Mercurial version string will be parsed.
399 399
400 400 ``n`` can be 2, 3, or 4. Here is how some version strings map to
401 401 returned values:
402 402
403 403 >>> v = '3.6.1+190-df9b73d2d444'
404 404 >>> versiontuple(v, 2)
405 405 (3, 6)
406 406 >>> versiontuple(v, 3)
407 407 (3, 6, 1)
408 408 >>> versiontuple(v, 4)
409 409 (3, 6, 1, '190-df9b73d2d444')
410 410
411 411 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
412 412 (3, 6, 1, '190-df9b73d2d444+20151118')
413 413
414 414 >>> v = '3.6'
415 415 >>> versiontuple(v, 2)
416 416 (3, 6)
417 417 >>> versiontuple(v, 3)
418 418 (3, 6, None)
419 419 >>> versiontuple(v, 4)
420 420 (3, 6, None, None)
421 421
422 422 >>> v = '3.9-rc'
423 423 >>> versiontuple(v, 2)
424 424 (3, 9)
425 425 >>> versiontuple(v, 3)
426 426 (3, 9, None)
427 427 >>> versiontuple(v, 4)
428 428 (3, 9, None, 'rc')
429 429
430 430 >>> v = '3.9-rc+2-02a8fea4289b'
431 431 >>> versiontuple(v, 2)
432 432 (3, 9)
433 433 >>> versiontuple(v, 3)
434 434 (3, 9, None)
435 435 >>> versiontuple(v, 4)
436 436 (3, 9, None, 'rc+2-02a8fea4289b')
437 437 """
438 438 if not v:
439 439 v = version()
440 440 parts = remod.split('[\+-]', v, 1)
441 441 if len(parts) == 1:
442 442 vparts, extra = parts[0], None
443 443 else:
444 444 vparts, extra = parts
445 445
446 446 vints = []
447 447 for i in vparts.split('.'):
448 448 try:
449 449 vints.append(int(i))
450 450 except ValueError:
451 451 break
452 452 # (3, 6) -> (3, 6, None)
453 453 while len(vints) < 3:
454 454 vints.append(None)
455 455
456 456 if n == 2:
457 457 return (vints[0], vints[1])
458 458 if n == 3:
459 459 return (vints[0], vints[1], vints[2])
460 460 if n == 4:
461 461 return (vints[0], vints[1], vints[2], extra)
462 462
463 463 # used by parsedate
464 464 defaultdateformats = (
465 465 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
466 466 '%Y-%m-%dT%H:%M', # without seconds
467 467 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
468 468 '%Y-%m-%dT%H%M', # without seconds
469 469 '%Y-%m-%d %H:%M:%S', # our common legal variant
470 470 '%Y-%m-%d %H:%M', # without seconds
471 471 '%Y-%m-%d %H%M%S', # without :
472 472 '%Y-%m-%d %H%M', # without seconds
473 473 '%Y-%m-%d %I:%M:%S%p',
474 474 '%Y-%m-%d %H:%M',
475 475 '%Y-%m-%d %I:%M%p',
476 476 '%Y-%m-%d',
477 477 '%m-%d',
478 478 '%m/%d',
479 479 '%m/%d/%y',
480 480 '%m/%d/%Y',
481 481 '%a %b %d %H:%M:%S %Y',
482 482 '%a %b %d %I:%M:%S%p %Y',
483 483 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
484 484 '%b %d %H:%M:%S %Y',
485 485 '%b %d %I:%M:%S%p %Y',
486 486 '%b %d %H:%M:%S',
487 487 '%b %d %I:%M:%S%p',
488 488 '%b %d %H:%M',
489 489 '%b %d %I:%M%p',
490 490 '%b %d %Y',
491 491 '%b %d',
492 492 '%H:%M:%S',
493 493 '%I:%M:%S%p',
494 494 '%H:%M',
495 495 '%I:%M%p',
496 496 )
497 497
498 498 extendeddateformats = defaultdateformats + (
499 499 "%Y",
500 500 "%Y-%m",
501 501 "%b",
502 502 "%b %Y",
503 503 )
504 504
505 505 def cachefunc(func):
506 506 '''cache the result of function calls'''
507 507 # XXX doesn't handle keywords args
508 508 if func.__code__.co_argcount == 0:
509 509 cache = []
510 510 def f():
511 511 if len(cache) == 0:
512 512 cache.append(func())
513 513 return cache[0]
514 514 return f
515 515 cache = {}
516 516 if func.__code__.co_argcount == 1:
517 517 # we gain a small amount of time because
518 518 # we don't need to pack/unpack the list
519 519 def f(arg):
520 520 if arg not in cache:
521 521 cache[arg] = func(arg)
522 522 return cache[arg]
523 523 else:
524 524 def f(*args):
525 525 if args not in cache:
526 526 cache[args] = func(*args)
527 527 return cache[args]
528 528
529 529 return f
530 530
531 531 class sortdict(dict):
532 532 '''a simple sorted dictionary'''
533 533 def __init__(self, data=None):
534 534 self._list = []
535 535 if data:
536 536 self.update(data)
537 537 def copy(self):
538 538 return sortdict(self)
539 539 def __setitem__(self, key, val):
540 540 if key in self:
541 541 self._list.remove(key)
542 542 self._list.append(key)
543 543 dict.__setitem__(self, key, val)
544 544 def __iter__(self):
545 545 return self._list.__iter__()
546 546 def update(self, src):
547 547 if isinstance(src, dict):
548 548 src = src.iteritems()
549 549 for k, v in src:
550 550 self[k] = v
551 551 def clear(self):
552 552 dict.clear(self)
553 553 self._list = []
554 554 def items(self):
555 555 return [(k, self[k]) for k in self._list]
556 556 def __delitem__(self, key):
557 557 dict.__delitem__(self, key)
558 558 self._list.remove(key)
559 559 def pop(self, key, *args, **kwargs):
560 560 dict.pop(self, key, *args, **kwargs)
561 561 try:
562 562 self._list.remove(key)
563 563 except ValueError:
564 564 pass
565 565 def keys(self):
566 566 return self._list[:]
567 567 def iterkeys(self):
568 568 return self._list.__iter__()
569 569 def iteritems(self):
570 570 for k in self._list:
571 571 yield k, self[k]
572 572 def insert(self, index, key, val):
573 573 self._list.insert(index, key)
574 574 dict.__setitem__(self, key, val)
575 575 def __repr__(self):
576 576 if not self:
577 577 return '%s()' % self.__class__.__name__
578 578 return '%s(%r)' % (self.__class__.__name__, self.items())
579 579
580 580 class _lrucachenode(object):
581 581 """A node in a doubly linked list.
582 582
583 583 Holds a reference to nodes on either side as well as a key-value
584 584 pair for the dictionary entry.
585 585 """
586 586 __slots__ = (u'next', u'prev', u'key', u'value')
587 587
588 588 def __init__(self):
589 589 self.next = None
590 590 self.prev = None
591 591
592 592 self.key = _notset
593 593 self.value = None
594 594
595 595 def markempty(self):
596 596 """Mark the node as emptied."""
597 597 self.key = _notset
598 598
599 599 class lrucachedict(object):
600 600 """Dict that caches most recent accesses and sets.
601 601
602 602 The dict consists of an actual backing dict - indexed by original
603 603 key - and a doubly linked circular list defining the order of entries in
604 604 the cache.
605 605
606 606 The head node is the newest entry in the cache. If the cache is full,
607 607 we recycle head.prev and make it the new head. Cache accesses result in
608 608 the node being moved to before the existing head and being marked as the
609 609 new head node.
610 610 """
611 611 def __init__(self, max):
612 612 self._cache = {}
613 613
614 614 self._head = head = _lrucachenode()
615 615 head.prev = head
616 616 head.next = head
617 617 self._size = 1
618 618 self._capacity = max
619 619
620 620 def __len__(self):
621 621 return len(self._cache)
622 622
623 623 def __contains__(self, k):
624 624 return k in self._cache
625 625
626 626 def __iter__(self):
627 627 # We don't have to iterate in cache order, but why not.
628 628 n = self._head
629 629 for i in range(len(self._cache)):
630 630 yield n.key
631 631 n = n.next
632 632
633 633 def __getitem__(self, k):
634 634 node = self._cache[k]
635 635 self._movetohead(node)
636 636 return node.value
637 637
638 638 def __setitem__(self, k, v):
639 639 node = self._cache.get(k)
640 640 # Replace existing value and mark as newest.
641 641 if node is not None:
642 642 node.value = v
643 643 self._movetohead(node)
644 644 return
645 645
646 646 if self._size < self._capacity:
647 647 node = self._addcapacity()
648 648 else:
649 649 # Grab the last/oldest item.
650 650 node = self._head.prev
651 651
652 652 # At capacity. Kill the old entry.
653 653 if node.key is not _notset:
654 654 del self._cache[node.key]
655 655
656 656 node.key = k
657 657 node.value = v
658 658 self._cache[k] = node
659 659 # And mark it as newest entry. No need to adjust order since it
660 660 # is already self._head.prev.
661 661 self._head = node
662 662
663 663 def __delitem__(self, k):
664 664 node = self._cache.pop(k)
665 665 node.markempty()
666 666
667 667 # Temporarily mark as newest item before re-adjusting head to make
668 668 # this node the oldest item.
669 669 self._movetohead(node)
670 670 self._head = node.next
671 671
672 672 # Additional dict methods.
673 673
674 674 def get(self, k, default=None):
675 675 try:
676 676 return self._cache[k].value
677 677 except KeyError:
678 678 return default
679 679
680 680 def clear(self):
681 681 n = self._head
682 682 while n.key is not _notset:
683 683 n.markempty()
684 684 n = n.next
685 685
686 686 self._cache.clear()
687 687
688 688 def copy(self):
689 689 result = lrucachedict(self._capacity)
690 690 n = self._head.prev
691 691 # Iterate in oldest-to-newest order, so the copy has the right ordering
692 692 for i in range(len(self._cache)):
693 693 result[n.key] = n.value
694 694 n = n.prev
695 695 return result
696 696
697 697 def _movetohead(self, node):
698 698 """Mark a node as the newest, making it the new head.
699 699
700 700 When a node is accessed, it becomes the freshest entry in the LRU
701 701 list, which is denoted by self._head.
702 702
703 703 Visually, let's make ``N`` the new head node (* denotes head):
704 704
705 705 previous/oldest <-> head <-> next/next newest
706 706
707 707 ----<->--- A* ---<->-----
708 708 | |
709 709 E <-> D <-> N <-> C <-> B
710 710
711 711 To:
712 712
713 713 ----<->--- N* ---<->-----
714 714 | |
715 715 E <-> D <-> C <-> B <-> A
716 716
717 717 This requires the following moves:
718 718
719 719 C.next = D (node.prev.next = node.next)
720 720 D.prev = C (node.next.prev = node.prev)
721 721 E.next = N (head.prev.next = node)
722 722 N.prev = E (node.prev = head.prev)
723 723 N.next = A (node.next = head)
724 724 A.prev = N (head.prev = node)
725 725 """
726 726 head = self._head
727 727 # C.next = D
728 728 node.prev.next = node.next
729 729 # D.prev = C
730 730 node.next.prev = node.prev
731 731 # N.prev = E
732 732 node.prev = head.prev
733 733 # N.next = A
734 734 # It is tempting to do just "head" here, however if node is
735 735 # adjacent to head, this will do bad things.
736 736 node.next = head.prev.next
737 737 # E.next = N
738 738 node.next.prev = node
739 739 # A.prev = N
740 740 node.prev.next = node
741 741
742 742 self._head = node
743 743
744 744 def _addcapacity(self):
745 745 """Add a node to the circular linked list.
746 746
747 747 The new node is inserted before the head node.
748 748 """
749 749 head = self._head
750 750 node = _lrucachenode()
751 751 head.prev.next = node
752 752 node.prev = head.prev
753 753 node.next = head
754 754 head.prev = node
755 755 self._size += 1
756 756 return node
757 757
758 758 def lrucachefunc(func):
759 759 '''cache most recent results of function calls'''
760 760 cache = {}
761 761 order = collections.deque()
762 762 if func.__code__.co_argcount == 1:
763 763 def f(arg):
764 764 if arg not in cache:
765 765 if len(cache) > 20:
766 766 del cache[order.popleft()]
767 767 cache[arg] = func(arg)
768 768 else:
769 769 order.remove(arg)
770 770 order.append(arg)
771 771 return cache[arg]
772 772 else:
773 773 def f(*args):
774 774 if args not in cache:
775 775 if len(cache) > 20:
776 776 del cache[order.popleft()]
777 777 cache[args] = func(*args)
778 778 else:
779 779 order.remove(args)
780 780 order.append(args)
781 781 return cache[args]
782 782
783 783 return f
784 784
785 785 class propertycache(object):
786 786 def __init__(self, func):
787 787 self.func = func
788 788 self.name = func.__name__
789 789 def __get__(self, obj, type=None):
790 790 result = self.func(obj)
791 791 self.cachevalue(obj, result)
792 792 return result
793 793
794 794 def cachevalue(self, obj, value):
795 795 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
796 796 obj.__dict__[self.name] = value
797 797
798 798 def pipefilter(s, cmd):
799 799 '''filter string S through command CMD, returning its output'''
800 800 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
801 801 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
802 802 pout, perr = p.communicate(s)
803 803 return pout
804 804
805 805 def tempfilter(s, cmd):
806 806 '''filter string S through a pair of temporary files with CMD.
807 807 CMD is used as a template to create the real command to be run,
808 808 with the strings INFILE and OUTFILE replaced by the real names of
809 809 the temporary files generated.'''
810 810 inname, outname = None, None
811 811 try:
812 812 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
813 813 fp = os.fdopen(infd, pycompat.sysstr('wb'))
814 814 fp.write(s)
815 815 fp.close()
816 816 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
817 817 os.close(outfd)
818 818 cmd = cmd.replace('INFILE', inname)
819 819 cmd = cmd.replace('OUTFILE', outname)
820 820 code = os.system(cmd)
821 821 if pycompat.sysplatform == 'OpenVMS' and code & 1:
822 822 code = 0
823 823 if code:
824 824 raise Abort(_("command '%s' failed: %s") %
825 825 (cmd, explainexit(code)))
826 826 return readfile(outname)
827 827 finally:
828 828 try:
829 829 if inname:
830 830 os.unlink(inname)
831 831 except OSError:
832 832 pass
833 833 try:
834 834 if outname:
835 835 os.unlink(outname)
836 836 except OSError:
837 837 pass
838 838
839 839 filtertable = {
840 840 'tempfile:': tempfilter,
841 841 'pipe:': pipefilter,
842 842 }
843 843
844 844 def filter(s, cmd):
845 845 "filter a string through a command that transforms its input to its output"
846 846 for name, fn in filtertable.iteritems():
847 847 if cmd.startswith(name):
848 848 return fn(s, cmd[len(name):].lstrip())
849 849 return pipefilter(s, cmd)
850 850
851 851 def binary(s):
852 852 """return true if a string is binary data"""
853 853 return bool(s and '\0' in s)
854 854
855 855 def increasingchunks(source, min=1024, max=65536):
856 856 '''return no less than min bytes per chunk while data remains,
857 857 doubling min after each chunk until it reaches max'''
858 858 def log2(x):
859 859 if not x:
860 860 return 0
861 861 i = 0
862 862 while x:
863 863 x >>= 1
864 864 i += 1
865 865 return i - 1
866 866
867 867 buf = []
868 868 blen = 0
869 869 for chunk in source:
870 870 buf.append(chunk)
871 871 blen += len(chunk)
872 872 if blen >= min:
873 873 if min < max:
874 874 min = min << 1
875 875 nmin = 1 << log2(blen)
876 876 if nmin > min:
877 877 min = nmin
878 878 if min > max:
879 879 min = max
880 880 yield ''.join(buf)
881 881 blen = 0
882 882 buf = []
883 883 if buf:
884 884 yield ''.join(buf)
885 885
886 886 Abort = error.Abort
887 887
888 888 def always(fn):
889 889 return True
890 890
891 891 def never(fn):
892 892 return False
893 893
894 894 def nogc(func):
895 895 """disable garbage collector
896 896
897 897 Python's garbage collector triggers a GC each time a certain number of
898 898 container objects (the number being defined by gc.get_threshold()) are
899 899 allocated even when marked not to be tracked by the collector. Tracking has
900 900 no effect on when GCs are triggered, only on what objects the GC looks
901 901 into. As a workaround, disable GC while building complex (huge)
902 902 containers.
903 903
904 904 This garbage collector issue have been fixed in 2.7.
905 905 """
906 906 if sys.version_info >= (2, 7):
907 907 return func
908 908 def wrapper(*args, **kwargs):
909 909 gcenabled = gc.isenabled()
910 910 gc.disable()
911 911 try:
912 912 return func(*args, **kwargs)
913 913 finally:
914 914 if gcenabled:
915 915 gc.enable()
916 916 return wrapper
917 917
918 918 def pathto(root, n1, n2):
919 919 '''return the relative path from one place to another.
920 920 root should use os.sep to separate directories
921 921 n1 should use os.sep to separate directories
922 922 n2 should use "/" to separate directories
923 923 returns an os.sep-separated path.
924 924
925 925 If n1 is a relative path, it's assumed it's
926 926 relative to root.
927 927 n2 should always be relative to root.
928 928 '''
929 929 if not n1:
930 930 return localpath(n2)
931 931 if os.path.isabs(n1):
932 932 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
933 933 return os.path.join(root, localpath(n2))
934 934 n2 = '/'.join((pconvert(root), n2))
935 935 a, b = splitpath(n1), n2.split('/')
936 936 a.reverse()
937 937 b.reverse()
938 938 while a and b and a[-1] == b[-1]:
939 939 a.pop()
940 940 b.pop()
941 941 b.reverse()
942 942 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
943 943
944 944 def mainfrozen():
945 945 """return True if we are a frozen executable.
946 946
947 947 The code supports py2exe (most common, Windows only) and tools/freeze
948 948 (portable, not much used).
949 949 """
950 950 return (safehasattr(sys, "frozen") or # new py2exe
951 951 safehasattr(sys, "importers") or # old py2exe
952 952 imp.is_frozen(u"__main__")) # tools/freeze
953 953
954 954 # the location of data files matching the source code
955 955 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
956 956 # executable version (py2exe) doesn't support __file__
957 957 datapath = os.path.dirname(pycompat.sysexecutable)
958 958 else:
959 959 datapath = os.path.dirname(pycompat.fsencode(__file__))
960 960
961 961 i18n.setdatapath(datapath)
962 962
963 963 _hgexecutable = None
964 964
965 965 def hgexecutable():
966 966 """return location of the 'hg' executable.
967 967
968 968 Defaults to $HG or 'hg' in the search path.
969 969 """
970 970 if _hgexecutable is None:
971 971 hg = encoding.environ.get('HG')
972 972 mainmod = sys.modules['__main__']
973 973 if hg:
974 974 _sethgexecutable(hg)
975 975 elif mainfrozen():
976 976 if getattr(sys, 'frozen', None) == 'macosx_app':
977 977 # Env variable set by py2app
978 978 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
979 979 else:
980 980 _sethgexecutable(pycompat.sysexecutable)
981 981 elif (os.path.basename(
982 982 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
983 983 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
984 984 else:
985 985 exe = findexe('hg') or os.path.basename(sys.argv[0])
986 986 _sethgexecutable(exe)
987 987 return _hgexecutable
988 988
989 989 def _sethgexecutable(path):
990 990 """set location of the 'hg' executable"""
991 991 global _hgexecutable
992 992 _hgexecutable = path
993 993
994 994 def _isstdout(f):
995 995 fileno = getattr(f, 'fileno', None)
996 996 return fileno and fileno() == sys.__stdout__.fileno()
997 997
998 998 def shellenviron(environ=None):
999 999 """return environ with optional override, useful for shelling out"""
1000 1000 def py2shell(val):
1001 1001 'convert python object into string that is useful to shell'
1002 1002 if val is None or val is False:
1003 1003 return '0'
1004 1004 if val is True:
1005 1005 return '1'
1006 1006 return str(val)
1007 1007 env = dict(encoding.environ)
1008 1008 if environ:
1009 1009 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1010 1010 env['HG'] = hgexecutable()
1011 1011 return env
1012 1012
1013 1013 def system(cmd, environ=None, cwd=None, out=None):
1014 1014 '''enhanced shell command execution.
1015 1015 run with environment maybe modified, maybe in different dir.
1016 1016
1017 1017 if out is specified, it is assumed to be a file-like object that has a
1018 1018 write() method. stdout and stderr will be redirected to out.'''
1019 1019 try:
1020 1020 stdout.flush()
1021 1021 except Exception:
1022 1022 pass
1023 1023 cmd = quotecommand(cmd)
1024 1024 if pycompat.sysplatform == 'plan9' and (sys.version_info[0] == 2
1025 1025 and sys.version_info[1] < 7):
1026 1026 # subprocess kludge to work around issues in half-baked Python
1027 1027 # ports, notably bichued/python:
1028 1028 if not cwd is None:
1029 1029 os.chdir(cwd)
1030 1030 rc = os.system(cmd)
1031 1031 else:
1032 1032 env = shellenviron(environ)
1033 1033 if out is None or _isstdout(out):
1034 1034 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1035 1035 env=env, cwd=cwd)
1036 1036 else:
1037 1037 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1038 1038 env=env, cwd=cwd, stdout=subprocess.PIPE,
1039 1039 stderr=subprocess.STDOUT)
1040 1040 for line in iter(proc.stdout.readline, ''):
1041 1041 out.write(line)
1042 1042 proc.wait()
1043 1043 rc = proc.returncode
1044 1044 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1045 1045 rc = 0
1046 1046 return rc
1047 1047
1048 1048 def checksignature(func):
1049 1049 '''wrap a function with code to check for calling errors'''
1050 1050 def check(*args, **kwargs):
1051 1051 try:
1052 1052 return func(*args, **kwargs)
1053 1053 except TypeError:
1054 1054 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1055 1055 raise error.SignatureError
1056 1056 raise
1057 1057
1058 1058 return check
1059 1059
1060 1060 # Hardlinks are problematic on CIFS, do not allow hardlinks
1061 1061 # until we find a way to work around it cleanly (issue4546).
1062 1062 # This is a variable so extensions can opt-in to using them.
1063 1063 allowhardlinks = False
1064 1064
1065 1065 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1066 1066 '''copy a file, preserving mode and optionally other stat info like
1067 1067 atime/mtime
1068 1068
1069 1069 checkambig argument is used with filestat, and is useful only if
1070 1070 destination file is guarded by any lock (e.g. repo.lock or
1071 1071 repo.wlock).
1072 1072
1073 1073 copystat and checkambig should be exclusive.
1074 1074 '''
1075 1075 assert not (copystat and checkambig)
1076 1076 oldstat = None
1077 1077 if os.path.lexists(dest):
1078 1078 if checkambig:
1079 1079 oldstat = checkambig and filestat(dest)
1080 1080 unlink(dest)
1081 1081 if allowhardlinks and hardlink:
1082 1082 try:
1083 1083 oslink(src, dest)
1084 1084 return
1085 1085 except (IOError, OSError):
1086 1086 pass # fall back to normal copy
1087 1087 if os.path.islink(src):
1088 1088 os.symlink(os.readlink(src), dest)
1089 1089 # copytime is ignored for symlinks, but in general copytime isn't needed
1090 1090 # for them anyway
1091 1091 else:
1092 1092 try:
1093 1093 shutil.copyfile(src, dest)
1094 1094 if copystat:
1095 1095 # copystat also copies mode
1096 1096 shutil.copystat(src, dest)
1097 1097 else:
1098 1098 shutil.copymode(src, dest)
1099 1099 if oldstat and oldstat.stat:
1100 1100 newstat = filestat(dest)
1101 1101 if newstat.isambig(oldstat):
1102 1102 # stat of copied file is ambiguous to original one
1103 1103 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1104 1104 os.utime(dest, (advanced, advanced))
1105 1105 except shutil.Error as inst:
1106 1106 raise Abort(str(inst))
1107 1107
1108 1108 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1109 1109 """Copy a directory tree using hardlinks if possible."""
1110 1110 num = 0
1111 1111
1112 1112 if hardlink is None:
1113 1113 hardlink = (os.stat(src).st_dev ==
1114 1114 os.stat(os.path.dirname(dst)).st_dev)
1115 1115 if hardlink:
1116 1116 topic = _('linking')
1117 1117 else:
1118 1118 topic = _('copying')
1119 1119
1120 1120 if os.path.isdir(src):
1121 1121 os.mkdir(dst)
1122 1122 for name, kind in osutil.listdir(src):
1123 1123 srcname = os.path.join(src, name)
1124 1124 dstname = os.path.join(dst, name)
1125 1125 def nprog(t, pos):
1126 1126 if pos is not None:
1127 1127 return progress(t, pos + num)
1128 1128 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1129 1129 num += n
1130 1130 else:
1131 1131 if hardlink:
1132 1132 try:
1133 1133 oslink(src, dst)
1134 1134 except (IOError, OSError):
1135 1135 hardlink = False
1136 1136 shutil.copy(src, dst)
1137 1137 else:
1138 1138 shutil.copy(src, dst)
1139 1139 num += 1
1140 1140 progress(topic, num)
1141 1141 progress(topic, None)
1142 1142
1143 1143 return hardlink, num
1144 1144
1145 1145 _winreservednames = '''con prn aux nul
1146 1146 com1 com2 com3 com4 com5 com6 com7 com8 com9
1147 1147 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1148 1148 _winreservedchars = ':*?"<>|'
1149 1149 def checkwinfilename(path):
1150 1150 r'''Check that the base-relative path is a valid filename on Windows.
1151 1151 Returns None if the path is ok, or a UI string describing the problem.
1152 1152
1153 1153 >>> checkwinfilename("just/a/normal/path")
1154 1154 >>> checkwinfilename("foo/bar/con.xml")
1155 1155 "filename contains 'con', which is reserved on Windows"
1156 1156 >>> checkwinfilename("foo/con.xml/bar")
1157 1157 "filename contains 'con', which is reserved on Windows"
1158 1158 >>> checkwinfilename("foo/bar/xml.con")
1159 1159 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1160 1160 "filename contains 'AUX', which is reserved on Windows"
1161 1161 >>> checkwinfilename("foo/bar/bla:.txt")
1162 1162 "filename contains ':', which is reserved on Windows"
1163 1163 >>> checkwinfilename("foo/bar/b\07la.txt")
1164 1164 "filename contains '\\x07', which is invalid on Windows"
1165 1165 >>> checkwinfilename("foo/bar/bla ")
1166 1166 "filename ends with ' ', which is not allowed on Windows"
1167 1167 >>> checkwinfilename("../bar")
1168 1168 >>> checkwinfilename("foo\\")
1169 1169 "filename ends with '\\', which is invalid on Windows"
1170 1170 >>> checkwinfilename("foo\\/bar")
1171 1171 "directory name ends with '\\', which is invalid on Windows"
1172 1172 '''
1173 1173 if path.endswith('\\'):
1174 1174 return _("filename ends with '\\', which is invalid on Windows")
1175 1175 if '\\/' in path:
1176 1176 return _("directory name ends with '\\', which is invalid on Windows")
1177 1177 for n in path.replace('\\', '/').split('/'):
1178 1178 if not n:
1179 1179 continue
1180 1180 for c in n:
1181 1181 if c in _winreservedchars:
1182 1182 return _("filename contains '%s', which is reserved "
1183 1183 "on Windows") % c
1184 1184 if ord(c) <= 31:
1185 1185 return _("filename contains %r, which is invalid "
1186 1186 "on Windows") % c
1187 1187 base = n.split('.')[0]
1188 1188 if base and base.lower() in _winreservednames:
1189 1189 return _("filename contains '%s', which is reserved "
1190 1190 "on Windows") % base
1191 1191 t = n[-1]
1192 1192 if t in '. ' and n not in '..':
1193 1193 return _("filename ends with '%s', which is not allowed "
1194 1194 "on Windows") % t
1195 1195
1196 1196 if pycompat.osname == 'nt':
1197 1197 checkosfilename = checkwinfilename
1198 1198 timer = time.clock
1199 1199 else:
1200 1200 checkosfilename = platform.checkosfilename
1201 1201 timer = time.time
1202 1202
1203 1203 if safehasattr(time, "perf_counter"):
1204 1204 timer = time.perf_counter
1205 1205
1206 1206 def makelock(info, pathname):
1207 1207 try:
1208 1208 return os.symlink(info, pathname)
1209 1209 except OSError as why:
1210 1210 if why.errno == errno.EEXIST:
1211 1211 raise
1212 1212 except AttributeError: # no symlink in os
1213 1213 pass
1214 1214
1215 1215 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1216 1216 os.write(ld, info)
1217 1217 os.close(ld)
1218 1218
1219 1219 def readlock(pathname):
1220 1220 try:
1221 1221 return os.readlink(pathname)
1222 1222 except OSError as why:
1223 1223 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1224 1224 raise
1225 1225 except AttributeError: # no symlink in os
1226 1226 pass
1227 1227 fp = posixfile(pathname)
1228 1228 r = fp.read()
1229 1229 fp.close()
1230 1230 return r
1231 1231
1232 1232 def fstat(fp):
1233 1233 '''stat file object that may not have fileno method.'''
1234 1234 try:
1235 1235 return os.fstat(fp.fileno())
1236 1236 except AttributeError:
1237 1237 return os.stat(fp.name)
1238 1238
1239 1239 # File system features
1240 1240
1241 1241 def fscasesensitive(path):
1242 1242 """
1243 1243 Return true if the given path is on a case-sensitive filesystem
1244 1244
1245 1245 Requires a path (like /foo/.hg) ending with a foldable final
1246 1246 directory component.
1247 1247 """
1248 1248 s1 = os.lstat(path)
1249 1249 d, b = os.path.split(path)
1250 1250 b2 = b.upper()
1251 1251 if b == b2:
1252 1252 b2 = b.lower()
1253 1253 if b == b2:
1254 1254 return True # no evidence against case sensitivity
1255 1255 p2 = os.path.join(d, b2)
1256 1256 try:
1257 1257 s2 = os.lstat(p2)
1258 1258 if s2 == s1:
1259 1259 return False
1260 1260 return True
1261 1261 except OSError:
1262 1262 return True
1263 1263
1264 1264 try:
1265 1265 import re2
1266 1266 _re2 = None
1267 1267 except ImportError:
1268 1268 _re2 = False
1269 1269
1270 1270 class _re(object):
1271 1271 def _checkre2(self):
1272 1272 global _re2
1273 1273 try:
1274 1274 # check if match works, see issue3964
1275 1275 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1276 1276 except ImportError:
1277 1277 _re2 = False
1278 1278
1279 1279 def compile(self, pat, flags=0):
1280 1280 '''Compile a regular expression, using re2 if possible
1281 1281
1282 1282 For best performance, use only re2-compatible regexp features. The
1283 1283 only flags from the re module that are re2-compatible are
1284 1284 IGNORECASE and MULTILINE.'''
1285 1285 if _re2 is None:
1286 1286 self._checkre2()
1287 1287 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1288 1288 if flags & remod.IGNORECASE:
1289 1289 pat = '(?i)' + pat
1290 1290 if flags & remod.MULTILINE:
1291 1291 pat = '(?m)' + pat
1292 1292 try:
1293 1293 return re2.compile(pat)
1294 1294 except re2.error:
1295 1295 pass
1296 1296 return remod.compile(pat, flags)
1297 1297
1298 1298 @propertycache
1299 1299 def escape(self):
1300 1300 '''Return the version of escape corresponding to self.compile.
1301 1301
1302 1302 This is imperfect because whether re2 or re is used for a particular
1303 1303 function depends on the flags, etc, but it's the best we can do.
1304 1304 '''
1305 1305 global _re2
1306 1306 if _re2 is None:
1307 1307 self._checkre2()
1308 1308 if _re2:
1309 1309 return re2.escape
1310 1310 else:
1311 1311 return remod.escape
1312 1312
1313 1313 re = _re()
1314 1314
1315 1315 _fspathcache = {}
1316 1316 def fspath(name, root):
1317 1317 '''Get name in the case stored in the filesystem
1318 1318
1319 1319 The name should be relative to root, and be normcase-ed for efficiency.
1320 1320
1321 1321 Note that this function is unnecessary, and should not be
1322 1322 called, for case-sensitive filesystems (simply because it's expensive).
1323 1323
1324 1324 The root should be normcase-ed, too.
1325 1325 '''
1326 1326 def _makefspathcacheentry(dir):
1327 1327 return dict((normcase(n), n) for n in os.listdir(dir))
1328 1328
1329 1329 seps = pycompat.ossep
1330 1330 if pycompat.osaltsep:
1331 1331 seps = seps + pycompat.osaltsep
1332 1332 # Protect backslashes. This gets silly very quickly.
1333 1333 seps.replace('\\','\\\\')
1334 1334 pattern = remod.compile(r'([^%s]+)|([%s]+)' % (seps, seps))
1335 1335 dir = os.path.normpath(root)
1336 1336 result = []
1337 1337 for part, sep in pattern.findall(name):
1338 1338 if sep:
1339 1339 result.append(sep)
1340 1340 continue
1341 1341
1342 1342 if dir not in _fspathcache:
1343 1343 _fspathcache[dir] = _makefspathcacheentry(dir)
1344 1344 contents = _fspathcache[dir]
1345 1345
1346 1346 found = contents.get(part)
1347 1347 if not found:
1348 1348 # retry "once per directory" per "dirstate.walk" which
1349 1349 # may take place for each patches of "hg qpush", for example
1350 1350 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1351 1351 found = contents.get(part)
1352 1352
1353 1353 result.append(found or part)
1354 1354 dir = os.path.join(dir, part)
1355 1355
1356 1356 return ''.join(result)
1357 1357
1358 1358 def checknlink(testfile):
1359 1359 '''check whether hardlink count reporting works properly'''
1360 1360
1361 1361 # testfile may be open, so we need a separate file for checking to
1362 1362 # work around issue2543 (or testfile may get lost on Samba shares)
1363 1363 f1 = testfile + ".hgtmp1"
1364 1364 if os.path.lexists(f1):
1365 1365 return False
1366 1366 try:
1367 1367 posixfile(f1, 'w').close()
1368 1368 except IOError:
1369 1369 try:
1370 1370 os.unlink(f1)
1371 1371 except OSError:
1372 1372 pass
1373 1373 return False
1374 1374
1375 1375 f2 = testfile + ".hgtmp2"
1376 1376 fd = None
1377 1377 try:
1378 1378 oslink(f1, f2)
1379 1379 # nlinks() may behave differently for files on Windows shares if
1380 1380 # the file is open.
1381 1381 fd = posixfile(f2)
1382 1382 return nlinks(f2) > 1
1383 1383 except OSError:
1384 1384 return False
1385 1385 finally:
1386 1386 if fd is not None:
1387 1387 fd.close()
1388 1388 for f in (f1, f2):
1389 1389 try:
1390 1390 os.unlink(f)
1391 1391 except OSError:
1392 1392 pass
1393 1393
1394 1394 def endswithsep(path):
1395 1395 '''Check path ends with os.sep or os.altsep.'''
1396 1396 return (path.endswith(pycompat.ossep)
1397 1397 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1398 1398
1399 1399 def splitpath(path):
1400 1400 '''Split path by os.sep.
1401 1401 Note that this function does not use os.altsep because this is
1402 1402 an alternative of simple "xxx.split(os.sep)".
1403 1403 It is recommended to use os.path.normpath() before using this
1404 1404 function if need.'''
1405 1405 return path.split(pycompat.ossep)
1406 1406
1407 1407 def gui():
1408 1408 '''Are we running in a GUI?'''
1409 1409 if pycompat.sysplatform == 'darwin':
1410 1410 if 'SSH_CONNECTION' in encoding.environ:
1411 1411 # handle SSH access to a box where the user is logged in
1412 1412 return False
1413 1413 elif getattr(osutil, 'isgui', None):
1414 1414 # check if a CoreGraphics session is available
1415 1415 return osutil.isgui()
1416 1416 else:
1417 1417 # pure build; use a safe default
1418 1418 return True
1419 1419 else:
1420 1420 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1421 1421
1422 1422 def mktempcopy(name, emptyok=False, createmode=None):
1423 1423 """Create a temporary file with the same contents from name
1424 1424
1425 1425 The permission bits are copied from the original file.
1426 1426
1427 1427 If the temporary file is going to be truncated immediately, you
1428 1428 can use emptyok=True as an optimization.
1429 1429
1430 1430 Returns the name of the temporary file.
1431 1431 """
1432 1432 d, fn = os.path.split(name)
1433 1433 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1434 1434 os.close(fd)
1435 1435 # Temporary files are created with mode 0600, which is usually not
1436 1436 # what we want. If the original file already exists, just copy
1437 1437 # its mode. Otherwise, manually obey umask.
1438 1438 copymode(name, temp, createmode)
1439 1439 if emptyok:
1440 1440 return temp
1441 1441 try:
1442 1442 try:
1443 1443 ifp = posixfile(name, "rb")
1444 1444 except IOError as inst:
1445 1445 if inst.errno == errno.ENOENT:
1446 1446 return temp
1447 1447 if not getattr(inst, 'filename', None):
1448 1448 inst.filename = name
1449 1449 raise
1450 1450 ofp = posixfile(temp, "wb")
1451 1451 for chunk in filechunkiter(ifp):
1452 1452 ofp.write(chunk)
1453 1453 ifp.close()
1454 1454 ofp.close()
1455 1455 except: # re-raises
1456 1456 try: os.unlink(temp)
1457 1457 except OSError: pass
1458 1458 raise
1459 1459 return temp
1460 1460
1461 1461 class filestat(object):
1462 1462 """help to exactly detect change of a file
1463 1463
1464 1464 'stat' attribute is result of 'os.stat()' if specified 'path'
1465 1465 exists. Otherwise, it is None. This can avoid preparative
1466 1466 'exists()' examination on client side of this class.
1467 1467 """
1468 1468 def __init__(self, path):
1469 1469 try:
1470 1470 self.stat = os.stat(path)
1471 1471 except OSError as err:
1472 1472 if err.errno != errno.ENOENT:
1473 1473 raise
1474 1474 self.stat = None
1475 1475
1476 1476 __hash__ = object.__hash__
1477 1477
1478 1478 def __eq__(self, old):
1479 1479 try:
1480 1480 # if ambiguity between stat of new and old file is
1481 1481 # avoided, comparison of size, ctime and mtime is enough
1482 1482 # to exactly detect change of a file regardless of platform
1483 1483 return (self.stat.st_size == old.stat.st_size and
1484 1484 self.stat.st_ctime == old.stat.st_ctime and
1485 1485 self.stat.st_mtime == old.stat.st_mtime)
1486 1486 except AttributeError:
1487 1487 return False
1488 1488
1489 1489 def isambig(self, old):
1490 1490 """Examine whether new (= self) stat is ambiguous against old one
1491 1491
1492 1492 "S[N]" below means stat of a file at N-th change:
1493 1493
1494 1494 - S[n-1].ctime < S[n].ctime: can detect change of a file
1495 1495 - S[n-1].ctime == S[n].ctime
1496 1496 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1497 1497 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1498 1498 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1499 1499 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1500 1500
1501 1501 Case (*2) above means that a file was changed twice or more at
1502 1502 same time in sec (= S[n-1].ctime), and comparison of timestamp
1503 1503 is ambiguous.
1504 1504
1505 1505 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1506 1506 timestamp is ambiguous".
1507 1507
1508 1508 But advancing mtime only in case (*2) doesn't work as
1509 1509 expected, because naturally advanced S[n].mtime in case (*1)
1510 1510 might be equal to manually advanced S[n-1 or earlier].mtime.
1511 1511
1512 1512 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1513 1513 treated as ambiguous regardless of mtime, to avoid overlooking
1514 1514 by confliction between such mtime.
1515 1515
1516 1516 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1517 1517 S[n].mtime", even if size of a file isn't changed.
1518 1518 """
1519 1519 try:
1520 1520 return (self.stat.st_ctime == old.stat.st_ctime)
1521 1521 except AttributeError:
1522 1522 return False
1523 1523
1524 1524 def avoidambig(self, path, old):
1525 1525 """Change file stat of specified path to avoid ambiguity
1526 1526
1527 1527 'old' should be previous filestat of 'path'.
1528 1528
1529 1529 This skips avoiding ambiguity, if a process doesn't have
1530 1530 appropriate privileges for 'path'.
1531 1531 """
1532 1532 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1533 1533 try:
1534 1534 os.utime(path, (advanced, advanced))
1535 1535 except OSError as inst:
1536 1536 if inst.errno == errno.EPERM:
1537 1537 # utime() on the file created by another user causes EPERM,
1538 1538 # if a process doesn't have appropriate privileges
1539 1539 return
1540 1540 raise
1541 1541
1542 1542 def __ne__(self, other):
1543 1543 return not self == other
1544 1544
1545 1545 class atomictempfile(object):
1546 1546 '''writable file object that atomically updates a file
1547 1547
1548 1548 All writes will go to a temporary copy of the original file. Call
1549 1549 close() when you are done writing, and atomictempfile will rename
1550 1550 the temporary copy to the original name, making the changes
1551 1551 visible. If the object is destroyed without being closed, all your
1552 1552 writes are discarded.
1553 1553
1554 1554 checkambig argument of constructor is used with filestat, and is
1555 1555 useful only if target file is guarded by any lock (e.g. repo.lock
1556 1556 or repo.wlock).
1557 1557 '''
1558 1558 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1559 1559 self.__name = name # permanent name
1560 1560 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1561 1561 createmode=createmode)
1562 1562 self._fp = posixfile(self._tempname, mode)
1563 1563 self._checkambig = checkambig
1564 1564
1565 1565 # delegated methods
1566 1566 self.read = self._fp.read
1567 1567 self.write = self._fp.write
1568 1568 self.seek = self._fp.seek
1569 1569 self.tell = self._fp.tell
1570 1570 self.fileno = self._fp.fileno
1571 1571
1572 1572 def close(self):
1573 1573 if not self._fp.closed:
1574 1574 self._fp.close()
1575 1575 filename = localpath(self.__name)
1576 1576 oldstat = self._checkambig and filestat(filename)
1577 1577 if oldstat and oldstat.stat:
1578 1578 rename(self._tempname, filename)
1579 1579 newstat = filestat(filename)
1580 1580 if newstat.isambig(oldstat):
1581 1581 # stat of changed file is ambiguous to original one
1582 1582 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1583 1583 os.utime(filename, (advanced, advanced))
1584 1584 else:
1585 1585 rename(self._tempname, filename)
1586 1586
1587 1587 def discard(self):
1588 1588 if not self._fp.closed:
1589 1589 try:
1590 1590 os.unlink(self._tempname)
1591 1591 except OSError:
1592 1592 pass
1593 1593 self._fp.close()
1594 1594
1595 1595 def __del__(self):
1596 1596 if safehasattr(self, '_fp'): # constructor actually did something
1597 1597 self.discard()
1598 1598
1599 1599 def __enter__(self):
1600 1600 return self
1601 1601
1602 1602 def __exit__(self, exctype, excvalue, traceback):
1603 1603 if exctype is not None:
1604 1604 self.discard()
1605 1605 else:
1606 1606 self.close()
1607 1607
1608 1608 def makedirs(name, mode=None, notindexed=False):
1609 1609 """recursive directory creation with parent mode inheritance
1610 1610
1611 1611 Newly created directories are marked as "not to be indexed by
1612 1612 the content indexing service", if ``notindexed`` is specified
1613 1613 for "write" mode access.
1614 1614 """
1615 1615 try:
1616 1616 makedir(name, notindexed)
1617 1617 except OSError as err:
1618 1618 if err.errno == errno.EEXIST:
1619 1619 return
1620 1620 if err.errno != errno.ENOENT or not name:
1621 1621 raise
1622 1622 parent = os.path.dirname(os.path.abspath(name))
1623 1623 if parent == name:
1624 1624 raise
1625 1625 makedirs(parent, mode, notindexed)
1626 1626 try:
1627 1627 makedir(name, notindexed)
1628 1628 except OSError as err:
1629 1629 # Catch EEXIST to handle races
1630 1630 if err.errno == errno.EEXIST:
1631 1631 return
1632 1632 raise
1633 1633 if mode is not None:
1634 1634 os.chmod(name, mode)
1635 1635
1636 1636 def readfile(path):
1637 1637 with open(path, 'rb') as fp:
1638 1638 return fp.read()
1639 1639
1640 1640 def writefile(path, text):
1641 1641 with open(path, 'wb') as fp:
1642 1642 fp.write(text)
1643 1643
1644 1644 def appendfile(path, text):
1645 1645 with open(path, 'ab') as fp:
1646 1646 fp.write(text)
1647 1647
1648 1648 class chunkbuffer(object):
1649 1649 """Allow arbitrary sized chunks of data to be efficiently read from an
1650 1650 iterator over chunks of arbitrary size."""
1651 1651
1652 1652 def __init__(self, in_iter):
1653 1653 """in_iter is the iterator that's iterating over the input chunks.
1654 1654 targetsize is how big a buffer to try to maintain."""
1655 1655 def splitbig(chunks):
1656 1656 for chunk in chunks:
1657 1657 if len(chunk) > 2**20:
1658 1658 pos = 0
1659 1659 while pos < len(chunk):
1660 1660 end = pos + 2 ** 18
1661 1661 yield chunk[pos:end]
1662 1662 pos = end
1663 1663 else:
1664 1664 yield chunk
1665 1665 self.iter = splitbig(in_iter)
1666 1666 self._queue = collections.deque()
1667 1667 self._chunkoffset = 0
1668 1668
1669 1669 def read(self, l=None):
1670 1670 """Read L bytes of data from the iterator of chunks of data.
1671 1671 Returns less than L bytes if the iterator runs dry.
1672 1672
1673 1673 If size parameter is omitted, read everything"""
1674 1674 if l is None:
1675 1675 return ''.join(self.iter)
1676 1676
1677 1677 left = l
1678 1678 buf = []
1679 1679 queue = self._queue
1680 1680 while left > 0:
1681 1681 # refill the queue
1682 1682 if not queue:
1683 1683 target = 2**18
1684 1684 for chunk in self.iter:
1685 1685 queue.append(chunk)
1686 1686 target -= len(chunk)
1687 1687 if target <= 0:
1688 1688 break
1689 1689 if not queue:
1690 1690 break
1691 1691
1692 1692 # The easy way to do this would be to queue.popleft(), modify the
1693 1693 # chunk (if necessary), then queue.appendleft(). However, for cases
1694 1694 # where we read partial chunk content, this incurs 2 dequeue
1695 1695 # mutations and creates a new str for the remaining chunk in the
1696 1696 # queue. Our code below avoids this overhead.
1697 1697
1698 1698 chunk = queue[0]
1699 1699 chunkl = len(chunk)
1700 1700 offset = self._chunkoffset
1701 1701
1702 1702 # Use full chunk.
1703 1703 if offset == 0 and left >= chunkl:
1704 1704 left -= chunkl
1705 1705 queue.popleft()
1706 1706 buf.append(chunk)
1707 1707 # self._chunkoffset remains at 0.
1708 1708 continue
1709 1709
1710 1710 chunkremaining = chunkl - offset
1711 1711
1712 1712 # Use all of unconsumed part of chunk.
1713 1713 if left >= chunkremaining:
1714 1714 left -= chunkremaining
1715 1715 queue.popleft()
1716 1716 # offset == 0 is enabled by block above, so this won't merely
1717 1717 # copy via ``chunk[0:]``.
1718 1718 buf.append(chunk[offset:])
1719 1719 self._chunkoffset = 0
1720 1720
1721 1721 # Partial chunk needed.
1722 1722 else:
1723 1723 buf.append(chunk[offset:offset + left])
1724 1724 self._chunkoffset += left
1725 1725 left -= chunkremaining
1726 1726
1727 1727 return ''.join(buf)
1728 1728
1729 1729 def filechunkiter(f, size=131072, limit=None):
1730 1730 """Create a generator that produces the data in the file size
1731 1731 (default 131072) bytes at a time, up to optional limit (default is
1732 1732 to read all data). Chunks may be less than size bytes if the
1733 1733 chunk is the last chunk in the file, or the file is a socket or
1734 1734 some other type of file that sometimes reads less data than is
1735 1735 requested."""
1736 1736 assert size >= 0
1737 1737 assert limit is None or limit >= 0
1738 1738 while True:
1739 1739 if limit is None:
1740 1740 nbytes = size
1741 1741 else:
1742 1742 nbytes = min(limit, size)
1743 1743 s = nbytes and f.read(nbytes)
1744 1744 if not s:
1745 1745 break
1746 1746 if limit:
1747 1747 limit -= len(s)
1748 1748 yield s
1749 1749
1750 1750 def makedate(timestamp=None):
1751 1751 '''Return a unix timestamp (or the current time) as a (unixtime,
1752 1752 offset) tuple based off the local timezone.'''
1753 1753 if timestamp is None:
1754 1754 timestamp = time.time()
1755 1755 if timestamp < 0:
1756 1756 hint = _("check your clock")
1757 1757 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1758 1758 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1759 1759 datetime.datetime.fromtimestamp(timestamp))
1760 1760 tz = delta.days * 86400 + delta.seconds
1761 1761 return timestamp, tz
1762 1762
1763 1763 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1764 1764 """represent a (unixtime, offset) tuple as a localized time.
1765 1765 unixtime is seconds since the epoch, and offset is the time zone's
1766 1766 number of seconds away from UTC.
1767 1767
1768 1768 >>> datestr((0, 0))
1769 1769 'Thu Jan 01 00:00:00 1970 +0000'
1770 1770 >>> datestr((42, 0))
1771 1771 'Thu Jan 01 00:00:42 1970 +0000'
1772 1772 >>> datestr((-42, 0))
1773 1773 'Wed Dec 31 23:59:18 1969 +0000'
1774 1774 >>> datestr((0x7fffffff, 0))
1775 1775 'Tue Jan 19 03:14:07 2038 +0000'
1776 1776 >>> datestr((-0x80000000, 0))
1777 1777 'Fri Dec 13 20:45:52 1901 +0000'
1778 1778 """
1779 1779 t, tz = date or makedate()
1780 1780 if "%1" in format or "%2" in format or "%z" in format:
1781 1781 sign = (tz > 0) and "-" or "+"
1782 1782 minutes = abs(tz) // 60
1783 1783 q, r = divmod(minutes, 60)
1784 1784 format = format.replace("%z", "%1%2")
1785 1785 format = format.replace("%1", "%c%02d" % (sign, q))
1786 1786 format = format.replace("%2", "%02d" % r)
1787 1787 d = t - tz
1788 1788 if d > 0x7fffffff:
1789 1789 d = 0x7fffffff
1790 1790 elif d < -0x80000000:
1791 1791 d = -0x80000000
1792 1792 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1793 1793 # because they use the gmtime() system call which is buggy on Windows
1794 1794 # for negative values.
1795 1795 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1796 1796 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1797 1797 return s
1798 1798
1799 1799 def shortdate(date=None):
1800 1800 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1801 1801 return datestr(date, format='%Y-%m-%d')
1802 1802
1803 1803 def parsetimezone(s):
1804 1804 """find a trailing timezone, if any, in string, and return a
1805 1805 (offset, remainder) pair"""
1806 1806
1807 1807 if s.endswith("GMT") or s.endswith("UTC"):
1808 1808 return 0, s[:-3].rstrip()
1809 1809
1810 1810 # Unix-style timezones [+-]hhmm
1811 1811 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1812 1812 sign = (s[-5] == "+") and 1 or -1
1813 1813 hours = int(s[-4:-2])
1814 1814 minutes = int(s[-2:])
1815 1815 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1816 1816
1817 1817 # ISO8601 trailing Z
1818 1818 if s.endswith("Z") and s[-2:-1].isdigit():
1819 1819 return 0, s[:-1]
1820 1820
1821 1821 # ISO8601-style [+-]hh:mm
1822 1822 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1823 1823 s[-5:-3].isdigit() and s[-2:].isdigit()):
1824 1824 sign = (s[-6] == "+") and 1 or -1
1825 1825 hours = int(s[-5:-3])
1826 1826 minutes = int(s[-2:])
1827 1827 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1828 1828
1829 1829 return None, s
1830 1830
1831 1831 def strdate(string, format, defaults=None):
1832 1832 """parse a localized time string and return a (unixtime, offset) tuple.
1833 1833 if the string cannot be parsed, ValueError is raised."""
1834 1834 if defaults is None:
1835 1835 defaults = {}
1836 1836
1837 1837 # NOTE: unixtime = localunixtime + offset
1838 1838 offset, date = parsetimezone(string)
1839 1839
1840 1840 # add missing elements from defaults
1841 1841 usenow = False # default to using biased defaults
1842 1842 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1843 1843 found = [True for p in part if ("%"+p) in format]
1844 1844 if not found:
1845 1845 date += "@" + defaults[part][usenow]
1846 1846 format += "@%" + part[0]
1847 1847 else:
1848 1848 # We've found a specific time element, less specific time
1849 1849 # elements are relative to today
1850 1850 usenow = True
1851 1851
1852 1852 timetuple = time.strptime(date, format)
1853 1853 localunixtime = int(calendar.timegm(timetuple))
1854 1854 if offset is None:
1855 1855 # local timezone
1856 1856 unixtime = int(time.mktime(timetuple))
1857 1857 offset = unixtime - localunixtime
1858 1858 else:
1859 1859 unixtime = localunixtime + offset
1860 1860 return unixtime, offset
1861 1861
1862 1862 def parsedate(date, formats=None, bias=None):
1863 1863 """parse a localized date/time and return a (unixtime, offset) tuple.
1864 1864
1865 1865 The date may be a "unixtime offset" string or in one of the specified
1866 1866 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1867 1867
1868 1868 >>> parsedate(' today ') == parsedate(\
1869 1869 datetime.date.today().strftime('%b %d'))
1870 1870 True
1871 1871 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
1872 1872 datetime.timedelta(days=1)\
1873 1873 ).strftime('%b %d'))
1874 1874 True
1875 1875 >>> now, tz = makedate()
1876 1876 >>> strnow, strtz = parsedate('now')
1877 1877 >>> (strnow - now) < 1
1878 1878 True
1879 1879 >>> tz == strtz
1880 1880 True
1881 1881 """
1882 1882 if bias is None:
1883 1883 bias = {}
1884 1884 if not date:
1885 1885 return 0, 0
1886 1886 if isinstance(date, tuple) and len(date) == 2:
1887 1887 return date
1888 1888 if not formats:
1889 1889 formats = defaultdateformats
1890 1890 date = date.strip()
1891 1891
1892 1892 if date == 'now' or date == _('now'):
1893 1893 return makedate()
1894 1894 if date == 'today' or date == _('today'):
1895 1895 date = datetime.date.today().strftime('%b %d')
1896 1896 elif date == 'yesterday' or date == _('yesterday'):
1897 1897 date = (datetime.date.today() -
1898 1898 datetime.timedelta(days=1)).strftime('%b %d')
1899 1899
1900 1900 try:
1901 1901 when, offset = map(int, date.split(' '))
1902 1902 except ValueError:
1903 1903 # fill out defaults
1904 1904 now = makedate()
1905 1905 defaults = {}
1906 1906 for part in ("d", "mb", "yY", "HI", "M", "S"):
1907 1907 # this piece is for rounding the specific end of unknowns
1908 1908 b = bias.get(part)
1909 1909 if b is None:
1910 1910 if part[0] in "HMS":
1911 1911 b = "00"
1912 1912 else:
1913 1913 b = "0"
1914 1914
1915 1915 # this piece is for matching the generic end to today's date
1916 1916 n = datestr(now, "%" + part[0])
1917 1917
1918 1918 defaults[part] = (b, n)
1919 1919
1920 1920 for format in formats:
1921 1921 try:
1922 1922 when, offset = strdate(date, format, defaults)
1923 1923 except (ValueError, OverflowError):
1924 1924 pass
1925 1925 else:
1926 1926 break
1927 1927 else:
1928 1928 raise Abort(_('invalid date: %r') % date)
1929 1929 # validate explicit (probably user-specified) date and
1930 1930 # time zone offset. values must fit in signed 32 bits for
1931 1931 # current 32-bit linux runtimes. timezones go from UTC-12
1932 1932 # to UTC+14
1933 1933 if when < -0x80000000 or when > 0x7fffffff:
1934 1934 raise Abort(_('date exceeds 32 bits: %d') % when)
1935 1935 if offset < -50400 or offset > 43200:
1936 1936 raise Abort(_('impossible time zone offset: %d') % offset)
1937 1937 return when, offset
1938 1938
1939 1939 def matchdate(date):
1940 1940 """Return a function that matches a given date match specifier
1941 1941
1942 1942 Formats include:
1943 1943
1944 1944 '{date}' match a given date to the accuracy provided
1945 1945
1946 1946 '<{date}' on or before a given date
1947 1947
1948 1948 '>{date}' on or after a given date
1949 1949
1950 1950 >>> p1 = parsedate("10:29:59")
1951 1951 >>> p2 = parsedate("10:30:00")
1952 1952 >>> p3 = parsedate("10:30:59")
1953 1953 >>> p4 = parsedate("10:31:00")
1954 1954 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1955 1955 >>> f = matchdate("10:30")
1956 1956 >>> f(p1[0])
1957 1957 False
1958 1958 >>> f(p2[0])
1959 1959 True
1960 1960 >>> f(p3[0])
1961 1961 True
1962 1962 >>> f(p4[0])
1963 1963 False
1964 1964 >>> f(p5[0])
1965 1965 False
1966 1966 """
1967 1967
1968 1968 def lower(date):
1969 1969 d = {'mb': "1", 'd': "1"}
1970 1970 return parsedate(date, extendeddateformats, d)[0]
1971 1971
1972 1972 def upper(date):
1973 1973 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
1974 1974 for days in ("31", "30", "29"):
1975 1975 try:
1976 1976 d["d"] = days
1977 1977 return parsedate(date, extendeddateformats, d)[0]
1978 1978 except Abort:
1979 1979 pass
1980 1980 d["d"] = "28"
1981 1981 return parsedate(date, extendeddateformats, d)[0]
1982 1982
1983 1983 date = date.strip()
1984 1984
1985 1985 if not date:
1986 1986 raise Abort(_("dates cannot consist entirely of whitespace"))
1987 1987 elif date[0] == "<":
1988 1988 if not date[1:]:
1989 1989 raise Abort(_("invalid day spec, use '<DATE'"))
1990 1990 when = upper(date[1:])
1991 1991 return lambda x: x <= when
1992 1992 elif date[0] == ">":
1993 1993 if not date[1:]:
1994 1994 raise Abort(_("invalid day spec, use '>DATE'"))
1995 1995 when = lower(date[1:])
1996 1996 return lambda x: x >= when
1997 1997 elif date[0] == "-":
1998 1998 try:
1999 1999 days = int(date[1:])
2000 2000 except ValueError:
2001 2001 raise Abort(_("invalid day spec: %s") % date[1:])
2002 2002 if days < 0:
2003 2003 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2004 2004 % date[1:])
2005 2005 when = makedate()[0] - days * 3600 * 24
2006 2006 return lambda x: x >= when
2007 2007 elif " to " in date:
2008 2008 a, b = date.split(" to ")
2009 2009 start, stop = lower(a), upper(b)
2010 2010 return lambda x: x >= start and x <= stop
2011 2011 else:
2012 2012 start, stop = lower(date), upper(date)
2013 2013 return lambda x: x >= start and x <= stop
2014 2014
2015 2015 def stringmatcher(pattern, casesensitive=True):
2016 2016 """
2017 2017 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2018 2018 returns the matcher name, pattern, and matcher function.
2019 2019 missing or unknown prefixes are treated as literal matches.
2020 2020
2021 2021 helper for tests:
2022 2022 >>> def test(pattern, *tests):
2023 2023 ... kind, pattern, matcher = stringmatcher(pattern)
2024 2024 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2025 2025 >>> def itest(pattern, *tests):
2026 2026 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2027 2027 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2028 2028
2029 2029 exact matching (no prefix):
2030 2030 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2031 2031 ('literal', 'abcdefg', [False, False, True])
2032 2032
2033 2033 regex matching ('re:' prefix)
2034 2034 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2035 2035 ('re', 'a.+b', [False, False, True])
2036 2036
2037 2037 force exact matches ('literal:' prefix)
2038 2038 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2039 2039 ('literal', 're:foobar', [False, True])
2040 2040
2041 2041 unknown prefixes are ignored and treated as literals
2042 2042 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2043 2043 ('literal', 'foo:bar', [False, False, True])
2044 2044
2045 2045 case insensitive regex matches
2046 2046 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2047 2047 ('re', 'A.+b', [False, False, True])
2048 2048
2049 2049 case insensitive literal matches
2050 2050 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2051 2051 ('literal', 'ABCDEFG', [False, False, True])
2052 2052 """
2053 2053 if pattern.startswith('re:'):
2054 2054 pattern = pattern[3:]
2055 2055 try:
2056 2056 flags = 0
2057 2057 if not casesensitive:
2058 2058 flags = remod.I
2059 2059 regex = remod.compile(pattern, flags)
2060 2060 except remod.error as e:
2061 2061 raise error.ParseError(_('invalid regular expression: %s')
2062 2062 % e)
2063 2063 return 're', pattern, regex.search
2064 2064 elif pattern.startswith('literal:'):
2065 2065 pattern = pattern[8:]
2066 2066
2067 2067 match = pattern.__eq__
2068 2068
2069 2069 if not casesensitive:
2070 2070 ipat = encoding.lower(pattern)
2071 2071 match = lambda s: ipat == encoding.lower(s)
2072 2072 return 'literal', pattern, match
2073 2073
2074 2074 def shortuser(user):
2075 2075 """Return a short representation of a user name or email address."""
2076 2076 f = user.find('@')
2077 2077 if f >= 0:
2078 2078 user = user[:f]
2079 2079 f = user.find('<')
2080 2080 if f >= 0:
2081 2081 user = user[f + 1:]
2082 2082 f = user.find(' ')
2083 2083 if f >= 0:
2084 2084 user = user[:f]
2085 2085 f = user.find('.')
2086 2086 if f >= 0:
2087 2087 user = user[:f]
2088 2088 return user
2089 2089
2090 2090 def emailuser(user):
2091 2091 """Return the user portion of an email address."""
2092 2092 f = user.find('@')
2093 2093 if f >= 0:
2094 2094 user = user[:f]
2095 2095 f = user.find('<')
2096 2096 if f >= 0:
2097 2097 user = user[f + 1:]
2098 2098 return user
2099 2099
2100 2100 def email(author):
2101 2101 '''get email of author.'''
2102 2102 r = author.find('>')
2103 2103 if r == -1:
2104 2104 r = None
2105 2105 return author[author.find('<') + 1:r]
2106 2106
2107 2107 def ellipsis(text, maxlength=400):
2108 2108 """Trim string to at most maxlength (default: 400) columns in display."""
2109 2109 return encoding.trim(text, maxlength, ellipsis='...')
2110 2110
2111 2111 def unitcountfn(*unittable):
2112 2112 '''return a function that renders a readable count of some quantity'''
2113 2113
2114 2114 def go(count):
2115 2115 for multiplier, divisor, format in unittable:
2116 2116 if count >= divisor * multiplier:
2117 2117 return format % (count / float(divisor))
2118 2118 return unittable[-1][2] % count
2119 2119
2120 2120 return go
2121 2121
2122 2122 bytecount = unitcountfn(
2123 2123 (100, 1 << 30, _('%.0f GB')),
2124 2124 (10, 1 << 30, _('%.1f GB')),
2125 2125 (1, 1 << 30, _('%.2f GB')),
2126 2126 (100, 1 << 20, _('%.0f MB')),
2127 2127 (10, 1 << 20, _('%.1f MB')),
2128 2128 (1, 1 << 20, _('%.2f MB')),
2129 2129 (100, 1 << 10, _('%.0f KB')),
2130 2130 (10, 1 << 10, _('%.1f KB')),
2131 2131 (1, 1 << 10, _('%.2f KB')),
2132 2132 (1, 1, _('%.0f bytes')),
2133 2133 )
2134 2134
2135 2135 def escapestr(s):
2136 2136 # call underlying function of s.encode('string_escape') directly for
2137 2137 # Python 3 compatibility
2138 2138 return codecs.escape_encode(s)[0]
2139 2139
2140 def unescapestr(s):
2141 return s.decode('string_escape')
2142
2140 2143 def uirepr(s):
2141 2144 # Avoid double backslash in Windows path repr()
2142 2145 return repr(s).replace('\\\\', '\\')
2143 2146
2144 2147 # delay import of textwrap
2145 2148 def MBTextWrapper(**kwargs):
2146 2149 class tw(textwrap.TextWrapper):
2147 2150 """
2148 2151 Extend TextWrapper for width-awareness.
2149 2152
2150 2153 Neither number of 'bytes' in any encoding nor 'characters' is
2151 2154 appropriate to calculate terminal columns for specified string.
2152 2155
2153 2156 Original TextWrapper implementation uses built-in 'len()' directly,
2154 2157 so overriding is needed to use width information of each characters.
2155 2158
2156 2159 In addition, characters classified into 'ambiguous' width are
2157 2160 treated as wide in East Asian area, but as narrow in other.
2158 2161
2159 2162 This requires use decision to determine width of such characters.
2160 2163 """
2161 2164 def _cutdown(self, ucstr, space_left):
2162 2165 l = 0
2163 2166 colwidth = encoding.ucolwidth
2164 2167 for i in xrange(len(ucstr)):
2165 2168 l += colwidth(ucstr[i])
2166 2169 if space_left < l:
2167 2170 return (ucstr[:i], ucstr[i:])
2168 2171 return ucstr, ''
2169 2172
2170 2173 # overriding of base class
2171 2174 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2172 2175 space_left = max(width - cur_len, 1)
2173 2176
2174 2177 if self.break_long_words:
2175 2178 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2176 2179 cur_line.append(cut)
2177 2180 reversed_chunks[-1] = res
2178 2181 elif not cur_line:
2179 2182 cur_line.append(reversed_chunks.pop())
2180 2183
2181 2184 # this overriding code is imported from TextWrapper of Python 2.6
2182 2185 # to calculate columns of string by 'encoding.ucolwidth()'
2183 2186 def _wrap_chunks(self, chunks):
2184 2187 colwidth = encoding.ucolwidth
2185 2188
2186 2189 lines = []
2187 2190 if self.width <= 0:
2188 2191 raise ValueError("invalid width %r (must be > 0)" % self.width)
2189 2192
2190 2193 # Arrange in reverse order so items can be efficiently popped
2191 2194 # from a stack of chucks.
2192 2195 chunks.reverse()
2193 2196
2194 2197 while chunks:
2195 2198
2196 2199 # Start the list of chunks that will make up the current line.
2197 2200 # cur_len is just the length of all the chunks in cur_line.
2198 2201 cur_line = []
2199 2202 cur_len = 0
2200 2203
2201 2204 # Figure out which static string will prefix this line.
2202 2205 if lines:
2203 2206 indent = self.subsequent_indent
2204 2207 else:
2205 2208 indent = self.initial_indent
2206 2209
2207 2210 # Maximum width for this line.
2208 2211 width = self.width - len(indent)
2209 2212
2210 2213 # First chunk on line is whitespace -- drop it, unless this
2211 2214 # is the very beginning of the text (i.e. no lines started yet).
2212 2215 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
2213 2216 del chunks[-1]
2214 2217
2215 2218 while chunks:
2216 2219 l = colwidth(chunks[-1])
2217 2220
2218 2221 # Can at least squeeze this chunk onto the current line.
2219 2222 if cur_len + l <= width:
2220 2223 cur_line.append(chunks.pop())
2221 2224 cur_len += l
2222 2225
2223 2226 # Nope, this line is full.
2224 2227 else:
2225 2228 break
2226 2229
2227 2230 # The current line is full, and the next chunk is too big to
2228 2231 # fit on *any* line (not just this one).
2229 2232 if chunks and colwidth(chunks[-1]) > width:
2230 2233 self._handle_long_word(chunks, cur_line, cur_len, width)
2231 2234
2232 2235 # If the last chunk on this line is all whitespace, drop it.
2233 2236 if (self.drop_whitespace and
2234 2237 cur_line and cur_line[-1].strip() == ''):
2235 2238 del cur_line[-1]
2236 2239
2237 2240 # Convert current line back to a string and store it in list
2238 2241 # of all lines (return value).
2239 2242 if cur_line:
2240 2243 lines.append(indent + ''.join(cur_line))
2241 2244
2242 2245 return lines
2243 2246
2244 2247 global MBTextWrapper
2245 2248 MBTextWrapper = tw
2246 2249 return tw(**kwargs)
2247 2250
2248 2251 def wrap(line, width, initindent='', hangindent=''):
2249 2252 maxindent = max(len(hangindent), len(initindent))
2250 2253 if width <= maxindent:
2251 2254 # adjust for weird terminal size
2252 2255 width = max(78, maxindent + 1)
2253 2256 line = line.decode(pycompat.sysstr(encoding.encoding),
2254 2257 pycompat.sysstr(encoding.encodingmode))
2255 2258 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2256 2259 pycompat.sysstr(encoding.encodingmode))
2257 2260 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2258 2261 pycompat.sysstr(encoding.encodingmode))
2259 2262 wrapper = MBTextWrapper(width=width,
2260 2263 initial_indent=initindent,
2261 2264 subsequent_indent=hangindent)
2262 2265 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2263 2266
2264 2267 if (pyplatform.python_implementation() == 'CPython' and
2265 2268 sys.version_info < (3, 0)):
2266 2269 # There is an issue in CPython that some IO methods do not handle EINTR
2267 2270 # correctly. The following table shows what CPython version (and functions)
2268 2271 # are affected (buggy: has the EINTR bug, okay: otherwise):
2269 2272 #
2270 2273 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2271 2274 # --------------------------------------------------
2272 2275 # fp.__iter__ | buggy | buggy | okay
2273 2276 # fp.read* | buggy | okay [1] | okay
2274 2277 #
2275 2278 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2276 2279 #
2277 2280 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2278 2281 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2279 2282 #
2280 2283 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2281 2284 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2282 2285 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2283 2286 # fp.__iter__ but not other fp.read* methods.
2284 2287 #
2285 2288 # On modern systems like Linux, the "read" syscall cannot be interrupted
2286 2289 # when reading "fast" files like on-disk files. So the EINTR issue only
2287 2290 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2288 2291 # files approximately as "fast" files and use the fast (unsafe) code path,
2289 2292 # to minimize the performance impact.
2290 2293 if sys.version_info >= (2, 7, 4):
2291 2294 # fp.readline deals with EINTR correctly, use it as a workaround.
2292 2295 def _safeiterfile(fp):
2293 2296 return iter(fp.readline, '')
2294 2297 else:
2295 2298 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2296 2299 # note: this may block longer than necessary because of bufsize.
2297 2300 def _safeiterfile(fp, bufsize=4096):
2298 2301 fd = fp.fileno()
2299 2302 line = ''
2300 2303 while True:
2301 2304 try:
2302 2305 buf = os.read(fd, bufsize)
2303 2306 except OSError as ex:
2304 2307 # os.read only raises EINTR before any data is read
2305 2308 if ex.errno == errno.EINTR:
2306 2309 continue
2307 2310 else:
2308 2311 raise
2309 2312 line += buf
2310 2313 if '\n' in buf:
2311 2314 splitted = line.splitlines(True)
2312 2315 line = ''
2313 2316 for l in splitted:
2314 2317 if l[-1] == '\n':
2315 2318 yield l
2316 2319 else:
2317 2320 line = l
2318 2321 if not buf:
2319 2322 break
2320 2323 if line:
2321 2324 yield line
2322 2325
2323 2326 def iterfile(fp):
2324 2327 fastpath = True
2325 2328 if type(fp) is file:
2326 2329 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2327 2330 if fastpath:
2328 2331 return fp
2329 2332 else:
2330 2333 return _safeiterfile(fp)
2331 2334 else:
2332 2335 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2333 2336 def iterfile(fp):
2334 2337 return fp
2335 2338
2336 2339 def iterlines(iterator):
2337 2340 for chunk in iterator:
2338 2341 for line in chunk.splitlines():
2339 2342 yield line
2340 2343
2341 2344 def expandpath(path):
2342 2345 return os.path.expanduser(os.path.expandvars(path))
2343 2346
2344 2347 def hgcmd():
2345 2348 """Return the command used to execute current hg
2346 2349
2347 2350 This is different from hgexecutable() because on Windows we want
2348 2351 to avoid things opening new shell windows like batch files, so we
2349 2352 get either the python call or current executable.
2350 2353 """
2351 2354 if mainfrozen():
2352 2355 if getattr(sys, 'frozen', None) == 'macosx_app':
2353 2356 # Env variable set by py2app
2354 2357 return [encoding.environ['EXECUTABLEPATH']]
2355 2358 else:
2356 2359 return [pycompat.sysexecutable]
2357 2360 return gethgcmd()
2358 2361
2359 2362 def rundetached(args, condfn):
2360 2363 """Execute the argument list in a detached process.
2361 2364
2362 2365 condfn is a callable which is called repeatedly and should return
2363 2366 True once the child process is known to have started successfully.
2364 2367 At this point, the child process PID is returned. If the child
2365 2368 process fails to start or finishes before condfn() evaluates to
2366 2369 True, return -1.
2367 2370 """
2368 2371 # Windows case is easier because the child process is either
2369 2372 # successfully starting and validating the condition or exiting
2370 2373 # on failure. We just poll on its PID. On Unix, if the child
2371 2374 # process fails to start, it will be left in a zombie state until
2372 2375 # the parent wait on it, which we cannot do since we expect a long
2373 2376 # running process on success. Instead we listen for SIGCHLD telling
2374 2377 # us our child process terminated.
2375 2378 terminated = set()
2376 2379 def handler(signum, frame):
2377 2380 terminated.add(os.wait())
2378 2381 prevhandler = None
2379 2382 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2380 2383 if SIGCHLD is not None:
2381 2384 prevhandler = signal.signal(SIGCHLD, handler)
2382 2385 try:
2383 2386 pid = spawndetached(args)
2384 2387 while not condfn():
2385 2388 if ((pid in terminated or not testpid(pid))
2386 2389 and not condfn()):
2387 2390 return -1
2388 2391 time.sleep(0.1)
2389 2392 return pid
2390 2393 finally:
2391 2394 if prevhandler is not None:
2392 2395 signal.signal(signal.SIGCHLD, prevhandler)
2393 2396
2394 2397 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2395 2398 """Return the result of interpolating items in the mapping into string s.
2396 2399
2397 2400 prefix is a single character string, or a two character string with
2398 2401 a backslash as the first character if the prefix needs to be escaped in
2399 2402 a regular expression.
2400 2403
2401 2404 fn is an optional function that will be applied to the replacement text
2402 2405 just before replacement.
2403 2406
2404 2407 escape_prefix is an optional flag that allows using doubled prefix for
2405 2408 its escaping.
2406 2409 """
2407 2410 fn = fn or (lambda s: s)
2408 2411 patterns = '|'.join(mapping.keys())
2409 2412 if escape_prefix:
2410 2413 patterns += '|' + prefix
2411 2414 if len(prefix) > 1:
2412 2415 prefix_char = prefix[1:]
2413 2416 else:
2414 2417 prefix_char = prefix
2415 2418 mapping[prefix_char] = prefix_char
2416 2419 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2417 2420 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2418 2421
2419 2422 def getport(port):
2420 2423 """Return the port for a given network service.
2421 2424
2422 2425 If port is an integer, it's returned as is. If it's a string, it's
2423 2426 looked up using socket.getservbyname(). If there's no matching
2424 2427 service, error.Abort is raised.
2425 2428 """
2426 2429 try:
2427 2430 return int(port)
2428 2431 except ValueError:
2429 2432 pass
2430 2433
2431 2434 try:
2432 2435 return socket.getservbyname(port)
2433 2436 except socket.error:
2434 2437 raise Abort(_("no port number associated with service '%s'") % port)
2435 2438
2436 2439 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2437 2440 '0': False, 'no': False, 'false': False, 'off': False,
2438 2441 'never': False}
2439 2442
2440 2443 def parsebool(s):
2441 2444 """Parse s into a boolean.
2442 2445
2443 2446 If s is not a valid boolean, returns None.
2444 2447 """
2445 2448 return _booleans.get(s.lower(), None)
2446 2449
2447 2450 _hextochr = dict((a + b, chr(int(a + b, 16)))
2448 2451 for a in string.hexdigits for b in string.hexdigits)
2449 2452
2450 2453 class url(object):
2451 2454 r"""Reliable URL parser.
2452 2455
2453 2456 This parses URLs and provides attributes for the following
2454 2457 components:
2455 2458
2456 2459 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2457 2460
2458 2461 Missing components are set to None. The only exception is
2459 2462 fragment, which is set to '' if present but empty.
2460 2463
2461 2464 If parsefragment is False, fragment is included in query. If
2462 2465 parsequery is False, query is included in path. If both are
2463 2466 False, both fragment and query are included in path.
2464 2467
2465 2468 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2466 2469
2467 2470 Note that for backward compatibility reasons, bundle URLs do not
2468 2471 take host names. That means 'bundle://../' has a path of '../'.
2469 2472
2470 2473 Examples:
2471 2474
2472 2475 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2473 2476 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2474 2477 >>> url('ssh://[::1]:2200//home/joe/repo')
2475 2478 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2476 2479 >>> url('file:///home/joe/repo')
2477 2480 <url scheme: 'file', path: '/home/joe/repo'>
2478 2481 >>> url('file:///c:/temp/foo/')
2479 2482 <url scheme: 'file', path: 'c:/temp/foo/'>
2480 2483 >>> url('bundle:foo')
2481 2484 <url scheme: 'bundle', path: 'foo'>
2482 2485 >>> url('bundle://../foo')
2483 2486 <url scheme: 'bundle', path: '../foo'>
2484 2487 >>> url(r'c:\foo\bar')
2485 2488 <url path: 'c:\\foo\\bar'>
2486 2489 >>> url(r'\\blah\blah\blah')
2487 2490 <url path: '\\\\blah\\blah\\blah'>
2488 2491 >>> url(r'\\blah\blah\blah#baz')
2489 2492 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2490 2493 >>> url(r'file:///C:\users\me')
2491 2494 <url scheme: 'file', path: 'C:\\users\\me'>
2492 2495
2493 2496 Authentication credentials:
2494 2497
2495 2498 >>> url('ssh://joe:xyz@x/repo')
2496 2499 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2497 2500 >>> url('ssh://joe@x/repo')
2498 2501 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2499 2502
2500 2503 Query strings and fragments:
2501 2504
2502 2505 >>> url('http://host/a?b#c')
2503 2506 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2504 2507 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2505 2508 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2506 2509
2507 2510 Empty path:
2508 2511
2509 2512 >>> url('')
2510 2513 <url path: ''>
2511 2514 >>> url('#a')
2512 2515 <url path: '', fragment: 'a'>
2513 2516 >>> url('http://host/')
2514 2517 <url scheme: 'http', host: 'host', path: ''>
2515 2518 >>> url('http://host/#a')
2516 2519 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2517 2520
2518 2521 Only scheme:
2519 2522
2520 2523 >>> url('http:')
2521 2524 <url scheme: 'http'>
2522 2525 """
2523 2526
2524 2527 _safechars = "!~*'()+"
2525 2528 _safepchars = "/!~*'()+:\\"
2526 2529 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2527 2530
2528 2531 def __init__(self, path, parsequery=True, parsefragment=True):
2529 2532 # We slowly chomp away at path until we have only the path left
2530 2533 self.scheme = self.user = self.passwd = self.host = None
2531 2534 self.port = self.path = self.query = self.fragment = None
2532 2535 self._localpath = True
2533 2536 self._hostport = ''
2534 2537 self._origpath = path
2535 2538
2536 2539 if parsefragment and '#' in path:
2537 2540 path, self.fragment = path.split('#', 1)
2538 2541
2539 2542 # special case for Windows drive letters and UNC paths
2540 2543 if hasdriveletter(path) or path.startswith('\\\\'):
2541 2544 self.path = path
2542 2545 return
2543 2546
2544 2547 # For compatibility reasons, we can't handle bundle paths as
2545 2548 # normal URLS
2546 2549 if path.startswith('bundle:'):
2547 2550 self.scheme = 'bundle'
2548 2551 path = path[7:]
2549 2552 if path.startswith('//'):
2550 2553 path = path[2:]
2551 2554 self.path = path
2552 2555 return
2553 2556
2554 2557 if self._matchscheme(path):
2555 2558 parts = path.split(':', 1)
2556 2559 if parts[0]:
2557 2560 self.scheme, path = parts
2558 2561 self._localpath = False
2559 2562
2560 2563 if not path:
2561 2564 path = None
2562 2565 if self._localpath:
2563 2566 self.path = ''
2564 2567 return
2565 2568 else:
2566 2569 if self._localpath:
2567 2570 self.path = path
2568 2571 return
2569 2572
2570 2573 if parsequery and '?' in path:
2571 2574 path, self.query = path.split('?', 1)
2572 2575 if not path:
2573 2576 path = None
2574 2577 if not self.query:
2575 2578 self.query = None
2576 2579
2577 2580 # // is required to specify a host/authority
2578 2581 if path and path.startswith('//'):
2579 2582 parts = path[2:].split('/', 1)
2580 2583 if len(parts) > 1:
2581 2584 self.host, path = parts
2582 2585 else:
2583 2586 self.host = parts[0]
2584 2587 path = None
2585 2588 if not self.host:
2586 2589 self.host = None
2587 2590 # path of file:///d is /d
2588 2591 # path of file:///d:/ is d:/, not /d:/
2589 2592 if path and not hasdriveletter(path):
2590 2593 path = '/' + path
2591 2594
2592 2595 if self.host and '@' in self.host:
2593 2596 self.user, self.host = self.host.rsplit('@', 1)
2594 2597 if ':' in self.user:
2595 2598 self.user, self.passwd = self.user.split(':', 1)
2596 2599 if not self.host:
2597 2600 self.host = None
2598 2601
2599 2602 # Don't split on colons in IPv6 addresses without ports
2600 2603 if (self.host and ':' in self.host and
2601 2604 not (self.host.startswith('[') and self.host.endswith(']'))):
2602 2605 self._hostport = self.host
2603 2606 self.host, self.port = self.host.rsplit(':', 1)
2604 2607 if not self.host:
2605 2608 self.host = None
2606 2609
2607 2610 if (self.host and self.scheme == 'file' and
2608 2611 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2609 2612 raise Abort(_('file:// URLs can only refer to localhost'))
2610 2613
2611 2614 self.path = path
2612 2615
2613 2616 # leave the query string escaped
2614 2617 for a in ('user', 'passwd', 'host', 'port',
2615 2618 'path', 'fragment'):
2616 2619 v = getattr(self, a)
2617 2620 if v is not None:
2618 2621 setattr(self, a, pycompat.urlunquote(v))
2619 2622
2620 2623 def __repr__(self):
2621 2624 attrs = []
2622 2625 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2623 2626 'query', 'fragment'):
2624 2627 v = getattr(self, a)
2625 2628 if v is not None:
2626 2629 attrs.append('%s: %r' % (a, v))
2627 2630 return '<url %s>' % ', '.join(attrs)
2628 2631
2629 2632 def __str__(self):
2630 2633 r"""Join the URL's components back into a URL string.
2631 2634
2632 2635 Examples:
2633 2636
2634 2637 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2635 2638 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2636 2639 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2637 2640 'http://user:pw@host:80/?foo=bar&baz=42'
2638 2641 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2639 2642 'http://user:pw@host:80/?foo=bar%3dbaz'
2640 2643 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2641 2644 'ssh://user:pw@[::1]:2200//home/joe#'
2642 2645 >>> str(url('http://localhost:80//'))
2643 2646 'http://localhost:80//'
2644 2647 >>> str(url('http://localhost:80/'))
2645 2648 'http://localhost:80/'
2646 2649 >>> str(url('http://localhost:80'))
2647 2650 'http://localhost:80/'
2648 2651 >>> str(url('bundle:foo'))
2649 2652 'bundle:foo'
2650 2653 >>> str(url('bundle://../foo'))
2651 2654 'bundle:../foo'
2652 2655 >>> str(url('path'))
2653 2656 'path'
2654 2657 >>> str(url('file:///tmp/foo/bar'))
2655 2658 'file:///tmp/foo/bar'
2656 2659 >>> str(url('file:///c:/tmp/foo/bar'))
2657 2660 'file:///c:/tmp/foo/bar'
2658 2661 >>> print url(r'bundle:foo\bar')
2659 2662 bundle:foo\bar
2660 2663 >>> print url(r'file:///D:\data\hg')
2661 2664 file:///D:\data\hg
2662 2665 """
2663 2666 return encoding.strfromlocal(self.__bytes__())
2664 2667
2665 2668 def __bytes__(self):
2666 2669 if self._localpath:
2667 2670 s = self.path
2668 2671 if self.scheme == 'bundle':
2669 2672 s = 'bundle:' + s
2670 2673 if self.fragment:
2671 2674 s += '#' + self.fragment
2672 2675 return s
2673 2676
2674 2677 s = self.scheme + ':'
2675 2678 if self.user or self.passwd or self.host:
2676 2679 s += '//'
2677 2680 elif self.scheme and (not self.path or self.path.startswith('/')
2678 2681 or hasdriveletter(self.path)):
2679 2682 s += '//'
2680 2683 if hasdriveletter(self.path):
2681 2684 s += '/'
2682 2685 if self.user:
2683 2686 s += urlreq.quote(self.user, safe=self._safechars)
2684 2687 if self.passwd:
2685 2688 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2686 2689 if self.user or self.passwd:
2687 2690 s += '@'
2688 2691 if self.host:
2689 2692 if not (self.host.startswith('[') and self.host.endswith(']')):
2690 2693 s += urlreq.quote(self.host)
2691 2694 else:
2692 2695 s += self.host
2693 2696 if self.port:
2694 2697 s += ':' + urlreq.quote(self.port)
2695 2698 if self.host:
2696 2699 s += '/'
2697 2700 if self.path:
2698 2701 # TODO: similar to the query string, we should not unescape the
2699 2702 # path when we store it, the path might contain '%2f' = '/',
2700 2703 # which we should *not* escape.
2701 2704 s += urlreq.quote(self.path, safe=self._safepchars)
2702 2705 if self.query:
2703 2706 # we store the query in escaped form.
2704 2707 s += '?' + self.query
2705 2708 if self.fragment is not None:
2706 2709 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2707 2710 return s
2708 2711
2709 2712 def authinfo(self):
2710 2713 user, passwd = self.user, self.passwd
2711 2714 try:
2712 2715 self.user, self.passwd = None, None
2713 2716 s = str(self)
2714 2717 finally:
2715 2718 self.user, self.passwd = user, passwd
2716 2719 if not self.user:
2717 2720 return (s, None)
2718 2721 # authinfo[1] is passed to urllib2 password manager, and its
2719 2722 # URIs must not contain credentials. The host is passed in the
2720 2723 # URIs list because Python < 2.4.3 uses only that to search for
2721 2724 # a password.
2722 2725 return (s, (None, (s, self.host),
2723 2726 self.user, self.passwd or ''))
2724 2727
2725 2728 def isabs(self):
2726 2729 if self.scheme and self.scheme != 'file':
2727 2730 return True # remote URL
2728 2731 if hasdriveletter(self.path):
2729 2732 return True # absolute for our purposes - can't be joined()
2730 2733 if self.path.startswith(r'\\'):
2731 2734 return True # Windows UNC path
2732 2735 if self.path.startswith('/'):
2733 2736 return True # POSIX-style
2734 2737 return False
2735 2738
2736 2739 def localpath(self):
2737 2740 if self.scheme == 'file' or self.scheme == 'bundle':
2738 2741 path = self.path or '/'
2739 2742 # For Windows, we need to promote hosts containing drive
2740 2743 # letters to paths with drive letters.
2741 2744 if hasdriveletter(self._hostport):
2742 2745 path = self._hostport + '/' + self.path
2743 2746 elif (self.host is not None and self.path
2744 2747 and not hasdriveletter(path)):
2745 2748 path = '/' + path
2746 2749 return path
2747 2750 return self._origpath
2748 2751
2749 2752 def islocal(self):
2750 2753 '''whether localpath will return something that posixfile can open'''
2751 2754 return (not self.scheme or self.scheme == 'file'
2752 2755 or self.scheme == 'bundle')
2753 2756
2754 2757 def hasscheme(path):
2755 2758 return bool(url(path).scheme)
2756 2759
2757 2760 def hasdriveletter(path):
2758 2761 return path and path[1:2] == ':' and path[0:1].isalpha()
2759 2762
2760 2763 def urllocalpath(path):
2761 2764 return url(path, parsequery=False, parsefragment=False).localpath()
2762 2765
2763 2766 def hidepassword(u):
2764 2767 '''hide user credential in a url string'''
2765 2768 u = url(u)
2766 2769 if u.passwd:
2767 2770 u.passwd = '***'
2768 2771 return str(u)
2769 2772
2770 2773 def removeauth(u):
2771 2774 '''remove all authentication information from a url string'''
2772 2775 u = url(u)
2773 2776 u.user = u.passwd = None
2774 2777 return str(u)
2775 2778
2776 2779 timecount = unitcountfn(
2777 2780 (1, 1e3, _('%.0f s')),
2778 2781 (100, 1, _('%.1f s')),
2779 2782 (10, 1, _('%.2f s')),
2780 2783 (1, 1, _('%.3f s')),
2781 2784 (100, 0.001, _('%.1f ms')),
2782 2785 (10, 0.001, _('%.2f ms')),
2783 2786 (1, 0.001, _('%.3f ms')),
2784 2787 (100, 0.000001, _('%.1f us')),
2785 2788 (10, 0.000001, _('%.2f us')),
2786 2789 (1, 0.000001, _('%.3f us')),
2787 2790 (100, 0.000000001, _('%.1f ns')),
2788 2791 (10, 0.000000001, _('%.2f ns')),
2789 2792 (1, 0.000000001, _('%.3f ns')),
2790 2793 )
2791 2794
2792 2795 _timenesting = [0]
2793 2796
2794 2797 def timed(func):
2795 2798 '''Report the execution time of a function call to stderr.
2796 2799
2797 2800 During development, use as a decorator when you need to measure
2798 2801 the cost of a function, e.g. as follows:
2799 2802
2800 2803 @util.timed
2801 2804 def foo(a, b, c):
2802 2805 pass
2803 2806 '''
2804 2807
2805 2808 def wrapper(*args, **kwargs):
2806 2809 start = timer()
2807 2810 indent = 2
2808 2811 _timenesting[0] += indent
2809 2812 try:
2810 2813 return func(*args, **kwargs)
2811 2814 finally:
2812 2815 elapsed = timer() - start
2813 2816 _timenesting[0] -= indent
2814 2817 stderr.write('%s%s: %s\n' %
2815 2818 (' ' * _timenesting[0], func.__name__,
2816 2819 timecount(elapsed)))
2817 2820 return wrapper
2818 2821
2819 2822 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2820 2823 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2821 2824
2822 2825 def sizetoint(s):
2823 2826 '''Convert a space specifier to a byte count.
2824 2827
2825 2828 >>> sizetoint('30')
2826 2829 30
2827 2830 >>> sizetoint('2.2kb')
2828 2831 2252
2829 2832 >>> sizetoint('6M')
2830 2833 6291456
2831 2834 '''
2832 2835 t = s.strip().lower()
2833 2836 try:
2834 2837 for k, u in _sizeunits:
2835 2838 if t.endswith(k):
2836 2839 return int(float(t[:-len(k)]) * u)
2837 2840 return int(t)
2838 2841 except ValueError:
2839 2842 raise error.ParseError(_("couldn't parse size: %s") % s)
2840 2843
2841 2844 class hooks(object):
2842 2845 '''A collection of hook functions that can be used to extend a
2843 2846 function's behavior. Hooks are called in lexicographic order,
2844 2847 based on the names of their sources.'''
2845 2848
2846 2849 def __init__(self):
2847 2850 self._hooks = []
2848 2851
2849 2852 def add(self, source, hook):
2850 2853 self._hooks.append((source, hook))
2851 2854
2852 2855 def __call__(self, *args):
2853 2856 self._hooks.sort(key=lambda x: x[0])
2854 2857 results = []
2855 2858 for source, hook in self._hooks:
2856 2859 results.append(hook(*args))
2857 2860 return results
2858 2861
2859 2862 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
2860 2863 '''Yields lines for a nicely formatted stacktrace.
2861 2864 Skips the 'skip' last entries, then return the last 'depth' entries.
2862 2865 Each file+linenumber is formatted according to fileline.
2863 2866 Each line is formatted according to line.
2864 2867 If line is None, it yields:
2865 2868 length of longest filepath+line number,
2866 2869 filepath+linenumber,
2867 2870 function
2868 2871
2869 2872 Not be used in production code but very convenient while developing.
2870 2873 '''
2871 2874 entries = [(fileline % (fn, ln), func)
2872 2875 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
2873 2876 ][-depth:]
2874 2877 if entries:
2875 2878 fnmax = max(len(entry[0]) for entry in entries)
2876 2879 for fnln, func in entries:
2877 2880 if line is None:
2878 2881 yield (fnmax, fnln, func)
2879 2882 else:
2880 2883 yield line % (fnmax, fnln, func)
2881 2884
2882 2885 def debugstacktrace(msg='stacktrace', skip=0,
2883 2886 f=stderr, otherf=stdout, depth=0):
2884 2887 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2885 2888 Skips the 'skip' entries closest to the call, then show 'depth' entries.
2886 2889 By default it will flush stdout first.
2887 2890 It can be used everywhere and intentionally does not require an ui object.
2888 2891 Not be used in production code but very convenient while developing.
2889 2892 '''
2890 2893 if otherf:
2891 2894 otherf.flush()
2892 2895 f.write('%s at:\n' % msg.rstrip())
2893 2896 for line in getstackframes(skip + 1, depth=depth):
2894 2897 f.write(line)
2895 2898 f.flush()
2896 2899
2897 2900 class dirs(object):
2898 2901 '''a multiset of directory names from a dirstate or manifest'''
2899 2902
2900 2903 def __init__(self, map, skip=None):
2901 2904 self._dirs = {}
2902 2905 addpath = self.addpath
2903 2906 if safehasattr(map, 'iteritems') and skip is not None:
2904 2907 for f, s in map.iteritems():
2905 2908 if s[0] != skip:
2906 2909 addpath(f)
2907 2910 else:
2908 2911 for f in map:
2909 2912 addpath(f)
2910 2913
2911 2914 def addpath(self, path):
2912 2915 dirs = self._dirs
2913 2916 for base in finddirs(path):
2914 2917 if base in dirs:
2915 2918 dirs[base] += 1
2916 2919 return
2917 2920 dirs[base] = 1
2918 2921
2919 2922 def delpath(self, path):
2920 2923 dirs = self._dirs
2921 2924 for base in finddirs(path):
2922 2925 if dirs[base] > 1:
2923 2926 dirs[base] -= 1
2924 2927 return
2925 2928 del dirs[base]
2926 2929
2927 2930 def __iter__(self):
2928 2931 return iter(self._dirs)
2929 2932
2930 2933 def __contains__(self, d):
2931 2934 return d in self._dirs
2932 2935
2933 2936 if safehasattr(parsers, 'dirs'):
2934 2937 dirs = parsers.dirs
2935 2938
2936 2939 def finddirs(path):
2937 2940 pos = path.rfind('/')
2938 2941 while pos != -1:
2939 2942 yield path[:pos]
2940 2943 pos = path.rfind('/', 0, pos)
2941 2944
2942 2945 class ctxmanager(object):
2943 2946 '''A context manager for use in 'with' blocks to allow multiple
2944 2947 contexts to be entered at once. This is both safer and more
2945 2948 flexible than contextlib.nested.
2946 2949
2947 2950 Once Mercurial supports Python 2.7+, this will become mostly
2948 2951 unnecessary.
2949 2952 '''
2950 2953
2951 2954 def __init__(self, *args):
2952 2955 '''Accepts a list of no-argument functions that return context
2953 2956 managers. These will be invoked at __call__ time.'''
2954 2957 self._pending = args
2955 2958 self._atexit = []
2956 2959
2957 2960 def __enter__(self):
2958 2961 return self
2959 2962
2960 2963 def enter(self):
2961 2964 '''Create and enter context managers in the order in which they were
2962 2965 passed to the constructor.'''
2963 2966 values = []
2964 2967 for func in self._pending:
2965 2968 obj = func()
2966 2969 values.append(obj.__enter__())
2967 2970 self._atexit.append(obj.__exit__)
2968 2971 del self._pending
2969 2972 return values
2970 2973
2971 2974 def atexit(self, func, *args, **kwargs):
2972 2975 '''Add a function to call when this context manager exits. The
2973 2976 ordering of multiple atexit calls is unspecified, save that
2974 2977 they will happen before any __exit__ functions.'''
2975 2978 def wrapper(exc_type, exc_val, exc_tb):
2976 2979 func(*args, **kwargs)
2977 2980 self._atexit.append(wrapper)
2978 2981 return func
2979 2982
2980 2983 def __exit__(self, exc_type, exc_val, exc_tb):
2981 2984 '''Context managers are exited in the reverse order from which
2982 2985 they were created.'''
2983 2986 received = exc_type is not None
2984 2987 suppressed = False
2985 2988 pending = None
2986 2989 self._atexit.reverse()
2987 2990 for exitfunc in self._atexit:
2988 2991 try:
2989 2992 if exitfunc(exc_type, exc_val, exc_tb):
2990 2993 suppressed = True
2991 2994 exc_type = None
2992 2995 exc_val = None
2993 2996 exc_tb = None
2994 2997 except BaseException:
2995 2998 pending = sys.exc_info()
2996 2999 exc_type, exc_val, exc_tb = pending = sys.exc_info()
2997 3000 del self._atexit
2998 3001 if pending:
2999 3002 raise exc_val
3000 3003 return received and suppressed
3001 3004
3002 3005 # compression code
3003 3006
3004 3007 SERVERROLE = 'server'
3005 3008 CLIENTROLE = 'client'
3006 3009
3007 3010 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3008 3011 (u'name', u'serverpriority',
3009 3012 u'clientpriority'))
3010 3013
3011 3014 class compressormanager(object):
3012 3015 """Holds registrations of various compression engines.
3013 3016
3014 3017 This class essentially abstracts the differences between compression
3015 3018 engines to allow new compression formats to be added easily, possibly from
3016 3019 extensions.
3017 3020
3018 3021 Compressors are registered against the global instance by calling its
3019 3022 ``register()`` method.
3020 3023 """
3021 3024 def __init__(self):
3022 3025 self._engines = {}
3023 3026 # Bundle spec human name to engine name.
3024 3027 self._bundlenames = {}
3025 3028 # Internal bundle identifier to engine name.
3026 3029 self._bundletypes = {}
3027 3030 # Revlog header to engine name.
3028 3031 self._revlogheaders = {}
3029 3032 # Wire proto identifier to engine name.
3030 3033 self._wiretypes = {}
3031 3034
3032 3035 def __getitem__(self, key):
3033 3036 return self._engines[key]
3034 3037
3035 3038 def __contains__(self, key):
3036 3039 return key in self._engines
3037 3040
3038 3041 def __iter__(self):
3039 3042 return iter(self._engines.keys())
3040 3043
3041 3044 def register(self, engine):
3042 3045 """Register a compression engine with the manager.
3043 3046
3044 3047 The argument must be a ``compressionengine`` instance.
3045 3048 """
3046 3049 if not isinstance(engine, compressionengine):
3047 3050 raise ValueError(_('argument must be a compressionengine'))
3048 3051
3049 3052 name = engine.name()
3050 3053
3051 3054 if name in self._engines:
3052 3055 raise error.Abort(_('compression engine %s already registered') %
3053 3056 name)
3054 3057
3055 3058 bundleinfo = engine.bundletype()
3056 3059 if bundleinfo:
3057 3060 bundlename, bundletype = bundleinfo
3058 3061
3059 3062 if bundlename in self._bundlenames:
3060 3063 raise error.Abort(_('bundle name %s already registered') %
3061 3064 bundlename)
3062 3065 if bundletype in self._bundletypes:
3063 3066 raise error.Abort(_('bundle type %s already registered by %s') %
3064 3067 (bundletype, self._bundletypes[bundletype]))
3065 3068
3066 3069 # No external facing name declared.
3067 3070 if bundlename:
3068 3071 self._bundlenames[bundlename] = name
3069 3072
3070 3073 self._bundletypes[bundletype] = name
3071 3074
3072 3075 wiresupport = engine.wireprotosupport()
3073 3076 if wiresupport:
3074 3077 wiretype = wiresupport.name
3075 3078 if wiretype in self._wiretypes:
3076 3079 raise error.Abort(_('wire protocol compression %s already '
3077 3080 'registered by %s') %
3078 3081 (wiretype, self._wiretypes[wiretype]))
3079 3082
3080 3083 self._wiretypes[wiretype] = name
3081 3084
3082 3085 revlogheader = engine.revlogheader()
3083 3086 if revlogheader and revlogheader in self._revlogheaders:
3084 3087 raise error.Abort(_('revlog header %s already registered by %s') %
3085 3088 (revlogheader, self._revlogheaders[revlogheader]))
3086 3089
3087 3090 if revlogheader:
3088 3091 self._revlogheaders[revlogheader] = name
3089 3092
3090 3093 self._engines[name] = engine
3091 3094
3092 3095 @property
3093 3096 def supportedbundlenames(self):
3094 3097 return set(self._bundlenames.keys())
3095 3098
3096 3099 @property
3097 3100 def supportedbundletypes(self):
3098 3101 return set(self._bundletypes.keys())
3099 3102
3100 3103 def forbundlename(self, bundlename):
3101 3104 """Obtain a compression engine registered to a bundle name.
3102 3105
3103 3106 Will raise KeyError if the bundle type isn't registered.
3104 3107
3105 3108 Will abort if the engine is known but not available.
3106 3109 """
3107 3110 engine = self._engines[self._bundlenames[bundlename]]
3108 3111 if not engine.available():
3109 3112 raise error.Abort(_('compression engine %s could not be loaded') %
3110 3113 engine.name())
3111 3114 return engine
3112 3115
3113 3116 def forbundletype(self, bundletype):
3114 3117 """Obtain a compression engine registered to a bundle type.
3115 3118
3116 3119 Will raise KeyError if the bundle type isn't registered.
3117 3120
3118 3121 Will abort if the engine is known but not available.
3119 3122 """
3120 3123 engine = self._engines[self._bundletypes[bundletype]]
3121 3124 if not engine.available():
3122 3125 raise error.Abort(_('compression engine %s could not be loaded') %
3123 3126 engine.name())
3124 3127 return engine
3125 3128
3126 3129 def supportedwireengines(self, role, onlyavailable=True):
3127 3130 """Obtain compression engines that support the wire protocol.
3128 3131
3129 3132 Returns a list of engines in prioritized order, most desired first.
3130 3133
3131 3134 If ``onlyavailable`` is set, filter out engines that can't be
3132 3135 loaded.
3133 3136 """
3134 3137 assert role in (SERVERROLE, CLIENTROLE)
3135 3138
3136 3139 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3137 3140
3138 3141 engines = [self._engines[e] for e in self._wiretypes.values()]
3139 3142 if onlyavailable:
3140 3143 engines = [e for e in engines if e.available()]
3141 3144
3142 3145 def getkey(e):
3143 3146 # Sort first by priority, highest first. In case of tie, sort
3144 3147 # alphabetically. This is arbitrary, but ensures output is
3145 3148 # stable.
3146 3149 w = e.wireprotosupport()
3147 3150 return -1 * getattr(w, attr), w.name
3148 3151
3149 3152 return list(sorted(engines, key=getkey))
3150 3153
3151 3154 def forwiretype(self, wiretype):
3152 3155 engine = self._engines[self._wiretypes[wiretype]]
3153 3156 if not engine.available():
3154 3157 raise error.Abort(_('compression engine %s could not be loaded') %
3155 3158 engine.name())
3156 3159 return engine
3157 3160
3158 3161 def forrevlogheader(self, header):
3159 3162 """Obtain a compression engine registered to a revlog header.
3160 3163
3161 3164 Will raise KeyError if the revlog header value isn't registered.
3162 3165 """
3163 3166 return self._engines[self._revlogheaders[header]]
3164 3167
3165 3168 compengines = compressormanager()
3166 3169
3167 3170 class compressionengine(object):
3168 3171 """Base class for compression engines.
3169 3172
3170 3173 Compression engines must implement the interface defined by this class.
3171 3174 """
3172 3175 def name(self):
3173 3176 """Returns the name of the compression engine.
3174 3177
3175 3178 This is the key the engine is registered under.
3176 3179
3177 3180 This method must be implemented.
3178 3181 """
3179 3182 raise NotImplementedError()
3180 3183
3181 3184 def available(self):
3182 3185 """Whether the compression engine is available.
3183 3186
3184 3187 The intent of this method is to allow optional compression engines
3185 3188 that may not be available in all installations (such as engines relying
3186 3189 on C extensions that may not be present).
3187 3190 """
3188 3191 return True
3189 3192
3190 3193 def bundletype(self):
3191 3194 """Describes bundle identifiers for this engine.
3192 3195
3193 3196 If this compression engine isn't supported for bundles, returns None.
3194 3197
3195 3198 If this engine can be used for bundles, returns a 2-tuple of strings of
3196 3199 the user-facing "bundle spec" compression name and an internal
3197 3200 identifier used to denote the compression format within bundles. To
3198 3201 exclude the name from external usage, set the first element to ``None``.
3199 3202
3200 3203 If bundle compression is supported, the class must also implement
3201 3204 ``compressstream`` and `decompressorreader``.
3202 3205 """
3203 3206 return None
3204 3207
3205 3208 def wireprotosupport(self):
3206 3209 """Declare support for this compression format on the wire protocol.
3207 3210
3208 3211 If this compression engine isn't supported for compressing wire
3209 3212 protocol payloads, returns None.
3210 3213
3211 3214 Otherwise, returns ``compenginewireprotosupport`` with the following
3212 3215 fields:
3213 3216
3214 3217 * String format identifier
3215 3218 * Integer priority for the server
3216 3219 * Integer priority for the client
3217 3220
3218 3221 The integer priorities are used to order the advertisement of format
3219 3222 support by server and client. The highest integer is advertised
3220 3223 first. Integers with non-positive values aren't advertised.
3221 3224
3222 3225 The priority values are somewhat arbitrary and only used for default
3223 3226 ordering. The relative order can be changed via config options.
3224 3227
3225 3228 If wire protocol compression is supported, the class must also implement
3226 3229 ``compressstream`` and ``decompressorreader``.
3227 3230 """
3228 3231 return None
3229 3232
3230 3233 def revlogheader(self):
3231 3234 """Header added to revlog chunks that identifies this engine.
3232 3235
3233 3236 If this engine can be used to compress revlogs, this method should
3234 3237 return the bytes used to identify chunks compressed with this engine.
3235 3238 Else, the method should return ``None`` to indicate it does not
3236 3239 participate in revlog compression.
3237 3240 """
3238 3241 return None
3239 3242
3240 3243 def compressstream(self, it, opts=None):
3241 3244 """Compress an iterator of chunks.
3242 3245
3243 3246 The method receives an iterator (ideally a generator) of chunks of
3244 3247 bytes to be compressed. It returns an iterator (ideally a generator)
3245 3248 of bytes of chunks representing the compressed output.
3246 3249
3247 3250 Optionally accepts an argument defining how to perform compression.
3248 3251 Each engine treats this argument differently.
3249 3252 """
3250 3253 raise NotImplementedError()
3251 3254
3252 3255 def decompressorreader(self, fh):
3253 3256 """Perform decompression on a file object.
3254 3257
3255 3258 Argument is an object with a ``read(size)`` method that returns
3256 3259 compressed data. Return value is an object with a ``read(size)`` that
3257 3260 returns uncompressed data.
3258 3261 """
3259 3262 raise NotImplementedError()
3260 3263
3261 3264 def revlogcompressor(self, opts=None):
3262 3265 """Obtain an object that can be used to compress revlog entries.
3263 3266
3264 3267 The object has a ``compress(data)`` method that compresses binary
3265 3268 data. This method returns compressed binary data or ``None`` if
3266 3269 the data could not be compressed (too small, not compressible, etc).
3267 3270 The returned data should have a header uniquely identifying this
3268 3271 compression format so decompression can be routed to this engine.
3269 3272 This header should be identified by the ``revlogheader()`` return
3270 3273 value.
3271 3274
3272 3275 The object has a ``decompress(data)`` method that decompresses
3273 3276 data. The method will only be called if ``data`` begins with
3274 3277 ``revlogheader()``. The method should return the raw, uncompressed
3275 3278 data or raise a ``RevlogError``.
3276 3279
3277 3280 The object is reusable but is not thread safe.
3278 3281 """
3279 3282 raise NotImplementedError()
3280 3283
3281 3284 class _zlibengine(compressionengine):
3282 3285 def name(self):
3283 3286 return 'zlib'
3284 3287
3285 3288 def bundletype(self):
3286 3289 return 'gzip', 'GZ'
3287 3290
3288 3291 def wireprotosupport(self):
3289 3292 return compewireprotosupport('zlib', 20, 20)
3290 3293
3291 3294 def revlogheader(self):
3292 3295 return 'x'
3293 3296
3294 3297 def compressstream(self, it, opts=None):
3295 3298 opts = opts or {}
3296 3299
3297 3300 z = zlib.compressobj(opts.get('level', -1))
3298 3301 for chunk in it:
3299 3302 data = z.compress(chunk)
3300 3303 # Not all calls to compress emit data. It is cheaper to inspect
3301 3304 # here than to feed empty chunks through generator.
3302 3305 if data:
3303 3306 yield data
3304 3307
3305 3308 yield z.flush()
3306 3309
3307 3310 def decompressorreader(self, fh):
3308 3311 def gen():
3309 3312 d = zlib.decompressobj()
3310 3313 for chunk in filechunkiter(fh):
3311 3314 while chunk:
3312 3315 # Limit output size to limit memory.
3313 3316 yield d.decompress(chunk, 2 ** 18)
3314 3317 chunk = d.unconsumed_tail
3315 3318
3316 3319 return chunkbuffer(gen())
3317 3320
3318 3321 class zlibrevlogcompressor(object):
3319 3322 def compress(self, data):
3320 3323 insize = len(data)
3321 3324 # Caller handles empty input case.
3322 3325 assert insize > 0
3323 3326
3324 3327 if insize < 44:
3325 3328 return None
3326 3329
3327 3330 elif insize <= 1000000:
3328 3331 compressed = zlib.compress(data)
3329 3332 if len(compressed) < insize:
3330 3333 return compressed
3331 3334 return None
3332 3335
3333 3336 # zlib makes an internal copy of the input buffer, doubling
3334 3337 # memory usage for large inputs. So do streaming compression
3335 3338 # on large inputs.
3336 3339 else:
3337 3340 z = zlib.compressobj()
3338 3341 parts = []
3339 3342 pos = 0
3340 3343 while pos < insize:
3341 3344 pos2 = pos + 2**20
3342 3345 parts.append(z.compress(data[pos:pos2]))
3343 3346 pos = pos2
3344 3347 parts.append(z.flush())
3345 3348
3346 3349 if sum(map(len, parts)) < insize:
3347 3350 return ''.join(parts)
3348 3351 return None
3349 3352
3350 3353 def decompress(self, data):
3351 3354 try:
3352 3355 return zlib.decompress(data)
3353 3356 except zlib.error as e:
3354 3357 raise error.RevlogError(_('revlog decompress error: %s') %
3355 3358 str(e))
3356 3359
3357 3360 def revlogcompressor(self, opts=None):
3358 3361 return self.zlibrevlogcompressor()
3359 3362
3360 3363 compengines.register(_zlibengine())
3361 3364
3362 3365 class _bz2engine(compressionengine):
3363 3366 def name(self):
3364 3367 return 'bz2'
3365 3368
3366 3369 def bundletype(self):
3367 3370 return 'bzip2', 'BZ'
3368 3371
3369 3372 # We declare a protocol name but don't advertise by default because
3370 3373 # it is slow.
3371 3374 def wireprotosupport(self):
3372 3375 return compewireprotosupport('bzip2', 0, 0)
3373 3376
3374 3377 def compressstream(self, it, opts=None):
3375 3378 opts = opts or {}
3376 3379 z = bz2.BZ2Compressor(opts.get('level', 9))
3377 3380 for chunk in it:
3378 3381 data = z.compress(chunk)
3379 3382 if data:
3380 3383 yield data
3381 3384
3382 3385 yield z.flush()
3383 3386
3384 3387 def decompressorreader(self, fh):
3385 3388 def gen():
3386 3389 d = bz2.BZ2Decompressor()
3387 3390 for chunk in filechunkiter(fh):
3388 3391 yield d.decompress(chunk)
3389 3392
3390 3393 return chunkbuffer(gen())
3391 3394
3392 3395 compengines.register(_bz2engine())
3393 3396
3394 3397 class _truncatedbz2engine(compressionengine):
3395 3398 def name(self):
3396 3399 return 'bz2truncated'
3397 3400
3398 3401 def bundletype(self):
3399 3402 return None, '_truncatedBZ'
3400 3403
3401 3404 # We don't implement compressstream because it is hackily handled elsewhere.
3402 3405
3403 3406 def decompressorreader(self, fh):
3404 3407 def gen():
3405 3408 # The input stream doesn't have the 'BZ' header. So add it back.
3406 3409 d = bz2.BZ2Decompressor()
3407 3410 d.decompress('BZ')
3408 3411 for chunk in filechunkiter(fh):
3409 3412 yield d.decompress(chunk)
3410 3413
3411 3414 return chunkbuffer(gen())
3412 3415
3413 3416 compengines.register(_truncatedbz2engine())
3414 3417
3415 3418 class _noopengine(compressionengine):
3416 3419 def name(self):
3417 3420 return 'none'
3418 3421
3419 3422 def bundletype(self):
3420 3423 return 'none', 'UN'
3421 3424
3422 3425 # Clients always support uncompressed payloads. Servers don't because
3423 3426 # unless you are on a fast network, uncompressed payloads can easily
3424 3427 # saturate your network pipe.
3425 3428 def wireprotosupport(self):
3426 3429 return compewireprotosupport('none', 0, 10)
3427 3430
3428 3431 # We don't implement revlogheader because it is handled specially
3429 3432 # in the revlog class.
3430 3433
3431 3434 def compressstream(self, it, opts=None):
3432 3435 return it
3433 3436
3434 3437 def decompressorreader(self, fh):
3435 3438 return fh
3436 3439
3437 3440 class nooprevlogcompressor(object):
3438 3441 def compress(self, data):
3439 3442 return None
3440 3443
3441 3444 def revlogcompressor(self, opts=None):
3442 3445 return self.nooprevlogcompressor()
3443 3446
3444 3447 compengines.register(_noopengine())
3445 3448
3446 3449 class _zstdengine(compressionengine):
3447 3450 def name(self):
3448 3451 return 'zstd'
3449 3452
3450 3453 @propertycache
3451 3454 def _module(self):
3452 3455 # Not all installs have the zstd module available. So defer importing
3453 3456 # until first access.
3454 3457 try:
3455 3458 from . import zstd
3456 3459 # Force delayed import.
3457 3460 zstd.__version__
3458 3461 return zstd
3459 3462 except ImportError:
3460 3463 return None
3461 3464
3462 3465 def available(self):
3463 3466 return bool(self._module)
3464 3467
3465 3468 def bundletype(self):
3466 3469 return 'zstd', 'ZS'
3467 3470
3468 3471 def wireprotosupport(self):
3469 3472 return compewireprotosupport('zstd', 50, 50)
3470 3473
3471 3474 def revlogheader(self):
3472 3475 return '\x28'
3473 3476
3474 3477 def compressstream(self, it, opts=None):
3475 3478 opts = opts or {}
3476 3479 # zstd level 3 is almost always significantly faster than zlib
3477 3480 # while providing no worse compression. It strikes a good balance
3478 3481 # between speed and compression.
3479 3482 level = opts.get('level', 3)
3480 3483
3481 3484 zstd = self._module
3482 3485 z = zstd.ZstdCompressor(level=level).compressobj()
3483 3486 for chunk in it:
3484 3487 data = z.compress(chunk)
3485 3488 if data:
3486 3489 yield data
3487 3490
3488 3491 yield z.flush()
3489 3492
3490 3493 def decompressorreader(self, fh):
3491 3494 zstd = self._module
3492 3495 dctx = zstd.ZstdDecompressor()
3493 3496 return chunkbuffer(dctx.read_from(fh))
3494 3497
3495 3498 class zstdrevlogcompressor(object):
3496 3499 def __init__(self, zstd, level=3):
3497 3500 # Writing the content size adds a few bytes to the output. However,
3498 3501 # it allows decompression to be more optimal since we can
3499 3502 # pre-allocate a buffer to hold the result.
3500 3503 self._cctx = zstd.ZstdCompressor(level=level,
3501 3504 write_content_size=True)
3502 3505 self._dctx = zstd.ZstdDecompressor()
3503 3506 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3504 3507 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3505 3508
3506 3509 def compress(self, data):
3507 3510 insize = len(data)
3508 3511 # Caller handles empty input case.
3509 3512 assert insize > 0
3510 3513
3511 3514 if insize < 50:
3512 3515 return None
3513 3516
3514 3517 elif insize <= 1000000:
3515 3518 compressed = self._cctx.compress(data)
3516 3519 if len(compressed) < insize:
3517 3520 return compressed
3518 3521 return None
3519 3522 else:
3520 3523 z = self._cctx.compressobj()
3521 3524 chunks = []
3522 3525 pos = 0
3523 3526 while pos < insize:
3524 3527 pos2 = pos + self._compinsize
3525 3528 chunk = z.compress(data[pos:pos2])
3526 3529 if chunk:
3527 3530 chunks.append(chunk)
3528 3531 pos = pos2
3529 3532 chunks.append(z.flush())
3530 3533
3531 3534 if sum(map(len, chunks)) < insize:
3532 3535 return ''.join(chunks)
3533 3536 return None
3534 3537
3535 3538 def decompress(self, data):
3536 3539 insize = len(data)
3537 3540
3538 3541 try:
3539 3542 # This was measured to be faster than other streaming
3540 3543 # decompressors.
3541 3544 dobj = self._dctx.decompressobj()
3542 3545 chunks = []
3543 3546 pos = 0
3544 3547 while pos < insize:
3545 3548 pos2 = pos + self._decompinsize
3546 3549 chunk = dobj.decompress(data[pos:pos2])
3547 3550 if chunk:
3548 3551 chunks.append(chunk)
3549 3552 pos = pos2
3550 3553 # Frame should be exhausted, so no finish() API.
3551 3554
3552 3555 return ''.join(chunks)
3553 3556 except Exception as e:
3554 3557 raise error.RevlogError(_('revlog decompress error: %s') %
3555 3558 str(e))
3556 3559
3557 3560 def revlogcompressor(self, opts=None):
3558 3561 opts = opts or {}
3559 3562 return self.zstdrevlogcompressor(self._module,
3560 3563 level=opts.get('level', 3))
3561 3564
3562 3565 compengines.register(_zstdengine())
3563 3566
3564 3567 # convenient shortcut
3565 3568 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now