##// END OF EJS Templates
pycompat: wrap xrange for py2 to provide efficient __contains__...
Joerg Sonnenberger -
r39474:45e05d39 default
parent child Browse files
Show More
@@ -1,563 +1,563 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullid,
15 15 )
16 16 from .thirdparty import (
17 17 attr,
18 18 )
19 19
20 20 from . import (
21 21 encoding,
22 22 error,
23 23 pycompat,
24 24 revlog,
25 25 )
26 26 from .utils import (
27 27 dateutil,
28 28 stringutil,
29 29 )
30 30
31 31 _defaultextra = {'branch': 'default'}
32 32
33 33 def _string_escape(text):
34 34 """
35 35 >>> from .pycompat import bytechr as chr
36 36 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
37 37 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
38 38 >>> s
39 39 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
40 40 >>> res = _string_escape(s)
41 41 >>> s == stringutil.unescapestr(res)
42 42 True
43 43 """
44 44 # subset of the string_escape codec
45 45 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
46 46 return text.replace('\0', '\\0')
47 47
48 48 def decodeextra(text):
49 49 """
50 50 >>> from .pycompat import bytechr as chr
51 51 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
52 52 ... ).items())
53 53 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
54 54 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
55 55 ... b'baz': chr(92) + chr(0) + b'2'})
56 56 ... ).items())
57 57 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
58 58 """
59 59 extra = _defaultextra.copy()
60 60 for l in text.split('\0'):
61 61 if l:
62 62 if '\\0' in l:
63 63 # fix up \0 without getting into trouble with \\0
64 64 l = l.replace('\\\\', '\\\\\n')
65 65 l = l.replace('\\0', '\0')
66 66 l = l.replace('\n', '')
67 67 k, v = stringutil.unescapestr(l).split(':', 1)
68 68 extra[k] = v
69 69 return extra
70 70
71 71 def encodeextra(d):
72 72 # keys must be sorted to produce a deterministic changelog entry
73 73 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
74 74 return "\0".join(items)
75 75
76 76 def stripdesc(desc):
77 77 """strip trailing whitespace and leading and trailing empty lines"""
78 78 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
79 79
80 80 class appender(object):
81 81 '''the changelog index must be updated last on disk, so we use this class
82 82 to delay writes to it'''
83 83 def __init__(self, vfs, name, mode, buf):
84 84 self.data = buf
85 85 fp = vfs(name, mode)
86 86 self.fp = fp
87 87 self.offset = fp.tell()
88 88 self.size = vfs.fstat(fp).st_size
89 89 self._end = self.size
90 90
91 91 def end(self):
92 92 return self._end
93 93 def tell(self):
94 94 return self.offset
95 95 def flush(self):
96 96 pass
97 97
98 98 @property
99 99 def closed(self):
100 100 return self.fp.closed
101 101
102 102 def close(self):
103 103 self.fp.close()
104 104
105 105 def seek(self, offset, whence=0):
106 106 '''virtual file offset spans real file and data'''
107 107 if whence == 0:
108 108 self.offset = offset
109 109 elif whence == 1:
110 110 self.offset += offset
111 111 elif whence == 2:
112 112 self.offset = self.end() + offset
113 113 if self.offset < self.size:
114 114 self.fp.seek(self.offset)
115 115
116 116 def read(self, count=-1):
117 117 '''only trick here is reads that span real file and data'''
118 118 ret = ""
119 119 if self.offset < self.size:
120 120 s = self.fp.read(count)
121 121 ret = s
122 122 self.offset += len(s)
123 123 if count > 0:
124 124 count -= len(s)
125 125 if count != 0:
126 126 doff = self.offset - self.size
127 127 self.data.insert(0, "".join(self.data))
128 128 del self.data[1:]
129 129 s = self.data[0][doff:doff + count]
130 130 self.offset += len(s)
131 131 ret += s
132 132 return ret
133 133
134 134 def write(self, s):
135 135 self.data.append(bytes(s))
136 136 self.offset += len(s)
137 137 self._end += len(s)
138 138
139 139 def __enter__(self):
140 140 self.fp.__enter__()
141 141 return self
142 142
143 143 def __exit__(self, *args):
144 144 return self.fp.__exit__(*args)
145 145
146 146 def _divertopener(opener, target):
147 147 """build an opener that writes in 'target.a' instead of 'target'"""
148 148 def _divert(name, mode='r', checkambig=False):
149 149 if name != target:
150 150 return opener(name, mode)
151 151 return opener(name + ".a", mode)
152 152 return _divert
153 153
154 154 def _delayopener(opener, target, buf):
155 155 """build an opener that stores chunks in 'buf' instead of 'target'"""
156 156 def _delay(name, mode='r', checkambig=False):
157 157 if name != target:
158 158 return opener(name, mode)
159 159 return appender(opener, name, mode, buf)
160 160 return _delay
161 161
162 162 @attr.s
163 163 class _changelogrevision(object):
164 164 # Extensions might modify _defaultextra, so let the constructor below pass
165 165 # it in
166 166 extra = attr.ib()
167 167 manifest = attr.ib(default=nullid)
168 168 user = attr.ib(default='')
169 169 date = attr.ib(default=(0, 0))
170 170 files = attr.ib(default=attr.Factory(list))
171 171 description = attr.ib(default='')
172 172
173 173 class changelogrevision(object):
174 174 """Holds results of a parsed changelog revision.
175 175
176 176 Changelog revisions consist of multiple pieces of data, including
177 177 the manifest node, user, and date. This object exposes a view into
178 178 the parsed object.
179 179 """
180 180
181 181 __slots__ = (
182 182 u'_offsets',
183 183 u'_text',
184 184 )
185 185
186 186 def __new__(cls, text):
187 187 if not text:
188 188 return _changelogrevision(extra=_defaultextra)
189 189
190 190 self = super(changelogrevision, cls).__new__(cls)
191 191 # We could return here and implement the following as an __init__.
192 192 # But doing it here is equivalent and saves an extra function call.
193 193
194 194 # format used:
195 195 # nodeid\n : manifest node in ascii
196 196 # user\n : user, no \n or \r allowed
197 197 # time tz extra\n : date (time is int or float, timezone is int)
198 198 # : extra is metadata, encoded and separated by '\0'
199 199 # : older versions ignore it
200 200 # files\n\n : files modified by the cset, no \n or \r allowed
201 201 # (.*) : comment (free text, ideally utf-8)
202 202 #
203 203 # changelog v0 doesn't use extra
204 204
205 205 nl1 = text.index('\n')
206 206 nl2 = text.index('\n', nl1 + 1)
207 207 nl3 = text.index('\n', nl2 + 1)
208 208
209 209 # The list of files may be empty. Which means nl3 is the first of the
210 210 # double newline that precedes the description.
211 211 if text[nl3 + 1:nl3 + 2] == '\n':
212 212 doublenl = nl3
213 213 else:
214 214 doublenl = text.index('\n\n', nl3 + 1)
215 215
216 216 self._offsets = (nl1, nl2, nl3, doublenl)
217 217 self._text = text
218 218
219 219 return self
220 220
221 221 @property
222 222 def manifest(self):
223 223 return bin(self._text[0:self._offsets[0]])
224 224
225 225 @property
226 226 def user(self):
227 227 off = self._offsets
228 228 return encoding.tolocal(self._text[off[0] + 1:off[1]])
229 229
230 230 @property
231 231 def _rawdate(self):
232 232 off = self._offsets
233 233 dateextra = self._text[off[1] + 1:off[2]]
234 234 return dateextra.split(' ', 2)[0:2]
235 235
236 236 @property
237 237 def _rawextra(self):
238 238 off = self._offsets
239 239 dateextra = self._text[off[1] + 1:off[2]]
240 240 fields = dateextra.split(' ', 2)
241 241 if len(fields) != 3:
242 242 return None
243 243
244 244 return fields[2]
245 245
246 246 @property
247 247 def date(self):
248 248 raw = self._rawdate
249 249 time = float(raw[0])
250 250 # Various tools did silly things with the timezone.
251 251 try:
252 252 timezone = int(raw[1])
253 253 except ValueError:
254 254 timezone = 0
255 255
256 256 return time, timezone
257 257
258 258 @property
259 259 def extra(self):
260 260 raw = self._rawextra
261 261 if raw is None:
262 262 return _defaultextra
263 263
264 264 return decodeextra(raw)
265 265
266 266 @property
267 267 def files(self):
268 268 off = self._offsets
269 269 if off[2] == off[3]:
270 270 return []
271 271
272 272 return self._text[off[2] + 1:off[3]].split('\n')
273 273
274 274 @property
275 275 def description(self):
276 276 return encoding.tolocal(self._text[self._offsets[3] + 2:])
277 277
278 278 class changelog(revlog.revlog):
279 279 def __init__(self, opener, trypending=False):
280 280 """Load a changelog revlog using an opener.
281 281
282 282 If ``trypending`` is true, we attempt to load the index from a
283 283 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
284 284 The ``00changelog.i.a`` file contains index (and possibly inline
285 285 revision) data for a transaction that hasn't been finalized yet.
286 286 It exists in a separate file to facilitate readers (such as
287 287 hooks processes) accessing data before a transaction is finalized.
288 288 """
289 289 if trypending and opener.exists('00changelog.i.a'):
290 290 indexfile = '00changelog.i.a'
291 291 else:
292 292 indexfile = '00changelog.i'
293 293
294 294 datafile = '00changelog.d'
295 295 revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
296 296 checkambig=True, mmaplargeindex=True)
297 297
298 298 if self._initempty:
299 299 # changelogs don't benefit from generaldelta
300 300 self.version &= ~revlog.FLAG_GENERALDELTA
301 301 self._generaldelta = False
302 302
303 303 # Delta chains for changelogs tend to be very small because entries
304 304 # tend to be small and don't delta well with each. So disable delta
305 305 # chains.
306 306 self.storedeltachains = False
307 307
308 308 self._realopener = opener
309 309 self._delayed = False
310 310 self._delaybuf = None
311 311 self._divert = False
312 312 self.filteredrevs = frozenset()
313 313
314 314 def tiprev(self):
315 315 for i in pycompat.xrange(len(self) -1, -2, -1):
316 316 if i not in self.filteredrevs:
317 317 return i
318 318
319 319 def tip(self):
320 320 """filtered version of revlog.tip"""
321 321 return self.node(self.tiprev())
322 322
323 323 def __contains__(self, rev):
324 324 """filtered version of revlog.__contains__"""
325 325 return (0 <= rev < len(self)
326 326 and rev not in self.filteredrevs)
327 327
328 328 def __iter__(self):
329 329 """filtered version of revlog.__iter__"""
330 330 if len(self.filteredrevs) == 0:
331 331 return revlog.revlog.__iter__(self)
332 332
333 333 def filterediter():
334 334 for i in pycompat.xrange(len(self)):
335 335 if i not in self.filteredrevs:
336 336 yield i
337 337
338 338 return filterediter()
339 339
340 340 def revs(self, start=0, stop=None):
341 341 """filtered version of revlog.revs"""
342 342 for i in super(changelog, self).revs(start, stop):
343 343 if i not in self.filteredrevs:
344 344 yield i
345 345
346 346 def reachableroots(self, minroot, heads, roots, includepath=False):
347 347 return self.index.reachableroots2(minroot, heads, roots, includepath)
348 348
349 349 def headrevs(self):
350 350 if self.filteredrevs:
351 351 try:
352 352 return self.index.headrevsfiltered(self.filteredrevs)
353 353 # AttributeError covers non-c-extension environments and
354 354 # old c extensions without filter handling.
355 355 except AttributeError:
356 356 return self._headrevs()
357 357
358 358 return super(changelog, self).headrevs()
359 359
360 360 def strip(self, *args, **kwargs):
361 361 # XXX make something better than assert
362 362 # We can't expect proper strip behavior if we are filtered.
363 363 assert not self.filteredrevs
364 364 super(changelog, self).strip(*args, **kwargs)
365 365
366 366 def rev(self, node):
367 367 """filtered version of revlog.rev"""
368 368 r = super(changelog, self).rev(node)
369 369 if r in self.filteredrevs:
370 370 raise error.FilteredLookupError(hex(node), self.indexfile,
371 371 _('filtered node'))
372 372 return r
373 373
374 374 def node(self, rev):
375 375 """filtered version of revlog.node"""
376 376 if rev in self.filteredrevs:
377 377 raise error.FilteredIndexError(rev)
378 378 return super(changelog, self).node(rev)
379 379
380 380 def linkrev(self, rev):
381 381 """filtered version of revlog.linkrev"""
382 382 if rev in self.filteredrevs:
383 383 raise error.FilteredIndexError(rev)
384 384 return super(changelog, self).linkrev(rev)
385 385
386 386 def parentrevs(self, rev):
387 387 """filtered version of revlog.parentrevs"""
388 388 if rev in self.filteredrevs:
389 389 raise error.FilteredIndexError(rev)
390 390 return super(changelog, self).parentrevs(rev)
391 391
392 392 def flags(self, rev):
393 393 """filtered version of revlog.flags"""
394 394 if rev in self.filteredrevs:
395 395 raise error.FilteredIndexError(rev)
396 396 return super(changelog, self).flags(rev)
397 397
398 398 def delayupdate(self, tr):
399 399 "delay visibility of index updates to other readers"
400 400
401 401 if not self._delayed:
402 402 if len(self) == 0:
403 403 self._divert = True
404 404 if self._realopener.exists(self.indexfile + '.a'):
405 405 self._realopener.unlink(self.indexfile + '.a')
406 406 self.opener = _divertopener(self._realopener, self.indexfile)
407 407 else:
408 408 self._delaybuf = []
409 409 self.opener = _delayopener(self._realopener, self.indexfile,
410 410 self._delaybuf)
411 411 self._delayed = True
412 412 tr.addpending('cl-%i' % id(self), self._writepending)
413 413 tr.addfinalize('cl-%i' % id(self), self._finalize)
414 414
415 415 def _finalize(self, tr):
416 416 "finalize index updates"
417 417 self._delayed = False
418 418 self.opener = self._realopener
419 419 # move redirected index data back into place
420 420 if self._divert:
421 421 assert not self._delaybuf
422 422 tmpname = self.indexfile + ".a"
423 423 nfile = self.opener.open(tmpname)
424 424 nfile.close()
425 425 self.opener.rename(tmpname, self.indexfile, checkambig=True)
426 426 elif self._delaybuf:
427 427 fp = self.opener(self.indexfile, 'a', checkambig=True)
428 428 fp.write("".join(self._delaybuf))
429 429 fp.close()
430 430 self._delaybuf = None
431 431 self._divert = False
432 432 # split when we're done
433 433 self._enforceinlinesize(tr)
434 434
435 435 def _writepending(self, tr):
436 436 "create a file containing the unfinalized state for pretxnchangegroup"
437 437 if self._delaybuf:
438 438 # make a temporary copy of the index
439 439 fp1 = self._realopener(self.indexfile)
440 440 pendingfilename = self.indexfile + ".a"
441 441 # register as a temp file to ensure cleanup on failure
442 442 tr.registertmp(pendingfilename)
443 443 # write existing data
444 444 fp2 = self._realopener(pendingfilename, "w")
445 445 fp2.write(fp1.read())
446 446 # add pending data
447 447 fp2.write("".join(self._delaybuf))
448 448 fp2.close()
449 449 # switch modes so finalize can simply rename
450 450 self._delaybuf = None
451 451 self._divert = True
452 452 self.opener = _divertopener(self._realopener, self.indexfile)
453 453
454 454 if self._divert:
455 455 return True
456 456
457 457 return False
458 458
459 459 def _enforceinlinesize(self, tr, fp=None):
460 460 if not self._delayed:
461 461 revlog.revlog._enforceinlinesize(self, tr, fp)
462 462
463 463 def read(self, node):
464 464 """Obtain data from a parsed changelog revision.
465 465
466 466 Returns a 6-tuple of:
467 467
468 468 - manifest node in binary
469 469 - author/user as a localstr
470 470 - date as a 2-tuple of (time, timezone)
471 471 - list of files
472 472 - commit message as a localstr
473 473 - dict of extra metadata
474 474
475 475 Unless you need to access all fields, consider calling
476 476 ``changelogrevision`` instead, as it is faster for partial object
477 477 access.
478 478 """
479 479 c = changelogrevision(self.revision(node))
480 480 return (
481 481 c.manifest,
482 482 c.user,
483 483 c.date,
484 484 c.files,
485 485 c.description,
486 486 c.extra
487 487 )
488 488
489 489 def changelogrevision(self, nodeorrev):
490 490 """Obtain a ``changelogrevision`` for a node or revision."""
491 491 return changelogrevision(self.revision(nodeorrev))
492 492
493 493 def readfiles(self, node):
494 494 """
495 495 short version of read that only returns the files modified by the cset
496 496 """
497 497 text = self.revision(node)
498 498 if not text:
499 499 return []
500 500 last = text.index("\n\n")
501 501 l = text[:last].split('\n')
502 502 return l[3:]
503 503
504 504 def add(self, manifest, files, desc, transaction, p1, p2,
505 505 user, date=None, extra=None):
506 506 # Convert to UTF-8 encoded bytestrings as the very first
507 507 # thing: calling any method on a localstr object will turn it
508 508 # into a str object and the cached UTF-8 string is thus lost.
509 509 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
510 510
511 511 user = user.strip()
512 512 # An empty username or a username with a "\n" will make the
513 513 # revision text contain two "\n\n" sequences -> corrupt
514 514 # repository since read cannot unpack the revision.
515 515 if not user:
516 516 raise error.RevlogError(_("empty username"))
517 517 if "\n" in user:
518 518 raise error.RevlogError(_("username %r contains a newline")
519 519 % pycompat.bytestr(user))
520 520
521 521 desc = stripdesc(desc)
522 522
523 523 if date:
524 524 parseddate = "%d %d" % dateutil.parsedate(date)
525 525 else:
526 526 parseddate = "%d %d" % dateutil.makedate()
527 527 if extra:
528 528 branch = extra.get("branch")
529 529 if branch in ("default", ""):
530 530 del extra["branch"]
531 531 elif branch in (".", "null", "tip"):
532 532 raise error.RevlogError(_('the name \'%s\' is reserved')
533 533 % branch)
534 534 if extra:
535 535 extra = encodeextra(extra)
536 536 parseddate = "%s %s" % (parseddate, extra)
537 537 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
538 538 text = "\n".join(l)
539 539 return self.addrevision(text, transaction, len(self), p1, p2)
540 540
541 541 def branchinfo(self, rev):
542 542 """return the branch name and open/close state of a revision
543 543
544 544 This function exists because creating a changectx object
545 545 just to access this is costly."""
546 546 extra = self.read(rev)[5]
547 547 return encoding.tolocal(extra.get("branch")), 'close' in extra
548 548
549 549 def _addrevision(self, node, rawtext, transaction, *args, **kwargs):
550 550 # overlay over the standard revlog._addrevision to track the new
551 551 # revision on the transaction.
552 552 rev = len(self)
553 553 node = super(changelog, self)._addrevision(node, rawtext, transaction,
554 554 *args, **kwargs)
555 555 revs = transaction.changes.get('revs')
556 556 if revs is not None:
557 557 if revs:
558 558 assert revs[-1] + 1 == rev
559 revs = pycompat.xrange(revs[0], rev + 1)
559 revs = pycompat.membershiprange(revs[0], rev + 1)
560 560 else:
561 revs = pycompat.xrange(rev, rev + 1)
561 revs = pycompat.membershiprange(rev, rev + 1)
562 562 transaction.changes['revs'] = revs
563 563 return node
@@ -1,429 +1,449 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import getopt
14 14 import inspect
15 15 import os
16 16 import shlex
17 17 import sys
18 18 import tempfile
19 19
20 20 ispy3 = (sys.version_info[0] >= 3)
21 21 ispypy = (r'__pypy__' in sys.builtin_module_names)
22 22
23 23 if not ispy3:
24 24 import cookielib
25 25 import cPickle as pickle
26 26 import httplib
27 27 import Queue as queue
28 28 import SocketServer as socketserver
29 29 import xmlrpclib
30 30
31 31 from .thirdparty.concurrent import futures
32 32
33 33 def future_set_exception_info(f, exc_info):
34 34 f.set_exception_info(*exc_info)
35 35 else:
36 36 import concurrent.futures as futures
37 37 import http.cookiejar as cookielib
38 38 import http.client as httplib
39 39 import pickle
40 40 import queue as queue
41 41 import socketserver
42 42 import xmlrpc.client as xmlrpclib
43 43
44 44 def future_set_exception_info(f, exc_info):
45 45 f.set_exception(exc_info[0])
46 46
47 47 def identity(a):
48 48 return a
49 49
50 50 def _rapply(f, xs):
51 51 if xs is None:
52 52 # assume None means non-value of optional data
53 53 return xs
54 54 if isinstance(xs, (list, set, tuple)):
55 55 return type(xs)(_rapply(f, x) for x in xs)
56 56 if isinstance(xs, dict):
57 57 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
58 58 return f(xs)
59 59
60 60 def rapply(f, xs):
61 61 """Apply function recursively to every item preserving the data structure
62 62
63 63 >>> def f(x):
64 64 ... return 'f(%s)' % x
65 65 >>> rapply(f, None) is None
66 66 True
67 67 >>> rapply(f, 'a')
68 68 'f(a)'
69 69 >>> rapply(f, {'a'}) == {'f(a)'}
70 70 True
71 71 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
72 72 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
73 73
74 74 >>> xs = [object()]
75 75 >>> rapply(identity, xs) is xs
76 76 True
77 77 """
78 78 if f is identity:
79 79 # fast path mainly for py2
80 80 return xs
81 81 return _rapply(f, xs)
82 82
83 83 if ispy3:
84 84 import builtins
85 85 import functools
86 86 import io
87 87 import struct
88 88
89 89 fsencode = os.fsencode
90 90 fsdecode = os.fsdecode
91 91 oscurdir = os.curdir.encode('ascii')
92 92 oslinesep = os.linesep.encode('ascii')
93 93 osname = os.name.encode('ascii')
94 94 ospathsep = os.pathsep.encode('ascii')
95 95 ospardir = os.pardir.encode('ascii')
96 96 ossep = os.sep.encode('ascii')
97 97 osaltsep = os.altsep
98 98 if osaltsep:
99 99 osaltsep = osaltsep.encode('ascii')
100 100 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
101 101 # returns bytes.
102 102 getcwd = os.getcwdb
103 103 sysplatform = sys.platform.encode('ascii')
104 104 sysexecutable = sys.executable
105 105 if sysexecutable:
106 106 sysexecutable = os.fsencode(sysexecutable)
107 107 bytesio = io.BytesIO
108 108 # TODO deprecate stringio name, as it is a lie on Python 3.
109 109 stringio = bytesio
110 110
111 111 def maplist(*args):
112 112 return list(map(*args))
113 113
114 114 def rangelist(*args):
115 115 return list(range(*args))
116 116
117 117 def ziplist(*args):
118 118 return list(zip(*args))
119 119
120 120 rawinput = input
121 121 getargspec = inspect.getfullargspec
122 122
123 123 # TODO: .buffer might not exist if std streams were replaced; we'll need
124 124 # a silly wrapper to make a bytes stream backed by a unicode one.
125 125 stdin = sys.stdin.buffer
126 126 stdout = sys.stdout.buffer
127 127 stderr = sys.stderr.buffer
128 128
129 129 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
130 130 # we can use os.fsencode() to get back bytes argv.
131 131 #
132 132 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
133 133 #
134 134 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
135 135 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
136 136 if getattr(sys, 'argv', None) is not None:
137 137 sysargv = list(map(os.fsencode, sys.argv))
138 138
139 139 bytechr = struct.Struct('>B').pack
140 140 byterepr = b'%r'.__mod__
141 141
142 142 class bytestr(bytes):
143 143 """A bytes which mostly acts as a Python 2 str
144 144
145 145 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
146 146 ('', 'foo', 'ascii', '1')
147 147 >>> s = bytestr(b'foo')
148 148 >>> assert s is bytestr(s)
149 149
150 150 __bytes__() should be called if provided:
151 151
152 152 >>> class bytesable(object):
153 153 ... def __bytes__(self):
154 154 ... return b'bytes'
155 155 >>> bytestr(bytesable())
156 156 'bytes'
157 157
158 158 There's no implicit conversion from non-ascii str as its encoding is
159 159 unknown:
160 160
161 161 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
162 162 Traceback (most recent call last):
163 163 ...
164 164 UnicodeEncodeError: ...
165 165
166 166 Comparison between bytestr and bytes should work:
167 167
168 168 >>> assert bytestr(b'foo') == b'foo'
169 169 >>> assert b'foo' == bytestr(b'foo')
170 170 >>> assert b'f' in bytestr(b'foo')
171 171 >>> assert bytestr(b'f') in b'foo'
172 172
173 173 Sliced elements should be bytes, not integer:
174 174
175 175 >>> s[1], s[:2]
176 176 (b'o', b'fo')
177 177 >>> list(s), list(reversed(s))
178 178 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
179 179
180 180 As bytestr type isn't propagated across operations, you need to cast
181 181 bytes to bytestr explicitly:
182 182
183 183 >>> s = bytestr(b'foo').upper()
184 184 >>> t = bytestr(s)
185 185 >>> s[0], t[0]
186 186 (70, b'F')
187 187
188 188 Be careful to not pass a bytestr object to a function which expects
189 189 bytearray-like behavior.
190 190
191 191 >>> t = bytes(t) # cast to bytes
192 192 >>> assert type(t) is bytes
193 193 """
194 194
195 195 def __new__(cls, s=b''):
196 196 if isinstance(s, bytestr):
197 197 return s
198 198 if (not isinstance(s, (bytes, bytearray))
199 199 and not hasattr(s, u'__bytes__')): # hasattr-py3-only
200 200 s = str(s).encode(u'ascii')
201 201 return bytes.__new__(cls, s)
202 202
203 203 def __getitem__(self, key):
204 204 s = bytes.__getitem__(self, key)
205 205 if not isinstance(s, bytes):
206 206 s = bytechr(s)
207 207 return s
208 208
209 209 def __iter__(self):
210 210 return iterbytestr(bytes.__iter__(self))
211 211
212 212 def __repr__(self):
213 213 return bytes.__repr__(self)[1:] # drop b''
214 214
215 215 def iterbytestr(s):
216 216 """Iterate bytes as if it were a str object of Python 2"""
217 217 return map(bytechr, s)
218 218
219 219 def maybebytestr(s):
220 220 """Promote bytes to bytestr"""
221 221 if isinstance(s, bytes):
222 222 return bytestr(s)
223 223 return s
224 224
225 225 def sysbytes(s):
226 226 """Convert an internal str (e.g. keyword, __doc__) back to bytes
227 227
228 228 This never raises UnicodeEncodeError, but only ASCII characters
229 229 can be round-trip by sysstr(sysbytes(s)).
230 230 """
231 231 return s.encode(u'utf-8')
232 232
233 233 def sysstr(s):
234 234 """Return a keyword str to be passed to Python functions such as
235 235 getattr() and str.encode()
236 236
237 237 This never raises UnicodeDecodeError. Non-ascii characters are
238 238 considered invalid and mapped to arbitrary but unique code points
239 239 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
240 240 """
241 241 if isinstance(s, builtins.str):
242 242 return s
243 243 return s.decode(u'latin-1')
244 244
245 245 def strurl(url):
246 246 """Converts a bytes url back to str"""
247 247 if isinstance(url, bytes):
248 248 return url.decode(u'ascii')
249 249 return url
250 250
251 251 def bytesurl(url):
252 252 """Converts a str url to bytes by encoding in ascii"""
253 253 if isinstance(url, str):
254 254 return url.encode(u'ascii')
255 255 return url
256 256
257 257 def raisewithtb(exc, tb):
258 258 """Raise exception with the given traceback"""
259 259 raise exc.with_traceback(tb)
260 260
261 261 def getdoc(obj):
262 262 """Get docstring as bytes; may be None so gettext() won't confuse it
263 263 with _('')"""
264 264 doc = getattr(obj, u'__doc__', None)
265 265 if doc is None:
266 266 return doc
267 267 return sysbytes(doc)
268 268
269 269 def _wrapattrfunc(f):
270 270 @functools.wraps(f)
271 271 def w(object, name, *args):
272 272 return f(object, sysstr(name), *args)
273 273 return w
274 274
275 275 # these wrappers are automagically imported by hgloader
276 276 delattr = _wrapattrfunc(builtins.delattr)
277 277 getattr = _wrapattrfunc(builtins.getattr)
278 278 hasattr = _wrapattrfunc(builtins.hasattr)
279 279 setattr = _wrapattrfunc(builtins.setattr)
280 280 xrange = builtins.range
281 membershiprange = builtins.range
281 282 unicode = str
282 283
283 284 def open(name, mode='r', buffering=-1, encoding=None):
284 285 return builtins.open(name, sysstr(mode), buffering, encoding)
285 286
286 287 safehasattr = _wrapattrfunc(builtins.hasattr)
287 288
288 289 def _getoptbwrapper(orig, args, shortlist, namelist):
289 290 """
290 291 Takes bytes arguments, converts them to unicode, pass them to
291 292 getopt.getopt(), convert the returned values back to bytes and then
292 293 return them for Python 3 compatibility as getopt.getopt() don't accepts
293 294 bytes on Python 3.
294 295 """
295 296 args = [a.decode('latin-1') for a in args]
296 297 shortlist = shortlist.decode('latin-1')
297 298 namelist = [a.decode('latin-1') for a in namelist]
298 299 opts, args = orig(args, shortlist, namelist)
299 300 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
300 301 for a in opts]
301 302 args = [a.encode('latin-1') for a in args]
302 303 return opts, args
303 304
304 305 def strkwargs(dic):
305 306 """
306 307 Converts the keys of a python dictonary to str i.e. unicodes so that
307 308 they can be passed as keyword arguments as dictonaries with bytes keys
308 309 can't be passed as keyword arguments to functions on Python 3.
309 310 """
310 311 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
311 312 return dic
312 313
313 314 def byteskwargs(dic):
314 315 """
315 316 Converts keys of python dictonaries to bytes as they were converted to
316 317 str to pass that dictonary as a keyword argument on Python 3.
317 318 """
318 319 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
319 320 return dic
320 321
321 322 # TODO: handle shlex.shlex().
322 323 def shlexsplit(s, comments=False, posix=True):
323 324 """
324 325 Takes bytes argument, convert it to str i.e. unicodes, pass that into
325 326 shlex.split(), convert the returned value to bytes and return that for
326 327 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
327 328 """
328 329 ret = shlex.split(s.decode('latin-1'), comments, posix)
329 330 return [a.encode('latin-1') for a in ret]
330 331
331 332 else:
332 333 import cStringIO
333 334
334 335 xrange = xrange
335 336 unicode = unicode
336 337 bytechr = chr
337 338 byterepr = repr
338 339 bytestr = str
339 340 iterbytestr = iter
340 341 maybebytestr = identity
341 342 sysbytes = identity
342 343 sysstr = identity
343 344 strurl = identity
344 345 bytesurl = identity
345 346
347 class membershiprange(object):
348 "Like xrange(a,b) but with constant-time membership test"
349 def __init__(self, a, b):
350 self._range = xrange(a, b)
351 def __getitem__(self, n):
352 return self._range[n]
353 def __hash__(self):
354 return hash(self._range)
355 def __iter__(self):
356 return iter(self._range)
357 def __len__(self):
358 return len(self._range)
359 def __reversed__(self):
360 return reversed(self._range)
361 def __contains__(self, n):
362 if not self._range:
363 return False
364 return n >= self._range[0] and n <= self._range[-1]
365
346 366 # this can't be parsed on Python 3
347 367 exec('def raisewithtb(exc, tb):\n'
348 368 ' raise exc, None, tb\n')
349 369
350 370 def fsencode(filename):
351 371 """
352 372 Partial backport from os.py in Python 3, which only accepts bytes.
353 373 In Python 2, our paths should only ever be bytes, a unicode path
354 374 indicates a bug.
355 375 """
356 376 if isinstance(filename, str):
357 377 return filename
358 378 else:
359 379 raise TypeError(
360 380 "expect str, not %s" % type(filename).__name__)
361 381
362 382 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
363 383 # better not to touch Python 2 part as it's already working fine.
364 384 fsdecode = identity
365 385
366 386 def getdoc(obj):
367 387 return getattr(obj, '__doc__', None)
368 388
369 389 _notset = object()
370 390
371 391 def safehasattr(thing, attr):
372 392 return getattr(thing, attr, _notset) is not _notset
373 393
374 394 def _getoptbwrapper(orig, args, shortlist, namelist):
375 395 return orig(args, shortlist, namelist)
376 396
377 397 strkwargs = identity
378 398 byteskwargs = identity
379 399
380 400 oscurdir = os.curdir
381 401 oslinesep = os.linesep
382 402 osname = os.name
383 403 ospathsep = os.pathsep
384 404 ospardir = os.pardir
385 405 ossep = os.sep
386 406 osaltsep = os.altsep
387 407 stdin = sys.stdin
388 408 stdout = sys.stdout
389 409 stderr = sys.stderr
390 410 if getattr(sys, 'argv', None) is not None:
391 411 sysargv = sys.argv
392 412 sysplatform = sys.platform
393 413 getcwd = os.getcwd
394 414 sysexecutable = sys.executable
395 415 shlexsplit = shlex.split
396 416 bytesio = cStringIO.StringIO
397 417 stringio = bytesio
398 418 maplist = map
399 419 rangelist = range
400 420 ziplist = zip
401 421 rawinput = raw_input
402 422 getargspec = inspect.getargspec
403 423
404 424 isjython = sysplatform.startswith('java')
405 425
406 426 isdarwin = sysplatform == 'darwin'
407 427 isposix = osname == 'posix'
408 428 iswindows = osname == 'nt'
409 429
410 430 def getoptb(args, shortlist, namelist):
411 431 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
412 432
413 433 def gnugetoptb(args, shortlist, namelist):
414 434 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
415 435
416 436 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
417 437 return tempfile.mkdtemp(suffix, prefix, dir)
418 438
419 439 # text=True is not supported; use util.from/tonativeeol() instead
420 440 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
421 441 return tempfile.mkstemp(suffix, prefix, dir)
422 442
423 443 # mode must include 'b'ytes as encoding= is not supported
424 444 def namedtempfile(mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None,
425 445 delete=True):
426 446 mode = sysstr(mode)
427 447 assert r'b' in mode
428 448 return tempfile.NamedTemporaryFile(mode, bufsize, suffix=suffix,
429 449 prefix=prefix, dir=dir, delete=delete)
General Comments 0
You need to be logged in to leave comments. Login now