##// END OF EJS Templates
bdiff: don't check border condition in loop...
bdiff: don't check border condition in loop `plast = a + len - 1`. So, this "for" loop iterates from "a" to "plast", inclusive. So, `p == plast` can only be true on the final iteration of the loop. So checking for it on every loop iteration is wasteful. This patch simply decreases the upper bound of the loop by 1 and adds an explicit check after iteration for the `p == plast` case. We can't simply add 1 to the initial value for "i" because that doesn't do the correct thing on empty input strings. `perfbdiff -m 3041e4d59df2` on the Firefox repo becomes significantly faster: ! wall 0.072763 comb 0.070000 user 0.070000 sys 0.000000 (best of 100) ! wall 0.053221 comb 0.060000 user 0.060000 sys 0.000000 (best of 100) For the curious, this code has its origins in 8b067bde6679, which is the changeset that introduced bdiff.c in 2005. Also, GNU diffutils is able to perform a similar line-based diff in under 20ms. So there's likely more perf wins to be found in this code. One of them is the hashing algorithm. But it looks like mpm spent some time testing hash collisions in d0c48891dd4a. I'd like to do the same before switching away from lyhash, just to be on the safe side.

File last commit:

r29890:31a6d5e1 default
r30308:d500ddae default
Show More
archival.py
341 lines | 10.6 KiB | text/x-python | PythonLexer
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 # archival.py - revision archival for mercurial
#
# Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
#
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112
Gregory Szorc
archival: use absolute_import
r25916 from __future__ import absolute_import
import gzip
import os
FUJIWARA Katsunori
archival: add "extended-timestamp" extra block for zip archives (issue3600)...
r17628 import struct
Gregory Szorc
archival: use absolute_import
r25916 import tarfile
import time
import zipfile
import zlib
from .i18n import _
from . import (
cmdutil,
encoding,
error,
match as matchmod,
scmutil,
util,
)
timeless
pycompat: switch to util.stringio for py3 compat
r28861 stringio = util.stringio
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112
Mads Kiilerich
declare local constants instead of using magic values and comments
r17429 # from unzip source code:
_UNX_IFREG = 0x8000
_UNX_IFLNK = 0xa000
Martin Geisler
archival: remove prefix argument from archivers...
r11558 def tidyprefix(dest, kind, prefix):
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 '''choose prefix to use for names in archive. make sure prefix is
safe for consumers.'''
if prefix:
Shun-ichi GOTO
Use util.normpath() instead of direct path string operation....
r5842 prefix = util.normpath(prefix)
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 else:
if not isinstance(dest, str):
raise ValueError('dest must be string if no prefix')
prefix = os.path.basename(dest)
lower = prefix.lower()
Martin Geisler
archival: remove prefix argument from archivers...
r11558 for sfx in exts.get(kind, []):
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 if lower.endswith(sfx):
prefix = prefix[:-len(sfx)]
break
lpfx = os.path.normpath(util.localpath(prefix))
prefix = util.pconvert(lpfx)
if not prefix.endswith('/'):
prefix += '/'
Matt Harbison
archive: drop the leading '.' path component from the prefix (issue4634)...
r24953 # Drop the leading '.' path component if present, so Windows can read the
# zip files (issue4634)
if prefix.startswith('./'):
prefix = prefix[2:]
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 if prefix.startswith('../') or os.path.isabs(lpfx) or '/../' in prefix:
Pierre-Yves David
error: get Abort from 'error' instead of 'util'...
r26587 raise error.Abort(_('archive prefix contains illegal components'))
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 return prefix
Martin Geisler
archival: move commands.archive.guess_type to archival.guesskind...
r11557 exts = {
'tar': ['.tar'],
'tbz2': ['.tbz2', '.tar.bz2'],
'tgz': ['.tgz', '.tar.gz'],
'zip': ['.zip'],
}
def guesskind(dest):
for kind, extensions in exts.iteritems():
Augie Fackler
cleanup: use __builtins__.any instead of util.any...
r25149 if any(dest.endswith(ext) for ext in extensions):
Martin Geisler
archival: move commands.archive.guess_type to archival.guesskind...
r11557 return kind
return None
Yuya Nishihara
archive: look for first visible revision to build repo identity (issue4591)...
r24681 def _rootctx(repo):
# repo[0] may be hidden
for rev in repo:
return repo[rev]
return repo['null']
Yuya Nishihara
archive: extract metadata() closure to module-level function...
r24678 def buildmetadata(ctx):
'''build content of .hg_archival.txt'''
repo = ctx.repo()
Matt Harbison
archive: report the node as "{p1node}+" when archiving a dirty wdir()...
r25615 hex = ctx.hex()
if ctx.rev() is None:
hex = ctx.p1().hex()
if ctx.dirty():
hex += '+'
Yuya Nishihara
archive: extract metadata() closure to module-level function...
r24678 base = 'repo: %s\nnode: %s\nbranch: %s\n' % (
Matt Harbison
archive: report the node as "{p1node}+" when archiving a dirty wdir()...
r25615 _rootctx(repo).hex(), hex, encoding.fromlocal(ctx.branch()))
Yuya Nishihara
archive: extract metadata() closure to module-level function...
r24678
tags = ''.join('tag: %s\n' % t for t in ctx.tags()
if repo.tagtype(t) == 'global')
if not tags:
repo.ui.pushbuffer()
Matt Harbison
archive: use {changessincelatesttag} to build the metadata file...
r25725 opts = {'template': '{latesttag}\n{latesttagdistance}\n'
'{changessincelatesttag}',
Yuya Nishihara
archive: extract metadata() closure to module-level function...
r24678 'style': '', 'patch': None, 'git': None}
cmdutil.show_changeset(repo.ui, repo, opts).show(ctx)
Matt Harbison
archive: use {changessincelatesttag} to build the metadata file...
r25725 ltags, dist, changessince = repo.ui.popbuffer().split('\n')
Yuya Nishihara
archive: extract metadata() closure to module-level function...
r24678 ltags = ltags.split(':')
tags = ''.join('latesttag: %s\n' % t for t in ltags)
tags += 'latesttagdistance: %s\n' % dist
tags += 'changessincelatesttag: %s\n' % changessince
return base + tags
Martin Geisler
archival: move commands.archive.guess_type to archival.guesskind...
r11557
Benoit Boissinot
use new style classes
r8778 class tarit(object):
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 '''write archive to tar file or stream. can write uncompressed,
or compress with gzip or bzip2.'''
csaba.henk@creo.hu
timestamp of gzip archives taken from changeset context
r4652 class GzipFileWithTime(gzip.GzipFile):
def __init__(self, *args, **kw):
timestamp = None
if 'timestamp' in kw:
timestamp = kw.pop('timestamp')
Martin Geisler
use 'x is None' instead of 'x == None'...
r8527 if timestamp is None:
csaba.henk@creo.hu
timestamp of gzip archives taken from changeset context
r4652 self.timestamp = time.time()
else:
self.timestamp = timestamp
gzip.GzipFile.__init__(self, *args, **kw)
def _write_gzip_header(self):
self.fileobj.write('\037\213') # magic header
self.fileobj.write('\010') # compression method
timeless@mozdev.org
archival: drop self.filename - deprecated in py2.6
r26198 fname = self.name
Brodie Rao
archival: don't set gzip filename header when there's no filename...
r13102 if fname and fname.endswith('.gz'):
fname = fname[:-3]
csaba.henk@creo.hu
timestamp of gzip archives taken from changeset context
r4652 flags = 0
if fname:
flags = gzip.FNAME
self.fileobj.write(chr(flags))
gzip.write32u(self.fileobj, long(self.timestamp))
self.fileobj.write('\002')
self.fileobj.write('\377')
if fname:
self.fileobj.write(fname + '\000')
Martin Geisler
archival: remove prefix argument from archivers...
r11558 def __init__(self, dest, mtime, kind=''):
Vadim Gelfer
use commit time as mtime for file archives....
r2477 self.mtime = mtime
Dan Villiom Podlaski Christiansen
explicitly close files...
r13400 self.fileobj = None
csaba.henk@creo.hu
timestamp of gzip archives taken from changeset context
r4652
def taropen(name, mode, fileobj=None):
if kind == 'gz':
mode = mode[0]
if not fileobj:
csaba.henk@creo.hu
Fix tgz archival on Windows....
r4731 fileobj = open(name, mode + 'b')
csaba.henk@creo.hu
timestamp of gzip archives taken from changeset context
r4652 gzfileobj = self.GzipFileWithTime(name, mode + 'b',
zlib.Z_BEST_COMPRESSION,
fileobj, timestamp=mtime)
Dan Villiom Podlaski Christiansen
explicitly close files...
r13400 self.fileobj = gzfileobj
csaba.henk@creo.hu
timestamp of gzip archives taken from changeset context
r4652 return tarfile.TarFile.taropen(name, mode, gzfileobj)
else:
return tarfile.open(name, mode + kind, fileobj)
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 if isinstance(dest, str):
csaba.henk@creo.hu
timestamp of gzip archives taken from changeset context
r4652 self.z = taropen(dest, mode='w:')
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 else:
Brendan Cully
Work around python 2.5.1 tarfile regression
r4357 # Python 2.5-2.5.1 have a regression that requires a name arg
csaba.henk@creo.hu
timestamp of gzip archives taken from changeset context
r4652 self.z = taropen(name='', mode='w|', fileobj=dest)
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112
Alexis S. L. Carvalho
archive: add symlink support
r4831 def addfile(self, name, mode, islink, data):
Martin Geisler
archival: remove prefix argument from archivers...
r11558 i = tarfile.TarInfo(name)
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 i.mtime = self.mtime
i.size = len(data)
Alexis S. L. Carvalho
archive: add symlink support
r4831 if islink:
i.type = tarfile.SYMTYPE
Gregory Szorc
global: mass rewrite to use modern octal syntax...
r25658 i.mode = 0o777
Alexis S. L. Carvalho
archive: add symlink support
r4831 i.linkname = data
data = None
Peter van Dijk
fix disappearing symlinks [issue1509]
r7770 i.size = 0
Alexis S. L. Carvalho
archive: add symlink support
r4831 else:
i.mode = mode
timeless
pycompat: switch to util.stringio for py3 compat
r28861 data = stringio(data)
Alexis S. L. Carvalho
archive: add symlink support
r4831 self.z.addfile(i, data)
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112
def done(self):
self.z.close()
Dan Villiom Podlaski Christiansen
explicitly close files...
r13400 if self.fileobj:
self.fileobj.close()
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112
Benoit Boissinot
use new style classes
r8778 class tellable(object):
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 '''provide tell method for zipfile.ZipFile when writing to http
response file object.'''
def __init__(self, fp):
self.fp = fp
self.offset = 0
def __getattr__(self, key):
return getattr(self.fp, key)
def write(self, s):
self.fp.write(s)
self.offset += len(s)
def tell(self):
return self.offset
Benoit Boissinot
use new style classes
r8778 class zipit(object):
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 '''write archive to zip file or stream. can write uncompressed,
or compressed with deflate.'''
Martin Geisler
archival: remove prefix argument from archivers...
r11558 def __init__(self, dest, mtime, compress=True):
Colin McMillen
Proper check to see if zip dest needs to be wrapped in tellable...
r2168 if not isinstance(dest, str):
try:
dest.tell()
Thomas Arendsen Hein
Combine catching exceptions added in dd4ec4576cc8 in one except statement.
r2169 except (AttributeError, IOError):
Colin McMillen
Proper check to see if zip dest needs to be wrapped in tellable...
r2168 dest = tellable(dest)
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 self.z = zipfile.ZipFile(dest, 'w',
compress and zipfile.ZIP_DEFLATED or
zipfile.ZIP_STORED)
Martin Geisler
archive: set date to 1980 for very old zip files...
r12319
# Python's zipfile module emits deprecation warnings if we try
# to store files with a date before 1980.
epoch = 315532800 # calendar.timegm((1980, 1, 1, 0, 0, 0, 1, 1, 0))
if mtime < epoch:
mtime = epoch
FUJIWARA Katsunori
archival: add "extended-timestamp" extra block for zip archives (issue3600)...
r17628 self.mtime = mtime
Vadim Gelfer
use commit time as mtime for file archives....
r2477 self.date_time = time.gmtime(mtime)[:6]
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112
Alexis S. L. Carvalho
archive: add symlink support
r4831 def addfile(self, name, mode, islink, data):
Martin Geisler
archival: remove prefix argument from archivers...
r11558 i = zipfile.ZipInfo(name, self.date_time)
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 i.compress_type = self.z.compression
# unzip will not honor unix file modes unless file creator is
# set to unix (id 3).
i.create_system = 3
Mads Kiilerich
declare local constants instead of using magic values and comments
r17429 ftype = _UNX_IFREG
Alexis S. L. Carvalho
archive: add symlink support
r4831 if islink:
Gregory Szorc
global: mass rewrite to use modern octal syntax...
r25658 mode = 0o777
Mads Kiilerich
declare local constants instead of using magic values and comments
r17429 ftype = _UNX_IFLNK
Pulkit Goyal
py3: remove use of *L syntax...
r29890 i.external_attr = (mode | ftype) << 16
FUJIWARA Katsunori
archival: add "extended-timestamp" extra block for zip archives (issue3600)...
r17628 # add "extended-timestamp" extra block, because zip archives
# without this will be extracted with unexpected timestamp,
# if TZ is not configured as GMT
i.extra += struct.pack('<hhBl',
0x5455, # block type: "extended-timestamp"
1 + 4, # size of this block
1, # "modification time is present"
Mads Kiilerich
archival: pass integer to struct.pack int field instead of float...
r18301 int(self.mtime)) # last modification (UTC)
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 self.z.writestr(i, data)
def done(self):
self.z.close()
Benoit Boissinot
use new style classes
r8778 class fileit(object):
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 '''write archive as files in directory.'''
Martin Geisler
archival: remove prefix argument from archivers...
r11558 def __init__(self, name, mtime):
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 self.basedir = name
Adrian Buehlmann
move opener from util to scmutil
r13970 self.opener = scmutil.opener(self.basedir)
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112
Alexis S. L. Carvalho
archive: add symlink support
r4831 def addfile(self, name, mode, islink, data):
if islink:
self.opener.symlink(data, name)
return
Alexis S. L. Carvalho
archive: use util.opener when archiving files....
r4830 f = self.opener(name, "w", atomictemp=True)
f.write(data)
Greg Ward
atomictempfile: make close() consistent with other file-like objects....
r15057 f.close()
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 destfile = os.path.join(self.basedir, name)
Alexis S. L. Carvalho
archive: use util.opener when archiving files....
r4830 os.chmod(destfile, mode)
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112
def done(self):
pass
archivers = {
'files': fileit,
'tar': tarit,
Martin Geisler
archival: remove prefix argument from archivers...
r11558 'tbz2': lambda name, mtime: tarit(name, mtime, 'bz2'),
'tgz': lambda name, mtime: tarit(name, mtime, 'gz'),
'uzip': lambda name, mtime: zipit(name, mtime, False),
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 'zip': zipit,
}
def archive(repo, dest, node, kind, decode=True, matchfn=None,
Matt Harbison
archive: change the default prefix to '' from None...
r24172 prefix='', mtime=None, subrepos=False):
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 '''create archive of repo as it was at node.
dest can be name of directory, name of archive file, or file
object to write archive to.
kind is type of archive to create.
decode tells whether to put files through decode filters from
hgrc.
matchfn is function to filter names of files to write to archive.
prefix is name of path to put before every archive member.'''
Martin Geisler
archival: remove prefix argument from archivers...
r11558 if kind == 'files':
if prefix:
Pierre-Yves David
error: get Abort from 'error' instead of 'util'...
r26587 raise error.Abort(_('cannot give prefix when archiving to files'))
Martin Geisler
archival: remove prefix argument from archivers...
r11558 else:
prefix = tidyprefix(dest, kind, prefix)
Alexis S. L. Carvalho
archive: delay extraction of file revisions...
r4951 def write(name, mode, islink, getdata):
data = getdata()
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 if decode:
Matt Mackall
replace filehandle version of wwrite with wwritedata
r4005 data = repo.wwritedata(name, data)
Martin Geisler
archival: remove prefix argument from archivers...
r11558 archiver.addfile(prefix + name, mode, islink, data)
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112
Dirkjan Ochtman
cleanly abort on unknown archive type (issue966)
r6019 if kind not in archivers:
Pierre-Yves David
error: get Abort from 'error' instead of 'util'...
r26587 raise error.Abort(_("unknown archive type '%s'") % kind)
Matt Mackall
manifest: remove execf/linkf methods
r6749
ctx = repo[node]
Martin Geisler
archival: remove prefix argument from archivers...
r11558 archiver = archivers[kind](dest, mtime or ctx.date()[0])
Matt Mackall
manifest: remove execf/linkf methods
r6749
Thomas Arendsen Hein
Add config option to disable putting .hg_archival.txt inside archives.
r6183 if repo.ui.configbool("ui", "archivemeta", True):
Thomas Arendsen Hein
archive: make progress only show files that are actually archived...
r16919 name = '.hg_archival.txt'
if not matchfn or matchfn(name):
Gregory Szorc
global: mass rewrite to use modern octal syntax...
r25658 write(name, 0o644, False, lambda: buildmetadata(ctx))
Gilles Moris
archive: add branch and tag informations to the .hg_archival.txt file...
r9614
Thomas Arendsen Hein
archive: make progress only show files that are actually archived...
r16919 if matchfn:
files = [f for f in ctx.manifest().keys() if matchfn(f)]
else:
files = ctx.manifest().keys()
total = len(files)
Angel Ezquerra
archive: raise error.Abort if the file pattern matches no files...
r18967 if total:
files.sort()
repo.ui.progress(_('archiving'), 0, unit=_('files'), total=total)
for i, f in enumerate(files):
ff = ctx.flags(f)
Gregory Szorc
global: mass rewrite to use modern octal syntax...
r25658 write(f, 'x' in ff and 0o755 or 0o644, 'l' in ff, ctx[f].data)
Angel Ezquerra
archive: raise error.Abort if the file pattern matches no files...
r18967 repo.ui.progress(_('archiving'), i + 1, item=f,
unit=_('files'), total=total)
repo.ui.progress(_('archiving'), None)
Martin Geisler
subrepo: add support for 'hg archive'
r12323
if subrepos:
Mads Kiilerich
subrepos: process subrepos in sorted order...
r18364 for subpath in sorted(ctx.substate):
Matt Harbison
archive: support 'wdir()'...
r25601 sub = ctx.workingsub(subpath)
Martin von Zweigbergk
match: rename "narrowmatcher" to "subdirmatcher" (API)...
r28017 submatch = matchmod.subdirmatcher(subpath, matchfn)
Matt Harbison
subrepo: drop the 'ui' parameter to archive()...
r23575 total += sub.archive(archiver, prefix, submatch)
Angel Ezquerra
archive: raise error.Abort if the file pattern matches no files...
r18967
if total == 0:
raise error.Abort(_('no files match the archive pattern'))
Martin Geisler
subrepo: add support for 'hg archive'
r12323
Vadim Gelfer
add "archive" command, like "cvs export" only better....
r2112 archiver.done()
Angel Ezquerra
archive: raise error.Abort if the file pattern matches no files...
r18967 return total