##// END OF EJS Templates
bdiff: gradually enable the popularity hack...
bdiff: gradually enable the popularity hack Patch from Jason Orendorff The lower the threshold, the stronger the popularity hack's influence. So at 3999 lines, the hack is disabled; and at 4000 lines, the hack is enabled at maximum strength (t=4). No source file in mercurial/crew is over 4000 lines. But there are, oh, a few such files in Mozilla. I can testify that this hack causes hg to generate some correct but eyebrow-raising patches. I think the hack should phase in gradually. The threshold should be high for small files where we don't need it so much. Like this: t = (bn < 31000) ? 1000000 / bn : bn / 1000; That would leave the popularity hack disabled for small files, then gradually phase it in: bn < 1000 -- t > bn (popularity hack is completely disabled) bn == 1000 -- t = 1000 (still effectively disabled) bn == 2000 -- t = 500 (only hits unusual files) bn == 10000 -- t = 100 (only hits especially common lines) bn == 31000 -- t = 31 (hack is at maximum power) bn == 32000 -- t = 32 (hack could backfire, ease off)

File last commit:

r9431:d1b135f2 default
r9534:8e202431 default
Show More
convcmd.py
399 lines | 13.5 KiB | text/x-python | PythonLexer
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 # convcmd - convert extension commands definition
#
# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
#
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
# GNU General Public License version 2, incorporated herein by reference.
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621
Patrick Mezard
convert: allow missing tools not to stop source type detection
r6332 from common import NoRepo, MissingTool, SKIPREV, mapfile
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 from cvs import convert_cvs
from darcs import darcs_source
from git import convert_git
from hg import mercurial_source, mercurial_sink
Peter Arrenbrecht
cleanup: drop unused imports
r7873 from subversion import svn_source, svn_sink
Mikkel Fahnøe Jørgensen
initial version of monotone source for convert extension
r6306 from monotone import monotone_source
Aleix Conchillo Flaque
convert: added GNU Arch source converter
r6035 from gnuarch import gnuarch_source
Marek Kubica
convert: add bzr source
r7053 from bzr import bzr_source
Frank Kingswood
convert: Perforce source for conversion to Mercurial
r7823 from p4 import p4_source
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 import filemap
import os, shutil
Matt Mackall
move encoding bits from util to encoding...
r7948 from mercurial import hg, util, encoding
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 from mercurial.i18n import _
Patrick Mezard
convert: improve cycles detection message
r6131 orig_encoding = 'ascii'
def recode(s):
if isinstance(s, unicode):
return s.encode(orig_encoding, 'replace')
else:
return s.decode('utf-8').encode(orig_encoding, 'replace')
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 source_converters = [
Patrick Mezard
convert: default revisions order depends on source...
r8692 ('cvs', convert_cvs, 'branchsort'),
('git', convert_git, 'branchsort'),
('svn', svn_source, 'branchsort'),
('hg', mercurial_source, 'sourcesort'),
('darcs', darcs_source, 'branchsort'),
('mtn', monotone_source, 'branchsort'),
('gnuarch', gnuarch_source, 'branchsort'),
('bzr', bzr_source, 'branchsort'),
('p4', p4_source, 'branchsort'),
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 ]
sink_converters = [
('hg', mercurial_sink),
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5631 ('svn', svn_sink),
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 ]
def convertsource(ui, path, type, rev):
exceptions = []
Patrick Mezard
convert: default revisions order depends on source...
r8692 for name, source, sortmode in source_converters:
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 try:
if not type or name == type:
Patrick Mezard
convert: default revisions order depends on source...
r8692 return source(ui, path, rev), sortmode
Patrick Mezard
convert: allow missing tools not to stop source type detection
r6332 except (NoRepo, MissingTool), inst:
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 exceptions.append(inst)
if not ui.quiet:
for inst in exceptions:
Martin Geisler
move % out of translatable strings...
r6913 ui.write("%s\n" % inst)
Patrick Mezard
convert: document source and sink identifiers, fix error message
r6976 raise util.Abort(_('%s: missing or unsupported repository') % path)
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621
def convertsink(ui, path, type):
for name, sink in sink_converters:
try:
if not type or name == type:
return sink(ui, path)
except NoRepo, inst:
ui.note(_("convert: %s\n") % inst)
Martin Geisler
move % out of translatable strings...
r6913 raise util.Abort(_('%s: unknown repository type') % path)
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621
class converter(object):
def __init__(self, ui, source, dest, revmapfile, opts):
self.source = source
self.dest = dest
self.ui = ui
self.opts = opts
self.commitcache = {}
self.authors = {}
self.authorfile = None
Greg Ward
convert: improve docstrings, comments.
r8444 # Record converted revisions persistently: maps source revision
Dirkjan Ochtman
kill trailing whitespace
r8843 # ID to target revision ID (both strings). (This is how
Greg Ward
convert: improve docstrings, comments.
r8444 # incremental conversions work.)
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5631 self.map = mapfile(ui, revmapfile)
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621
# Read first the dst author map if any
authorfile = self.dest.authorfile()
if authorfile and os.path.exists(authorfile):
self.readauthormap(authorfile)
# Extend/Override with new author map if necessary
if opts.get('authors'):
self.readauthormap(opts.get('authors'))
self.authorfile = self.dest.authorfile()
Bryan O'Sullivan
convert: document splicemap, allow setting of multiple parents
r6143 self.splicemap = mapfile(ui, opts.get('splicemap'))
Michael J. Pedersen
convert: adding branchmap functionality to convert extension
r8377 self.branchmap = mapfile(ui, opts.get('branchmap'))
Bryan O'Sullivan
convert: allow synthetic history to be spliced in....
r5996
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 def walktree(self, heads):
'''Return a mapping that identifies the uncommitted parents of every
uncommitted changeset.'''
visit = heads
Benoit Boissinot
convert: use set instead of dict
r8456 known = set()
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 parents = {}
while visit:
n = visit.pop(0)
if n in known or n in self.map: continue
Benoit Boissinot
convert: use set instead of dict
r8456 known.add(n)
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 commit = self.cachecommit(n)
parents[n] = []
for p in commit.parents:
parents[n].append(p)
visit.append(p)
return parents
Patrick Mezard
convert: parse sort mode sooner
r8689 def toposort(self, parents, sortmode):
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 '''Return an ordering such that every uncommitted changeset is
preceeded by all its uncommitted ancestors.'''
Patrick Mezard
convert: split toposort() into subfunctions for readability
r8688
def mapchildren(parents):
"""Return a (children, roots) tuple where 'children' maps parent
revision identifiers to children ones, and 'roots' is the list of
revisions without parents. 'parents' must be a mapping of revision
identifier to its parents ones.
"""
visit = parents.keys()
seen = set()
children = {}
roots = []
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621
Patrick Mezard
convert: split toposort() into subfunctions for readability
r8688 while visit:
n = visit.pop(0)
if n in seen:
continue
seen.add(n)
# Ensure that nodes without parents are present in the
# 'children' mapping.
children.setdefault(n, [])
hasparent = False
for p in parents[n]:
if not p in self.map:
visit.append(p)
hasparent = True
children.setdefault(p, []).append(n)
if not hasparent:
roots.append(n)
return children, roots
Patrick Mezard
convert: fix --datesort ordering...
r6100
Patrick Mezard
convert: split toposort() into subfunctions for readability
r8688 # Sort functions are supposed to take a list of revisions which
# can be converted immediately and pick one
Patrick Mezard
convert: fix --datesort ordering...
r6100
Patrick Mezard
convert: split toposort() into subfunctions for readability
r8688 def makebranchsorter():
"""If the previously converted revision has a child in the
eligible revisions list, pick it. Return the list head
otherwise. Branch sort attempts to minimize branch
switching, which is harmful for Mercurial backend
compression.
"""
prev = [None]
def picknext(nodes):
next = nodes[0]
for n in nodes:
if prev[0] in parents[n]:
next = n
break
prev[0] = next
return next
return picknext
Patrick Mezard
convert: add --sourcesort option for source specific sort...
r8690 def makesourcesorter():
"""Source specific sort."""
keyfn = lambda n: self.commitcache[n].sortkey
def picknext(nodes):
return sorted(nodes, key=keyfn)[0]
return picknext
Patrick Mezard
convert: split toposort() into subfunctions for readability
r8688 def makedatesorter():
"""Sort revisions by date."""
Patrick Mezard
convert: fix --datesort ordering...
r6100 dates = {}
def getdate(n):
if n not in dates:
dates[n] = util.parsedate(self.commitcache[n].date)
return dates[n]
def picknext(nodes):
return min([(getdate(n), n) for n in nodes])[1]
Patrick Mezard
convert: split toposort() into subfunctions for readability
r8688
return picknext
Patrick Mezard
convert: parse sort mode sooner
r8689 if sortmode == 'branchsort':
picknext = makebranchsorter()
elif sortmode == 'datesort':
Patrick Mezard
convert: split toposort() into subfunctions for readability
r8688 picknext = makedatesorter()
Patrick Mezard
convert: add --sourcesort option for source specific sort...
r8690 elif sortmode == 'sourcesort':
picknext = makesourcesorter()
Patrick Mezard
convert: fix --datesort ordering...
r6100 else:
Patrick Mezard
convert: parse sort mode sooner
r8689 raise util.Abort(_('unknown sort mode: %s') % sortmode)
Patrick Mezard
convert: split toposort() into subfunctions for readability
r8688
children, actives = mapchildren(parents)
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621
s = []
Patrick Mezard
convert: fix --datesort ordering...
r6100 pendings = {}
while actives:
n = picknext(actives)
actives.remove(n)
s.append(n)
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621
Patrick Mezard
convert: fix --datesort ordering...
r6100 # Update dependents list
for c in children.get(n, []):
if c not in pendings:
pendings[c] = [p for p in parents[c] if p not in self.map]
Patrick Mezard
convert: improve cycles detection message
r6131 try:
pendings[c].remove(n)
except ValueError:
raise util.Abort(_('cycle detected between %s and %s')
% (recode(c), recode(n)))
Patrick Mezard
convert: fix --datesort ordering...
r6100 if not pendings[c]:
# Parents are converted, node is eligible
actives.insert(0, c)
pendings[c] = None
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621
Patrick Mezard
convert: fix --datesort ordering...
r6100 if len(s) != len(parents):
raise util.Abort(_("not all revisions were sorted"))
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621
return s
def writeauthormap(self):
authorfile = self.authorfile
if authorfile:
Peter Arrenbrecht
cleanup: whitespace cleanup
r7877 self.ui.status(_('Writing author map file %s\n') % authorfile)
ofile = open(authorfile, 'w+')
for author in self.authors:
ofile.write("%s=%s\n" % (author, self.authors[author]))
ofile.close()
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621
def readauthormap(self, authorfile):
afile = open(authorfile, 'r')
for line in afile:
Marti Raudsepp
convert: fix authormap handling of lines without '='...
r7962
Marti Raudsepp
convert: handle comments starting with '#' in authormap files
r7968 line = line.strip()
if not line or line.startswith('#'):
Marti Raudsepp
convert: Ignore empty lines in authormap file.
r6184 continue
Marti Raudsepp
convert: fix authormap handling of lines without '='...
r7962
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 try:
Marti Raudsepp
convert: Clean up authormap key=value splitting....
r6186 srcauthor, dstauthor = line.split('=', 1)
Marti Raudsepp
convert: fix authormap handling of lines without '='...
r7962 except ValueError:
msg = _('Ignoring bad line in author map file %s: %s\n')
self.ui.warn(msg % (authorfile, line.rstrip()))
continue
srcauthor = srcauthor.strip()
dstauthor = dstauthor.strip()
if self.authors.get(srcauthor) in (None, dstauthor):
msg = _('mapping author %s to %s\n')
self.ui.debug(msg % (srcauthor, dstauthor))
self.authors[srcauthor] = dstauthor
continue
m = _('overriding mapping for author %s, was %s, will be %s\n')
self.ui.status(m % (srcauthor, self.authors[srcauthor], dstauthor))
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 afile.close()
def cachecommit(self, rev):
commit = self.source.getcommit(rev)
commit.author = self.authors.get(commit.author, commit.author)
Michael J. Pedersen
convert: adding branchmap functionality to convert extension
r8377 commit.branch = self.branchmap.get(commit.branch, commit.branch)
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 self.commitcache[rev] = commit
return commit
def copy(self, rev):
commit = self.commitcache[rev]
changes = self.source.getchanges(rev)
if isinstance(changes, basestring):
if changes == SKIPREV:
dest = SKIPREV
else:
dest = self.map[changes]
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5631 self.map[rev] = dest
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 return
files, copies = changes
Patrick Mezard
convert: hg.clonebranches must pull missing parents (issue941)
r5934 pbranches = []
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 if commit.parents:
Patrick Mezard
convert: hg.clonebranches must pull missing parents (issue941)
r5934 for prev in commit.parents:
if prev not in self.commitcache:
self.cachecommit(prev)
Thomas Arendsen Hein
Removed trailing spaces from everything except test output
r6210 pbranches.append((self.map[prev],
Patrick Mezard
convert: hg.clonebranches must pull missing parents (issue941)
r5934 self.commitcache[prev].branch))
self.dest.setbranch(commit.branch, pbranches)
Bryan O'Sullivan
convert: allow synthetic history to be spliced in....
r5996 try:
Bryan O'Sullivan
convert: document splicemap, allow setting of multiple parents
r6143 parents = self.splicemap[rev].replace(',', ' ').split()
Martin Geisler
i18n: mark strings for translation in convert extension
r6956 self.ui.status(_('spliced in %s as parents of %s\n') %
Bryan O'Sullivan
convert: document splicemap, allow setting of multiple parents
r6143 (parents, rev))
parents = [self.map.get(p, p) for p in parents]
Bryan O'Sullivan
convert: allow synthetic history to be spliced in....
r5996 except KeyError:
parents = [b[0] for b in pbranches]
Dirkjan Ochtman
kill trailing whitespace
r8843 newnode = self.dest.putcommit(files, copies, parents, commit,
Patrick Mezard
convert: rewrite tags when converting from hg to hg
r8693 self.source, self.map)
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5631 self.source.converted(rev, newnode)
self.map[rev] = newnode
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621
Patrick Mezard
convert: parse sort mode sooner
r8689 def convert(self, sortmode):
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 try:
self.source.before()
self.dest.before()
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5631 self.source.setrevmap(self.map)
Martin Geisler
i18n: mark strings for translation in convert extension
r6956 self.ui.status(_("scanning source...\n"))
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 heads = self.source.getheads()
parents = self.walktree(heads)
Martin Geisler
i18n: mark strings for translation in convert extension
r6956 self.ui.status(_("sorting...\n"))
Patrick Mezard
convert: parse sort mode sooner
r8689 t = self.toposort(parents, sortmode)
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 num = len(t)
c = None
Martin Geisler
i18n: mark strings for translation in convert extension
r6956 self.ui.status(_("converting...\n"))
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 for c in t:
num -= 1
desc = self.commitcache[c].desc
if "\n" in desc:
desc = desc.splitlines()[0]
Shun-ichi GOTO
convert: print commit log message with local encoding correctly.
r5794 # convert log message to local encoding without using
Matt Mackall
move encoding bits from util to encoding...
r7948 # tolocal() because encoding.encoding conver() use it as
Shun-ichi GOTO
convert: print commit log message with local encoding correctly.
r5794 # 'utf-8'
Patrick Mezard
convert: display source revision id with --verbose
r5954 self.ui.status("%d %s\n" % (num, recode(desc)))
Martin Geisler
move % out of translatable strings...
r6913 self.ui.note(_("source: %s\n") % recode(c))
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 self.copy(c)
tags = self.source.gettags()
ctags = {}
for k in tags:
v = tags[k]
if self.map.get(v, SKIPREV) != SKIPREV:
ctags[k] = self.map[v]
if c and ctags:
Patrick Mezard
convert: fix history topology when using hg.tagsbranch...
r9431 nrev, tagsparent = self.dest.puttags(ctags)
if nrev and tagsparent:
# write another hash correspondence to override the previous
# one so we don't end up with extra tag heads
tagsparents = [e for e in self.map.iteritems()
if e[1] == tagsparent]
if tagsparents:
self.map[tagsparents[0][0]] = nrev
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621
self.writeauthormap()
finally:
self.cleanup()
def cleanup(self):
try:
self.dest.after()
finally:
self.source.after()
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5631 self.map.close()
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621
def convert(ui, src, dest=None, revmapfile=None, **opts):
Shun-ichi GOTO
convert: print commit log message with local encoding correctly.
r5794 global orig_encoding
Matt Mackall
move encoding bits from util to encoding...
r7948 orig_encoding = encoding.encoding
encoding.encoding = 'UTF-8'
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621
if not dest:
dest = hg.defaultdest(src) + "-hg"
Martin Geisler
i18n: mark strings for translation in convert extension
r6956 ui.status(_("assuming destination %s\n") % dest)
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621
destc = convertsink(ui, dest, opts.get('dest_type'))
try:
Patrick Mezard
convert: default revisions order depends on source...
r8692 srcc, defaultsort = convertsource(ui, src, opts.get('source_type'),
opts.get('rev'))
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 except Exception:
for path in destc.created:
shutil.rmtree(path, True)
raise
Patrick Mezard
convert: default revisions order depends on source...
r8692 sortmodes = ('branchsort', 'datesort', 'sourcesort')
Patrick Mezard
convert: add --sourcesort option for source specific sort...
r8690 sortmode = [m for m in sortmodes if opts.get(m)]
if len(sortmode) > 1:
raise util.Abort(_('more than one sort mode specified'))
Patrick Mezard
convert: default revisions order depends on source...
r8692 sortmode = sortmode and sortmode[0] or defaultsort
Patrick Mezard
convert: fail fast if source does not support --sourcesort
r8691 if sortmode == 'sourcesort' and not srcc.hasnativeorder():
raise util.Abort(_('--sourcesort is not supported by this data source'))
Patrick Mezard
convert: parse sort mode sooner
r8689
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621 fmap = opts.get('filemap')
if fmap:
srcc = filemap.filemap_source(ui, srcc, fmap)
destc.setfilemapmode(True)
if not revmapfile:
try:
revmapfile = destc.revmapfile()
except:
revmapfile = os.path.join(destc, "map")
c = converter(ui, srcc, destc, revmapfile, opts)
Patrick Mezard
convert: parse sort mode sooner
r8689 c.convert(sortmode)
Patrick Mezard
convert: move commands definition to ease demandload job (issue 860)
r5621