##// END OF EJS Templates
typing: add type hints to bdiff implementations...
typing: add type hints to bdiff implementations Not super important code, but this was an exercise in using `merge-pyi` to fold type stubs back into the code on something small. The cext stubs don't seem to be getting used (at least the only thing in `.pytype/pyi/mercurial/cext` after a run generating the stubs is `__init__.pyi`), so maybe this will help some.

File last commit:

r49790:2cce2fa5 default
r50493:594fc56c default
Show More
synthrepo.py
564 lines | 18.1 KiB | text/x-python | PythonLexer
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 # synthrepo.py - repo synthesis
#
# Copyright 2012 Facebook
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
'''synthesize structurally interesting change history
This extension is useful for creating a repository with properties
that are statistically similar to an existing repository. During
analysis, a simple probability table is constructed from the history
of an existing repository. During synthesis, these properties are
reconstructed.
Properties that are analyzed and synthesized include the following:
- Lines added or removed when an existing file is modified
- Number and sizes of files added
- Number of files removed
- Line lengths
- Topological distance to parent changeset(s)
- Probability of a commit being a merge
- Probability of a newly added file being added to a new directory
- Interarrival time, and time zone, of commits
Mike Edgar
contrib/synthrepo: walk a repo's directory structure during analysis...
r22709 - Number of files in each directory
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734
A few obvious properties that are not currently handled realistically:
- Merges are treated as regular commits with two parents, which is not
realistic
- Modifications are not treated as operations on hunks of lines, but
as insertions and deletions of randomly chosen single lines
- Committer ID (always random)
- Executability of files
- Symlinks and binary files are ignored
'''
Pulkit Goyal
contrib: synthrepo use absolute_import
r28563 import bisect
import collections
import itertools
import json
import os
import random
import sys
import time
Yuya Nishihara
py3: move up symbol imports to enforce import-checker rules...
r29205
from mercurial.i18n import _
from mercurial.node import (
nullid,
nullrev,
short,
)
Pulkit Goyal
contrib: synthrepo use absolute_import
r28563 from mercurial import (
context,
Yuya Nishihara
diffutil: move the module out of utils package...
r38607 diffutil,
Pulkit Goyal
contrib: synthrepo use absolute_import
r28563 error,
hg,
Martin von Zweigbergk
errors: raise InputError on bad revset to revrange() iff provided by the user...
r48928 logcmdutil,
Pulkit Goyal
contrib: synthrepo use absolute_import
r28563 patch,
Gregory Szorc
synthrepo: use pycompat.xrange...
r43274 pycompat,
Yuya Nishihara
registrar: move cmdutil.command to registrar module (API)...
r32337 registrar,
Pulkit Goyal
contrib: synthrepo use absolute_import
r28563 )
Augie Fackler
formatting: blacken the codebase...
r43346 from mercurial.utils import dateutil
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734
Augie Fackler
extensions: change magic "shipped with hg" string...
r29841 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
Augie Fackler
extensions: document that `testedwith = 'internal'` is special...
r25186 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
# be specifying the version(s) of Mercurial they are tested with, or
# leave the attribute unspecified.
Augie Fackler
extensions: change magic "shipped with hg" string...
r29841 testedwith = 'ships-with-hg-core'
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734
cmdtable = {}
Yuya Nishihara
registrar: move cmdutil.command to registrar module (API)...
r32337 command = registrar.command(cmdtable)
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734
Martin von Zweigbergk
cleanup: use set literals...
r32291 newfile = {'new fi', 'rename', 'copy f', 'copy t'}
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734
Augie Fackler
formatting: blacken the codebase...
r43346
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 def zerodict():
return collections.defaultdict(lambda: 0)
Augie Fackler
formatting: blacken the codebase...
r43346
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 def roundto(x, k):
if x > k * 2:
return int(round(x / float(k)) * k)
return int(round(x))
Augie Fackler
formatting: blacken the codebase...
r43346
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 def parsegitdiff(lines):
filename, mar, lineadd, lineremove = None, None, zerodict(), 0
binary = False
for line in lines:
start = line[:6]
if start == 'diff -':
if filename:
yield filename, mar, lineadd, lineremove, binary
mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
filename = patch.gitre.match(line).group(1)
elif start in newfile:
mar = 'a'
elif start == 'GIT bi':
binary = True
elif start == 'delete':
mar = 'r'
elif start:
s = start[0]
if s == '-' and not line.startswith('--- '):
lineremove += 1
elif s == '+' and not line.startswith('+++ '):
lineadd[roundto(len(line) - 1, 5)] += 1
if filename:
yield filename, mar, lineadd, lineremove, binary
Augie Fackler
formatting: blacken the codebase...
r43346
@command(
'analyze',
[
('o', 'output', '', _('write output to given file'), _('FILE')),
('r', 'rev', [], _('analyze specified revisions'), _('REV')),
],
_('hg analyze'),
optionalrepo=True,
)
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 def analyze(ui, repo, *revs, **opts):
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """create a simple model of a repository to use for later synthesis
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734
This command examines every changeset in the given range (or all
of history if none are specified) and creates a simple statistical
Mike Edgar
contrib/synthrepo: walk a repo's directory structure during analysis...
r22709 model of the history of the repository. It also measures the directory
structure of the repository as checked out.
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734
The model is written out to a JSON file, and can be used by
:hg:`synthesize` to create or augment a repository with synthetic
commits that have a structure that is statistically similar to the
analyzed repository.
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """
Mike Edgar
contrib/synthrepo: walk a repo's directory structure during analysis...
r22709 root = repo.root
if not root.endswith(os.path.sep):
root += os.path.sep
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734
revs = list(revs)
revs.extend(opts['rev'])
if not revs:
revs = [':']
output = opts['output']
if not output:
Mike Edgar
contrib/synthrepo: walk a repo's directory structure during analysis...
r22709 output = os.path.basename(root) + '.json'
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734
if output == '-':
fp = sys.stdout
else:
fp = open(output, 'w')
Mike Edgar
contrib/synthrepo: walk a repo's directory structure during analysis...
r22709 # Always obtain file counts of each directory in the given root directory.
def onerror(e):
ui.warn(_('error walking directory structure: %s\n') % e)
dirs = {}
rootprefixlen = len(root)
for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
dirpathfromroot = dirpath[rootprefixlen:]
dirs[dirpathfromroot] = len(filenames)
if '.hg' in dirnames:
dirnames.remove('.hg')
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734
lineschanged = zerodict()
children = zerodict()
p1distance = zerodict()
p2distance = zerodict()
linesinfilesadded = zerodict()
fileschanged = zerodict()
filesadded = zerodict()
filesremoved = zerodict()
linelengths = zerodict()
interarrival = zerodict()
parents = zerodict()
dirsadded = zerodict()
tzoffset = zerodict()
Mike Edgar
contrib/synthrepo: walk a repo's directory structure during analysis...
r22709 # If a mercurial repo is available, also model the commit history.
if repo:
Martin von Zweigbergk
errors: raise InputError on bad revset to revrange() iff provided by the user...
r48928 revs = logcmdutil.revrange(repo, revs)
Mike Edgar
contrib/synthrepo: walk a repo's directory structure during analysis...
r22709 revs.sort()
Augie Fackler
formatting: blacken the codebase...
r43346 progress = ui.makeprogress(
_('analyzing'), unit=_('changesets'), total=len(revs)
)
Mike Edgar
contrib/synthrepo: walk a repo's directory structure during analysis...
r22709 for i, rev in enumerate(revs):
Martin von Zweigbergk
synthrepo: use progress helper...
r38427 progress.update(i)
Mike Edgar
contrib/synthrepo: walk a repo's directory structure during analysis...
r22709 ctx = repo[rev]
pl = ctx.parents()
pctx = pl[0]
prev = pctx.rev()
children[prev] += 1
p1distance[rev - prev] += 1
parents[len(pl)] += 1
tzoffset[ctx.date()[1]] += 1
if len(pl) > 1:
p2distance[rev - pl[1].rev()] += 1
if prev == rev - 1:
lastctx = pctx
else:
lastctx = repo[rev - 1]
if lastctx.rev() != nullrev:
timedelta = ctx.date()[0] - lastctx.date()[0]
interarrival[roundto(timedelta, 300)] += 1
Yuya Nishihara
diffutil: remove diffopts() in favor of diffallopts()...
r38606 diffopts = diffutil.diffallopts(ui, {'git': True})
Augie Fackler
formatting: blacken the codebase...
r43346 diff = sum(
(d.splitlines() for d in ctx.diff(pctx, opts=diffopts)), []
)
Mike Edgar
contrib/synthrepo: walk a repo's directory structure during analysis...
r22709 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
if isbin:
continue
Gregory Szorc
py3: replace pycompat.itervalues(x) with x.values()...
r49790 added = sum(lineadd.values(), 0)
Mike Edgar
contrib/synthrepo: walk a repo's directory structure during analysis...
r22709 if mar == 'm':
if added and lineremove:
Augie Fackler
formatting: blacken the codebase...
r43346 lineschanged[
roundto(added, 5), roundto(lineremove, 5)
] += 1
Mike Edgar
contrib/synthrepo: walk a repo's directory structure during analysis...
r22709 filechanges += 1
elif mar == 'a':
fileadds += 1
if '/' in filename:
filedir = filename.rsplit('/', 1)[0]
if filedir not in pctx.dirs():
diradds += 1
linesinfilesadded[roundto(added, 5)] += 1
elif mar == 'r':
fileremoves += 1
for length, count in lineadd.iteritems():
linelengths[length] += count
fileschanged[filechanges] += 1
filesadded[fileadds] += 1
dirsadded[diradds] += 1
filesremoved[fileremoves] += 1
Martin von Zweigbergk
synthrepo: close progress topics...
r38428 progress.complete()
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734
invchildren = zerodict()
for rev, count in children.iteritems():
invchildren[count] += 1
if output != '-':
ui.status(_('writing output to %s\n') % output)
def pronk(d):
return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
Augie Fackler
formatting: blacken the codebase...
r43346 json.dump(
{
'revs': len(revs),
'initdirs': pronk(dirs),
'lineschanged': pronk(lineschanged),
'children': pronk(invchildren),
'fileschanged': pronk(fileschanged),
'filesadded': pronk(filesadded),
'linesinfilesadded': pronk(linesinfilesadded),
'dirsadded': pronk(dirsadded),
'filesremoved': pronk(filesremoved),
'linelengths': pronk(linelengths),
'parents': pronk(parents),
'p1distance': pronk(p1distance),
'p2distance': pronk(p2distance),
'interarrival': pronk(interarrival),
'tzoffset': pronk(tzoffset),
},
fp,
)
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 fp.close()
Augie Fackler
formatting: blacken the codebase...
r43346
@command(
'synthesize',
[
('c', 'count', 0, _('create given number of commits'), _('COUNT')),
('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
('', 'initfiles', 0, _('initial file count to create'), _('COUNT')),
],
_('hg synthesize [OPTION].. DESCFILE'),
)
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 def synthesize(ui, repo, descpath, **opts):
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """synthesize commits based on a model of an existing repository
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734
The model must have been generated by :hg:`analyze`. Commits will
be generated randomly according to the probabilities described in
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708 the model. If --initfiles is set, the repository will be seeded with
the given number files following the modeled repository's directory
structure.
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734
When synthesizing new content, commit descriptions, and user
names, words will be chosen randomly from a dictionary that is
presumed to contain one word per line. Use --dict to specify the
path to an alternate dictionary to use.
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 try:
Siddharth Agarwal
url: use open and not url.open for local files (issue3624)
r17887 fp = hg.openpath(ui, descpath)
Gregory Szorc
global: mass rewrite to use modern exception syntax...
r25660 except Exception as err:
Pierre-Yves David
error: get Abort from 'error' instead of 'util'...
r26587 raise error.Abort('%s: %s' % (descpath, err[0].strerror))
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 desc = json.load(fp)
fp.close()
def cdf(l):
Bryan O'Sullivan
synthrepo: do not crash if a list is empty
r18047 if not l:
return [], []
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
t = float(sum(probs, 0))
s, cdfs = 0, []
for v in probs:
s += v
cdfs.append(s / t)
return vals, cdfs
lineschanged = cdf(desc['lineschanged'])
fileschanged = cdf(desc['fileschanged'])
filesadded = cdf(desc['filesadded'])
dirsadded = cdf(desc['dirsadded'])
filesremoved = cdf(desc['filesremoved'])
linelengths = cdf(desc['linelengths'])
parents = cdf(desc['parents'])
p1distance = cdf(desc['p1distance'])
p2distance = cdf(desc['p2distance'])
interarrival = cdf(desc['interarrival'])
linesinfilesadded = cdf(desc['linesinfilesadded'])
tzoffset = cdf(desc['tzoffset'])
dictfile = opts.get('dict') or '/usr/share/dict/words'
try:
fp = open(dictfile, 'rU')
Gregory Szorc
global: mass rewrite to use modern exception syntax...
r25660 except IOError as err:
Pierre-Yves David
error: get Abort from 'error' instead of 'util'...
r26587 raise error.Abort('%s: %s' % (dictfile, err.strerror))
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 words = fp.read().splitlines()
fp.close()
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708 initdirs = {}
if desc['initdirs']:
for k, v in desc['initdirs']:
initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
initdirs = renamedirs(initdirs, words)
initdirscdf = cdf(initdirs)
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 def pick(cdf):
return cdf[0][bisect.bisect_left(cdf[1], random.random())]
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708 def pickpath():
return os.path.join(pick(initdirscdf), random.choice(words))
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 def makeline(minimum=0):
total = max(minimum, pick(linelengths))
c, l = 0, []
while c < total:
w = random.choice(words)
c += len(w) + 1
l.append(w)
return ' '.join(l)
wlock = repo.wlock()
lock = repo.lock()
Martin von Zweigbergk
cleanup: use set literals...
r32291 nevertouch = {'.hgsub', '.hgignore', '.hgtags'}
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734
_synthesizing = _('synthesizing')
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708 _files = _('initial files')
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 _changesets = _('changesets')
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708 # Synthesize a single initial revision adding files to the repo according
# to the modeled directory structure.
initcount = int(opts['initfiles'])
if initcount and initdirs:
Martin von Zweigbergk
cleanup: use repo['.'] instead of repo[None].p1()...
r41443 pctx = repo['.']
Mike Edgar
synthrepo: new filenames must not also be new directories, and vice-versa...
r23778 dirs = set(pctx.dirs())
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708 files = {}
Mike Edgar
synthrepo: new filenames must not also be new directories, and vice-versa...
r23778
def validpath(path):
# Don't pick filenames which are already directory names.
if path in dirs:
return False
# Don't pick directories which were used as file names.
while path:
if path in files:
return False
path = os.path.dirname(path)
return True
Martin von Zweigbergk
synthrepo: use progress helper...
r38427 progress = ui.makeprogress(_synthesizing, unit=_files, total=initcount)
Gregory Szorc
synthrepo: use pycompat.xrange...
r43274 for i in pycompat.xrange(0, initcount):
Martin von Zweigbergk
synthrepo: use progress helper...
r38427 progress.update(i)
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708
path = pickpath()
Mike Edgar
synthrepo: new filenames must not also be new directories, and vice-versa...
r23778 while not validpath(path):
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708 path = pickpath()
data = '%s contents\n' % path
Martin von Zweigbergk
synthrepo: create filectx instance in 'filectxfn' callback...
r35399 files[path] = data
Mike Edgar
synthrepo: new filenames must not also be new directories, and vice-versa...
r23778 dir = os.path.dirname(path)
while dir and dir not in dirs:
dirs.add(dir)
dir = os.path.dirname(dir)
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708
def filectxfn(repo, memctx, path):
Martin von Zweigbergk
memfilectx: make changectx argument mandatory in constructor (API)...
r35401 return context.memfilectx(repo, memctx, path, files[path])
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708
Martin von Zweigbergk
synthrepo: use progress helper...
r38427 progress.complete()
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708 message = 'synthesized wide repo with %d files' % (len(files),)
Augie Fackler
formatting: blacken the codebase...
r43346 mc = context.memctx(
repo,
[pctx.node(), nullid],
message,
files,
filectxfn,
ui.username(),
'%d %d' % dateutil.makedate(),
)
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708 initnode = mc.commit()
Jordi GutiƩrrez Hermoso
style: kill ersatz if-else ternary operators...
r24306 if ui.debugflag:
hexfn = hex
else:
hexfn = short
Augie Fackler
formatting: blacken the codebase...
r43346 ui.status(
_('added commit %s with %d files\n') % (hexfn(initnode), len(files))
)
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708
# Synthesize incremental revisions to the repository, adding repo depth.
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 count = int(opts['count'])
heads = set(map(repo.changelog.rev, repo.heads()))
Martin von Zweigbergk
synthrepo: use progress helper...
r38427 progress = ui.makeprogress(_synthesizing, unit=_changesets, total=count)
Gregory Szorc
synthrepo: use pycompat.xrange...
r43274 for i in pycompat.xrange(count):
Martin von Zweigbergk
synthrepo: use progress helper...
r38427 progress.update(i)
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734
node = repo.changelog.node
revs = len(repo)
def pickhead(heads, distance):
if heads:
lheads = sorted(heads)
rev = revs - min(pick(distance), revs)
if rev < lheads[-1]:
rev = lheads[bisect.bisect_left(lheads, rev)]
else:
rev = lheads[-1]
return rev, node(rev)
return nullrev, nullid
r1 = revs - min(pick(p1distance), revs)
p1 = node(r1)
# the number of heads will grow without bound if we use a pure
# model, so artificially constrain their proliferation
Mike Edgar
contrib/synthrepo: only generate 2 parents if model contains merges...
r22472 toomanyheads = len(heads) > random.randint(1, 20)
if p2distance[0] and (pick(parents) == 2 or toomanyheads):
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 r2, p2 = pickhead(heads.difference([r1]), p2distance)
else:
r2, p2 = nullrev, nullid
pl = [p1, p2]
pctx = repo[r1]
mf = pctx.manifest()
mfk = mf.keys()
changes = {}
if mfk:
Gregory Szorc
synthrepo: use pycompat.xrange...
r43274 for __ in pycompat.xrange(pick(fileschanged)):
for __ in pycompat.xrange(10):
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 fctx = pctx.filectx(random.choice(mfk))
path = fctx.path()
Augie Fackler
formatting: blacken the codebase...
r43346 if not (
path in nevertouch
or fctx.isbinary()
or 'l' in fctx.flags()
):
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 break
lines = fctx.data().splitlines()
add, remove = pick(lineschanged)
Gregory Szorc
synthrepo: use pycompat.xrange...
r43274 for __ in pycompat.xrange(remove):
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 if not lines:
break
del lines[random.randrange(0, len(lines))]
Gregory Szorc
synthrepo: use pycompat.xrange...
r43274 for __ in pycompat.xrange(add):
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 lines.insert(random.randint(0, len(lines)), makeline())
path = fctx.path()
Martin von Zweigbergk
synthrepo: create filectx instance in 'filectxfn' callback...
r35399 changes[path] = '\n'.join(lines) + '\n'
Gregory Szorc
synthrepo: use pycompat.xrange...
r43274 for __ in pycompat.xrange(pick(filesremoved)):
for __ in pycompat.xrange(10):
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 path = random.choice(mfk)
if path not in changes:
break
if filesadded:
dirs = list(pctx.dirs())
Mike Edgar
synthrepo: when adding files, ensure new path is not a directory
r23235 dirs.insert(0, '')
Gregory Szorc
synthrepo: use pycompat.xrange...
r43274 for __ in pycompat.xrange(pick(filesadded)):
Mike Edgar
synthrepo: when adding files, ensure new path is not a directory
r23235 pathstr = ''
while pathstr in dirs:
path = [random.choice(dirs)]
if pick(dirsadded):
path.append(random.choice(words))
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 path.append(random.choice(words))
Mike Edgar
synthrepo: when adding files, ensure new path is not a directory
r23235 pathstr = '/'.join(filter(None, path))
Augie Fackler
formatting: blacken the codebase...
r43346 data = (
'\n'.join(
makeline()
for __ in pycompat.xrange(pick(linesinfilesadded))
)
+ '\n'
)
Martin von Zweigbergk
synthrepo: create filectx instance in 'filectxfn' callback...
r35399 changes[pathstr] = data
Augie Fackler
formatting: blacken the codebase...
r43346
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 def filectxfn(repo, memctx, path):
Martin von Zweigbergk
synthrepo: create filectx instance in 'filectxfn' callback...
r35399 if path not in changes:
return None
Martin von Zweigbergk
memfilectx: make changectx argument mandatory in constructor (API)...
r35401 return context.memfilectx(repo, memctx, path, changes[path])
Augie Fackler
formatting: blacken the codebase...
r43346
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 if not changes:
continue
if revs:
date = repo['tip'].date()[0] + pick(interarrival)
else:
date = time.time() - (86400 * count)
Mike Edgar
synthrepo: synthesized dates must be positive, fit in 32-bit signed ints
r23234 # dates in mercurial must be positive, fit in 32-bit signed integers.
Augie Fackler
formatting: blacken the codebase...
r43346 date = min(0x7FFFFFFF, max(0, date))
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 user = random.choice(words) + '@' + random.choice(words)
Augie Fackler
formatting: blacken the codebase...
r43346 mc = context.memctx(
repo,
pl,
makeline(minimum=2),
sorted(changes),
filectxfn,
user,
'%d %d' % (date, pick(tzoffset)),
)
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734 newnode = mc.commit()
heads.add(repo.changelog.rev(newnode))
heads.discard(r1)
heads.discard(r2)
Martin von Zweigbergk
synthrepo: close progress topics...
r38428 progress.complete()
Bryan O'Sullivan
contrib: add a commit synthesizer for reproducing scaling problems...
r17734
lock.release()
wlock.release()
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708
Augie Fackler
formatting: blacken the codebase...
r43346
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708 def renamedirs(dirs, words):
'''Randomly rename the directory names in the per-dir file count dict.'''
wordgen = itertools.cycle(words)
replacements = {'': ''}
Augie Fackler
formatting: blacken the codebase...
r43346
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708 def rename(dirpath):
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """Recursively rename the directory and all path prefixes.
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708
The mapping from path to renamed path is stored for all path prefixes
as in dynamic programming, ensuring linear runtime and consistent
renaming regardless of iteration order through the model.
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708 if dirpath in replacements:
return replacements[dirpath]
head, _ = os.path.split(dirpath)
Jordi GutiƩrrez Hermoso
style: kill ersatz if-else ternary operators...
r24306 if head:
head = rename(head)
else:
head = ''
timeless
py3: convert to next() function...
r29216 renamed = os.path.join(head, next(wordgen))
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708 replacements[dirpath] = renamed
return renamed
Augie Fackler
formatting: blacken the codebase...
r43346
Mike Edgar
contrib/synthrepo: generate initial repo contents using directory shape model...
r22708 result = []
for dirpath, count in dirs.iteritems():
result.append([rename(dirpath.lstrip(os.sep)), count])
return result