upstream/mercurial-mirror Commit - r38603:c6398fc2

1

# synthrepo.py - repo synthesis

1

# synthrepo.py - repo synthesis

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

'''synthesize structurally interesting change history

8

'''synthesize structurally interesting change history

9

10

This extension is useful for creating a repository with properties

10

This extension is useful for creating a repository with properties

11

that are statistically similar to an existing repository. During

11

that are statistically similar to an existing repository. During

12

analysis, a simple probability table is constructed from the history

12

analysis, a simple probability table is constructed from the history

13

of an existing repository. During synthesis, these properties are

13

of an existing repository. During synthesis, these properties are

14

reconstructed.

14

reconstructed.

15

16

Properties that are analyzed and synthesized include the following:

16

Properties that are analyzed and synthesized include the following:

17

18

- Lines added or removed when an existing file is modified

18

- Lines added or removed when an existing file is modified

19

- Number and sizes of files added

19

- Number and sizes of files added

20

- Number of files removed

20

- Number of files removed

21

- Line lengths

21

- Line lengths

22

- Topological distance to parent changeset(s)

22

- Topological distance to parent changeset(s)

23

- Probability of a commit being a merge

23

- Probability of a commit being a merge

24

- Probability of a newly added file being added to a new directory

24

- Probability of a newly added file being added to a new directory

25

- Interarrival time, and time zone, of commits

25

- Interarrival time, and time zone, of commits

26

- Number of files in each directory

26

- Number of files in each directory

27

28

A few obvious properties that are not currently handled realistically:

28

A few obvious properties that are not currently handled realistically:

29

30

- Merges are treated as regular commits with two parents, which is not

30

- Merges are treated as regular commits with two parents, which is not

31

realistic

31

realistic

32

- Modifications are not treated as operations on hunks of lines, but

32

- Modifications are not treated as operations on hunks of lines, but

33

as insertions and deletions of randomly chosen single lines

33

as insertions and deletions of randomly chosen single lines

34

- Committer ID (always random)

34

- Committer ID (always random)

35

- Executability of files

35

- Executability of files

36

- Symlinks and binary files are ignored

36

- Symlinks and binary files are ignored

37

'''

37

'''

38

39

from __future__ import absolute_import

39

from __future__ import absolute_import

40

import bisect

40

import bisect

41

import collections

41

import collections

42

import itertools

42

import itertools

43

import json

43

import json

44

import os

44

import os

45

import random

45

import random

46

import sys

46

import sys

47

import time

47

import time

48

49

from mercurial.i18n import _

49

from mercurial.i18n import _

50

from mercurial.node import (

50

from mercurial.node import (

51

nullid,

51

nullid,

52

nullrev,

52

nullrev,

53

short,

53

short,

54

)

54

)

55

from mercurial import (

55

from mercurial import (

56

context,

56

context,

57

error,

57

error,

58

hg,

58

hg,

59

patch,

59

patch,

60

registrar,

60

registrar,

61

scmutil,

61

scmutil,

62

)

62

)

63

from mercurial.utils import (

63

from mercurial.utils import (

64

dateutil,

64

dateutil,

65

diffutil,

65

diffutil,

66

)

66

)

67

68

# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for

68

# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for

69

# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should

69

# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should

70

# be specifying the version(s) of Mercurial they are tested with, or

70

# be specifying the version(s) of Mercurial they are tested with, or

71

# leave the attribute unspecified.

71

# leave the attribute unspecified.

72

testedwith = 'ships-with-hg-core'

72

testedwith = 'ships-with-hg-core'

73

74

cmdtable = {}

74

cmdtable = {}

75

command = registrar.command(cmdtable)

75

command = registrar.command(cmdtable)

76

77

newfile = {'new fi', 'rename', 'copy f', 'copy t'}

77

newfile = {'new fi', 'rename', 'copy f', 'copy t'}

78

79

def zerodict():

79

def zerodict():

80

return collections.defaultdict(lambda: 0)

80

return collections.defaultdict(lambda: 0)

81

82

def roundto(x, k):

82

def roundto(x, k):

83

if x > k * 2:

83

if x > k * 2:

84

return int(round(x / float(k)) * k)

84

return int(round(x / float(k)) * k)

85

return int(round(x))

85

return int(round(x))

86

87

def parsegitdiff(lines):

87

def parsegitdiff(lines):

88

filename, mar, lineadd, lineremove = None, None, zerodict(), 0

88

filename, mar, lineadd, lineremove = None, None, zerodict(), 0

89

binary = False

89

binary = False

90

for line in lines:

90

for line in lines:

91

start = line[:6]

91

start = line[:6]

92

if start == 'diff -':

92

if start == 'diff -':

93

if filename:

93

if filename:

94

yield filename, mar, lineadd, lineremove, binary

94

yield filename, mar, lineadd, lineremove, binary

95

mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False

95

mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False

96

filename = patch.gitre.match(line).group(1)

96

filename = patch.gitre.match(line).group(1)

97

elif start in newfile:

97

elif start in newfile:

98

mar = 'a'

98

mar = 'a'

99

elif start == 'GIT bi':

99

elif start == 'GIT bi':

100

binary = True

100

binary = True

101

elif start == 'delete':

101

elif start == 'delete':

102

mar = 'r'

102

mar = 'r'

103

elif start:

103

elif start:

104

s = start[0]

104

s = start[0]

105

if s == '-' and not line.startswith('--- '):

105

if s == '-' and not line.startswith('--- '):

106

lineremove += 1

106

lineremove += 1

107

elif s == '+' and not line.startswith('+++ '):

107

elif s == '+' and not line.startswith('+++ '):

108

lineadd[roundto(len(line) - 1, 5)] += 1

108

lineadd[roundto(len(line) - 1, 5)] += 1

109

if filename:

109

if filename:

110

yield filename, mar, lineadd, lineremove, binary

110

yield filename, mar, lineadd, lineremove, binary

111

112

@command('analyze',

112

@command('analyze',

113

[('o', 'output', '', _('write output to given file'), _('FILE')),

113

[('o', 'output', '', _('write output to given file'), _('FILE')),

114

('r', 'rev', [], _('analyze specified revisions'), _('REV'))],

114

('r', 'rev', [], _('analyze specified revisions'), _('REV'))],

115

_('hg analyze'), optionalrepo=True)

115

_('hg analyze'), optionalrepo=True)

116

def analyze(ui, repo, *revs, **opts):

116

def analyze(ui, repo, *revs, **opts):

117

'''create a simple model of a repository to use for later synthesis

117

'''create a simple model of a repository to use for later synthesis

118

119

This command examines every changeset in the given range (or all

119

This command examines every changeset in the given range (or all

120

of history if none are specified) and creates a simple statistical

120

of history if none are specified) and creates a simple statistical

121

model of the history of the repository. It also measures the directory

121

model of the history of the repository. It also measures the directory

122

structure of the repository as checked out.

122

structure of the repository as checked out.

123

124

The model is written out to a JSON file, and can be used by

124

The model is written out to a JSON file, and can be used by

125

:hg:`synthesize` to create or augment a repository with synthetic

125

:hg:`synthesize` to create or augment a repository with synthetic

126

commits that have a structure that is statistically similar to the

126

commits that have a structure that is statistically similar to the

127

analyzed repository.

127

analyzed repository.

128

'''

128

'''

129

root = repo.root

129

root = repo.root

130

if not root.endswith(os.path.sep):

130

if not root.endswith(os.path.sep):

131

root += os.path.sep

131

root += os.path.sep

132

133

revs = list(revs)

133

revs = list(revs)

134

revs.extend(opts['rev'])

134

revs.extend(opts['rev'])

135

if not revs:

135

if not revs:

136

revs = [':']

136

revs = [':']

137

138

output = opts['output']

138

output = opts['output']

139

if not output:

139

if not output:

140

output = os.path.basename(root) + '.json'

140

output = os.path.basename(root) + '.json'

141

142

if output == '-':

142

if output == '-':

143

fp = sys.stdout

143

fp = sys.stdout

144

else:

144

else:

145

fp = open(output, 'w')

145

fp = open(output, 'w')

146

147

# Always obtain file counts of each directory in the given root directory.

147

# Always obtain file counts of each directory in the given root directory.

148

def onerror(e):

148

def onerror(e):

149

ui.warn(_('error walking directory structure: %s\n') % e)

149

ui.warn(_('error walking directory structure: %s\n') % e)

150

151

dirs = {}

151

dirs = {}

152

rootprefixlen = len(root)

152

rootprefixlen = len(root)

153

for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):

153

for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):

154

dirpathfromroot = dirpath[rootprefixlen:]

154

dirpathfromroot = dirpath[rootprefixlen:]

155

dirs[dirpathfromroot] = len(filenames)

155

dirs[dirpathfromroot] = len(filenames)

156

if '.hg' in dirnames:

156

if '.hg' in dirnames:

157

dirnames.remove('.hg')

157

dirnames.remove('.hg')

158

159

lineschanged = zerodict()

159

lineschanged = zerodict()

160

children = zerodict()

160

children = zerodict()

161

p1distance = zerodict()

161

p1distance = zerodict()

162

p2distance = zerodict()

162

p2distance = zerodict()

163

linesinfilesadded = zerodict()

163

linesinfilesadded = zerodict()

164

fileschanged = zerodict()

164

fileschanged = zerodict()

165

filesadded = zerodict()

165

filesadded = zerodict()

166

filesremoved = zerodict()

166

filesremoved = zerodict()

167

linelengths = zerodict()

167

linelengths = zerodict()

168

interarrival = zerodict()

168

interarrival = zerodict()

169

parents = zerodict()

169

parents = zerodict()

170

dirsadded = zerodict()

170

dirsadded = zerodict()

171

tzoffset = zerodict()

171

tzoffset = zerodict()

172

173

# If a mercurial repo is available, also model the commit history.

173

# If a mercurial repo is available, also model the commit history.

174

if repo:

174

if repo:

175

revs = scmutil.revrange(repo, revs)

175

revs = scmutil.revrange(repo, revs)

176

revs.sort()

176

revs.sort()

177

178

progress = ui.makeprogress(_('analyzing'), unit=_('changesets'),

178

progress = ui.makeprogress(_('analyzing'), unit=_('changesets'),

179

total=len(revs))

179

total=len(revs))

180

for i, rev in enumerate(revs):

180

for i, rev in enumerate(revs):

181

progress.update(i)

181

progress.update(i)

182

ctx = repo[rev]

182

ctx = repo[rev]

183

pl = ctx.parents()

183

pl = ctx.parents()

184

pctx = pl[0]

184

pctx = pl[0]

185

prev = pctx.rev()

185

prev = pctx.rev()

186

children[prev] += 1

186

children[prev] += 1

187

p1distance[rev - prev] += 1

187

p1distance[rev - prev] += 1

188

parents[len(pl)] += 1

188

parents[len(pl)] += 1

189

tzoffset[ctx.date()[1]] += 1

189

tzoffset[ctx.date()[1]] += 1

190

if len(pl) > 1:

190

if len(pl) > 1:

191

p2distance[rev - pl[1].rev()] += 1

191

p2distance[rev - pl[1].rev()] += 1

192

if prev == rev - 1:

192

if prev == rev - 1:

193

lastctx = pctx

193

lastctx = pctx

194

else:

194

else:

195

lastctx = repo[rev - 1]

195

lastctx = repo[rev - 1]

196

if lastctx.rev() != nullrev:

196

if lastctx.rev() != nullrev:

197

timedelta = ctx.date()[0] - lastctx.date()[0]

197

timedelta = ctx.date()[0] - lastctx.date()[0]

198

interarrival[roundto(timedelta, 300)] += 1

198

interarrival[roundto(timedelta, 300)] += 1

199

diffopts = diffutil.diffopts(~~ctx~~.~~_repo~~.ui, {'git': True})

199

diffopts = diffutil.diffopts(ui, {'git': True})

200

diff = sum((d.splitlines()

200

diff = sum((d.splitlines()

201

for d in ctx.diff(pctx, opts=diffopts)), [])

201

for d in ctx.diff(pctx, opts=diffopts)), [])

202

fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0

202

fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0

203

for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):

203

for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):

204

if isbin:

204

if isbin:

205

continue

205

continue

206

added = sum(lineadd.itervalues(), 0)

206

added = sum(lineadd.itervalues(), 0)

207

if mar == 'm':

207

if mar == 'm':

208

if added and lineremove:

208

if added and lineremove:

209

lineschanged[roundto(added, 5),

209

lineschanged[roundto(added, 5),

210

roundto(lineremove, 5)] += 1

210

roundto(lineremove, 5)] += 1

211

filechanges += 1

211

filechanges += 1

212

elif mar == 'a':

212

elif mar == 'a':

213

fileadds += 1

213

fileadds += 1

214

if '/' in filename:

214

if '/' in filename:

215

filedir = filename.rsplit('/', 1)[0]

215

filedir = filename.rsplit('/', 1)[0]

216

if filedir not in pctx.dirs():

216

if filedir not in pctx.dirs():

217

diradds += 1

217

diradds += 1

218

linesinfilesadded[roundto(added, 5)] += 1

218

linesinfilesadded[roundto(added, 5)] += 1

219

elif mar == 'r':

219

elif mar == 'r':

220

fileremoves += 1

220

fileremoves += 1

221

for length, count in lineadd.iteritems():

221

for length, count in lineadd.iteritems():

222

linelengths[length] += count

222

linelengths[length] += count

223

fileschanged[filechanges] += 1

223

fileschanged[filechanges] += 1

224

filesadded[fileadds] += 1

224

filesadded[fileadds] += 1

225

dirsadded[diradds] += 1

225

dirsadded[diradds] += 1

226

filesremoved[fileremoves] += 1

226

filesremoved[fileremoves] += 1

227

progress.complete()

227

progress.complete()

228

229

invchildren = zerodict()

229

invchildren = zerodict()

230

231

for rev, count in children.iteritems():

231

for rev, count in children.iteritems():

232

invchildren[count] += 1

232

invchildren[count] += 1

233

234

if output != '-':

234

if output != '-':

235

ui.status(_('writing output to %s\n') % output)

235

ui.status(_('writing output to %s\n') % output)

236

237

def pronk(d):

237

def pronk(d):

238

return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)

238

return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)

239

240

json.dump({'revs': len(revs),

240

json.dump({'revs': len(revs),

241

'initdirs': pronk(dirs),

241

'initdirs': pronk(dirs),

242

'lineschanged': pronk(lineschanged),

242

'lineschanged': pronk(lineschanged),

243

'children': pronk(invchildren),

243

'children': pronk(invchildren),

244

'fileschanged': pronk(fileschanged),

244

'fileschanged': pronk(fileschanged),

245

'filesadded': pronk(filesadded),

245

'filesadded': pronk(filesadded),

246

'linesinfilesadded': pronk(linesinfilesadded),

246

'linesinfilesadded': pronk(linesinfilesadded),

247

'dirsadded': pronk(dirsadded),

247

'dirsadded': pronk(dirsadded),

248

'filesremoved': pronk(filesremoved),

248

'filesremoved': pronk(filesremoved),

249

'linelengths': pronk(linelengths),

249

'linelengths': pronk(linelengths),

250

'parents': pronk(parents),

250

'parents': pronk(parents),

251

'p1distance': pronk(p1distance),

251

'p1distance': pronk(p1distance),

252

'p2distance': pronk(p2distance),

252

'p2distance': pronk(p2distance),

253

'interarrival': pronk(interarrival),

253

'interarrival': pronk(interarrival),

254

'tzoffset': pronk(tzoffset),

254

'tzoffset': pronk(tzoffset),

255

},

255

},

256

fp)

256

fp)

257

fp.close()

257

fp.close()

258

259

@command('synthesize',

259

@command('synthesize',

260

[('c', 'count', 0, _('create given number of commits'), _('COUNT')),

260

[('c', 'count', 0, _('create given number of commits'), _('COUNT')),

261

('', 'dict', '', _('path to a dictionary of words'), _('FILE')),

261

('', 'dict', '', _('path to a dictionary of words'), _('FILE')),

262

('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],

262

('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],

263

_('hg synthesize [OPTION].. DESCFILE'))

263

_('hg synthesize [OPTION].. DESCFILE'))

264

def synthesize(ui, repo, descpath, **opts):

264

def synthesize(ui, repo, descpath, **opts):

265

'''synthesize commits based on a model of an existing repository

265

'''synthesize commits based on a model of an existing repository

266

267

The model must have been generated by :hg:`analyze`. Commits will

267

The model must have been generated by :hg:`analyze`. Commits will

268

be generated randomly according to the probabilities described in

268

be generated randomly according to the probabilities described in

269

the model. If --initfiles is set, the repository will be seeded with

269

the model. If --initfiles is set, the repository will be seeded with

270

the given number files following the modeled repository's directory

270

the given number files following the modeled repository's directory

271

structure.

271

structure.

272

273

When synthesizing new content, commit descriptions, and user

273

When synthesizing new content, commit descriptions, and user

274

names, words will be chosen randomly from a dictionary that is

274

names, words will be chosen randomly from a dictionary that is

275

presumed to contain one word per line. Use --dict to specify the

275

presumed to contain one word per line. Use --dict to specify the

276

path to an alternate dictionary to use.

276

path to an alternate dictionary to use.

277

'''

277

'''

278

try:

278

try:

279

fp = hg.openpath(ui, descpath)

279

fp = hg.openpath(ui, descpath)

280

except Exception as err:

280

except Exception as err:

281

raise error.Abort('%s: %s' % (descpath, err[0].strerror))

281

raise error.Abort('%s: %s' % (descpath, err[0].strerror))

282

desc = json.load(fp)

282

desc = json.load(fp)

283

fp.close()

283

fp.close()

284

285

def cdf(l):

285

def cdf(l):

286

if not l:

286

if not l:

287

return [], []

287

return [], []

288

vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))

288

vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))

289

t = float(sum(probs, 0))

289

t = float(sum(probs, 0))

290

s, cdfs = 0, []

290

s, cdfs = 0, []

291

for v in probs:

291

for v in probs:

292

s += v

292

s += v

293

cdfs.append(s / t)

293

cdfs.append(s / t)

294

return vals, cdfs

294

return vals, cdfs

295

296

lineschanged = cdf(desc['lineschanged'])

296

lineschanged = cdf(desc['lineschanged'])

297

fileschanged = cdf(desc['fileschanged'])

297

fileschanged = cdf(desc['fileschanged'])

298

filesadded = cdf(desc['filesadded'])

298

filesadded = cdf(desc['filesadded'])

299

dirsadded = cdf(desc['dirsadded'])

299

dirsadded = cdf(desc['dirsadded'])

300

filesremoved = cdf(desc['filesremoved'])

300

filesremoved = cdf(desc['filesremoved'])

301

linelengths = cdf(desc['linelengths'])

301

linelengths = cdf(desc['linelengths'])

302

parents = cdf(desc['parents'])

302

parents = cdf(desc['parents'])

303

p1distance = cdf(desc['p1distance'])

303

p1distance = cdf(desc['p1distance'])

304

p2distance = cdf(desc['p2distance'])

304

p2distance = cdf(desc['p2distance'])

305

interarrival = cdf(desc['interarrival'])

305

interarrival = cdf(desc['interarrival'])

306

linesinfilesadded = cdf(desc['linesinfilesadded'])

306

linesinfilesadded = cdf(desc['linesinfilesadded'])

307

tzoffset = cdf(desc['tzoffset'])

307

tzoffset = cdf(desc['tzoffset'])

308

309

dictfile = opts.get('dict') or '/usr/share/dict/words'

309

dictfile = opts.get('dict') or '/usr/share/dict/words'

310

try:

310

try:

311

fp = open(dictfile, 'rU')

311

fp = open(dictfile, 'rU')

312

except IOError as err:

312

except IOError as err:

313

raise error.Abort('%s: %s' % (dictfile, err.strerror))

313

raise error.Abort('%s: %s' % (dictfile, err.strerror))

314

words = fp.read().splitlines()

314

words = fp.read().splitlines()

315

fp.close()

315

fp.close()

316

317

initdirs = {}

317

initdirs = {}

318

if desc['initdirs']:

318

if desc['initdirs']:

319

for k, v in desc['initdirs']:

319

for k, v in desc['initdirs']:

320

initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v

320

initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v

321

initdirs = renamedirs(initdirs, words)

321

initdirs = renamedirs(initdirs, words)

322

initdirscdf = cdf(initdirs)

322

initdirscdf = cdf(initdirs)

323

324

def pick(cdf):

324

def pick(cdf):

325

return cdf[0][bisect.bisect_left(cdf[1], random.random())]

325

return cdf[0][bisect.bisect_left(cdf[1], random.random())]

326

327

def pickpath():

327

def pickpath():

328

return os.path.join(pick(initdirscdf), random.choice(words))

328

return os.path.join(pick(initdirscdf), random.choice(words))

329

330

def makeline(minimum=0):

330

def makeline(minimum=0):

331

total = max(minimum, pick(linelengths))

331

total = max(minimum, pick(linelengths))

332

c, l = 0, []

332

c, l = 0, []

333

while c < total:

333

while c < total:

334

w = random.choice(words)

334

w = random.choice(words)

335

c += len(w) + 1

335

c += len(w) + 1

336

l.append(w)

336

l.append(w)

337

return ' '.join(l)

337

return ' '.join(l)

338

339

wlock = repo.wlock()

339

wlock = repo.wlock()

340

lock = repo.lock()

340

lock = repo.lock()

341

342

nevertouch = {'.hgsub', '.hgignore', '.hgtags'}

342

nevertouch = {'.hgsub', '.hgignore', '.hgtags'}

343

344

_synthesizing = _('synthesizing')

344

_synthesizing = _('synthesizing')

345

_files = _('initial files')

345

_files = _('initial files')

346

_changesets = _('changesets')

346

_changesets = _('changesets')

347

348

# Synthesize a single initial revision adding files to the repo according

348

# Synthesize a single initial revision adding files to the repo according

349

# to the modeled directory structure.

349

# to the modeled directory structure.

350

initcount = int(opts['initfiles'])

350

initcount = int(opts['initfiles'])

351

if initcount and initdirs:

351

if initcount and initdirs:

352

pctx = repo[None].parents()[0]

352

pctx = repo[None].parents()[0]

353

dirs = set(pctx.dirs())

353

dirs = set(pctx.dirs())

354

files = {}

354

files = {}

355

356

def validpath(path):

356

def validpath(path):

357

# Don't pick filenames which are already directory names.

357

# Don't pick filenames which are already directory names.

358

if path in dirs:

358

if path in dirs:

359

return False

359

return False

360

# Don't pick directories which were used as file names.

360

# Don't pick directories which were used as file names.

361

while path:

361

while path:

362

if path in files:

362

if path in files:

363

return False

363

return False

364

path = os.path.dirname(path)

364

path = os.path.dirname(path)

365

return True

365

return True

366

367

progress = ui.makeprogress(_synthesizing, unit=_files, total=initcount)

367

progress = ui.makeprogress(_synthesizing, unit=_files, total=initcount)

368

for i in xrange(0, initcount):

368

for i in xrange(0, initcount):

369

progress.update(i)

369

progress.update(i)

370

371

path = pickpath()

371

path = pickpath()

372

while not validpath(path):

372

while not validpath(path):

373

path = pickpath()

373

path = pickpath()

374

data = '%s contents\n' % path

374

data = '%s contents\n' % path

375

files[path] = data

375

files[path] = data

376

dir = os.path.dirname(path)

376

dir = os.path.dirname(path)

377

while dir and dir not in dirs:

377

while dir and dir not in dirs:

378

dirs.add(dir)

378

dirs.add(dir)

379

dir = os.path.dirname(dir)

379

dir = os.path.dirname(dir)

380

381

def filectxfn(repo, memctx, path):

381

def filectxfn(repo, memctx, path):

382

return context.memfilectx(repo, memctx, path, files[path])

382

return context.memfilectx(repo, memctx, path, files[path])

383

384

progress.complete()

384

progress.complete()

385

message = 'synthesized wide repo with %d files' % (len(files),)

385

message = 'synthesized wide repo with %d files' % (len(files),)

386

mc = context.memctx(repo, [pctx.node(), nullid], message,

386

mc = context.memctx(repo, [pctx.node(), nullid], message,

387

files, filectxfn, ui.username(),

387

files, filectxfn, ui.username(),

388

'%d %d' % dateutil.makedate())

388

'%d %d' % dateutil.makedate())

389

initnode = mc.commit()

389

initnode = mc.commit()

390

if ui.debugflag:

390

if ui.debugflag:

391

hexfn = hex

391

hexfn = hex

392

else:

392

else:

393

hexfn = short

393

hexfn = short

394

ui.status(_('added commit %s with %d files\n')

394

ui.status(_('added commit %s with %d files\n')

395

% (hexfn(initnode), len(files)))

395

% (hexfn(initnode), len(files)))

396

397

# Synthesize incremental revisions to the repository, adding repo depth.

397

# Synthesize incremental revisions to the repository, adding repo depth.

398

count = int(opts['count'])

398

count = int(opts['count'])

399

heads = set(map(repo.changelog.rev, repo.heads()))

399

heads = set(map(repo.changelog.rev, repo.heads()))

400

progress = ui.makeprogress(_synthesizing, unit=_changesets, total=count)

400

progress = ui.makeprogress(_synthesizing, unit=_changesets, total=count)

401

for i in xrange(count):

401

for i in xrange(count):

402

progress.update(i)

402

progress.update(i)

403

404

node = repo.changelog.node

404

node = repo.changelog.node

405

revs = len(repo)

405

revs = len(repo)

406

407

def pickhead(heads, distance):

407

def pickhead(heads, distance):

408

if heads:

408

if heads:

409

lheads = sorted(heads)

409

lheads = sorted(heads)

410

rev = revs - min(pick(distance), revs)

410

rev = revs - min(pick(distance), revs)

411

if rev < lheads[-1]:

411

if rev < lheads[-1]:

412

rev = lheads[bisect.bisect_left(lheads, rev)]

412

rev = lheads[bisect.bisect_left(lheads, rev)]

413

else:

413

else:

414

rev = lheads[-1]

414

rev = lheads[-1]

415

return rev, node(rev)

415

return rev, node(rev)

416

return nullrev, nullid

416

return nullrev, nullid

417

418

r1 = revs - min(pick(p1distance), revs)

418

r1 = revs - min(pick(p1distance), revs)

419

p1 = node(r1)

419

p1 = node(r1)

420

421

# the number of heads will grow without bound if we use a pure

421

# the number of heads will grow without bound if we use a pure

422

# model, so artificially constrain their proliferation

422

# model, so artificially constrain their proliferation

423

toomanyheads = len(heads) > random.randint(1, 20)

423

toomanyheads = len(heads) > random.randint(1, 20)

424

if p2distance[0] and (pick(parents) == 2 or toomanyheads):

424

if p2distance[0] and (pick(parents) == 2 or toomanyheads):

425

r2, p2 = pickhead(heads.difference([r1]), p2distance)

425

r2, p2 = pickhead(heads.difference([r1]), p2distance)

426

else:

426

else:

427

r2, p2 = nullrev, nullid

427

r2, p2 = nullrev, nullid

428

429

pl = [p1, p2]

429

pl = [p1, p2]

430

pctx = repo[r1]

430

pctx = repo[r1]

431

mf = pctx.manifest()

431

mf = pctx.manifest()

432

mfk = mf.keys()

432

mfk = mf.keys()

433

changes = {}

433

changes = {}

434

if mfk:

434

if mfk:

435

for __ in xrange(pick(fileschanged)):

435

for __ in xrange(pick(fileschanged)):

436

for __ in xrange(10):

436

for __ in xrange(10):

437

fctx = pctx.filectx(random.choice(mfk))

437

fctx = pctx.filectx(random.choice(mfk))

438

path = fctx.path()

438

path = fctx.path()

439

if not (path in nevertouch or fctx.isbinary() or

439

if not (path in nevertouch or fctx.isbinary() or

440

'l' in fctx.flags()):

440

'l' in fctx.flags()):

441

break

441

break

442

lines = fctx.data().splitlines()

442

lines = fctx.data().splitlines()

443

add, remove = pick(lineschanged)

443

add, remove = pick(lineschanged)

444

for __ in xrange(remove):

444

for __ in xrange(remove):

445

if not lines:

445

if not lines:

446

break

446

break

447

del lines[random.randrange(0, len(lines))]

447

del lines[random.randrange(0, len(lines))]

448

for __ in xrange(add):

448

for __ in xrange(add):

449

lines.insert(random.randint(0, len(lines)), makeline())

449

lines.insert(random.randint(0, len(lines)), makeline())

450

path = fctx.path()

450

path = fctx.path()

451

changes[path] = '\n'.join(lines) + '\n'

451

changes[path] = '\n'.join(lines) + '\n'

452

for __ in xrange(pick(filesremoved)):

452

for __ in xrange(pick(filesremoved)):

453

path = random.choice(mfk)

453

path = random.choice(mfk)

454

for __ in xrange(10):

454

for __ in xrange(10):

455

path = random.choice(mfk)

455

path = random.choice(mfk)

456

if path not in changes:

456

if path not in changes:

457

break

457

break

458

if filesadded:

458

if filesadded:

459

dirs = list(pctx.dirs())

459

dirs = list(pctx.dirs())

460

dirs.insert(0, '')

460

dirs.insert(0, '')

461

for __ in xrange(pick(filesadded)):

461

for __ in xrange(pick(filesadded)):

462

pathstr = ''

462

pathstr = ''

463

while pathstr in dirs:

463

while pathstr in dirs:

464

path = [random.choice(dirs)]

464

path = [random.choice(dirs)]

465

if pick(dirsadded):

465

if pick(dirsadded):

466

path.append(random.choice(words))

466

path.append(random.choice(words))

467

path.append(random.choice(words))

467

path.append(random.choice(words))

468

pathstr = '/'.join(filter(None, path))

468

pathstr = '/'.join(filter(None, path))

469

data = '\n'.join(makeline()

469

data = '\n'.join(makeline()

470

for __ in xrange(pick(linesinfilesadded))) + '\n'

470

for __ in xrange(pick(linesinfilesadded))) + '\n'

471

changes[pathstr] = data

471

changes[pathstr] = data

472

def filectxfn(repo, memctx, path):

472

def filectxfn(repo, memctx, path):

473

if path not in changes:

473

if path not in changes:

474

return None

474

return None

475

return context.memfilectx(repo, memctx, path, changes[path])

475

return context.memfilectx(repo, memctx, path, changes[path])

476

if not changes:

476

if not changes:

477

continue

477

continue

478

if revs:

478

if revs:

479

date = repo['tip'].date()[0] + pick(interarrival)

479

date = repo['tip'].date()[0] + pick(interarrival)

480

else:

480

else:

481

date = time.time() - (86400 * count)

481

date = time.time() - (86400 * count)

482

# dates in mercurial must be positive, fit in 32-bit signed integers.

482

# dates in mercurial must be positive, fit in 32-bit signed integers.

483

date = min(0x7fffffff, max(0, date))

483

date = min(0x7fffffff, max(0, date))

484

user = random.choice(words) + '@' + random.choice(words)

484

user = random.choice(words) + '@' + random.choice(words)

485

mc = context.memctx(repo, pl, makeline(minimum=2),

485

mc = context.memctx(repo, pl, makeline(minimum=2),

486

sorted(changes),

486

sorted(changes),

487

filectxfn, user, '%d %d' % (date, pick(tzoffset)))

487

filectxfn, user, '%d %d' % (date, pick(tzoffset)))

488

newnode = mc.commit()

488

newnode = mc.commit()

489

heads.add(repo.changelog.rev(newnode))

489

heads.add(repo.changelog.rev(newnode))

490

heads.discard(r1)

490

heads.discard(r1)

491

heads.discard(r2)

491

heads.discard(r2)

492

progress.complete()

492

progress.complete()

493

494

lock.release()

494

lock.release()

495

wlock.release()

495

wlock.release()

496

497

def renamedirs(dirs, words):

497

def renamedirs(dirs, words):

498

'''Randomly rename the directory names in the per-dir file count dict.'''

498

'''Randomly rename the directory names in the per-dir file count dict.'''

499

wordgen = itertools.cycle(words)

499

wordgen = itertools.cycle(words)

500

replacements = {'': ''}

500

replacements = {'': ''}

501

def rename(dirpath):

501

def rename(dirpath):

502

'''Recursively rename the directory and all path prefixes.

502

'''Recursively rename the directory and all path prefixes.

503

504

The mapping from path to renamed path is stored for all path prefixes

504

The mapping from path to renamed path is stored for all path prefixes

505

as in dynamic programming, ensuring linear runtime and consistent

505

as in dynamic programming, ensuring linear runtime and consistent

506

renaming regardless of iteration order through the model.

506

renaming regardless of iteration order through the model.

507

'''

507

'''

508

if dirpath in replacements:

508

if dirpath in replacements:

509

return replacements[dirpath]

509

return replacements[dirpath]

510

head, _ = os.path.split(dirpath)

510

head, _ = os.path.split(dirpath)

511

if head:

511

if head:

512

head = rename(head)

512

head = rename(head)

513

else:

513

else:

514

head = ''

514

head = ''

515

renamed = os.path.join(head, next(wordgen))

515

renamed = os.path.join(head, next(wordgen))

516

replacements[dirpath] = renamed

516

replacements[dirpath] = renamed

517

return renamed

517

return renamed

518

result = []

518

result = []

519

for dirpath, count in dirs.iteritems():

519

for dirpath, count in dirs.iteritems():

520

result.append([rename(dirpath.lstrip(os.sep)), count])

520

result.append([rename(dirpath.lstrip(os.sep)), count])

521

return result

521

return result

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # synthrepo.py - repo synthesis
             #
             # Copyright 2012 Facebook
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             '''synthesize structurally interesting change history
             This extension is useful for creating a repository with properties
             that are statistically similar to an existing repository. During
             analysis, a simple probability table is constructed from the history
             of an existing repository.  During synthesis, these properties are
             reconstructed.
             Properties that are analyzed and synthesized include the following:
             - Lines added or removed when an existing file is modified
             - Number and sizes of files added
             - Number of files removed
             - Line lengths
             - Topological distance to parent changeset(s)
             - Probability of a commit being a merge
             - Probability of a newly added file being added to a new directory
             - Interarrival time, and time zone, of commits
             - Number of files in each directory
             A few obvious properties that are not currently handled realistically:
             - Merges are treated as regular commits with two parents, which is not
               realistic
             - Modifications are not treated as operations on hunks of lines, but
               as insertions and deletions of randomly chosen single lines
             - Committer ID (always random)
             - Executability of files
             - Symlinks and binary files are ignored
             '''
             from __future__ import absolute_import
             import bisect
             import collections
             import itertools
             import json
             import os
             import random
             import sys
             import time
             from mercurial.i18n import _
             from mercurial.node import (
                 nullid,
                 nullrev,
                 short,
             )
             from mercurial import (
                 context,
                 error,
                 hg,
                 patch,
                 registrar,
                 scmutil,
             )
             from mercurial.utils import (
                 dateutil,
                 diffutil,
             )
             # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
             # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
             # be specifying the version(s) of Mercurial they are tested with, or
             # leave the attribute unspecified.
             testedwith = 'ships-with-hg-core'
             cmdtable = {}
             command = registrar.command(cmdtable)
             newfile = {'new fi', 'rename', 'copy f', 'copy t'}
             def zerodict():
                 return collections.defaultdict(lambda: 0)
             def roundto(x, k):
                 if x > k * 2:
                     return int(round(x / float(k)) * k)
                 return int(round(x))
             def parsegitdiff(lines):
                 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
                 binary = False
                 for line in lines:
                     start = line[:6]
                     if start == 'diff -':
                         if filename:
                             yield filename, mar, lineadd, lineremove, binary
                         mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
                         filename = patch.gitre.match(line).group(1)
                     elif start in newfile:
                         mar = 'a'
                     elif start == 'GIT bi':
                         binary = True
                     elif start == 'delete':
                         mar = 'r'
                     elif start:
                         s = start[0]
                         if s == '-' and not line.startswith('--- '):
                             lineremove += 1
                         elif s == '+' and not line.startswith('+++ '):
                             lineadd[roundto(len(line) - 1, 5)] += 1
                 if filename:
                     yield filename, mar, lineadd, lineremove, binary
             @command('analyze',
                      [('o', 'output', '', _('write output to given file'), _('FILE')),
                       ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
                      _('hg analyze'), optionalrepo=True)
             def analyze(ui, repo, *revs, **opts):
                 '''create a simple model of a repository to use for later synthesis
                 This command examines every changeset in the given range (or all
                 of history if none are specified) and creates a simple statistical
                 model of the history of the repository. It also measures the directory
                 structure of the repository as checked out.
                 The model is written out to a JSON file, and can be used by
                 :hg:`synthesize` to create or augment a repository with synthetic
                 commits that have a structure that is statistically similar to the
                 analyzed repository.
                 '''
                 root = repo.root
                 if not root.endswith(os.path.sep):
                     root += os.path.sep
                 revs = list(revs)
                 revs.extend(opts['rev'])
                 if not revs:
                     revs = [':']
                 output = opts['output']
                 if not output:
                     output = os.path.basename(root) + '.json'
                 if output == '-':
                     fp = sys.stdout
                 else:
                     fp = open(output, 'w')
                 # Always obtain file counts of each directory in the given root directory.
                 def onerror(e):
                     ui.warn(_('error walking directory structure: %s\n') % e)
                 dirs = {}
                 rootprefixlen = len(root)
                 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
                     dirpathfromroot = dirpath[rootprefixlen:]
                     dirs[dirpathfromroot] = len(filenames)
                     if '.hg' in dirnames:
                         dirnames.remove('.hg')
                 lineschanged = zerodict()
                 children = zerodict()
                 p1distance = zerodict()
                 p2distance = zerodict()
                 linesinfilesadded = zerodict()
                 fileschanged = zerodict()
                 filesadded = zerodict()
                 filesremoved = zerodict()
                 linelengths = zerodict()
                 interarrival = zerodict()
                 parents = zerodict()
                 dirsadded = zerodict()
                 tzoffset = zerodict()
                 # If a mercurial repo is available, also model the commit history.
                 if repo:
                     revs = scmutil.revrange(repo, revs)
                     revs.sort()
                     progress = ui.makeprogress(_('analyzing'), unit=_('changesets'),
                                                total=len(revs))
                     for i, rev in enumerate(revs):
                         progress.update(i)
                         ctx = repo[rev]
                         pl = ctx.parents()
                         pctx = pl[0]
                         prev = pctx.rev()
                         children[prev] += 1
                         p1distance[rev - prev] += 1
                         parents[len(pl)] += 1
                         tzoffset[ctx.date()[1]] += 1
                         if len(pl) > 1:
                             p2distance[rev - pl[1].rev()] += 1
                         if prev == rev - 1:
                             lastctx = pctx
                         else:
                             lastctx = repo[rev - 1]
                         if lastctx.rev() != nullrev:
                             timedelta = ctx.date()[0] - lastctx.date()[0]
                             interarrival[roundto(timedelta, 300)] += 1
-                        diffopts = diffutil.diffopts(ctx._repo.ui, {'git': True})
+                        diffopts = diffutil.diffopts(ui, {'git': True})
                         diff = sum((d.splitlines()
                                    for d in ctx.diff(pctx, opts=diffopts)), [])
                         fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
                         for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
                             if isbin:
                                 continue
                             added = sum(lineadd.itervalues(), 0)
                             if mar == 'm':
                                 if added and lineremove:
                                     lineschanged[roundto(added, 5),
                                                  roundto(lineremove, 5)] += 1
                                     filechanges += 1
                             elif mar == 'a':
                                 fileadds += 1
                                 if '/' in filename:
                                     filedir = filename.rsplit('/', 1)[0]
                                     if filedir not in pctx.dirs():
                                         diradds += 1
                                 linesinfilesadded[roundto(added, 5)] += 1
                             elif mar == 'r':
                                 fileremoves += 1
                             for length, count in lineadd.iteritems():
                                 linelengths[length] += count
                         fileschanged[filechanges] += 1
                         filesadded[fileadds] += 1
                         dirsadded[diradds] += 1
                         filesremoved[fileremoves] += 1
                     progress.complete()
                 invchildren = zerodict()
                 for rev, count in children.iteritems():
                     invchildren[count] += 1
                 if output != '-':
                     ui.status(_('writing output to %s\n') % output)
                 def pronk(d):
                     return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
                 json.dump({'revs': len(revs),
                            'initdirs': pronk(dirs),
                            'lineschanged': pronk(lineschanged),
                            'children': pronk(invchildren),
                            'fileschanged': pronk(fileschanged),
                            'filesadded': pronk(filesadded),
                            'linesinfilesadded': pronk(linesinfilesadded),
                            'dirsadded': pronk(dirsadded),
                            'filesremoved': pronk(filesremoved),
                            'linelengths': pronk(linelengths),
                            'parents': pronk(parents),
                            'p1distance': pronk(p1distance),
                            'p2distance': pronk(p2distance),
                            'interarrival': pronk(interarrival),
                            'tzoffset': pronk(tzoffset),
                            },
                           fp)
                 fp.close()
             @command('synthesize',
                      [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
                       ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
                       ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
                      _('hg synthesize [OPTION].. DESCFILE'))
             def synthesize(ui, repo, descpath, **opts):
                 '''synthesize commits based on a model of an existing repository
                 The model must have been generated by :hg:`analyze`. Commits will
                 be generated randomly according to the probabilities described in
                 the model. If --initfiles is set, the repository will be seeded with
                 the given number files following the modeled repository's directory
                 structure.
                 When synthesizing new content, commit descriptions, and user
                 names, words will be chosen randomly from a dictionary that is
                 presumed to contain one word per line. Use --dict to specify the
                 path to an alternate dictionary to use.
                 '''
                 try:
                     fp = hg.openpath(ui, descpath)
                 except Exception as err:
                     raise error.Abort('%s: %s' % (descpath, err[0].strerror))
                 desc = json.load(fp)
                 fp.close()
                 def cdf(l):
                     if not l:
                         return [], []
                     vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
                     t = float(sum(probs, 0))
                     s, cdfs = 0, []
                     for v in probs:
                         s += v
                         cdfs.append(s / t)
                     return vals, cdfs
                 lineschanged = cdf(desc['lineschanged'])
                 fileschanged = cdf(desc['fileschanged'])
                 filesadded = cdf(desc['filesadded'])
                 dirsadded = cdf(desc['dirsadded'])
                 filesremoved = cdf(desc['filesremoved'])
                 linelengths = cdf(desc['linelengths'])
                 parents = cdf(desc['parents'])
                 p1distance = cdf(desc['p1distance'])
                 p2distance = cdf(desc['p2distance'])
                 interarrival = cdf(desc['interarrival'])
                 linesinfilesadded = cdf(desc['linesinfilesadded'])
                 tzoffset = cdf(desc['tzoffset'])
                 dictfile = opts.get('dict') or '/usr/share/dict/words'
                 try:
                     fp = open(dictfile, 'rU')
                 except IOError as err:
                     raise error.Abort('%s: %s' % (dictfile, err.strerror))
                 words = fp.read().splitlines()
                 fp.close()
                 initdirs = {}
                 if desc['initdirs']:
                     for k, v in desc['initdirs']:
                         initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
                     initdirs = renamedirs(initdirs, words)
                 initdirscdf = cdf(initdirs)
                 def pick(cdf):
                     return cdf[0][bisect.bisect_left(cdf[1], random.random())]
                 def pickpath():
                     return os.path.join(pick(initdirscdf), random.choice(words))
                 def makeline(minimum=0):
                     total = max(minimum, pick(linelengths))
                     c, l = 0, []
                     while c < total:
                         w = random.choice(words)
                         c += len(w) + 1
                         l.append(w)
                     return ' '.join(l)
                 wlock = repo.wlock()
                 lock = repo.lock()
                 nevertouch = {'.hgsub', '.hgignore', '.hgtags'}
                 _synthesizing = _('synthesizing')
                 _files = _('initial files')
                 _changesets = _('changesets')
                 # Synthesize a single initial revision adding files to the repo according
                 # to the modeled directory structure.
                 initcount = int(opts['initfiles'])
                 if initcount and initdirs:
                     pctx = repo[None].parents()[0]
                     dirs = set(pctx.dirs())
                     files = {}
                     def validpath(path):
                         # Don't pick filenames which are already directory names.
                         if path in dirs:
                             return False
                         # Don't pick directories which were used as file names.
                         while path:
                             if path in files:
                                 return False
                             path = os.path.dirname(path)
                         return True
                     progress = ui.makeprogress(_synthesizing, unit=_files, total=initcount)
                     for i in xrange(0, initcount):
                         progress.update(i)
                         path = pickpath()
                         while not validpath(path):
                             path = pickpath()
                         data = '%s contents\n' % path
                         files[path] = data
                         dir = os.path.dirname(path)
                         while dir and dir not in dirs:
                             dirs.add(dir)
                             dir = os.path.dirname(dir)
                     def filectxfn(repo, memctx, path):
                         return context.memfilectx(repo, memctx, path, files[path])
                     progress.complete()
                     message = 'synthesized wide repo with %d files' % (len(files),)
                     mc = context.memctx(repo, [pctx.node(), nullid], message,
                                         files, filectxfn, ui.username(),
                                         '%d %d' % dateutil.makedate())
                     initnode = mc.commit()
                     if ui.debugflag:
                         hexfn = hex
                     else:
                         hexfn = short
                     ui.status(_('added commit %s with %d files\n')
                               % (hexfn(initnode), len(files)))
                 # Synthesize incremental revisions to the repository, adding repo depth.
                 count = int(opts['count'])
                 heads = set(map(repo.changelog.rev, repo.heads()))
                 progress = ui.makeprogress(_synthesizing, unit=_changesets, total=count)
                 for i in xrange(count):
                     progress.update(i)
                     node = repo.changelog.node
                     revs = len(repo)
                     def pickhead(heads, distance):
                         if heads:
                             lheads = sorted(heads)
                             rev = revs - min(pick(distance), revs)
                             if rev < lheads[-1]:
                                 rev = lheads[bisect.bisect_left(lheads, rev)]
                             else:
                                 rev = lheads[-1]
                             return rev, node(rev)
                         return nullrev, nullid
                     r1 = revs - min(pick(p1distance), revs)
                     p1 = node(r1)
                     # the number of heads will grow without bound if we use a pure
                     # model, so artificially constrain their proliferation
                     toomanyheads = len(heads) > random.randint(1, 20)
                     if p2distance[0] and (pick(parents) == 2 or toomanyheads):
                         r2, p2 = pickhead(heads.difference([r1]), p2distance)
                     else:
                         r2, p2 = nullrev, nullid
                     pl = [p1, p2]
                     pctx = repo[r1]
                     mf = pctx.manifest()
                     mfk = mf.keys()
                     changes = {}
                     if mfk:
                         for __ in xrange(pick(fileschanged)):
                             for __ in xrange(10):
                                 fctx = pctx.filectx(random.choice(mfk))
                                 path = fctx.path()
                                 if not (path in nevertouch or fctx.isbinary() or
                                         'l' in fctx.flags()):
                                     break
                             lines = fctx.data().splitlines()
                             add, remove = pick(lineschanged)
                             for __ in xrange(remove):
                                 if not lines:
                                     break
                                 del lines[random.randrange(0, len(lines))]
                             for __ in xrange(add):
                                 lines.insert(random.randint(0, len(lines)), makeline())
                             path = fctx.path()
                             changes[path] = '\n'.join(lines) + '\n'
                         for __ in xrange(pick(filesremoved)):
                             path = random.choice(mfk)
                             for __ in xrange(10):
                                 path = random.choice(mfk)
                                 if path not in changes:
                                     break
                     if filesadded:
                         dirs = list(pctx.dirs())
                         dirs.insert(0, '')
                     for __ in xrange(pick(filesadded)):
                         pathstr = ''
                         while pathstr in dirs:
                             path = [random.choice(dirs)]
                             if pick(dirsadded):
                                 path.append(random.choice(words))
                             path.append(random.choice(words))
                             pathstr = '/'.join(filter(None, path))
                         data = '\n'.join(makeline()
                                          for __ in xrange(pick(linesinfilesadded))) + '\n'
                         changes[pathstr] = data
                     def filectxfn(repo, memctx, path):
                         if path not in changes:
                             return None
                         return context.memfilectx(repo, memctx, path, changes[path])
                     if not changes:
                         continue
                     if revs:
                         date = repo['tip'].date()[0] + pick(interarrival)
                     else:
                         date = time.time() - (86400 * count)
                     # dates in mercurial must be positive, fit in 32-bit signed integers.
                     date = min(0x7fffffff, max(0, date))
                     user = random.choice(words) + '@' + random.choice(words)
                     mc = context.memctx(repo, pl, makeline(minimum=2),
                                         sorted(changes),
                                         filectxfn, user, '%d %d' % (date, pick(tzoffset)))
                     newnode = mc.commit()
                     heads.add(repo.changelog.rev(newnode))
                     heads.discard(r1)
                     heads.discard(r2)
                 progress.complete()
                 lock.release()
                 wlock.release()
             def renamedirs(dirs, words):
                 '''Randomly rename the directory names in the per-dir file count dict.'''
                 wordgen = itertools.cycle(words)
                 replacements = {'': ''}
                 def rename(dirpath):
                     '''Recursively rename the directory and all path prefixes.
                     The mapping from path to renamed path is stored for all path prefixes
                     as in dynamic programming, ensuring linear runtime and consistent
                     renaming regardless of iteration order through the model.
                     '''
                     if dirpath in replacements:
                         return replacements[dirpath]
                     head, _ = os.path.split(dirpath)
                     if head:
                         head = rename(head)
                     else:
                         head = ''
                     renamed = os.path.join(head, next(wordgen))
                     replacements[dirpath] = renamed
                     return renamed
                 result = []
                 for dirpath, count in dirs.iteritems():
                     result.append([rename(dirpath.lstrip(os.sep)), count])
                 return result