upstream/mercurial-mirror Commit - r38427:6540333a

1

# synthrepo.py - repo synthesis

1

# synthrepo.py - repo synthesis

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

'''synthesize structurally interesting change history

8

'''synthesize structurally interesting change history

9

10

This extension is useful for creating a repository with properties

10

This extension is useful for creating a repository with properties

11

that are statistically similar to an existing repository. During

11

that are statistically similar to an existing repository. During

12

analysis, a simple probability table is constructed from the history

12

analysis, a simple probability table is constructed from the history

13

of an existing repository. During synthesis, these properties are

13

of an existing repository. During synthesis, these properties are

14

reconstructed.

14

reconstructed.

15

16

Properties that are analyzed and synthesized include the following:

16

Properties that are analyzed and synthesized include the following:

17

18

- Lines added or removed when an existing file is modified

18

- Lines added or removed when an existing file is modified

19

- Number and sizes of files added

19

- Number and sizes of files added

20

- Number of files removed

20

- Number of files removed

21

- Line lengths

21

- Line lengths

22

- Topological distance to parent changeset(s)

22

- Topological distance to parent changeset(s)

23

- Probability of a commit being a merge

23

- Probability of a commit being a merge

24

- Probability of a newly added file being added to a new directory

24

- Probability of a newly added file being added to a new directory

25

- Interarrival time, and time zone, of commits

25

- Interarrival time, and time zone, of commits

26

- Number of files in each directory

26

- Number of files in each directory

27

28

A few obvious properties that are not currently handled realistically:

28

A few obvious properties that are not currently handled realistically:

29

30

- Merges are treated as regular commits with two parents, which is not

30

- Merges are treated as regular commits with two parents, which is not

31

realistic

31

realistic

32

- Modifications are not treated as operations on hunks of lines, but

32

- Modifications are not treated as operations on hunks of lines, but

33

as insertions and deletions of randomly chosen single lines

33

as insertions and deletions of randomly chosen single lines

34

- Committer ID (always random)

34

- Committer ID (always random)

35

- Executability of files

35

- Executability of files

36

- Symlinks and binary files are ignored

36

- Symlinks and binary files are ignored

37

'''

37

'''

38

39

from __future__ import absolute_import

39

from __future__ import absolute_import

40

import bisect

40

import bisect

41

import collections

41

import collections

42

import itertools

42

import itertools

43

import json

43

import json

44

import os

44

import os

45

import random

45

import random

46

import sys

46

import sys

47

import time

47

import time

48

49

from mercurial.i18n import _

49

from mercurial.i18n import _

50

from mercurial.node import (

50

from mercurial.node import (

51

nullid,

51

nullid,

52

nullrev,

52

nullrev,

53

short,

53

short,

54

)

54

)

55

from mercurial import (

55

from mercurial import (

56

context,

56

context,

57

error,

57

error,

58

hg,

58

hg,

59

patch,

59

patch,

60

registrar,

60

registrar,

61

scmutil,

61

scmutil,

62

)

62

)

63

from mercurial.utils import dateutil

63

from mercurial.utils import dateutil

64

65

# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for

65

# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for

66

# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should

66

# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should

67

# be specifying the version(s) of Mercurial they are tested with, or

67

# be specifying the version(s) of Mercurial they are tested with, or

68

# leave the attribute unspecified.

68

# leave the attribute unspecified.

69

testedwith = 'ships-with-hg-core'

69

testedwith = 'ships-with-hg-core'

70

71

cmdtable = {}

71

cmdtable = {}

72

command = registrar.command(cmdtable)

72

command = registrar.command(cmdtable)

73

74

newfile = {'new fi', 'rename', 'copy f', 'copy t'}

74

newfile = {'new fi', 'rename', 'copy f', 'copy t'}

75

76

def zerodict():

76

def zerodict():

77

return collections.defaultdict(lambda: 0)

77

return collections.defaultdict(lambda: 0)

78

79

def roundto(x, k):

79

def roundto(x, k):

80

if x > k * 2:

80

if x > k * 2:

81

return int(round(x / float(k)) * k)

81

return int(round(x / float(k)) * k)

82

return int(round(x))

82

return int(round(x))

83

84

def parsegitdiff(lines):

84

def parsegitdiff(lines):

85

filename, mar, lineadd, lineremove = None, None, zerodict(), 0

85

filename, mar, lineadd, lineremove = None, None, zerodict(), 0

86

binary = False

86

binary = False

87

for line in lines:

87

for line in lines:

88

start = line[:6]

88

start = line[:6]

89

if start == 'diff -':

89

if start == 'diff -':

90

if filename:

90

if filename:

91

yield filename, mar, lineadd, lineremove, binary

91

yield filename, mar, lineadd, lineremove, binary

92

mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False

92

mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False

93

filename = patch.gitre.match(line).group(1)

93

filename = patch.gitre.match(line).group(1)

94

elif start in newfile:

94

elif start in newfile:

95

mar = 'a'

95

mar = 'a'

96

elif start == 'GIT bi':

96

elif start == 'GIT bi':

97

binary = True

97

binary = True

98

elif start == 'delete':

98

elif start == 'delete':

99

mar = 'r'

99

mar = 'r'

100

elif start:

100

elif start:

101

s = start[0]

101

s = start[0]

102

if s == '-' and not line.startswith('--- '):

102

if s == '-' and not line.startswith('--- '):

103

lineremove += 1

103

lineremove += 1

104

elif s == '+' and not line.startswith('+++ '):

104

elif s == '+' and not line.startswith('+++ '):

105

lineadd[roundto(len(line) - 1, 5)] += 1

105

lineadd[roundto(len(line) - 1, 5)] += 1

106

if filename:

106

if filename:

107

yield filename, mar, lineadd, lineremove, binary

107

yield filename, mar, lineadd, lineremove, binary

108

109

@command('analyze',

109

@command('analyze',

110

[('o', 'output', '', _('write output to given file'), _('FILE')),

110

[('o', 'output', '', _('write output to given file'), _('FILE')),

111

('r', 'rev', [], _('analyze specified revisions'), _('REV'))],

111

('r', 'rev', [], _('analyze specified revisions'), _('REV'))],

112

_('hg analyze'), optionalrepo=True)

112

_('hg analyze'), optionalrepo=True)

113

def analyze(ui, repo, *revs, **opts):

113

def analyze(ui, repo, *revs, **opts):

114

'''create a simple model of a repository to use for later synthesis

114

'''create a simple model of a repository to use for later synthesis

115

116

This command examines every changeset in the given range (or all

116

This command examines every changeset in the given range (or all

117

of history if none are specified) and creates a simple statistical

117

of history if none are specified) and creates a simple statistical

118

model of the history of the repository. It also measures the directory

118

model of the history of the repository. It also measures the directory

119

structure of the repository as checked out.

119

structure of the repository as checked out.

120

121

The model is written out to a JSON file, and can be used by

121

The model is written out to a JSON file, and can be used by

122

:hg:`synthesize` to create or augment a repository with synthetic

122

:hg:`synthesize` to create or augment a repository with synthetic

123

commits that have a structure that is statistically similar to the

123

commits that have a structure that is statistically similar to the

124

analyzed repository.

124

analyzed repository.

125

'''

125

'''

126

root = repo.root

126

root = repo.root

127

if not root.endswith(os.path.sep):

127

if not root.endswith(os.path.sep):

128

root += os.path.sep

128

root += os.path.sep

129

130

revs = list(revs)

130

revs = list(revs)

131

revs.extend(opts['rev'])

131

revs.extend(opts['rev'])

132

if not revs:

132

if not revs:

133

revs = [':']

133

revs = [':']

134

135

output = opts['output']

135

output = opts['output']

136

if not output:

136

if not output:

137

output = os.path.basename(root) + '.json'

137

output = os.path.basename(root) + '.json'

138

139

if output == '-':

139

if output == '-':

140

fp = sys.stdout

140

fp = sys.stdout

141

else:

141

else:

142

fp = open(output, 'w')

142

fp = open(output, 'w')

143

144

# Always obtain file counts of each directory in the given root directory.

144

# Always obtain file counts of each directory in the given root directory.

145

def onerror(e):

145

def onerror(e):

146

ui.warn(_('error walking directory structure: %s\n') % e)

146

ui.warn(_('error walking directory structure: %s\n') % e)

147

148

dirs = {}

148

dirs = {}

149

rootprefixlen = len(root)

149

rootprefixlen = len(root)

150

for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):

150

for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):

151

dirpathfromroot = dirpath[rootprefixlen:]

151

dirpathfromroot = dirpath[rootprefixlen:]

152

dirs[dirpathfromroot] = len(filenames)

152

dirs[dirpathfromroot] = len(filenames)

153

if '.hg' in dirnames:

153

if '.hg' in dirnames:

154

dirnames.remove('.hg')

154

dirnames.remove('.hg')

155

156

lineschanged = zerodict()

156

lineschanged = zerodict()

157

children = zerodict()

157

children = zerodict()

158

p1distance = zerodict()

158

p1distance = zerodict()

159

p2distance = zerodict()

159

p2distance = zerodict()

160

linesinfilesadded = zerodict()

160

linesinfilesadded = zerodict()

161

fileschanged = zerodict()

161

fileschanged = zerodict()

162

filesadded = zerodict()

162

filesadded = zerodict()

163

filesremoved = zerodict()

163

filesremoved = zerodict()

164

linelengths = zerodict()

164

linelengths = zerodict()

165

interarrival = zerodict()

165

interarrival = zerodict()

166

parents = zerodict()

166

parents = zerodict()

167

dirsadded = zerodict()

167

dirsadded = zerodict()

168

tzoffset = zerodict()

168

tzoffset = zerodict()

169

170

# If a mercurial repo is available, also model the commit history.

170

# If a mercurial repo is available, also model the commit history.

171

if repo:

171

if repo:

172

revs = scmutil.revrange(repo, revs)

172

revs = scmutil.revrange(repo, revs)

173

revs.sort()

173

revs.sort()

174

175

progress = ui.progress

175

progress = ui.makeprogress(_('analyzing'), unit=_('changesets'),

176

_analyzing = _('analyzing')

176

total=len(revs))

177

_changesets = _('changesets')

178

_total = len(revs)

179

180

for i, rev in enumerate(revs):

177

for i, rev in enumerate(revs):

181

progress(_analyzing, i, unit=_changesets, total=_total)

178

progress.update(i)

182

ctx = repo[rev]

179

ctx = repo[rev]

183

pl = ctx.parents()

180

pl = ctx.parents()

184

pctx = pl[0]

181

pctx = pl[0]

185

prev = pctx.rev()

182

prev = pctx.rev()

186

children[prev] += 1

183

children[prev] += 1

187

p1distance[rev - prev] += 1

184

p1distance[rev - prev] += 1

188

parents[len(pl)] += 1

185

parents[len(pl)] += 1

189

tzoffset[ctx.date()[1]] += 1

186

tzoffset[ctx.date()[1]] += 1

190

if len(pl) > 1:

187

if len(pl) > 1:

191

p2distance[rev - pl[1].rev()] += 1

188

p2distance[rev - pl[1].rev()] += 1

192

if prev == rev - 1:

189

if prev == rev - 1:

193

lastctx = pctx

190

lastctx = pctx

194

else:

191

else:

195

lastctx = repo[rev - 1]

192

lastctx = repo[rev - 1]

196

if lastctx.rev() != nullrev:

193

if lastctx.rev() != nullrev:

197

timedelta = ctx.date()[0] - lastctx.date()[0]

194

timedelta = ctx.date()[0] - lastctx.date()[0]

198

interarrival[roundto(timedelta, 300)] += 1

195

interarrival[roundto(timedelta, 300)] += 1

199

diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])

196

diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])

200

fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0

197

fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0

201

for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):

198

for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):

202

if isbin:

199

if isbin:

203

continue

200

continue

204

added = sum(lineadd.itervalues(), 0)

201

added = sum(lineadd.itervalues(), 0)

205

if mar == 'm':

202

if mar == 'm':

206

if added and lineremove:

203

if added and lineremove:

207

lineschanged[roundto(added, 5),

204

lineschanged[roundto(added, 5),

208

roundto(lineremove, 5)] += 1

205

roundto(lineremove, 5)] += 1

209

filechanges += 1

206

filechanges += 1

210

elif mar == 'a':

207

elif mar == 'a':

211

fileadds += 1

208

fileadds += 1

212

if '/' in filename:

209

if '/' in filename:

213

filedir = filename.rsplit('/', 1)[0]

210

filedir = filename.rsplit('/', 1)[0]

214

if filedir not in pctx.dirs():

211

if filedir not in pctx.dirs():

215

diradds += 1

212

diradds += 1

216

linesinfilesadded[roundto(added, 5)] += 1

213

linesinfilesadded[roundto(added, 5)] += 1

217

elif mar == 'r':

214

elif mar == 'r':

218

fileremoves += 1

215

fileremoves += 1

219

for length, count in lineadd.iteritems():

216

for length, count in lineadd.iteritems():

220

linelengths[length] += count

217

linelengths[length] += count

221

fileschanged[filechanges] += 1

218

fileschanged[filechanges] += 1

222

filesadded[fileadds] += 1

219

filesadded[fileadds] += 1

223

dirsadded[diradds] += 1

220

dirsadded[diradds] += 1

224

filesremoved[fileremoves] += 1

221

filesremoved[fileremoves] += 1

225

222

226

invchildren = zerodict()

223

invchildren = zerodict()

227

224

228

for rev, count in children.iteritems():

225

for rev, count in children.iteritems():

229

invchildren[count] += 1

226

invchildren[count] += 1

230

227

231

if output != '-':

228

if output != '-':

232

ui.status(_('writing output to %s\n') % output)

229

ui.status(_('writing output to %s\n') % output)

233

230

234

def pronk(d):

231

def pronk(d):

235

return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)

232

return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)

236

233

237

json.dump({'revs': len(revs),

234

json.dump({'revs': len(revs),

238

'initdirs': pronk(dirs),

235

'initdirs': pronk(dirs),

239

'lineschanged': pronk(lineschanged),

236

'lineschanged': pronk(lineschanged),

240

'children': pronk(invchildren),

237

'children': pronk(invchildren),

241

'fileschanged': pronk(fileschanged),

238

'fileschanged': pronk(fileschanged),

242

'filesadded': pronk(filesadded),

239

'filesadded': pronk(filesadded),

243

'linesinfilesadded': pronk(linesinfilesadded),

240

'linesinfilesadded': pronk(linesinfilesadded),

244

'dirsadded': pronk(dirsadded),

241

'dirsadded': pronk(dirsadded),

245

'filesremoved': pronk(filesremoved),

242

'filesremoved': pronk(filesremoved),

246

'linelengths': pronk(linelengths),

243

'linelengths': pronk(linelengths),

247

'parents': pronk(parents),

244

'parents': pronk(parents),

248

'p1distance': pronk(p1distance),

245

'p1distance': pronk(p1distance),

249

'p2distance': pronk(p2distance),

246

'p2distance': pronk(p2distance),

250

'interarrival': pronk(interarrival),

247

'interarrival': pronk(interarrival),

251

'tzoffset': pronk(tzoffset),

248

'tzoffset': pronk(tzoffset),

252

},

249

},

253

fp)

250

fp)

254

fp.close()

251

fp.close()

255

252

256

@command('synthesize',

253

@command('synthesize',

257

[('c', 'count', 0, _('create given number of commits'), _('COUNT')),

254

[('c', 'count', 0, _('create given number of commits'), _('COUNT')),

258

('', 'dict', '', _('path to a dictionary of words'), _('FILE')),

255

('', 'dict', '', _('path to a dictionary of words'), _('FILE')),

259

('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],

256

('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],

260

_('hg synthesize [OPTION].. DESCFILE'))

257

_('hg synthesize [OPTION].. DESCFILE'))

261

def synthesize(ui, repo, descpath, **opts):

258

def synthesize(ui, repo, descpath, **opts):

262

'''synthesize commits based on a model of an existing repository

259

'''synthesize commits based on a model of an existing repository

263

260

264

The model must have been generated by :hg:`analyze`. Commits will

261

The model must have been generated by :hg:`analyze`. Commits will

265

be generated randomly according to the probabilities described in

262

be generated randomly according to the probabilities described in

266

the model. If --initfiles is set, the repository will be seeded with

263

the model. If --initfiles is set, the repository will be seeded with

267

the given number files following the modeled repository's directory

264

the given number files following the modeled repository's directory

268

structure.

265

structure.

269

266

270

When synthesizing new content, commit descriptions, and user

267

When synthesizing new content, commit descriptions, and user

271

names, words will be chosen randomly from a dictionary that is

268

names, words will be chosen randomly from a dictionary that is

272

presumed to contain one word per line. Use --dict to specify the

269

presumed to contain one word per line. Use --dict to specify the

273

path to an alternate dictionary to use.

270

path to an alternate dictionary to use.

274

'''

271

'''

275

try:

272

try:

276

fp = hg.openpath(ui, descpath)

273

fp = hg.openpath(ui, descpath)

277

except Exception as err:

274

except Exception as err:

278

raise error.Abort('%s: %s' % (descpath, err[0].strerror))

275

raise error.Abort('%s: %s' % (descpath, err[0].strerror))

279

desc = json.load(fp)

276

desc = json.load(fp)

280

fp.close()

277

fp.close()

281

278

282

def cdf(l):

279

def cdf(l):

283

if not l:

280

if not l:

284

return [], []

281

return [], []

285

vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))

282

vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))

286

t = float(sum(probs, 0))

283

t = float(sum(probs, 0))

287

s, cdfs = 0, []

284

s, cdfs = 0, []

288

for v in probs:

285

for v in probs:

289

s += v

286

s += v

290

cdfs.append(s / t)

287

cdfs.append(s / t)

291

return vals, cdfs

288

return vals, cdfs

292

289

293

lineschanged = cdf(desc['lineschanged'])

290

lineschanged = cdf(desc['lineschanged'])

294

fileschanged = cdf(desc['fileschanged'])

291

fileschanged = cdf(desc['fileschanged'])

295

filesadded = cdf(desc['filesadded'])

292

filesadded = cdf(desc['filesadded'])

296

dirsadded = cdf(desc['dirsadded'])

293

dirsadded = cdf(desc['dirsadded'])

297

filesremoved = cdf(desc['filesremoved'])

294

filesremoved = cdf(desc['filesremoved'])

298

linelengths = cdf(desc['linelengths'])

295

linelengths = cdf(desc['linelengths'])

299

parents = cdf(desc['parents'])

296

parents = cdf(desc['parents'])

300

p1distance = cdf(desc['p1distance'])

297

p1distance = cdf(desc['p1distance'])

301

p2distance = cdf(desc['p2distance'])

298

p2distance = cdf(desc['p2distance'])

302

interarrival = cdf(desc['interarrival'])

299

interarrival = cdf(desc['interarrival'])

303

linesinfilesadded = cdf(desc['linesinfilesadded'])

300

linesinfilesadded = cdf(desc['linesinfilesadded'])

304

tzoffset = cdf(desc['tzoffset'])

301

tzoffset = cdf(desc['tzoffset'])

305

302

306

dictfile = opts.get('dict') or '/usr/share/dict/words'

303

dictfile = opts.get('dict') or '/usr/share/dict/words'

307

try:

304

try:

308

fp = open(dictfile, 'rU')

305

fp = open(dictfile, 'rU')

309

except IOError as err:

306

except IOError as err:

310

raise error.Abort('%s: %s' % (dictfile, err.strerror))

307

raise error.Abort('%s: %s' % (dictfile, err.strerror))

311

words = fp.read().splitlines()

308

words = fp.read().splitlines()

312

fp.close()

309

fp.close()

313

310

314

initdirs = {}

311

initdirs = {}

315

if desc['initdirs']:

312

if desc['initdirs']:

316

for k, v in desc['initdirs']:

313

for k, v in desc['initdirs']:

317

initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v

314

initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v

318

initdirs = renamedirs(initdirs, words)

315

initdirs = renamedirs(initdirs, words)

319

initdirscdf = cdf(initdirs)

316

initdirscdf = cdf(initdirs)

320

317

321

def pick(cdf):

318

def pick(cdf):

322

return cdf[0][bisect.bisect_left(cdf[1], random.random())]

319

return cdf[0][bisect.bisect_left(cdf[1], random.random())]

323

320

324

def pickpath():

321

def pickpath():

325

return os.path.join(pick(initdirscdf), random.choice(words))

322

return os.path.join(pick(initdirscdf), random.choice(words))

326

323

327

def makeline(minimum=0):

324

def makeline(minimum=0):

328

total = max(minimum, pick(linelengths))

325

total = max(minimum, pick(linelengths))

329

c, l = 0, []

326

c, l = 0, []

330

while c < total:

327

while c < total:

331

w = random.choice(words)

328

w = random.choice(words)

332

c += len(w) + 1

329

c += len(w) + 1

333

l.append(w)

330

l.append(w)

334

return ' '.join(l)

331

return ' '.join(l)

335

332

336

wlock = repo.wlock()

333

wlock = repo.wlock()

337

lock = repo.lock()

334

lock = repo.lock()

338

335

339

nevertouch = {'.hgsub', '.hgignore', '.hgtags'}

336

nevertouch = {'.hgsub', '.hgignore', '.hgtags'}

340

337

341

progress = ui.progress

342

_synthesizing = _('synthesizing')

338

_synthesizing = _('synthesizing')

343

_files = _('initial files')

339

_files = _('initial files')

344

_changesets = _('changesets')

340

_changesets = _('changesets')

345

341

346

# Synthesize a single initial revision adding files to the repo according

342

# Synthesize a single initial revision adding files to the repo according

347

# to the modeled directory structure.

343

# to the modeled directory structure.

348

initcount = int(opts['initfiles'])

344

initcount = int(opts['initfiles'])

349

if initcount and initdirs:

345

if initcount and initdirs:

350

pctx = repo[None].parents()[0]

346

pctx = repo[None].parents()[0]

351

dirs = set(pctx.dirs())

347

dirs = set(pctx.dirs())

352

files = {}

348

files = {}

353

349

354

def validpath(path):

350

def validpath(path):

355

# Don't pick filenames which are already directory names.

351

# Don't pick filenames which are already directory names.

356

if path in dirs:

352

if path in dirs:

357

return False

353

return False

358

# Don't pick directories which were used as file names.

354

# Don't pick directories which were used as file names.

359

while path:

355

while path:

360

if path in files:

356

if path in files:

361

return False

357

return False

362

path = os.path.dirname(path)

358

path = os.path.dirname(path)

363

return True

359

return True

364

360

361

progress = ui.makeprogress(_synthesizing, unit=_files, total=initcount)

365

for i in xrange(0, initcount):

362

for i in xrange(0, initcount):

366

ui.progress(_synthesizing, i, unit=_files, total=initcount)

363

progress.update(i)

367

364

368

path = pickpath()

365

path = pickpath()

369

while not validpath(path):

366

while not validpath(path):

370

path = pickpath()

367

path = pickpath()

371

data = '%s contents\n' % path

368

data = '%s contents\n' % path

372

files[path] = data

369

files[path] = data

373

dir = os.path.dirname(path)

370

dir = os.path.dirname(path)

374

while dir and dir not in dirs:

371

while dir and dir not in dirs:

375

dirs.add(dir)

372

dirs.add(dir)

376

dir = os.path.dirname(dir)

373

dir = os.path.dirname(dir)

377

374

378

def filectxfn(repo, memctx, path):

375

def filectxfn(repo, memctx, path):

379

return context.memfilectx(repo, memctx, path, files[path])

376

return context.memfilectx(repo, memctx, path, files[path])

380

377

381

ui.progress(_synthesizing, None)

378

progress.complete()

382

message = 'synthesized wide repo with %d files' % (len(files),)

379

message = 'synthesized wide repo with %d files' % (len(files),)

383

mc = context.memctx(repo, [pctx.node(), nullid], message,

380

mc = context.memctx(repo, [pctx.node(), nullid], message,

384

files, filectxfn, ui.username(),

381

files, filectxfn, ui.username(),

385

'%d %d' % dateutil.makedate())

382

'%d %d' % dateutil.makedate())

386

initnode = mc.commit()

383

initnode = mc.commit()

387

if ui.debugflag:

384

if ui.debugflag:

388

hexfn = hex

385

hexfn = hex

389

else:

386

else:

390

hexfn = short

387

hexfn = short

391

ui.status(_('added commit %s with %d files\n')

388

ui.status(_('added commit %s with %d files\n')

392

% (hexfn(initnode), len(files)))

389

% (hexfn(initnode), len(files)))

393

390

394

# Synthesize incremental revisions to the repository, adding repo depth.

391

# Synthesize incremental revisions to the repository, adding repo depth.

395

count = int(opts['count'])

392

count = int(opts['count'])

396

heads = set(map(repo.changelog.rev, repo.heads()))

393

heads = set(map(repo.changelog.rev, repo.heads()))

394

progress = ui.makeprogress(_synthesizing, unit=_changesets, total=count)

397

for i in xrange(count):

395

for i in xrange(count):

398

progress(_synthesizing, i, unit=_changesets, total=count)

396

progress.update(i)

399

397

400

node = repo.changelog.node

398

node = repo.changelog.node

401

revs = len(repo)

399

revs = len(repo)

402

400

403

def pickhead(heads, distance):

401

def pickhead(heads, distance):

404

if heads:

402

if heads:

405

lheads = sorted(heads)

403

lheads = sorted(heads)

406

rev = revs - min(pick(distance), revs)

404

rev = revs - min(pick(distance), revs)

407

if rev < lheads[-1]:

405

if rev < lheads[-1]:

408

rev = lheads[bisect.bisect_left(lheads, rev)]

406

rev = lheads[bisect.bisect_left(lheads, rev)]

409

else:

407

else:

410

rev = lheads[-1]

408

rev = lheads[-1]

411

return rev, node(rev)

409

return rev, node(rev)

412

return nullrev, nullid

410

return nullrev, nullid

413

411

414

r1 = revs - min(pick(p1distance), revs)

412

r1 = revs - min(pick(p1distance), revs)

415

p1 = node(r1)

413

p1 = node(r1)

416

414

417

# the number of heads will grow without bound if we use a pure

415

# the number of heads will grow without bound if we use a pure

418

# model, so artificially constrain their proliferation

416

# model, so artificially constrain their proliferation

419

toomanyheads = len(heads) > random.randint(1, 20)

417

toomanyheads = len(heads) > random.randint(1, 20)

420

if p2distance[0] and (pick(parents) == 2 or toomanyheads):

418

if p2distance[0] and (pick(parents) == 2 or toomanyheads):

421

r2, p2 = pickhead(heads.difference([r1]), p2distance)

419

r2, p2 = pickhead(heads.difference([r1]), p2distance)

422

else:

420

else:

423

r2, p2 = nullrev, nullid

421

r2, p2 = nullrev, nullid

424

422

425

pl = [p1, p2]

423

pl = [p1, p2]

426

pctx = repo[r1]

424

pctx = repo[r1]

427

mf = pctx.manifest()

425

mf = pctx.manifest()

428

mfk = mf.keys()

426

mfk = mf.keys()

429

changes = {}

427

changes = {}

430

if mfk:

428

if mfk:

431

for __ in xrange(pick(fileschanged)):

429

for __ in xrange(pick(fileschanged)):

432

for __ in xrange(10):

430

for __ in xrange(10):

433

fctx = pctx.filectx(random.choice(mfk))

431

fctx = pctx.filectx(random.choice(mfk))

434

path = fctx.path()

432

path = fctx.path()

435

if not (path in nevertouch or fctx.isbinary() or

433

if not (path in nevertouch or fctx.isbinary() or

436

'l' in fctx.flags()):

434

'l' in fctx.flags()):

437

break

435

break

438

lines = fctx.data().splitlines()

436

lines = fctx.data().splitlines()

439

add, remove = pick(lineschanged)

437

add, remove = pick(lineschanged)

440

for __ in xrange(remove):

438

for __ in xrange(remove):

441

if not lines:

439

if not lines:

442

break

440

break

443

del lines[random.randrange(0, len(lines))]

441

del lines[random.randrange(0, len(lines))]

444

for __ in xrange(add):

442

for __ in xrange(add):

445

lines.insert(random.randint(0, len(lines)), makeline())

443

lines.insert(random.randint(0, len(lines)), makeline())

446

path = fctx.path()

444

path = fctx.path()

447

changes[path] = '\n'.join(lines) + '\n'

445

changes[path] = '\n'.join(lines) + '\n'

448

for __ in xrange(pick(filesremoved)):

446

for __ in xrange(pick(filesremoved)):

449

path = random.choice(mfk)

447

path = random.choice(mfk)

450

for __ in xrange(10):

448

for __ in xrange(10):

451

path = random.choice(mfk)

449

path = random.choice(mfk)

452

if path not in changes:

450

if path not in changes:

453

break

451

break

454

if filesadded:

452

if filesadded:

455

dirs = list(pctx.dirs())

453

dirs = list(pctx.dirs())

456

dirs.insert(0, '')

454

dirs.insert(0, '')

457

for __ in xrange(pick(filesadded)):

455

for __ in xrange(pick(filesadded)):

458

pathstr = ''

456

pathstr = ''

459

while pathstr in dirs:

457

while pathstr in dirs:

460

path = [random.choice(dirs)]

458

path = [random.choice(dirs)]

461

if pick(dirsadded):

459

if pick(dirsadded):

462

path.append(random.choice(words))

460

path.append(random.choice(words))

463

path.append(random.choice(words))

461

path.append(random.choice(words))

464

pathstr = '/'.join(filter(None, path))

462

pathstr = '/'.join(filter(None, path))

465

data = '\n'.join(makeline()

463

data = '\n'.join(makeline()

466

for __ in xrange(pick(linesinfilesadded))) + '\n'

464

for __ in xrange(pick(linesinfilesadded))) + '\n'

467

changes[pathstr] = data

465

changes[pathstr] = data

468

def filectxfn(repo, memctx, path):

466

def filectxfn(repo, memctx, path):

469

if path not in changes:

467

if path not in changes:

470

return None

468

return None

471

return context.memfilectx(repo, memctx, path, changes[path])

469

return context.memfilectx(repo, memctx, path, changes[path])

472

if not changes:

470

if not changes:

473

continue

471

continue

474

if revs:

472

if revs:

475

date = repo['tip'].date()[0] + pick(interarrival)

473

date = repo['tip'].date()[0] + pick(interarrival)

476

else:

474

else:

477

date = time.time() - (86400 * count)

475

date = time.time() - (86400 * count)

478

# dates in mercurial must be positive, fit in 32-bit signed integers.

476

# dates in mercurial must be positive, fit in 32-bit signed integers.

479

date = min(0x7fffffff, max(0, date))

477

date = min(0x7fffffff, max(0, date))

480

user = random.choice(words) + '@' + random.choice(words)

478

user = random.choice(words) + '@' + random.choice(words)

481

mc = context.memctx(repo, pl, makeline(minimum=2),

479

mc = context.memctx(repo, pl, makeline(minimum=2),

482

sorted(changes),

480

sorted(changes),

483

filectxfn, user, '%d %d' % (date, pick(tzoffset)))

481

filectxfn, user, '%d %d' % (date, pick(tzoffset)))

484

newnode = mc.commit()

482

newnode = mc.commit()

485

heads.add(repo.changelog.rev(newnode))

483

heads.add(repo.changelog.rev(newnode))

486

heads.discard(r1)

484

heads.discard(r1)

487

heads.discard(r2)

485

heads.discard(r2)

488

486

489

lock.release()

487

lock.release()

490

wlock.release()

488

wlock.release()

491

489

492

def renamedirs(dirs, words):

490

def renamedirs(dirs, words):

493

'''Randomly rename the directory names in the per-dir file count dict.'''

491

'''Randomly rename the directory names in the per-dir file count dict.'''

494

wordgen = itertools.cycle(words)

492

wordgen = itertools.cycle(words)

495

replacements = {'': ''}

493

replacements = {'': ''}

496

def rename(dirpath):

494

def rename(dirpath):

497

'''Recursively rename the directory and all path prefixes.

495

'''Recursively rename the directory and all path prefixes.

498

496

499

The mapping from path to renamed path is stored for all path prefixes

497

The mapping from path to renamed path is stored for all path prefixes

500

as in dynamic programming, ensuring linear runtime and consistent

498

as in dynamic programming, ensuring linear runtime and consistent

501

renaming regardless of iteration order through the model.

499

renaming regardless of iteration order through the model.

502

'''

500

'''

503

if dirpath in replacements:

501

if dirpath in replacements:

504

return replacements[dirpath]

502

return replacements[dirpath]

505

head, _ = os.path.split(dirpath)

503

head, _ = os.path.split(dirpath)

506

if head:

504

if head:

507

head = rename(head)

505

head = rename(head)

508

else:

506

else:

509

head = ''

507

head = ''

510

renamed = os.path.join(head, next(wordgen))

508

renamed = os.path.join(head, next(wordgen))

511

replacements[dirpath] = renamed

509

replacements[dirpath] = renamed

512

return renamed

510

return renamed

513

result = []

511

result = []

514

for dirpath, count in dirs.iteritems():

512

for dirpath, count in dirs.iteritems():

515

result.append([rename(dirpath.lstrip(os.sep)), count])

513

result.append([rename(dirpath.lstrip(os.sep)), count])

516

return result

514

return result

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # synthrepo.py - repo synthesis
             #
             # Copyright 2012 Facebook
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             '''synthesize structurally interesting change history
             This extension is useful for creating a repository with properties
             that are statistically similar to an existing repository. During
             analysis, a simple probability table is constructed from the history
             of an existing repository.  During synthesis, these properties are
             reconstructed.
             Properties that are analyzed and synthesized include the following:
             - Lines added or removed when an existing file is modified
             - Number and sizes of files added
             - Number of files removed
             - Line lengths
             - Topological distance to parent changeset(s)
             - Probability of a commit being a merge
             - Probability of a newly added file being added to a new directory
             - Interarrival time, and time zone, of commits
             - Number of files in each directory
             A few obvious properties that are not currently handled realistically:
             - Merges are treated as regular commits with two parents, which is not
               realistic
             - Modifications are not treated as operations on hunks of lines, but
               as insertions and deletions of randomly chosen single lines
             - Committer ID (always random)
             - Executability of files
             - Symlinks and binary files are ignored
             '''
             from __future__ import absolute_import
             import bisect
             import collections
             import itertools
             import json
             import os
             import random
             import sys
             import time
             from mercurial.i18n import _
             from mercurial.node import (
                 nullid,
                 nullrev,
                 short,
             )
             from mercurial import (
                 context,
                 error,
                 hg,
                 patch,
                 registrar,
                 scmutil,
             )
             from mercurial.utils import dateutil
             # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
             # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
             # be specifying the version(s) of Mercurial they are tested with, or
             # leave the attribute unspecified.
             testedwith = 'ships-with-hg-core'
             cmdtable = {}
             command = registrar.command(cmdtable)
             newfile = {'new fi', 'rename', 'copy f', 'copy t'}
             def zerodict():
                 return collections.defaultdict(lambda: 0)
             def roundto(x, k):
                 if x > k * 2:
                     return int(round(x / float(k)) * k)
                 return int(round(x))
             def parsegitdiff(lines):
                 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
                 binary = False
                 for line in lines:
                     start = line[:6]
                     if start == 'diff -':
                         if filename:
                             yield filename, mar, lineadd, lineremove, binary
                         mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
                         filename = patch.gitre.match(line).group(1)
                     elif start in newfile:
                         mar = 'a'
                     elif start == 'GIT bi':
                         binary = True
                     elif start == 'delete':
                         mar = 'r'
                     elif start:
                         s = start[0]
                         if s == '-' and not line.startswith('--- '):
                             lineremove += 1
                         elif s == '+' and not line.startswith('+++ '):
                             lineadd[roundto(len(line) - 1, 5)] += 1
                 if filename:
                     yield filename, mar, lineadd, lineremove, binary
             @command('analyze',
                      [('o', 'output', '', _('write output to given file'), _('FILE')),
                       ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
                      _('hg analyze'), optionalrepo=True)
             def analyze(ui, repo, *revs, **opts):
                 '''create a simple model of a repository to use for later synthesis
                 This command examines every changeset in the given range (or all
                 of history if none are specified) and creates a simple statistical
                 model of the history of the repository. It also measures the directory
                 structure of the repository as checked out.
                 The model is written out to a JSON file, and can be used by
                 :hg:`synthesize` to create or augment a repository with synthetic
                 commits that have a structure that is statistically similar to the
                 analyzed repository.
                 '''
                 root = repo.root
                 if not root.endswith(os.path.sep):
                     root += os.path.sep
                 revs = list(revs)
                 revs.extend(opts['rev'])
                 if not revs:
                     revs = [':']
                 output = opts['output']
                 if not output:
                     output = os.path.basename(root) + '.json'
                 if output == '-':
                     fp = sys.stdout
                 else:
                     fp = open(output, 'w')
                 # Always obtain file counts of each directory in the given root directory.
                 def onerror(e):
                     ui.warn(_('error walking directory structure: %s\n') % e)
                 dirs = {}
                 rootprefixlen = len(root)
                 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
                     dirpathfromroot = dirpath[rootprefixlen:]
                     dirs[dirpathfromroot] = len(filenames)
                     if '.hg' in dirnames:
                         dirnames.remove('.hg')
                 lineschanged = zerodict()
                 children = zerodict()
                 p1distance = zerodict()
                 p2distance = zerodict()
                 linesinfilesadded = zerodict()
                 fileschanged = zerodict()
                 filesadded = zerodict()
                 filesremoved = zerodict()
                 linelengths = zerodict()
                 interarrival = zerodict()
                 parents = zerodict()
                 dirsadded = zerodict()
                 tzoffset = zerodict()
                 # If a mercurial repo is available, also model the commit history.
                 if repo:
                     revs = scmutil.revrange(repo, revs)
                     revs.sort()
-                    progress = ui.progress
+                    progress = ui.makeprogress(_('analyzing'), unit=_('changesets'),
-                    _analyzing = _('analyzing')
+                                               total=len(revs))
-                    _changesets = _('changesets')
-                    _total = len(revs)
                     for i, rev in enumerate(revs):
-                        progress(_analyzing, i, unit=_changesets, total=_total)
+                        progress.update(i)
                         ctx = repo[rev]
                         pl = ctx.parents()
                         pctx = pl[0]
                         prev = pctx.rev()
                         children[prev] += 1
                         p1distance[rev - prev] += 1
                         parents[len(pl)] += 1
                         tzoffset[ctx.date()[1]] += 1
                         if len(pl) > 1:
                             p2distance[rev - pl[1].rev()] += 1
                         if prev == rev - 1:
                             lastctx = pctx
                         else:
                             lastctx = repo[rev - 1]
                         if lastctx.rev() != nullrev:
                             timedelta = ctx.date()[0] - lastctx.date()[0]
                             interarrival[roundto(timedelta, 300)] += 1
                         diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
                         fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
                         for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
                             if isbin:
                                 continue
                             added = sum(lineadd.itervalues(), 0)
                             if mar == 'm':
                                 if added and lineremove:
                                     lineschanged[roundto(added, 5),
                                                  roundto(lineremove, 5)] += 1
                                     filechanges += 1
                             elif mar == 'a':
                                 fileadds += 1
                                 if '/' in filename:
                                     filedir = filename.rsplit('/', 1)[0]
                                     if filedir not in pctx.dirs():
                                         diradds += 1
                                 linesinfilesadded[roundto(added, 5)] += 1
                             elif mar == 'r':
                                 fileremoves += 1
                             for length, count in lineadd.iteritems():
                                 linelengths[length] += count
                         fileschanged[filechanges] += 1
                         filesadded[fileadds] += 1
                         dirsadded[diradds] += 1
                         filesremoved[fileremoves] += 1
                 invchildren = zerodict()
                 for rev, count in children.iteritems():
                     invchildren[count] += 1
                 if output != '-':
                     ui.status(_('writing output to %s\n') % output)
                 def pronk(d):
                     return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
                 json.dump({'revs': len(revs),
                            'initdirs': pronk(dirs),
                            'lineschanged': pronk(lineschanged),
                            'children': pronk(invchildren),
                            'fileschanged': pronk(fileschanged),
                            'filesadded': pronk(filesadded),
                            'linesinfilesadded': pronk(linesinfilesadded),
                            'dirsadded': pronk(dirsadded),
                            'filesremoved': pronk(filesremoved),
                            'linelengths': pronk(linelengths),
                            'parents': pronk(parents),
                            'p1distance': pronk(p1distance),
                            'p2distance': pronk(p2distance),
                            'interarrival': pronk(interarrival),
                            'tzoffset': pronk(tzoffset),
                            },
                           fp)
                 fp.close()
             @command('synthesize',
                      [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
                       ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
                       ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
                      _('hg synthesize [OPTION].. DESCFILE'))
             def synthesize(ui, repo, descpath, **opts):
                 '''synthesize commits based on a model of an existing repository
                 The model must have been generated by :hg:`analyze`. Commits will
                 be generated randomly according to the probabilities described in
                 the model. If --initfiles is set, the repository will be seeded with
                 the given number files following the modeled repository's directory
                 structure.
                 When synthesizing new content, commit descriptions, and user
                 names, words will be chosen randomly from a dictionary that is
                 presumed to contain one word per line. Use --dict to specify the
                 path to an alternate dictionary to use.
                 '''
                 try:
                     fp = hg.openpath(ui, descpath)
                 except Exception as err:
                     raise error.Abort('%s: %s' % (descpath, err[0].strerror))
                 desc = json.load(fp)
                 fp.close()
                 def cdf(l):
                     if not l:
                         return [], []
                     vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
                     t = float(sum(probs, 0))
                     s, cdfs = 0, []
                     for v in probs:
                         s += v
                         cdfs.append(s / t)
                     return vals, cdfs
                 lineschanged = cdf(desc['lineschanged'])
                 fileschanged = cdf(desc['fileschanged'])
                 filesadded = cdf(desc['filesadded'])
                 dirsadded = cdf(desc['dirsadded'])
                 filesremoved = cdf(desc['filesremoved'])
                 linelengths = cdf(desc['linelengths'])
                 parents = cdf(desc['parents'])
                 p1distance = cdf(desc['p1distance'])
                 p2distance = cdf(desc['p2distance'])
                 interarrival = cdf(desc['interarrival'])
                 linesinfilesadded = cdf(desc['linesinfilesadded'])
                 tzoffset = cdf(desc['tzoffset'])
                 dictfile = opts.get('dict') or '/usr/share/dict/words'
                 try:
                     fp = open(dictfile, 'rU')
                 except IOError as err:
                     raise error.Abort('%s: %s' % (dictfile, err.strerror))
                 words = fp.read().splitlines()
                 fp.close()
                 initdirs = {}
                 if desc['initdirs']:
                     for k, v in desc['initdirs']:
                         initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
                     initdirs = renamedirs(initdirs, words)
                 initdirscdf = cdf(initdirs)
                 def pick(cdf):
                     return cdf[0][bisect.bisect_left(cdf[1], random.random())]
                 def pickpath():
                     return os.path.join(pick(initdirscdf), random.choice(words))
                 def makeline(minimum=0):
                     total = max(minimum, pick(linelengths))
                     c, l = 0, []
                     while c < total:
                         w = random.choice(words)
                         c += len(w) + 1
                         l.append(w)
                     return ' '.join(l)
                 wlock = repo.wlock()
                 lock = repo.lock()
                 nevertouch = {'.hgsub', '.hgignore', '.hgtags'}
-                progress = ui.progress
                 _synthesizing = _('synthesizing')
                 _files = _('initial files')
                 _changesets = _('changesets')
                 # Synthesize a single initial revision adding files to the repo according
                 # to the modeled directory structure.
                 initcount = int(opts['initfiles'])
                 if initcount and initdirs:
                     pctx = repo[None].parents()[0]
                     dirs = set(pctx.dirs())
                     files = {}
                     def validpath(path):
                         # Don't pick filenames which are already directory names.
                         if path in dirs:
                             return False
                         # Don't pick directories which were used as file names.
                         while path:
                             if path in files:
                                 return False
                             path = os.path.dirname(path)
                         return True
+                    progress = ui.makeprogress(_synthesizing, unit=_files, total=initcount)
                     for i in xrange(0, initcount):
-                        ui.progress(_synthesizing, i, unit=_files, total=initcount)
+                        progress.update(i)
                         path = pickpath()
                         while not validpath(path):
                             path = pickpath()
                         data = '%s contents\n' % path
                         files[path] = data
                         dir = os.path.dirname(path)
                         while dir and dir not in dirs:
                             dirs.add(dir)
                             dir = os.path.dirname(dir)
                     def filectxfn(repo, memctx, path):
                         return context.memfilectx(repo, memctx, path, files[path])
-                    ui.progress(_synthesizing, None)
+                    progress.complete()
                     message = 'synthesized wide repo with %d files' % (len(files),)
                     mc = context.memctx(repo, [pctx.node(), nullid], message,
                                         files, filectxfn, ui.username(),
                                         '%d %d' % dateutil.makedate())
                     initnode = mc.commit()
                     if ui.debugflag:
                         hexfn = hex
                     else:
                         hexfn = short
                     ui.status(_('added commit %s with %d files\n')
                               % (hexfn(initnode), len(files)))
                 # Synthesize incremental revisions to the repository, adding repo depth.
                 count = int(opts['count'])
                 heads = set(map(repo.changelog.rev, repo.heads()))
+                progress = ui.makeprogress(_synthesizing, unit=_changesets, total=count)
                 for i in xrange(count):
-                    progress(_synthesizing, i, unit=_changesets, total=count)
+                    progress.update(i)
                     node = repo.changelog.node
                     revs = len(repo)
                     def pickhead(heads, distance):
                         if heads:
                             lheads = sorted(heads)
                             rev = revs - min(pick(distance), revs)
                             if rev < lheads[-1]:
                                 rev = lheads[bisect.bisect_left(lheads, rev)]
                             else:
                                 rev = lheads[-1]
                             return rev, node(rev)
                         return nullrev, nullid
                     r1 = revs - min(pick(p1distance), revs)
                     p1 = node(r1)
                     # the number of heads will grow without bound if we use a pure
                     # model, so artificially constrain their proliferation
                     toomanyheads = len(heads) > random.randint(1, 20)
                     if p2distance[0] and (pick(parents) == 2 or toomanyheads):
                         r2, p2 = pickhead(heads.difference([r1]), p2distance)
                     else:
                         r2, p2 = nullrev, nullid
                     pl = [p1, p2]
                     pctx = repo[r1]
                     mf = pctx.manifest()
                     mfk = mf.keys()
                     changes = {}
                     if mfk:
                         for __ in xrange(pick(fileschanged)):
                             for __ in xrange(10):
                                 fctx = pctx.filectx(random.choice(mfk))
                                 path = fctx.path()
                                 if not (path in nevertouch or fctx.isbinary() or
                                         'l' in fctx.flags()):
                                     break
                             lines = fctx.data().splitlines()
                             add, remove = pick(lineschanged)
                             for __ in xrange(remove):
                                 if not lines:
                                     break
                                 del lines[random.randrange(0, len(lines))]
                             for __ in xrange(add):
                                 lines.insert(random.randint(0, len(lines)), makeline())
                             path = fctx.path()
                             changes[path] = '\n'.join(lines) + '\n'
                         for __ in xrange(pick(filesremoved)):
                             path = random.choice(mfk)
                             for __ in xrange(10):
                                 path = random.choice(mfk)
                                 if path not in changes:
                                     break
                     if filesadded:
                         dirs = list(pctx.dirs())
                         dirs.insert(0, '')
                     for __ in xrange(pick(filesadded)):
                         pathstr = ''
                         while pathstr in dirs:
                             path = [random.choice(dirs)]
                             if pick(dirsadded):
                                 path.append(random.choice(words))
                             path.append(random.choice(words))
                             pathstr = '/'.join(filter(None, path))
                         data = '\n'.join(makeline()
                                          for __ in xrange(pick(linesinfilesadded))) + '\n'
                         changes[pathstr] = data
                     def filectxfn(repo, memctx, path):
                         if path not in changes:
                             return None
                         return context.memfilectx(repo, memctx, path, changes[path])
                     if not changes:
                         continue
                     if revs:
                         date = repo['tip'].date()[0] + pick(interarrival)
                     else:
                         date = time.time() - (86400 * count)
                     # dates in mercurial must be positive, fit in 32-bit signed integers.
                     date = min(0x7fffffff, max(0, date))
                     user = random.choice(words) + '@' + random.choice(words)
                     mc = context.memctx(repo, pl, makeline(minimum=2),
                                         sorted(changes),
                                         filectxfn, user, '%d %d' % (date, pick(tzoffset)))
                     newnode = mc.commit()
                     heads.add(repo.changelog.rev(newnode))
                     heads.discard(r1)
                     heads.discard(r2)
                 lock.release()
                 wlock.release()
             def renamedirs(dirs, words):
                 '''Randomly rename the directory names in the per-dir file count dict.'''
                 wordgen = itertools.cycle(words)
                 replacements = {'': ''}
                 def rename(dirpath):
                     '''Recursively rename the directory and all path prefixes.
                     The mapping from path to renamed path is stored for all path prefixes
                     as in dynamic programming, ensuring linear runtime and consistent
                     renaming regardless of iteration order through the model.
                     '''
                     if dirpath in replacements:
                         return replacements[dirpath]
                     head, _ = os.path.split(dirpath)
                     if head:
                         head = rename(head)
                     else:
                         head = ''
                     renamed = os.path.join(head, next(wordgen))
                     replacements[dirpath] = renamed
                     return renamed
                 result = []
                 for dirpath, count in dirs.iteritems():
                     result.append([rename(dirpath.lstrip(os.sep)), count])
                 return result