upstream/mercurial-mirror Commit - r17734:619068c2

1

# synthrepo.py - repo synthesis

2

#

3

4

#

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

7

8

'''synthesize structurally interesting change history

9

10

This extension is useful for creating a repository with properties

11

that are statistically similar to an existing repository. During

12

analysis, a simple probability table is constructed from the history

13

of an existing repository. During synthesis, these properties are

14

reconstructed.

15

16

Properties that are analyzed and synthesized include the following:

17

18

- Lines added or removed when an existing file is modified

19

- Number and sizes of files added

20

- Number of files removed

21

- Line lengths

22

- Topological distance to parent changeset(s)

23

- Probability of a commit being a merge

24

- Probability of a newly added file being added to a new directory

25

- Interarrival time, and time zone, of commits

26

27

A few obvious properties that are not currently handled realistically:

28

29

- Merges are treated as regular commits with two parents, which is not

30

realistic

31

- Modifications are not treated as operations on hunks of lines, but

32

as insertions and deletions of randomly chosen single lines

33

- Committer ID (always random)

34

- Executability of files

35

- Symlinks and binary files are ignored

36

'''

37

38

import bisect, collections, json, os, random, time

39

from mercurial import cmdutil, context, patch, scmutil, url, util

40

from mercurial.i18n import _

41

from mercurial.node import nullrev, nullid

42

43

testedwith = 'internal'

44

45

cmdtable = {}

46

command = cmdutil.command(cmdtable)

47

48

newfile = set(('new fi', 'rename', 'copy f', 'copy t'))

49

50

def zerodict():

51

return collections.defaultdict(lambda: 0)

52

53

def roundto(x, k):

54

if x > k * 2:

55

return int(round(x / float(k)) * k)

56

return int(round(x))

57

58

def parsegitdiff(lines):

59

filename, mar, lineadd, lineremove = None, None, zerodict(), 0

60

binary = False

61

for line in lines:

62

start = line[:6]

63

if start == 'diff -':

64

if filename:

65

yield filename, mar, lineadd, lineremove, binary

66

mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False

67

filename = patch.gitre.match(line).group(1)

68

elif start in newfile:

69

mar = 'a'

70

elif start == 'GIT bi':

71

binary = True

72

elif start == 'delete':

73

mar = 'r'

74

elif start:

75

s = start[0]

76

if s == '-' and not line.startswith('--- '):

77

lineremove += 1

78

elif s == '+' and not line.startswith('+++ '):

79

lineadd[roundto(len(line) - 1, 5)] += 1

80

if filename:

81

yield filename, mar, lineadd, lineremove, binary

82

83

@command('analyze',

84

[('o', 'output', [], _('write output to given file'), _('FILE')),

85

('r', 'rev', [], _('analyze specified revisions'), _('REV'))],

86

_('hg analyze'))

87

def analyze(ui, repo, *revs, **opts):

88

'''create a simple model of a repository to use for later synthesis

89

90

This command examines every changeset in the given range (or all

91

of history if none are specified) and creates a simple statistical

92

model of the history of the repository.

93

94

The model is written out to a JSON file, and can be used by

95

:hg:`synthesize` to create or augment a repository with synthetic

96

commits that have a structure that is statistically similar to the

97

analyzed repository.

98

'''

99

100

revs = list(revs)

101

revs.extend(opts['rev'])

102

if not revs:

103

revs = [':']

104

105

output = opts['output']

106

if not output:

107

output = os.path.basename(repo.root) + '.json'

108

109

if output == '-':

110

fp = sys.stdout

111

else:

112

fp = open(output, 'w')

113

114

revs = scmutil.revrange(repo, revs)

115

revs.sort()

116

117

lineschanged = zerodict()

118

children = zerodict()

119

p1distance = zerodict()

120

p2distance = zerodict()

121

linesinfilesadded = zerodict()

122

fileschanged = zerodict()

123

filesadded = zerodict()

124

filesremoved = zerodict()

125

linelengths = zerodict()

126

interarrival = zerodict()

127

parents = zerodict()

128

dirsadded = zerodict()

129

tzoffset = zerodict()

130

131

progress = ui.progress

132

_analyzing = _('analyzing')

133

_changesets = _('changesets')

134

_total = len(revs)

135

136

for i, rev in enumerate(revs):

137

progress(_analyzing, i, unit=_changesets, total=_total)

138

ctx = repo[rev]

139

pl = ctx.parents()

140

pctx = pl[0]

141

prev = pctx.rev()

142

children[prev] += 1

143

p1distance[rev - prev] += 1

144

parents[len(pl)] += 1

145

tzoffset[ctx.date()[1]] += 1

146

if len(pl) > 1:

147

p2distance[rev - pl[1].rev()] += 1

148

if prev == rev - 1:

149

lastctx = pctx

150

else:

151

lastctx = repo[rev - 1]

152

if lastctx.rev() != nullrev:

153

interarrival[roundto(ctx.date()[0] - lastctx.date()[0], 300)] += 1

154

diff = sum((d.splitlines()

155

for d in ctx.diff(pctx, opts=dict(git=True))), [])

156

fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0

157

for filename, mar, lineadd, lineremove, binary in parsegitdiff(diff):

158

if binary:

159

continue

160

added = sum(lineadd.itervalues(), 0)

161

if mar == 'm':

162

if added and lineremove:

163

lineschanged[roundto(added, 5), roundto(lineremove, 5)] += 1

164

filechanges += 1

165

elif mar == 'a':

166

fileadds += 1

167

if '/' in filename:

168

filedir = filename.rsplit('/', 1)[0]

169

if filedir not in pctx.dirs():

170

diradds += 1

171

linesinfilesadded[roundto(added, 5)] += 1

172

elif mar == 'r':

173

fileremoves += 1

174

for length, count in lineadd.iteritems():

175

linelengths[length] += count

176

fileschanged[filechanges] += 1

177

filesadded[fileadds] += 1

178

dirsadded[diradds] += 1

179

filesremoved[fileremoves] += 1

180

181

invchildren = zerodict()

182

183

for rev, count in children.iteritems():

184

invchildren[count] += 1

185

186

if output != '-':

187

ui.status(_('writing output to %s\n') % output)

188

189

def pronk(d):

190

return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)

191

192

json.dump(dict(revs=len(revs),

193

lineschanged=pronk(lineschanged),

194

children=pronk(invchildren),

195

fileschanged=pronk(fileschanged),

196

filesadded=pronk(filesadded),

197

linesinfilesadded=pronk(linesinfilesadded),

198

dirsadded=pronk(dirsadded),

199

filesremoved=pronk(filesremoved),

200

linelengths=pronk(linelengths),

201

parents=pronk(parents),

202

p1distance=pronk(p1distance),

203

p2distance=pronk(p2distance),

204

interarrival=pronk(interarrival),

205

tzoffset=pronk(tzoffset),

206

),

207

fp)

208

fp.close()

209

210

@command('synthesize',

211

[('c', 'count', 0, _('create given number of commits'), _('COUNT')),

212

('', 'dict', '', _('path to a dictionary of words'), _('FILE'))],

213

_('hg synthesize [OPTION].. DESCFILE'))

214

def synthesize(ui, repo, descpath, **opts):

215

'''synthesize commits based on a model of an existing repository

216

217

The model must have been generated by :hg:`analyze`. Commits will

218

be generated randomly according to the probabilities described in

219

the model.

220

221

When synthesizing new content, commit descriptions, and user

222

names, words will be chosen randomly from a dictionary that is

223

presumed to contain one word per line. Use --dict to specify the

224

path to an alternate dictionary to use.

225

'''

226

try:

227

fp = url.open(ui, descpath)

228

except Exception, err:

229

raise util.Abort('%s: %s' % (descpath, err[0].strerror))

230

desc = json.load(fp)

231

fp.close()

232

233

def cdf(l):

234

vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))

235

t = float(sum(probs, 0))

236

s, cdfs = 0, []

237

for v in probs:

238

s += v

239

cdfs.append(s / t)

240

return vals, cdfs

241

242

lineschanged = cdf(desc['lineschanged'])

243

fileschanged = cdf(desc['fileschanged'])

244

filesadded = cdf(desc['filesadded'])

245

dirsadded = cdf(desc['dirsadded'])

246

filesremoved = cdf(desc['filesremoved'])

247

linelengths = cdf(desc['linelengths'])

248

parents = cdf(desc['parents'])

249

p1distance = cdf(desc['p1distance'])

250

p2distance = cdf(desc['p2distance'])

251

interarrival = cdf(desc['interarrival'])

252

linesinfilesadded = cdf(desc['linesinfilesadded'])

253

tzoffset = cdf(desc['tzoffset'])

254

255

dictfile = opts.get('dict') or '/usr/share/dict/words'

256

try:

257

fp = open(dictfile, 'rU')

258

except IOError, err:

259

raise util.Abort('%s: %s' % (dictfile, err.strerror))

260

words = fp.read().splitlines()

261

fp.close()

262

263

def pick(cdf):

264

return cdf[0][bisect.bisect_left(cdf[1], random.random())]

265

266

def makeline(minimum=0):

267

total = max(minimum, pick(linelengths))

268

c, l = 0, []

269

while c < total:

270

w = random.choice(words)

271

c += len(w) + 1

272

l.append(w)

273

return ' '.join(l)

274

275

wlock = repo.wlock()

276

lock = repo.lock()

277

278

nevertouch = set(('.hgsub', '.hgignore', '.hgtags'))

279

280

progress = ui.progress

281

_synthesizing = _('synthesizing')

282

_changesets = _('changesets')

283

284

count = int(opts['count'])

285

heads = set(map(repo.changelog.rev, repo.heads()))

286

for i in xrange(count):

287

progress(_synthesizing, i, unit=_changesets, total=count)

288

289

node = repo.changelog.node

290

revs = len(repo)

291

292

def pickhead(heads, distance):

293

if heads:

294

lheads = sorted(heads)

295

rev = revs - min(pick(distance), revs)

296

if rev < lheads[-1]:

297

rev = lheads[bisect.bisect_left(lheads, rev)]

298

else:

299

rev = lheads[-1]

300

return rev, node(rev)

301

return nullrev, nullid

302

303

r1 = revs - min(pick(p1distance), revs)

304

p1 = node(r1)

305

306

# the number of heads will grow without bound if we use a pure

307

# model, so artificially constrain their proliferation

308

if pick(parents) == 2 or len(heads) > random.randint(1, 20):

309

r2, p2 = pickhead(heads.difference([r1]), p2distance)

310

else:

311

r2, p2 = nullrev, nullid

312

313

pl = [p1, p2]

314

pctx = repo[r1]

315

mf = pctx.manifest()

316

mfk = mf.keys()

317

changes = {}

318

if mfk:

319

for __ in xrange(pick(fileschanged)):

320

for __ in xrange(10):

321

fctx = pctx.filectx(random.choice(mfk))

322

path = fctx.path()

323

if not (path in nevertouch or fctx.isbinary() or

324

'l' in fctx.flags()):

325

break

326

lines = fctx.data().splitlines()

327

add, remove = pick(lineschanged)

328

for __ in xrange(remove):

329

if not lines:

330

break

331

del lines[random.randrange(0, len(lines))]

332

for __ in xrange(add):

333

lines.insert(random.randint(0, len(lines)), makeline())

334

path = fctx.path()

335

changes[path] = context.memfilectx(path,

336

'\n'.join(lines) + '\n')

337

for __ in xrange(pick(filesremoved)):

338

path = random.choice(mfk)

339

for __ in xrange(10):

340

path = random.choice(mfk)

341

if path not in changes:

342

changes[path] = None

343

break

344

if filesadded:

345

dirs = list(pctx.dirs())

346

dirs.append('')

347

for __ in xrange(pick(filesadded)):

348

path = [random.choice(dirs)]

349

if pick(dirsadded):

350

path.append(random.choice(words))

351

path.append(random.choice(words))

352

path = '/'.join(filter(None, path))

353

data = '\n'.join(makeline()

354

for __ in xrange(pick(linesinfilesadded))) + '\n'

355

changes[path] = context.memfilectx(path, data)

356

def filectxfn(repo, memctx, path):

357

data = changes[path]

358

if data is None:

359

raise IOError

360

return data

361

if not changes:

362

continue

363

if revs:

364

date = repo['tip'].date()[0] + pick(interarrival)

365

else:

366

date = time.time() - (86400 * count)

367

user = random.choice(words) + '@' + random.choice(words)

368

mc = context.memctx(repo, pl, makeline(minimum=2),

369

sorted(changes.iterkeys()),

370

filectxfn, user, '%d %d' % (date, pick(tzoffset)))

371

newnode = mc.commit()

372

heads.add(repo.changelog.rev(newnode))

373

heads.discard(r1)

374

heads.discard(r2)

375

376

lock.release()

377

wlock.release()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

			@@ -0,0 +1,377 b''
		1	# synthrepo.py - repo synthesis
		2	#
		3	# Copyright 2012 Facebook
		4	#
		5	# This software may be used and distributed according to the terms of the
		6	# GNU General Public License version 2 or any later version.
		7
		8	'''synthesize structurally interesting change history
		9
		10	This extension is useful for creating a repository with properties
		11	that are statistically similar to an existing repository. During
		12	analysis, a simple probability table is constructed from the history
		13	of an existing repository. During synthesis, these properties are
		14	reconstructed.
		15
		16	Properties that are analyzed and synthesized include the following:
		17
		18	- Lines added or removed when an existing file is modified
		19	- Number and sizes of files added
		20	- Number of files removed
		21	- Line lengths
		22	- Topological distance to parent changeset(s)
		23	- Probability of a commit being a merge
		24	- Probability of a newly added file being added to a new directory
		25	- Interarrival time, and time zone, of commits
		26
		27	A few obvious properties that are not currently handled realistically:
		28
		29	- Merges are treated as regular commits with two parents, which is not
		30	realistic
		31	- Modifications are not treated as operations on hunks of lines, but
		32	as insertions and deletions of randomly chosen single lines
		33	- Committer ID (always random)
		34	- Executability of files
		35	- Symlinks and binary files are ignored
		36	'''
		37
		38	import bisect, collections, json, os, random, time
		39	from mercurial import cmdutil, context, patch, scmutil, url, util
		40	from mercurial.i18n import _
		41	from mercurial.node import nullrev, nullid
		42
		43	testedwith = 'internal'
		44
		45	cmdtable = {}
		46	command = cmdutil.command(cmdtable)
		47
		48	newfile = set(('new fi', 'rename', 'copy f', 'copy t'))
		49
		50	def zerodict():
		51	return collections.defaultdict(lambda: 0)
		52
		53	def roundto(x, k):
		54	if x > k * 2:
		55	return int(round(x / float(k)) * k)
		56	return int(round(x))
		57
		58	def parsegitdiff(lines):
		59	filename, mar, lineadd, lineremove = None, None, zerodict(), 0
		60	binary = False
		61	for line in lines:
		62	start = line[:6]
		63	if start == 'diff -':
		64	if filename:
		65	yield filename, mar, lineadd, lineremove, binary
		66	mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
		67	filename = patch.gitre.match(line).group(1)
		68	elif start in newfile:
		69	mar = 'a'
		70	elif start == 'GIT bi':
		71	binary = True
		72	elif start == 'delete':
		73	mar = 'r'
		74	elif start:
		75	s = start[0]
		76	if s == '-' and not line.startswith('--- '):
		77	lineremove += 1
		78	elif s == '+' and not line.startswith('+++ '):
		79	lineadd[roundto(len(line) - 1, 5)] += 1
		80	if filename:
		81	yield filename, mar, lineadd, lineremove, binary
		82
		83	@command('analyze',
		84	[('o', 'output', [], _('write output to given file'), _('FILE')),
		85	('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
		86	_('hg analyze'))
		87	def analyze(ui, repo, revs, *opts):
		88	'''create a simple model of a repository to use for later synthesis
		89
		90	This command examines every changeset in the given range (or all
		91	of history if none are specified) and creates a simple statistical
		92	model of the history of the repository.
		93
		94	The model is written out to a JSON file, and can be used by
		95	:hg:`synthesize` to create or augment a repository with synthetic
		96	commits that have a structure that is statistically similar to the
		97	analyzed repository.
		98	'''
		99
		100	revs = list(revs)
		101	revs.extend(opts['rev'])
		102	if not revs:
		103	revs = [':']
		104
		105	output = opts['output']
		106	if not output:
		107	output = os.path.basename(repo.root) + '.json'
		108
		109	if output == '-':
		110	fp = sys.stdout
		111	else:
		112	fp = open(output, 'w')
		113
		114	revs = scmutil.revrange(repo, revs)
		115	revs.sort()
		116
		117	lineschanged = zerodict()
		118	children = zerodict()
		119	p1distance = zerodict()
		120	p2distance = zerodict()
		121	linesinfilesadded = zerodict()
		122	fileschanged = zerodict()
		123	filesadded = zerodict()
		124	filesremoved = zerodict()
		125	linelengths = zerodict()
		126	interarrival = zerodict()
		127	parents = zerodict()
		128	dirsadded = zerodict()
		129	tzoffset = zerodict()
		130
		131	progress = ui.progress
		132	_analyzing = _('analyzing')
		133	_changesets = _('changesets')
		134	_total = len(revs)
		135
		136	for i, rev in enumerate(revs):
		137	progress(_analyzing, i, unit=_changesets, total=_total)
		138	ctx = repo[rev]
		139	pl = ctx.parents()
		140	pctx = pl[0]
		141	prev = pctx.rev()
		142	children[prev] += 1
		143	p1distance[rev - prev] += 1
		144	parents[len(pl)] += 1
		145	tzoffset[ctx.date()[1]] += 1
		146	if len(pl) > 1:
		147	p2distance[rev - pl[1].rev()] += 1
		148	if prev == rev - 1:
		149	lastctx = pctx
		150	else:
		151	lastctx = repo[rev - 1]
		152	if lastctx.rev() != nullrev:
		153	interarrival[roundto(ctx.date()[0] - lastctx.date()[0], 300)] += 1
		154	diff = sum((d.splitlines()
		155	for d in ctx.diff(pctx, opts=dict(git=True))), [])
		156	fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
		157	for filename, mar, lineadd, lineremove, binary in parsegitdiff(diff):
		158	if binary:
		159	continue
		160	added = sum(lineadd.itervalues(), 0)
		161	if mar == 'm':
		162	if added and lineremove:
		163	lineschanged[roundto(added, 5), roundto(lineremove, 5)] += 1
		164	filechanges += 1
		165	elif mar == 'a':
		166	fileadds += 1
		167	if '/' in filename:
		168	filedir = filename.rsplit('/', 1)[0]
		169	if filedir not in pctx.dirs():
		170	diradds += 1
		171	linesinfilesadded[roundto(added, 5)] += 1
		172	elif mar == 'r':
		173	fileremoves += 1
		174	for length, count in lineadd.iteritems():
		175	linelengths[length] += count
		176	fileschanged[filechanges] += 1
		177	filesadded[fileadds] += 1
		178	dirsadded[diradds] += 1
		179	filesremoved[fileremoves] += 1
		180
		181	invchildren = zerodict()
		182
		183	for rev, count in children.iteritems():
		184	invchildren[count] += 1
		185
		186	if output != '-':
		187	ui.status(_('writing output to %s\n') % output)
		188
		189	def pronk(d):
		190	return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
		191
		192	json.dump(dict(revs=len(revs),
		193	lineschanged=pronk(lineschanged),
		194	children=pronk(invchildren),
		195	fileschanged=pronk(fileschanged),
		196	filesadded=pronk(filesadded),
		197	linesinfilesadded=pronk(linesinfilesadded),
		198	dirsadded=pronk(dirsadded),
		199	filesremoved=pronk(filesremoved),
		200	linelengths=pronk(linelengths),
		201	parents=pronk(parents),
		202	p1distance=pronk(p1distance),
		203	p2distance=pronk(p2distance),
		204	interarrival=pronk(interarrival),
		205	tzoffset=pronk(tzoffset),
		206	),
		207	fp)
		208	fp.close()
		209
		210	@command('synthesize',
		211	[('c', 'count', 0, _('create given number of commits'), _('COUNT')),
		212	('', 'dict', '', _('path to a dictionary of words'), _('FILE'))],
		213	_('hg synthesize [OPTION].. DESCFILE'))
		214	def synthesize(ui, repo, descpath, **opts):
		215	'''synthesize commits based on a model of an existing repository
		216
		217	The model must have been generated by :hg:`analyze`. Commits will
		218	be generated randomly according to the probabilities described in
		219	the model.
		220
		221	When synthesizing new content, commit descriptions, and user
		222	names, words will be chosen randomly from a dictionary that is
		223	presumed to contain one word per line. Use --dict to specify the
		224	path to an alternate dictionary to use.
		225	'''
		226	try:
		227	fp = url.open(ui, descpath)
		228	except Exception, err:
		229	raise util.Abort('%s: %s' % (descpath, err[0].strerror))
		230	desc = json.load(fp)
		231	fp.close()
		232
		233	def cdf(l):
		234	vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
		235	t = float(sum(probs, 0))
		236	s, cdfs = 0, []
		237	for v in probs:
		238	s += v
		239	cdfs.append(s / t)
		240	return vals, cdfs
		241
		242	lineschanged = cdf(desc['lineschanged'])
		243	fileschanged = cdf(desc['fileschanged'])
		244	filesadded = cdf(desc['filesadded'])
		245	dirsadded = cdf(desc['dirsadded'])
		246	filesremoved = cdf(desc['filesremoved'])
		247	linelengths = cdf(desc['linelengths'])
		248	parents = cdf(desc['parents'])
		249	p1distance = cdf(desc['p1distance'])
		250	p2distance = cdf(desc['p2distance'])
		251	interarrival = cdf(desc['interarrival'])
		252	linesinfilesadded = cdf(desc['linesinfilesadded'])
		253	tzoffset = cdf(desc['tzoffset'])
		254
		255	dictfile = opts.get('dict') or '/usr/share/dict/words'
		256	try:
		257	fp = open(dictfile, 'rU')
		258	except IOError, err:
		259	raise util.Abort('%s: %s' % (dictfile, err.strerror))
		260	words = fp.read().splitlines()
		261	fp.close()
		262
		263	def pick(cdf):
		264	return cdf[0][bisect.bisect_left(cdf[1], random.random())]
		265
		266	def makeline(minimum=0):
		267	total = max(minimum, pick(linelengths))
		268	c, l = 0, []
		269	while c < total:
		270	w = random.choice(words)
		271	c += len(w) + 1
		272	l.append(w)
		273	return ' '.join(l)
		274
		275	wlock = repo.wlock()
		276	lock = repo.lock()
		277
		278	nevertouch = set(('.hgsub', '.hgignore', '.hgtags'))
		279
		280	progress = ui.progress
		281	_synthesizing = _('synthesizing')
		282	_changesets = _('changesets')
		283
		284	count = int(opts['count'])
		285	heads = set(map(repo.changelog.rev, repo.heads()))
		286	for i in xrange(count):
		287	progress(_synthesizing, i, unit=_changesets, total=count)
		288
		289	node = repo.changelog.node
		290	revs = len(repo)
		291
		292	def pickhead(heads, distance):
		293	if heads:
		294	lheads = sorted(heads)
		295	rev = revs - min(pick(distance), revs)
		296	if rev < lheads[-1]:
		297	rev = lheads[bisect.bisect_left(lheads, rev)]
		298	else:
		299	rev = lheads[-1]
		300	return rev, node(rev)
		301	return nullrev, nullid
		302
		303	r1 = revs - min(pick(p1distance), revs)
		304	p1 = node(r1)
		305
		306	# the number of heads will grow without bound if we use a pure
		307	# model, so artificially constrain their proliferation
		308	if pick(parents) == 2 or len(heads) > random.randint(1, 20):
		309	r2, p2 = pickhead(heads.difference([r1]), p2distance)
		310	else:
		311	r2, p2 = nullrev, nullid
		312
		313	pl = [p1, p2]
		314	pctx = repo[r1]
		315	mf = pctx.manifest()
		316	mfk = mf.keys()
		317	changes = {}
		318	if mfk:
		319	for __ in xrange(pick(fileschanged)):
		320	for __ in xrange(10):
		321	fctx = pctx.filectx(random.choice(mfk))
		322	path = fctx.path()
		323	if not (path in nevertouch or fctx.isbinary() or
		324	'l' in fctx.flags()):
		325	break
		326	lines = fctx.data().splitlines()
		327	add, remove = pick(lineschanged)
		328	for __ in xrange(remove):
		329	if not lines:
		330	break
		331	del lines[random.randrange(0, len(lines))]
		332	for __ in xrange(add):
		333	lines.insert(random.randint(0, len(lines)), makeline())
		334	path = fctx.path()
		335	changes[path] = context.memfilectx(path,
		336	'\n'.join(lines) + '\n')
		337	for __ in xrange(pick(filesremoved)):
		338	path = random.choice(mfk)
		339	for __ in xrange(10):
		340	path = random.choice(mfk)
		341	if path not in changes:
		342	changes[path] = None
		343	break
		344	if filesadded:
		345	dirs = list(pctx.dirs())
		346	dirs.append('')
		347	for __ in xrange(pick(filesadded)):
		348	path = [random.choice(dirs)]
		349	if pick(dirsadded):
		350	path.append(random.choice(words))
		351	path.append(random.choice(words))
		352	path = '/'.join(filter(None, path))
		353	data = '\n'.join(makeline()
		354	for __ in xrange(pick(linesinfilesadded))) + '\n'
		355	changes[path] = context.memfilectx(path, data)
		356	def filectxfn(repo, memctx, path):
		357	data = changes[path]
		358	if data is None:
		359	raise IOError
		360	return data
		361	if not changes:
		362	continue
		363	if revs:
		364	date = repo['tip'].date()[0] + pick(interarrival)
		365	else:
		366	date = time.time() - (86400 * count)
		367	user = random.choice(words) + '@' + random.choice(words)
		368	mc = context.memctx(repo, pl, makeline(minimum=2),
		369	sorted(changes.iterkeys()),
		370	filectxfn, user, '%d %d' % (date, pick(tzoffset)))
		371	newnode = mc.commit()
		372	heads.add(repo.changelog.rev(newnode))
		373	heads.discard(r1)
		374	heads.discard(r2)
		375
		376	lock.release()
		377	wlock.release()