upstream/mercurial-mirror Commit - r43274:c07812bd

1

# synthrepo.py - repo synthesis

1

# synthrepo.py - repo synthesis

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

'''synthesize structurally interesting change history

8

'''synthesize structurally interesting change history

9

10

This extension is useful for creating a repository with properties

10

This extension is useful for creating a repository with properties

11

that are statistically similar to an existing repository. During

11

that are statistically similar to an existing repository. During

12

analysis, a simple probability table is constructed from the history

12

analysis, a simple probability table is constructed from the history

13

of an existing repository. During synthesis, these properties are

13

of an existing repository. During synthesis, these properties are

14

reconstructed.

14

reconstructed.

15

16

Properties that are analyzed and synthesized include the following:

16

Properties that are analyzed and synthesized include the following:

17

18

- Lines added or removed when an existing file is modified

18

- Lines added or removed when an existing file is modified

19

- Number and sizes of files added

19

- Number and sizes of files added

20

- Number of files removed

20

- Number of files removed

21

- Line lengths

21

- Line lengths

22

- Topological distance to parent changeset(s)

22

- Topological distance to parent changeset(s)

23

- Probability of a commit being a merge

23

- Probability of a commit being a merge

24

- Probability of a newly added file being added to a new directory

24

- Probability of a newly added file being added to a new directory

25

- Interarrival time, and time zone, of commits

25

- Interarrival time, and time zone, of commits

26

- Number of files in each directory

26

- Number of files in each directory

27

28

A few obvious properties that are not currently handled realistically:

28

A few obvious properties that are not currently handled realistically:

29

30

- Merges are treated as regular commits with two parents, which is not

30

- Merges are treated as regular commits with two parents, which is not

31

realistic

31

realistic

32

- Modifications are not treated as operations on hunks of lines, but

32

- Modifications are not treated as operations on hunks of lines, but

33

as insertions and deletions of randomly chosen single lines

33

as insertions and deletions of randomly chosen single lines

34

- Committer ID (always random)

34

- Committer ID (always random)

35

- Executability of files

35

- Executability of files

36

- Symlinks and binary files are ignored

36

- Symlinks and binary files are ignored

37

'''

37

'''

38

39

from __future__ import absolute_import

39

from __future__ import absolute_import

40

import bisect

40

import bisect

41

import collections

41

import collections

42

import itertools

42

import itertools

43

import json

43

import json

44

import os

44

import os

45

import random

45

import random

46

import sys

46

import sys

47

import time

47

import time

48

49

from mercurial.i18n import _

49

from mercurial.i18n import _

50

from mercurial.node import (

50

from mercurial.node import (

51

nullid,

51

nullid,

52

nullrev,

52

nullrev,

53

short,

53

short,

54

)

54

)

55

from mercurial import (

55

from mercurial import (

56

context,

56

context,

57

diffutil,

57

diffutil,

58

error,

58

error,

59

hg,

59

hg,

60

patch,

60

patch,

61

pycompat,

61

registrar,

62

registrar,

62

scmutil,

63

scmutil,

63

)

64

)

64

from mercurial.utils import (

65

from mercurial.utils import (

65

dateutil,

66

dateutil,

66

)

67

)

67

68

# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for

69

# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for

69

# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should

70

# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should

70

# be specifying the version(s) of Mercurial they are tested with, or

71

# be specifying the version(s) of Mercurial they are tested with, or

71

# leave the attribute unspecified.

72

# leave the attribute unspecified.

72

testedwith = 'ships-with-hg-core'

73

testedwith = 'ships-with-hg-core'

73

74

cmdtable = {}

75

cmdtable = {}

75

command = registrar.command(cmdtable)

76

command = registrar.command(cmdtable)

76

77

newfile = {'new fi', 'rename', 'copy f', 'copy t'}

78

newfile = {'new fi', 'rename', 'copy f', 'copy t'}

78

79

def zerodict():

80

def zerodict():

80

return collections.defaultdict(lambda: 0)

81

return collections.defaultdict(lambda: 0)

81

82

def roundto(x, k):

83

def roundto(x, k):

83

if x > k * 2:

84

if x > k * 2:

84

return int(round(x / float(k)) * k)

85

return int(round(x / float(k)) * k)

85

return int(round(x))

86

return int(round(x))

86

87

def parsegitdiff(lines):

88

def parsegitdiff(lines):

88

filename, mar, lineadd, lineremove = None, None, zerodict(), 0

89

filename, mar, lineadd, lineremove = None, None, zerodict(), 0

89

binary = False

90

binary = False

90

for line in lines:

91

for line in lines:

91

start = line[:6]

92

start = line[:6]

92

if start == 'diff -':

93

if start == 'diff -':

93

if filename:

94

if filename:

94

yield filename, mar, lineadd, lineremove, binary

95

yield filename, mar, lineadd, lineremove, binary

95

mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False

96

mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False

96

filename = patch.gitre.match(line).group(1)

97

filename = patch.gitre.match(line).group(1)

97

elif start in newfile:

98

elif start in newfile:

98

mar = 'a'

99

mar = 'a'

99

elif start == 'GIT bi':

100

elif start == 'GIT bi':

100

binary = True

101

binary = True

101

elif start == 'delete':

102

elif start == 'delete':

102

mar = 'r'

103

mar = 'r'

103

elif start:

104

elif start:

104

s = start[0]

105

s = start[0]

105

if s == '-' and not line.startswith('--- '):

106

if s == '-' and not line.startswith('--- '):

106

lineremove += 1

107

lineremove += 1

107

elif s == '+' and not line.startswith('+++ '):

108

elif s == '+' and not line.startswith('+++ '):

108

lineadd[roundto(len(line) - 1, 5)] += 1

109

lineadd[roundto(len(line) - 1, 5)] += 1

109

if filename:

110

if filename:

110

yield filename, mar, lineadd, lineremove, binary

111

yield filename, mar, lineadd, lineremove, binary

111

112

@command('analyze',

113

@command('analyze',

113

[('o', 'output', '', _('write output to given file'), _('FILE')),

114

[('o', 'output', '', _('write output to given file'), _('FILE')),

114

('r', 'rev', [], _('analyze specified revisions'), _('REV'))],

115

('r', 'rev', [], _('analyze specified revisions'), _('REV'))],

115

_('hg analyze'), optionalrepo=True)

116

_('hg analyze'), optionalrepo=True)

116

def analyze(ui, repo, *revs, **opts):

117

def analyze(ui, repo, *revs, **opts):

117

'''create a simple model of a repository to use for later synthesis

118

'''create a simple model of a repository to use for later synthesis

118

119

This command examines every changeset in the given range (or all

120

This command examines every changeset in the given range (or all

120

of history if none are specified) and creates a simple statistical

121

of history if none are specified) and creates a simple statistical

121

model of the history of the repository. It also measures the directory

122

model of the history of the repository. It also measures the directory

122

structure of the repository as checked out.

123

structure of the repository as checked out.

123

124

The model is written out to a JSON file, and can be used by

125

The model is written out to a JSON file, and can be used by

125

:hg:`synthesize` to create or augment a repository with synthetic

126

:hg:`synthesize` to create or augment a repository with synthetic

126

commits that have a structure that is statistically similar to the

127

commits that have a structure that is statistically similar to the

127

analyzed repository.

128

analyzed repository.

128

'''

129

'''

129

root = repo.root

130

root = repo.root

130

if not root.endswith(os.path.sep):

131

if not root.endswith(os.path.sep):

131

root += os.path.sep

132

root += os.path.sep

132

133

revs = list(revs)

134

revs = list(revs)

134

revs.extend(opts['rev'])

135

revs.extend(opts['rev'])

135

if not revs:

136

if not revs:

136

revs = [':']

137

revs = [':']

137

138

output = opts['output']

139

output = opts['output']

139

if not output:

140

if not output:

140

output = os.path.basename(root) + '.json'

141

output = os.path.basename(root) + '.json'

141

142

if output == '-':

143

if output == '-':

143

fp = sys.stdout

144

fp = sys.stdout

144

else:

145

else:

145

fp = open(output, 'w')

146

fp = open(output, 'w')

146

147

# Always obtain file counts of each directory in the given root directory.

148

# Always obtain file counts of each directory in the given root directory.

148

def onerror(e):

149

def onerror(e):

149

ui.warn(_('error walking directory structure: %s\n') % e)

150

ui.warn(_('error walking directory structure: %s\n') % e)

150

151

dirs = {}

152

dirs = {}

152

rootprefixlen = len(root)

153

rootprefixlen = len(root)

153

for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):

154

for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):

154

dirpathfromroot = dirpath[rootprefixlen:]

155

dirpathfromroot = dirpath[rootprefixlen:]

155

dirs[dirpathfromroot] = len(filenames)

156

dirs[dirpathfromroot] = len(filenames)

156

if '.hg' in dirnames:

157

if '.hg' in dirnames:

157

dirnames.remove('.hg')

158

dirnames.remove('.hg')

158

159

lineschanged = zerodict()

160

lineschanged = zerodict()

160

children = zerodict()

161

children = zerodict()

161

p1distance = zerodict()

162

p1distance = zerodict()

162

p2distance = zerodict()

163

p2distance = zerodict()

163

linesinfilesadded = zerodict()

164

linesinfilesadded = zerodict()

164

fileschanged = zerodict()

165

fileschanged = zerodict()

165

filesadded = zerodict()

166

filesadded = zerodict()

166

filesremoved = zerodict()

167

filesremoved = zerodict()

167

linelengths = zerodict()

168

linelengths = zerodict()

168

interarrival = zerodict()

169

interarrival = zerodict()

169

parents = zerodict()

170

parents = zerodict()

170

dirsadded = zerodict()

171

dirsadded = zerodict()

171

tzoffset = zerodict()

172

tzoffset = zerodict()

172

173

# If a mercurial repo is available, also model the commit history.

174

# If a mercurial repo is available, also model the commit history.

174

if repo:

175

if repo:

175

revs = scmutil.revrange(repo, revs)

176

revs = scmutil.revrange(repo, revs)

176

revs.sort()

177

revs.sort()

177

178

progress = ui.makeprogress(_('analyzing'), unit=_('changesets'),

179

progress = ui.makeprogress(_('analyzing'), unit=_('changesets'),

179

total=len(revs))

180

total=len(revs))

180

for i, rev in enumerate(revs):

181

for i, rev in enumerate(revs):

181

progress.update(i)

182

progress.update(i)

182

ctx = repo[rev]

183

ctx = repo[rev]

183

pl = ctx.parents()

184

pl = ctx.parents()

184

pctx = pl[0]

185

pctx = pl[0]

185

prev = pctx.rev()

186

prev = pctx.rev()

186

children[prev] += 1

187

children[prev] += 1

187

p1distance[rev - prev] += 1

188

p1distance[rev - prev] += 1

188

parents[len(pl)] += 1

189

parents[len(pl)] += 1

189

tzoffset[ctx.date()[1]] += 1

190

tzoffset[ctx.date()[1]] += 1

190

if len(pl) > 1:

191

if len(pl) > 1:

191

p2distance[rev - pl[1].rev()] += 1

192

p2distance[rev - pl[1].rev()] += 1

192

if prev == rev - 1:

193

if prev == rev - 1:

193

lastctx = pctx

194

lastctx = pctx

194

else:

195

else:

195

lastctx = repo[rev - 1]

196

lastctx = repo[rev - 1]

196

if lastctx.rev() != nullrev:

197

if lastctx.rev() != nullrev:

197

timedelta = ctx.date()[0] - lastctx.date()[0]

198

timedelta = ctx.date()[0] - lastctx.date()[0]

198

interarrival[roundto(timedelta, 300)] += 1

199

interarrival[roundto(timedelta, 300)] += 1

199

diffopts = diffutil.diffallopts(ui, {'git': True})

200

diffopts = diffutil.diffallopts(ui, {'git': True})

200

diff = sum((d.splitlines()

201

diff = sum((d.splitlines()

201

for d in ctx.diff(pctx, opts=diffopts)), [])

202

for d in ctx.diff(pctx, opts=diffopts)), [])

202

fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0

203

fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0

203

for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):

204

for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):

204

if isbin:

205

if isbin:

205

continue

206

continue

206

added = sum(lineadd.itervalues(), 0)

207

added = sum(lineadd.itervalues(), 0)

207

if mar == 'm':

208

if mar == 'm':

208

if added and lineremove:

209

if added and lineremove:

209

lineschanged[roundto(added, 5),

210

lineschanged[roundto(added, 5),

210

roundto(lineremove, 5)] += 1

211

roundto(lineremove, 5)] += 1

211

filechanges += 1

212

filechanges += 1

212

elif mar == 'a':

213

elif mar == 'a':

213

fileadds += 1

214

fileadds += 1

214

if '/' in filename:

215

if '/' in filename:

215

filedir = filename.rsplit('/', 1)[0]

216

filedir = filename.rsplit('/', 1)[0]

216

if filedir not in pctx.dirs():

217

if filedir not in pctx.dirs():

217

diradds += 1

218

diradds += 1

218

linesinfilesadded[roundto(added, 5)] += 1

219

linesinfilesadded[roundto(added, 5)] += 1

219

elif mar == 'r':

220

elif mar == 'r':

220

fileremoves += 1

221

fileremoves += 1

221

for length, count in lineadd.iteritems():

222

for length, count in lineadd.iteritems():

222

linelengths[length] += count

223

linelengths[length] += count

223

fileschanged[filechanges] += 1

224

fileschanged[filechanges] += 1

224

filesadded[fileadds] += 1

225

filesadded[fileadds] += 1

225

dirsadded[diradds] += 1

226

dirsadded[diradds] += 1

226

filesremoved[fileremoves] += 1

227

filesremoved[fileremoves] += 1

227

progress.complete()

228

progress.complete()

228

229

invchildren = zerodict()

230

invchildren = zerodict()

230

231

for rev, count in children.iteritems():

232

for rev, count in children.iteritems():

232

invchildren[count] += 1

233

invchildren[count] += 1

233

234

if output != '-':

235

if output != '-':

235

ui.status(_('writing output to %s\n') % output)

236

ui.status(_('writing output to %s\n') % output)

236

237

def pronk(d):

238

def pronk(d):

238

return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)

239

return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)

239

240

json.dump({'revs': len(revs),

241

json.dump({'revs': len(revs),

241

'initdirs': pronk(dirs),

242

'initdirs': pronk(dirs),

242

'lineschanged': pronk(lineschanged),

243

'lineschanged': pronk(lineschanged),

243

'children': pronk(invchildren),

244

'children': pronk(invchildren),

244

'fileschanged': pronk(fileschanged),

245

'fileschanged': pronk(fileschanged),

245

'filesadded': pronk(filesadded),

246

'filesadded': pronk(filesadded),

246

'linesinfilesadded': pronk(linesinfilesadded),

247

'linesinfilesadded': pronk(linesinfilesadded),

247

'dirsadded': pronk(dirsadded),

248

'dirsadded': pronk(dirsadded),

248

'filesremoved': pronk(filesremoved),

249

'filesremoved': pronk(filesremoved),

249

'linelengths': pronk(linelengths),

250

'linelengths': pronk(linelengths),

250

'parents': pronk(parents),

251

'parents': pronk(parents),

251

'p1distance': pronk(p1distance),

252

'p1distance': pronk(p1distance),

252

'p2distance': pronk(p2distance),

253

'p2distance': pronk(p2distance),

253

'interarrival': pronk(interarrival),

254

'interarrival': pronk(interarrival),

254

'tzoffset': pronk(tzoffset),

255

'tzoffset': pronk(tzoffset),

255

},

256

},

256

fp)

257

fp)

257

fp.close()

258

fp.close()

258

259

@command('synthesize',

260

@command('synthesize',

260

[('c', 'count', 0, _('create given number of commits'), _('COUNT')),

261

[('c', 'count', 0, _('create given number of commits'), _('COUNT')),

261

('', 'dict', '', _('path to a dictionary of words'), _('FILE')),

262

('', 'dict', '', _('path to a dictionary of words'), _('FILE')),

262

('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],

263

('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],

263

_('hg synthesize [OPTION].. DESCFILE'))

264

_('hg synthesize [OPTION].. DESCFILE'))

264

def synthesize(ui, repo, descpath, **opts):

265

def synthesize(ui, repo, descpath, **opts):

265

'''synthesize commits based on a model of an existing repository

266

'''synthesize commits based on a model of an existing repository

266

267

The model must have been generated by :hg:`analyze`. Commits will

268

The model must have been generated by :hg:`analyze`. Commits will

268

be generated randomly according to the probabilities described in

269

be generated randomly according to the probabilities described in

269

the model. If --initfiles is set, the repository will be seeded with

270

the model. If --initfiles is set, the repository will be seeded with

270

the given number files following the modeled repository's directory

271

the given number files following the modeled repository's directory

271

structure.

272

structure.

272

273

When synthesizing new content, commit descriptions, and user

274

When synthesizing new content, commit descriptions, and user

274

names, words will be chosen randomly from a dictionary that is

275

names, words will be chosen randomly from a dictionary that is

275

presumed to contain one word per line. Use --dict to specify the

276

presumed to contain one word per line. Use --dict to specify the

276

path to an alternate dictionary to use.

277

path to an alternate dictionary to use.

277

'''

278

'''

278

try:

279

try:

279

fp = hg.openpath(ui, descpath)

280

fp = hg.openpath(ui, descpath)

280

except Exception as err:

281

except Exception as err:

281

raise error.Abort('%s: %s' % (descpath, err[0].strerror))

282

raise error.Abort('%s: %s' % (descpath, err[0].strerror))

282

desc = json.load(fp)

283

desc = json.load(fp)

283

fp.close()

284

fp.close()

284

285

def cdf(l):

286

def cdf(l):

286

if not l:

287

if not l:

287

return [], []

288

return [], []

288

vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))

289

vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))

289

t = float(sum(probs, 0))

290

t = float(sum(probs, 0))

290

s, cdfs = 0, []

291

s, cdfs = 0, []

291

for v in probs:

292

for v in probs:

292

s += v

293

s += v

293

cdfs.append(s / t)

294

cdfs.append(s / t)

294

return vals, cdfs

295

return vals, cdfs

295

296

lineschanged = cdf(desc['lineschanged'])

297

lineschanged = cdf(desc['lineschanged'])

297

fileschanged = cdf(desc['fileschanged'])

298

fileschanged = cdf(desc['fileschanged'])

298

filesadded = cdf(desc['filesadded'])

299

filesadded = cdf(desc['filesadded'])

299

dirsadded = cdf(desc['dirsadded'])

300

dirsadded = cdf(desc['dirsadded'])

300

filesremoved = cdf(desc['filesremoved'])

301

filesremoved = cdf(desc['filesremoved'])

301

linelengths = cdf(desc['linelengths'])

302

linelengths = cdf(desc['linelengths'])

302

parents = cdf(desc['parents'])

303

parents = cdf(desc['parents'])

303

p1distance = cdf(desc['p1distance'])

304

p1distance = cdf(desc['p1distance'])

304

p2distance = cdf(desc['p2distance'])

305

p2distance = cdf(desc['p2distance'])

305

interarrival = cdf(desc['interarrival'])

306

interarrival = cdf(desc['interarrival'])

306

linesinfilesadded = cdf(desc['linesinfilesadded'])

307

linesinfilesadded = cdf(desc['linesinfilesadded'])

307

tzoffset = cdf(desc['tzoffset'])

308

tzoffset = cdf(desc['tzoffset'])

308

309

dictfile = opts.get('dict') or '/usr/share/dict/words'

310

dictfile = opts.get('dict') or '/usr/share/dict/words'

310

try:

311

try:

311

fp = open(dictfile, 'rU')

312

fp = open(dictfile, 'rU')

312

except IOError as err:

313

except IOError as err:

313

raise error.Abort('%s: %s' % (dictfile, err.strerror))

314

raise error.Abort('%s: %s' % (dictfile, err.strerror))

314

words = fp.read().splitlines()

315

words = fp.read().splitlines()

315

fp.close()

316

fp.close()

316

317

initdirs = {}

318

initdirs = {}

318

if desc['initdirs']:

319

if desc['initdirs']:

319

for k, v in desc['initdirs']:

320

for k, v in desc['initdirs']:

320

initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v

321

initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v

321

initdirs = renamedirs(initdirs, words)

322

initdirs = renamedirs(initdirs, words)

322

initdirscdf = cdf(initdirs)

323

initdirscdf = cdf(initdirs)

323

324

def pick(cdf):

325

def pick(cdf):

325

return cdf[0][bisect.bisect_left(cdf[1], random.random())]

326

return cdf[0][bisect.bisect_left(cdf[1], random.random())]

326

327

def pickpath():

328

def pickpath():

328

return os.path.join(pick(initdirscdf), random.choice(words))

329

return os.path.join(pick(initdirscdf), random.choice(words))

329

330

def makeline(minimum=0):

331

def makeline(minimum=0):

331

total = max(minimum, pick(linelengths))

332

total = max(minimum, pick(linelengths))

332

c, l = 0, []

333

c, l = 0, []

333

while c < total:

334

while c < total:

334

w = random.choice(words)

335

w = random.choice(words)

335

c += len(w) + 1

336

c += len(w) + 1

336

l.append(w)

337

l.append(w)

337

return ' '.join(l)

338

return ' '.join(l)

338

339

wlock = repo.wlock()

340

wlock = repo.wlock()

340

lock = repo.lock()

341

lock = repo.lock()

341

342

nevertouch = {'.hgsub', '.hgignore', '.hgtags'}

343

nevertouch = {'.hgsub', '.hgignore', '.hgtags'}

343

344

_synthesizing = _('synthesizing')

345

_synthesizing = _('synthesizing')

345

_files = _('initial files')

346

_files = _('initial files')

346

_changesets = _('changesets')

347

_changesets = _('changesets')

347

348

# Synthesize a single initial revision adding files to the repo according

349

# Synthesize a single initial revision adding files to the repo according

349

# to the modeled directory structure.

350

# to the modeled directory structure.

350

initcount = int(opts['initfiles'])

351

initcount = int(opts['initfiles'])

351

if initcount and initdirs:

352

if initcount and initdirs:

352

pctx = repo['.']

353

pctx = repo['.']

353

dirs = set(pctx.dirs())

354

dirs = set(pctx.dirs())

354

files = {}

355

files = {}

355

356

def validpath(path):

357

def validpath(path):

357

# Don't pick filenames which are already directory names.

358

# Don't pick filenames which are already directory names.

358

if path in dirs:

359

if path in dirs:

359

return False

360

return False

360

# Don't pick directories which were used as file names.

361

# Don't pick directories which were used as file names.

361

while path:

362

while path:

362

if path in files:

363

if path in files:

363

return False

364

return False

364

path = os.path.dirname(path)

365

path = os.path.dirname(path)

365

return True

366

return True

366

367

progress = ui.makeprogress(_synthesizing, unit=_files, total=initcount)

368

progress = ui.makeprogress(_synthesizing, unit=_files, total=initcount)

368

for i in xrange(0, initcount):

369

for i in pycompat.xrange(0, initcount):

369

progress.update(i)

370

progress.update(i)

370

371

path = pickpath()

372

path = pickpath()

372

while not validpath(path):

373

while not validpath(path):

373

path = pickpath()

374

path = pickpath()

374

data = '%s contents\n' % path

375

data = '%s contents\n' % path

375

files[path] = data

376

files[path] = data

376

dir = os.path.dirname(path)

377

dir = os.path.dirname(path)

377

while dir and dir not in dirs:

378

while dir and dir not in dirs:

378

dirs.add(dir)

379

dirs.add(dir)

379

dir = os.path.dirname(dir)

380

dir = os.path.dirname(dir)

380

381

def filectxfn(repo, memctx, path):

382

def filectxfn(repo, memctx, path):

382

return context.memfilectx(repo, memctx, path, files[path])

383

return context.memfilectx(repo, memctx, path, files[path])

383

384

progress.complete()

385

progress.complete()

385

message = 'synthesized wide repo with %d files' % (len(files),)

386

message = 'synthesized wide repo with %d files' % (len(files),)

386

mc = context.memctx(repo, [pctx.node(), nullid], message,

387

mc = context.memctx(repo, [pctx.node(), nullid], message,

387

files, filectxfn, ui.username(),

388

files, filectxfn, ui.username(),

388

'%d %d' % dateutil.makedate())

389

'%d %d' % dateutil.makedate())

389

initnode = mc.commit()

390

initnode = mc.commit()

390

if ui.debugflag:

391

if ui.debugflag:

391

hexfn = hex

392

hexfn = hex

392

else:

393

else:

393

hexfn = short

394

hexfn = short

394

ui.status(_('added commit %s with %d files\n')

395

ui.status(_('added commit %s with %d files\n')

395

% (hexfn(initnode), len(files)))

396

% (hexfn(initnode), len(files)))

396

397

# Synthesize incremental revisions to the repository, adding repo depth.

398

# Synthesize incremental revisions to the repository, adding repo depth.

398

count = int(opts['count'])

399

count = int(opts['count'])

399

heads = set(map(repo.changelog.rev, repo.heads()))

400

heads = set(map(repo.changelog.rev, repo.heads()))

400

progress = ui.makeprogress(_synthesizing, unit=_changesets, total=count)

401

progress = ui.makeprogress(_synthesizing, unit=_changesets, total=count)

401

for i in xrange(count):

402

for i in pycompat.xrange(count):

402

progress.update(i)

403

progress.update(i)

403

404

node = repo.changelog.node

405

node = repo.changelog.node

405

revs = len(repo)

406

revs = len(repo)

406

407

def pickhead(heads, distance):

408

def pickhead(heads, distance):

408

if heads:

409

if heads:

409

lheads = sorted(heads)

410

lheads = sorted(heads)

410

rev = revs - min(pick(distance), revs)

411

rev = revs - min(pick(distance), revs)

411

if rev < lheads[-1]:

412

if rev < lheads[-1]:

412

rev = lheads[bisect.bisect_left(lheads, rev)]

413

rev = lheads[bisect.bisect_left(lheads, rev)]

413

else:

414

else:

414

rev = lheads[-1]

415

rev = lheads[-1]

415

return rev, node(rev)

416

return rev, node(rev)

416

return nullrev, nullid

417

return nullrev, nullid

417

418

r1 = revs - min(pick(p1distance), revs)

419

r1 = revs - min(pick(p1distance), revs)

419

p1 = node(r1)

420

p1 = node(r1)

420

421

# the number of heads will grow without bound if we use a pure

422

# the number of heads will grow without bound if we use a pure

422

# model, so artificially constrain their proliferation

423

# model, so artificially constrain their proliferation

423

toomanyheads = len(heads) > random.randint(1, 20)

424

toomanyheads = len(heads) > random.randint(1, 20)

424

if p2distance[0] and (pick(parents) == 2 or toomanyheads):

425

if p2distance[0] and (pick(parents) == 2 or toomanyheads):

425

r2, p2 = pickhead(heads.difference([r1]), p2distance)

426

r2, p2 = pickhead(heads.difference([r1]), p2distance)

426

else:

427

else:

427

r2, p2 = nullrev, nullid

428

r2, p2 = nullrev, nullid

428

429

pl = [p1, p2]

430

pl = [p1, p2]

430

pctx = repo[r1]

431

pctx = repo[r1]

431

mf = pctx.manifest()

432

mf = pctx.manifest()

432

mfk = mf.keys()

433

mfk = mf.keys()

433

changes = {}

434

changes = {}

434

if mfk:

435

if mfk:

435

for __ in xrange(pick(fileschanged)):

436

for __ in pycompat.xrange(pick(fileschanged)):

436

for __ in xrange(10):

437

for __ in pycompat.xrange(10):

437

fctx = pctx.filectx(random.choice(mfk))

438

fctx = pctx.filectx(random.choice(mfk))

438

path = fctx.path()

439

path = fctx.path()

439

if not (path in nevertouch or fctx.isbinary() or

440

if not (path in nevertouch or fctx.isbinary() or

440

'l' in fctx.flags()):

441

'l' in fctx.flags()):

441

break

442

break

442

lines = fctx.data().splitlines()

443

lines = fctx.data().splitlines()

443

add, remove = pick(lineschanged)

444

add, remove = pick(lineschanged)

444

for __ in xrange(remove):

445

for __ in pycompat.xrange(remove):

445

if not lines:

446

if not lines:

446

break

447

break

447

del lines[random.randrange(0, len(lines))]

448

del lines[random.randrange(0, len(lines))]

448

for __ in xrange(add):

449

for __ in pycompat.xrange(add):

449

lines.insert(random.randint(0, len(lines)), makeline())

450

lines.insert(random.randint(0, len(lines)), makeline())

450

path = fctx.path()

451

path = fctx.path()

451

changes[path] = '\n'.join(lines) + '\n'

452

changes[path] = '\n'.join(lines) + '\n'

452

for __ in xrange(pick(filesremoved)):

453

for __ in pycompat.xrange(pick(filesremoved)):

453

for __ in xrange(10):

454

for __ in pycompat.xrange(10):

454

path = random.choice(mfk)

455

path = random.choice(mfk)

455

if path not in changes:

456

if path not in changes:

456

break

457

break

457

if filesadded:

458

if filesadded:

458

dirs = list(pctx.dirs())

459

dirs = list(pctx.dirs())

459

dirs.insert(0, '')

460

dirs.insert(0, '')

460

for __ in xrange(pick(filesadded)):

461

for __ in pycompat.xrange(pick(filesadded)):

461

pathstr = ''

462

pathstr = ''

462

while pathstr in dirs:

463

while pathstr in dirs:

463

path = [random.choice(dirs)]

464

path = [random.choice(dirs)]

464

if pick(dirsadded):

465

if pick(dirsadded):

465

path.append(random.choice(words))

466

path.append(random.choice(words))

466

path.append(random.choice(words))

467

path.append(random.choice(words))

467

pathstr = '/'.join(filter(None, path))

468

pathstr = '/'.join(filter(None, path))

468

data = '\n'.join(~~makeline~~()

469

data = '\n'.join(

469

for __ in xrange(pick(linesinfilesadded))) + '\n'

470

makeline()

471

for __ in pycompat.xrange(pick(linesinfilesadded))) + '\n'

470

changes[pathstr] = data

472

changes[pathstr] = data

471

def filectxfn(repo, memctx, path):

473

def filectxfn(repo, memctx, path):

472

if path not in changes:

474

if path not in changes:

473

return None

475

return None

474

return context.memfilectx(repo, memctx, path, changes[path])

476

return context.memfilectx(repo, memctx, path, changes[path])

475

if not changes:

477

if not changes:

476

continue

478

continue

477

if revs:

479

if revs:

478

date = repo['tip'].date()[0] + pick(interarrival)

480

date = repo['tip'].date()[0] + pick(interarrival)

479

else:

481

else:

480

date = time.time() - (86400 * count)

482

date = time.time() - (86400 * count)

481

# dates in mercurial must be positive, fit in 32-bit signed integers.

483

# dates in mercurial must be positive, fit in 32-bit signed integers.

482

date = min(0x7fffffff, max(0, date))

484

date = min(0x7fffffff, max(0, date))

483

user = random.choice(words) + '@' + random.choice(words)

485

user = random.choice(words) + '@' + random.choice(words)

484

mc = context.memctx(repo, pl, makeline(minimum=2),

486

mc = context.memctx(repo, pl, makeline(minimum=2),

485

sorted(changes),

487

sorted(changes),

486

filectxfn, user, '%d %d' % (date, pick(tzoffset)))

488

filectxfn, user, '%d %d' % (date, pick(tzoffset)))

487

newnode = mc.commit()

489

newnode = mc.commit()

488

heads.add(repo.changelog.rev(newnode))

490

heads.add(repo.changelog.rev(newnode))

489

heads.discard(r1)

491

heads.discard(r1)

490

heads.discard(r2)

492

heads.discard(r2)

491

progress.complete()

493

progress.complete()

492

494

493

lock.release()

495

lock.release()

494

wlock.release()

496

wlock.release()

495

497

496

def renamedirs(dirs, words):

498

def renamedirs(dirs, words):

497

'''Randomly rename the directory names in the per-dir file count dict.'''

499

'''Randomly rename the directory names in the per-dir file count dict.'''

498

wordgen = itertools.cycle(words)

500

wordgen = itertools.cycle(words)

499

replacements = {'': ''}

501

replacements = {'': ''}

500

def rename(dirpath):

502

def rename(dirpath):

501

'''Recursively rename the directory and all path prefixes.

503

'''Recursively rename the directory and all path prefixes.

502

504

503

The mapping from path to renamed path is stored for all path prefixes

505

The mapping from path to renamed path is stored for all path prefixes

504

as in dynamic programming, ensuring linear runtime and consistent

506

as in dynamic programming, ensuring linear runtime and consistent

505

renaming regardless of iteration order through the model.

507

renaming regardless of iteration order through the model.

506

'''

508

'''

507

if dirpath in replacements:

509

if dirpath in replacements:

508

return replacements[dirpath]

510

return replacements[dirpath]

509

head, _ = os.path.split(dirpath)

511

head, _ = os.path.split(dirpath)

510

if head:

512

if head:

511

head = rename(head)

513

head = rename(head)

512

else:

514

else:

513

head = ''

515

head = ''

514

renamed = os.path.join(head, next(wordgen))

516

renamed = os.path.join(head, next(wordgen))

515

replacements[dirpath] = renamed

517

replacements[dirpath] = renamed

516

return renamed

518

return renamed

517

result = []

519

result = []

518

for dirpath, count in dirs.iteritems():

520

for dirpath, count in dirs.iteritems():

519

result.append([rename(dirpath.lstrip(os.sep)), count])

521

result.append([rename(dirpath.lstrip(os.sep)), count])

520

return result

522

return result

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # synthrepo.py - repo synthesis
             #
             # Copyright 2012 Facebook
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             '''synthesize structurally interesting change history
             This extension is useful for creating a repository with properties
             that are statistically similar to an existing repository. During
             analysis, a simple probability table is constructed from the history
             of an existing repository.  During synthesis, these properties are
             reconstructed.
             Properties that are analyzed and synthesized include the following:
             - Lines added or removed when an existing file is modified
             - Number and sizes of files added
             - Number of files removed
             - Line lengths
             - Topological distance to parent changeset(s)
             - Probability of a commit being a merge
             - Probability of a newly added file being added to a new directory
             - Interarrival time, and time zone, of commits
             - Number of files in each directory
             A few obvious properties that are not currently handled realistically:
             - Merges are treated as regular commits with two parents, which is not
               realistic
             - Modifications are not treated as operations on hunks of lines, but
               as insertions and deletions of randomly chosen single lines
             - Committer ID (always random)
             - Executability of files
             - Symlinks and binary files are ignored
             '''
             from __future__ import absolute_import
             import bisect
             import collections
             import itertools
             import json
             import os
             import random
             import sys
             import time
             from mercurial.i18n import _
             from mercurial.node import (
                 nullid,
                 nullrev,
                 short,
             )
             from mercurial import (
                 context,
                 diffutil,
                 error,
                 hg,
                 patch,
+                pycompat,
                 registrar,
                 scmutil,
             )
             from mercurial.utils import (
                 dateutil,
             )
             # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
             # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
             # be specifying the version(s) of Mercurial they are tested with, or
             # leave the attribute unspecified.
             testedwith = 'ships-with-hg-core'
             cmdtable = {}
             command = registrar.command(cmdtable)
             newfile = {'new fi', 'rename', 'copy f', 'copy t'}
             def zerodict():
                 return collections.defaultdict(lambda: 0)
             def roundto(x, k):
                 if x > k * 2:
                     return int(round(x / float(k)) * k)
                 return int(round(x))
             def parsegitdiff(lines):
                 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
                 binary = False
                 for line in lines:
                     start = line[:6]
                     if start == 'diff -':
                         if filename:
                             yield filename, mar, lineadd, lineremove, binary
                         mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
                         filename = patch.gitre.match(line).group(1)
                     elif start in newfile:
                         mar = 'a'
                     elif start == 'GIT bi':
                         binary = True
                     elif start == 'delete':
                         mar = 'r'
                     elif start:
                         s = start[0]
                         if s == '-' and not line.startswith('--- '):
                             lineremove += 1
                         elif s == '+' and not line.startswith('+++ '):
                             lineadd[roundto(len(line) - 1, 5)] += 1
                 if filename:
                     yield filename, mar, lineadd, lineremove, binary
             @command('analyze',
                      [('o', 'output', '', _('write output to given file'), _('FILE')),
                       ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
                      _('hg analyze'), optionalrepo=True)
             def analyze(ui, repo, *revs, **opts):
                 '''create a simple model of a repository to use for later synthesis
                 This command examines every changeset in the given range (or all
                 of history if none are specified) and creates a simple statistical
                 model of the history of the repository. It also measures the directory
                 structure of the repository as checked out.
                 The model is written out to a JSON file, and can be used by
                 :hg:`synthesize` to create or augment a repository with synthetic
                 commits that have a structure that is statistically similar to the
                 analyzed repository.
                 '''
                 root = repo.root
                 if not root.endswith(os.path.sep):
                     root += os.path.sep
                 revs = list(revs)
                 revs.extend(opts['rev'])
                 if not revs:
                     revs = [':']
                 output = opts['output']
                 if not output:
                     output = os.path.basename(root) + '.json'
                 if output == '-':
                     fp = sys.stdout
                 else:
                     fp = open(output, 'w')
                 # Always obtain file counts of each directory in the given root directory.
                 def onerror(e):
                     ui.warn(_('error walking directory structure: %s\n') % e)
                 dirs = {}
                 rootprefixlen = len(root)
                 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
                     dirpathfromroot = dirpath[rootprefixlen:]
                     dirs[dirpathfromroot] = len(filenames)
                     if '.hg' in dirnames:
                         dirnames.remove('.hg')
                 lineschanged = zerodict()
                 children = zerodict()
                 p1distance = zerodict()
                 p2distance = zerodict()
                 linesinfilesadded = zerodict()
                 fileschanged = zerodict()
                 filesadded = zerodict()
                 filesremoved = zerodict()
                 linelengths = zerodict()
                 interarrival = zerodict()
                 parents = zerodict()
                 dirsadded = zerodict()
                 tzoffset = zerodict()
                 # If a mercurial repo is available, also model the commit history.
                 if repo:
                     revs = scmutil.revrange(repo, revs)
                     revs.sort()
                     progress = ui.makeprogress(_('analyzing'), unit=_('changesets'),
                                                total=len(revs))
                     for i, rev in enumerate(revs):
                         progress.update(i)
                         ctx = repo[rev]
                         pl = ctx.parents()
                         pctx = pl[0]
                         prev = pctx.rev()
                         children[prev] += 1
                         p1distance[rev - prev] += 1
                         parents[len(pl)] += 1
                         tzoffset[ctx.date()[1]] += 1
                         if len(pl) > 1:
                             p2distance[rev - pl[1].rev()] += 1
                         if prev == rev - 1:
                             lastctx = pctx
                         else:
                             lastctx = repo[rev - 1]
                         if lastctx.rev() != nullrev:
                             timedelta = ctx.date()[0] - lastctx.date()[0]
                             interarrival[roundto(timedelta, 300)] += 1
                         diffopts = diffutil.diffallopts(ui, {'git': True})
                         diff = sum((d.splitlines()
                                    for d in ctx.diff(pctx, opts=diffopts)), [])
                         fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
                         for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
                             if isbin:
                                 continue
                             added = sum(lineadd.itervalues(), 0)
                             if mar == 'm':
                                 if added and lineremove:
                                     lineschanged[roundto(added, 5),
                                                  roundto(lineremove, 5)] += 1
                                     filechanges += 1
                             elif mar == 'a':
                                 fileadds += 1
                                 if '/' in filename:
                                     filedir = filename.rsplit('/', 1)[0]
                                     if filedir not in pctx.dirs():
                                         diradds += 1
                                 linesinfilesadded[roundto(added, 5)] += 1
                             elif mar == 'r':
                                 fileremoves += 1
                             for length, count in lineadd.iteritems():
                                 linelengths[length] += count
                         fileschanged[filechanges] += 1
                         filesadded[fileadds] += 1
                         dirsadded[diradds] += 1
                         filesremoved[fileremoves] += 1
                     progress.complete()
                 invchildren = zerodict()
                 for rev, count in children.iteritems():
                     invchildren[count] += 1
                 if output != '-':
                     ui.status(_('writing output to %s\n') % output)
                 def pronk(d):
                     return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
                 json.dump({'revs': len(revs),
                            'initdirs': pronk(dirs),
                            'lineschanged': pronk(lineschanged),
                            'children': pronk(invchildren),
                            'fileschanged': pronk(fileschanged),
                            'filesadded': pronk(filesadded),
                            'linesinfilesadded': pronk(linesinfilesadded),
                            'dirsadded': pronk(dirsadded),
                            'filesremoved': pronk(filesremoved),
                            'linelengths': pronk(linelengths),
                            'parents': pronk(parents),
                            'p1distance': pronk(p1distance),
                            'p2distance': pronk(p2distance),
                            'interarrival': pronk(interarrival),
                            'tzoffset': pronk(tzoffset),
                            },
                           fp)
                 fp.close()
             @command('synthesize',
                      [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
                       ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
                       ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
                      _('hg synthesize [OPTION].. DESCFILE'))
             def synthesize(ui, repo, descpath, **opts):
                 '''synthesize commits based on a model of an existing repository
                 The model must have been generated by :hg:`analyze`. Commits will
                 be generated randomly according to the probabilities described in
                 the model. If --initfiles is set, the repository will be seeded with
                 the given number files following the modeled repository's directory
                 structure.
                 When synthesizing new content, commit descriptions, and user
                 names, words will be chosen randomly from a dictionary that is
                 presumed to contain one word per line. Use --dict to specify the
                 path to an alternate dictionary to use.
                 '''
                 try:
                     fp = hg.openpath(ui, descpath)
                 except Exception as err:
                     raise error.Abort('%s: %s' % (descpath, err[0].strerror))
                 desc = json.load(fp)
                 fp.close()
                 def cdf(l):
                     if not l:
                         return [], []
                     vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
                     t = float(sum(probs, 0))
                     s, cdfs = 0, []
                     for v in probs:
                         s += v
                         cdfs.append(s / t)
                     return vals, cdfs
                 lineschanged = cdf(desc['lineschanged'])
                 fileschanged = cdf(desc['fileschanged'])
                 filesadded = cdf(desc['filesadded'])
                 dirsadded = cdf(desc['dirsadded'])
                 filesremoved = cdf(desc['filesremoved'])
                 linelengths = cdf(desc['linelengths'])
                 parents = cdf(desc['parents'])
                 p1distance = cdf(desc['p1distance'])
                 p2distance = cdf(desc['p2distance'])
                 interarrival = cdf(desc['interarrival'])
                 linesinfilesadded = cdf(desc['linesinfilesadded'])
                 tzoffset = cdf(desc['tzoffset'])
                 dictfile = opts.get('dict') or '/usr/share/dict/words'
                 try:
                     fp = open(dictfile, 'rU')
                 except IOError as err:
                     raise error.Abort('%s: %s' % (dictfile, err.strerror))
                 words = fp.read().splitlines()
                 fp.close()
                 initdirs = {}
                 if desc['initdirs']:
                     for k, v in desc['initdirs']:
                         initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
                     initdirs = renamedirs(initdirs, words)
                 initdirscdf = cdf(initdirs)
                 def pick(cdf):
                     return cdf[0][bisect.bisect_left(cdf[1], random.random())]
                 def pickpath():
                     return os.path.join(pick(initdirscdf), random.choice(words))
                 def makeline(minimum=0):
                     total = max(minimum, pick(linelengths))
                     c, l = 0, []
                     while c < total:
                         w = random.choice(words)
                         c += len(w) + 1
                         l.append(w)
                     return ' '.join(l)
                 wlock = repo.wlock()
                 lock = repo.lock()
                 nevertouch = {'.hgsub', '.hgignore', '.hgtags'}
                 _synthesizing = _('synthesizing')
                 _files = _('initial files')
                 _changesets = _('changesets')
                 # Synthesize a single initial revision adding files to the repo according
                 # to the modeled directory structure.
                 initcount = int(opts['initfiles'])
                 if initcount and initdirs:
                     pctx = repo['.']
                     dirs = set(pctx.dirs())
                     files = {}
                     def validpath(path):
                         # Don't pick filenames which are already directory names.
                         if path in dirs:
                             return False
                         # Don't pick directories which were used as file names.
                         while path:
                             if path in files:
                                 return False
                             path = os.path.dirname(path)
                         return True
                     progress = ui.makeprogress(_synthesizing, unit=_files, total=initcount)
-                    for i in xrange(0, initcount):
+                    for i in pycompat.xrange(0, initcount):
                         progress.update(i)
                         path = pickpath()
                         while not validpath(path):
                             path = pickpath()
                         data = '%s contents\n' % path
                         files[path] = data
                         dir = os.path.dirname(path)
                         while dir and dir not in dirs:
                             dirs.add(dir)
                             dir = os.path.dirname(dir)
                     def filectxfn(repo, memctx, path):
                         return context.memfilectx(repo, memctx, path, files[path])
                     progress.complete()
                     message = 'synthesized wide repo with %d files' % (len(files),)
                     mc = context.memctx(repo, [pctx.node(), nullid], message,
                                         files, filectxfn, ui.username(),
                                         '%d %d' % dateutil.makedate())
                     initnode = mc.commit()
                     if ui.debugflag:
                         hexfn = hex
                     else:
                         hexfn = short
                     ui.status(_('added commit %s with %d files\n')
                               % (hexfn(initnode), len(files)))
                 # Synthesize incremental revisions to the repository, adding repo depth.
                 count = int(opts['count'])
                 heads = set(map(repo.changelog.rev, repo.heads()))
                 progress = ui.makeprogress(_synthesizing, unit=_changesets, total=count)
-                for i in xrange(count):
+                for i in pycompat.xrange(count):
                     progress.update(i)
                     node = repo.changelog.node
                     revs = len(repo)
                     def pickhead(heads, distance):
                         if heads:
                             lheads = sorted(heads)
                             rev = revs - min(pick(distance), revs)
                             if rev < lheads[-1]:
                                 rev = lheads[bisect.bisect_left(lheads, rev)]
                             else:
                                 rev = lheads[-1]
                             return rev, node(rev)
                         return nullrev, nullid
                     r1 = revs - min(pick(p1distance), revs)
                     p1 = node(r1)
                     # the number of heads will grow without bound if we use a pure
                     # model, so artificially constrain their proliferation
                     toomanyheads = len(heads) > random.randint(1, 20)
                     if p2distance[0] and (pick(parents) == 2 or toomanyheads):
                         r2, p2 = pickhead(heads.difference([r1]), p2distance)
                     else:
                         r2, p2 = nullrev, nullid
                     pl = [p1, p2]
                     pctx = repo[r1]
                     mf = pctx.manifest()
                     mfk = mf.keys()
                     changes = {}
                     if mfk:
-                        for __ in xrange(pick(fileschanged)):
+                        for __ in pycompat.xrange(pick(fileschanged)):
-                            for __ in xrange(10):
+                            for __ in pycompat.xrange(10):
                                 fctx = pctx.filectx(random.choice(mfk))
                                 path = fctx.path()
                                 if not (path in nevertouch or fctx.isbinary() or
                                         'l' in fctx.flags()):
                                     break
                             lines = fctx.data().splitlines()
                             add, remove = pick(lineschanged)
-                            for __ in xrange(remove):
+                            for __ in pycompat.xrange(remove):
                                 if not lines:
                                     break
                                 del lines[random.randrange(0, len(lines))]
-                            for __ in xrange(add):
+                            for __ in pycompat.xrange(add):
                                 lines.insert(random.randint(0, len(lines)), makeline())
                             path = fctx.path()
                             changes[path] = '\n'.join(lines) + '\n'
-                        for __ in xrange(pick(filesremoved)):
+                        for __ in pycompat.xrange(pick(filesremoved)):
-                            for __ in xrange(10):
+                            for __ in pycompat.xrange(10):
                                 path = random.choice(mfk)
                                 if path not in changes:
                                     break
                     if filesadded:
                         dirs = list(pctx.dirs())
                         dirs.insert(0, '')
-                    for __ in xrange(pick(filesadded)):
+                    for __ in pycompat.xrange(pick(filesadded)):
                         pathstr = ''
                         while pathstr in dirs:
                             path = [random.choice(dirs)]
                             if pick(dirsadded):
                                 path.append(random.choice(words))
                             path.append(random.choice(words))
                             pathstr = '/'.join(filter(None, path))
-                        data = '\n'.join(makeline()
+                        data = '\n'.join(
-                                         for __ in xrange(pick(linesinfilesadded))) + '\n'
+                            makeline()
+                            for __ in pycompat.xrange(pick(linesinfilesadded))) + '\n'
                         changes[pathstr] = data
                     def filectxfn(repo, memctx, path):
                         if path not in changes:
                             return None
                         return context.memfilectx(repo, memctx, path, changes[path])
                     if not changes:
                         continue
                     if revs:
                         date = repo['tip'].date()[0] + pick(interarrival)
                     else:
                         date = time.time() - (86400 * count)
                     # dates in mercurial must be positive, fit in 32-bit signed integers.
                     date = min(0x7fffffff, max(0, date))
                     user = random.choice(words) + '@' + random.choice(words)
                     mc = context.memctx(repo, pl, makeline(minimum=2),
                                         sorted(changes),
                                         filectxfn, user, '%d %d' % (date, pick(tzoffset)))
                     newnode = mc.commit()
                     heads.add(repo.changelog.rev(newnode))
                     heads.discard(r1)
                     heads.discard(r2)
                 progress.complete()
                 lock.release()
                 wlock.release()
             def renamedirs(dirs, words):
                 '''Randomly rename the directory names in the per-dir file count dict.'''
                 wordgen = itertools.cycle(words)
                 replacements = {'': ''}
                 def rename(dirpath):
                     '''Recursively rename the directory and all path prefixes.
                     The mapping from path to renamed path is stored for all path prefixes
                     as in dynamic programming, ensuring linear runtime and consistent
                     renaming regardless of iteration order through the model.
                     '''
                     if dirpath in replacements:
                         return replacements[dirpath]
                     head, _ = os.path.split(dirpath)
                     if head:
                         head = rename(head)
                     else:
                         head = ''
                     renamed = os.path.join(head, next(wordgen))
                     replacements[dirpath] = renamed
                     return renamed
                 result = []
                 for dirpath, count in dirs.iteritems():
                     result.append([rename(dirpath.lstrip(os.sep)), count])
                 return result