upstream/mercurial-mirror Commit - r6687:f8ef3920

1

#

2

# Mercurial built-in replacement for cvsps.

3

#

4

5

#

6

# This software may be used and distributed according to the terms

7

# of the GNU General Public License, incorporated herein by reference.

8

9

import os

10

import re

11

import sys

12

import cPickle as pickle

13

from mercurial import util

14

from mercurial.i18n import _

15

16

def listsort(list, key):

17

"helper to sort by key in Python 2.3"

18

try:

19

list.sort(key=key)

20

except TypeError:

21

list.sort(lambda l, r:cmp(key(l), key(r)))

22

23

class logentry(object):

24

'''Class logentry has the following attributes:

25

.author - author name as CVS knows it

26

.branch - name of branch this revision is on

27

.branches - revision tuple of branches starting at this revision

28

.comment - commit message

29

.date - the commit date as a (time, tz) tuple

30

.dead - true if file revision is dead

31

.file - Name of file

32

.lines - a tuple (+lines, -lines) or None

33

.parent - Previous revision of this entry

34

.rcs - name of file as returned from CVS

35

.revision - revision number as tuple

36

.tags - list of tags on the file

37

'''

38

def __init__(self, **entries):

39

self.__dict__.update(entries)

40

41

class logerror(Exception):

42

pass

43

44

def createlog(ui, directory=None, root="", rlog=True, cache=None):

45

'''Collect the CVS rlog'''

46

47

# Because we store many duplicate commit log messages, reusing strings

48

# saves a lot of memory and pickle storage space.

49

_scache = {}

50

def scache(s):

51

"return a shared version of a string"

52

return _scache.setdefault(s, s)

53

54

ui.status(_('collecting CVS rlog\n'))

55

56

log = [] # list of logentry objects containing the CVS state

57

58

# patterns to match in CVS (r)log output, by state of use

59

re_00 = re.compile('RCS file: (.+)$')

60

re_01 = re.compile('cvs \\[r?log aborted\\]: (.+)$')

61

re_02 = re.compile('cvs (r?log|server): (.+)\n$')

62

re_03 = re.compile("(Cannot access.+CVSROOT)|(can't create temporary directory.+)$")

63

re_10 = re.compile('Working file: (.+)$')

64

re_20 = re.compile('symbolic names:')

65

re_30 = re.compile('\t(.+): ([\\d.]+)$')

66

re_31 = re.compile('----------------------------$')

67

re_32 = re.compile('=============================================================================$')

68

re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$')

69

re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?')

70

re_70 = re.compile('branches: (.+);$')

71

72

prefix = '' # leading path to strip of what we get from CVS

73

74

if directory is None:

75

# Current working directory

76

77

# Get the real directory in the repository

78

try:

79

prefix = file(os.path.join('CVS','Repository')).read().strip()

80

if prefix == ".":

81

prefix=""

82

directory = prefix

83

except IOError:

84

raise logerror('Not a CVS sandbox')

85

86

if prefix and not prefix.endswith('/'):

87

prefix+='/'

88

89

# Use the Root file in the sandbox, if it exists

90

try:

91

root = file(os.path.join('CVS','Root')).read().strip()

92

except IOError:

93

pass

94

95

if not root:

96

root = os.environ.get('CVSROOT', '')

97

98

# read log cache if one exists

99

oldlog = []

100

date = None

101

102

if cache:

103

cachedir = os.path.expanduser('~/.hg.cvsps')

104

if not os.path.exists(cachedir):

105

os.mkdir(cachedir)

106

107

# The cvsps cache pickle needs a uniquified name, based on the

108

# repository location. The address may have all sort of nasties

109

# in it, slashes, colons and such. So here we take just the

110

# alphanumerics, concatenated in a way that does not mix up the

111

# various components, so that

112

# :pserver:user@server:/path

113

# and

114

# /pserver/user/server/path

115

# are mapped to different cache file names.

116

cachefile = root.split(":")+[directory, "cache"]

117

cachefile = ['-'.join(re.findall(r'\w+', s)) for s in cachefile if s]

118

cachefile = os.path.join(cachedir, '.'.join([s for s in cachefile if s]))

119

120

if cache == 'update':

121

try:

122

ui.note(_('reading cvs log cache %s\n') % cachefile)

123

oldlog = pickle.load(file(cachefile))

124

ui.note(_('cache has %d log entries\n') % len(oldlog))

125

except Exception, e:

126

ui.note(_('error reading cache: %r\n') % e)

127

128

if oldlog:

129

date = oldlog[-1].date # last commit date as a (time,tz) tuple

130

date = util.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')

131

132

# build the CVS commandline

133

cmd = ['cvs', '-q']

134

if root:

135

cmd.append('-d%s' % root)

136

p = root.split(':')[-1]

137

if not p.endswith('/'):

138

p+='/'

139

prefix = p+prefix

140

cmd.append(['log', 'rlog'][rlog])

141

if date:

142

# no space between option and date string

143

cmd.append('-d>%s' % date)

144

cmd.append(directory)

145

146

# state machine begins here

147

tags = {} # dictionary of revisions on current file with their tags

148

state = 0

149

store = False # set when a new record can be appended

150

151

cmd = [util.shellquote(arg) for arg in cmd]

152

ui.note("running %s\n" % (' '.join(cmd)))

153

ui.debug("prefix=%r directory=%r root=%r\n" % (prefix, directory, root))

154

155

for line in util.popen(' '.join(cmd)):

156

if line.endswith('\n'):

157

line = line[:-1]

158

#ui.debug('state=%d line=%r\n' % (state, line))

159

160

if state == 0:

161

# initial state, consume input until we see 'RCS file'

162

match = re_00.match(line)

163

if match:

164

rcs = match.group(1)

165

tags = {}

166

if rlog:

167

filename = rcs[:-2]

168

if filename.startswith(prefix):

169

filename = filename[len(prefix):]

170

if filename.startswith('/'):

171

filename = filename[1:]

172

if filename.startswith('Attic/'):

173

filename = filename[6:]

174

else:

175

filename = filename.replace('/Attic/', '/')

176

state = 2

177

continue

178

state = 1

179

continue

180

match = re_01.match(line)

181

if match:

182

raise Exception(match.group(1))

183

match = re_02.match(line)

184

if match:

185

raise Exception(match.group(2))

186

if re_03.match(line):

187

raise Exception(line)

188

189

elif state == 1:

190

# expect 'Working file' (only when using log instead of rlog)

191

match = re_10.match(line)

192

assert match, _('RCS file must be followed by working file')

193

filename = match.group(1)

194

state = 2

195

196

elif state == 2:

197

# expect 'symbolic names'

198

if re_20.match(line):

199

state = 3

200

201

elif state == 3:

202

# read the symbolic names and store as tags

203

match = re_30.match(line)

204

if match:

205

rev = [int(x) for x in match.group(2).split('.')]

206

207

# Convert magic branch number to an odd-numbered one

208

revn = len(rev)

209

if revn>3 and (revn%2) == 0 and rev[-2] == 0:

210

rev = rev[:-2]+rev[-1:]

211

rev = tuple(rev)

212

213

if rev not in tags:

214

tags[rev] = []

215

tags[rev].append(match.group(1))

216

217

elif re_31.match(line):

218

state = 5

219

elif re_32.match(line):

220

state = 0

221

222

elif state == 4:

223

# expecting '------' separator before first revision

224

if re_31.match(line):

225

state = 5

226

else:

227

assert not re_32.match(line), _('Must have at least some revisions')

228

229

elif state == 5:

230

# expecting revision number and possibly (ignored) lock indication

231

# we create the logentry here from values stored in states 0 to 4,

232

# as this state is re-entered for subsequent revisions of a file.

233

match = re_50.match(line)

234

assert match, _('expected revision number')

235

e = logentry(rcs=scache(rcs), file=scache(filename),

236

revision=tuple([int(x) for x in match.group(1).split('.')]),

237

branches=[], parent=None)

238

state = 6

239

240

elif state == 6:

241

# expecting date, author, state, lines changed

242

match = re_60.match(line)

243

assert match, _('revision must be followed by date line')

244

d = match.group(1)

245

if d[2] == '/':

246

# Y2K

247

d = '19'+d

248

249

if len(d.split()) != 3:

250

# cvs log dates always in GMT

251

d = d+' UTC'

252

e.date = util.parsedate(d, ['%y/%m/%d %H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y-%m-%d %H:%M:%S'])

253

e.author = scache(match.group(2))

254

e.dead = match.group(3).lower() == 'dead'

255

256

if match.group(5):

257

if match.group(6):

258

e.lines = (int(match.group(5)), int(match.group(6)))

259

else:

260

e.lines = (int(match.group(5)), 0)

261

elif match.group(6):

262

e.lines = (0, int(match.group(6)))

263

else:

264

e.lines = None

265

e.comment = []

266

state = 7

267

268

elif state == 7:

269

# read the revision numbers of branches that start at this revision,

270

# or store the commit log message otherwise

271

m = re_70.match(line)

272

if m:

273

e.branches = [tuple([int(y) for y in x.strip().split('.')])

274

for x in m.group(1).split(';')]

275

state = 8

276

elif re_31.match(line):

277

state = 5

278

store = True

279

elif re_32.match(line):

280

state = 0

281

store = True

282

else:

283

e.comment.append(line)

284

285

elif state == 8:

286

# store commit log message

287

if re_31.match(line):

288

state = 5

289

store = True

290

elif re_32.match(line):

291

state = 0

292

store = True

293

else:

294

e.comment.append(line)

295

296

if store:

297

# clean up the results and save in the log.

298

store = False

299

e.tags = [scache(x) for x in tags.get(e.revision, [])]

300

e.tags.sort()

301

e.comment = scache('\n'.join(e.comment))

302

303

revn = len(e.revision)

304

if revn>3 and (revn%2) == 0:

305

e.branch = tags.get(e.revision[:-1], [None])[0]

306

else:

307

e.branch = None

308

309

log.append(e)

310

311

if len(log)%100 == 0:

312

ui.status(util.ellipsis('%d %s'%(len(log), e.file), 80)+'\n')

313

314

listsort(log, key=lambda x:(x.rcs, x.revision))

315

316

# find parent revisions of individual files

317

versions = {}

318

for e in log:

319

branch = e.revision[:-1]

320

p = versions.get((e.rcs, branch), None)

321

if p is None:

322

p = e.revision[:-2]

323

e.parent = p

324

versions[(e.rcs, branch)] = e.revision

325

326

# update the log cache

327

if cache:

328

if log:

329

# join up the old and new logs

330

listsort(log, key=lambda x:x.date)

331

332

if oldlog and oldlog[-1].date >= log[0].date:

333

raise logerror('Log cache overlaps with new log entries, re-run without cache.')

334

335

log = oldlog+log

336

337

# write the new cachefile

338

ui.note(_('writing cvs log cache %s\n') % cachefile)

339

pickle.dump(log, file(cachefile, 'w'))

340

else:

341

log = oldlog

342

343

ui.status(_('%d log entries\n') % len(log))

344

345

return log

346

347

348

class changeset(object):

349

'''Class changeset has the following attributes:

350

.author - author name as CVS knows it

351

.branch - name of branch this changeset is on, or None

352

.comment - commit message

353

.date - the commit date as a (time,tz) tuple

354

.entries - list of logentry objects in this changeset

355

.parents - list of one or two parent changesets

356

.tags - list of tags on this changeset

357

'''

358

def __init__(self, **entries):

359

self.__dict__.update(entries)

360

361

def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):

362

'''Convert log into changesets.'''

363

364

ui.status(_('creating changesets\n'))

365

366

# Merge changesets

367

368

listsort(log, key=lambda x:(x.comment, x.author, x.branch, x.date))

369

370

changesets = []

371

files = {}

372

c = None

373

for i, e in enumerate(log):

374

375

# Check if log entry belongs to the current changeset or not.

376

if not (c and

377

e.comment == c.comment and

378

e.author == c.author and

379

e.branch == c.branch and

380

(c.date[0]+c.date[1]) <= (e.date[0]+e.date[1]) <= (c.date[0]+c.date[1])+fuzz and

381

e.file not in files):

382

c = changeset(comment=e.comment, author=e.author,

383

branch=e.branch, date=e.date, entries=[])

384

changesets.append(c)

385

files = {}

386

if len(changesets)%100 == 0:

387

ui.status(util.ellipsis('%d %s'%(len(changesets), repr(e.comment)[1:-1]), 80)+'\n')

388

389

e.Changeset = c

390

c.entries.append(e)

391

files[e.file] = True

392

c.date = e.date # changeset date is date of latest commit in it

393

394

# Sort files in each changeset

395

396

for c in changesets:

397

def pathcompare(l, r):

398

'Mimic cvsps sorting order'

399

l = l.split('/')

400

r = r.split('/')

401

nl = len(l)

402

nr = len(r)

403

n = min(nl, nr)

404

for i in range(n):

405

if i+1 == nl and nl<nr:

406

return -1

407

elif i+1 == nr and nl>nr:

408

return +1

409

elif l[i]<r[i]:

410

return -1

411

elif l[i]>r[i]:

412

return +1

413

return 0

414

def entitycompare(l, r):

415

return pathcompare(l.file, r.file)

416

417

c.entries.sort(entitycompare)

418

419

# Sort changesets by date

420

421

def cscmp(l, r):

422

d = sum(l.date)-sum(r.date)

423

if d:

424

return d

425

426

# detect vendor branches and initial commits on a branch

427

le = {}

428

for e in l.entries:

429

le[e.rcs] = e.revision

430

re = {}

431

for e in r.entries:

432

re[e.rcs] = e.revision

433

434

d = 0

435

for e in l.entries:

436

if re.get(e.rcs, None) == e.parent:

437

assert not d

438

d = 1

439

break

440

441

for e in r.entries:

442

if le.get(e.rcs, None) == e.parent:

443

assert not d

444

d = -1

445

break

446

447

return d

448

449

changesets.sort(cscmp)

450

451

# Collect tags

452

453

globaltags = {}

454

for c in changesets:

455

tags = {}

456

for e in c.entries:

457

for tag in e.tags:

458

# remember which is the latest changeset to have this tag

459

globaltags[tag] = c

460

461

for c in changesets:

462

tags = {}

463

for e in c.entries:

464

for tag in e.tags:

465

tags[tag] = True

466

# remember tags only if this is the latest changeset to have it

467

tagnames = [tag for tag in tags if globaltags[tag] is c]

468

tagnames.sort()

469

c.tags = tagnames

470

471

# Find parent changesets, handle {{mergetobranch BRANCHNAME}}

472

# by inserting dummy changesets with two parents, and handle

473

# {{mergefrombranch BRANCHNAME}} by setting two parents.

474

475

if mergeto is None:

476

mergeto = r'{{mergetobranch ([-\w]+)}}'

477

if mergeto:

478

mergeto = re.compile(mergeto)

479

480

if mergefrom is None:

481

mergefrom = r'{{mergefrombranch ([-\w]+)}}'

482

if mergefrom:

483

mergefrom = re.compile(mergefrom)

484

485

versions = {} # changeset index where we saw any particular file version

486

branches = {} # changeset index where we saw a branch

487

n = len(changesets)

488

i = 0

489

while i<n:

490

c = changesets[i]

491

492

for f in c.entries:

493

versions[(f.rcs, f.revision)] = i

494

495

p = None

496

if c.branch in branches:

497

p = branches[c.branch]

498

else:

499

for f in c.entries:

500

p = max(p, versions.get((f.rcs, f.parent), None))

501

502

c.parents = []

503

if p is not None:

504

c.parents.append(changesets[p])

505

506

if mergefrom:

507

m = mergefrom.search(c.comment)

508

if m:

509

m = m.group(1)

510

if m == 'HEAD':

511

m = None

512

if m in branches and c.branch != m:

513

c.parents.append(changesets[branches[m]])

514

515

if mergeto:

516

m = mergeto.search(c.comment)

517

if m:

518

try:

519

m = m.group(1)

520

if m == 'HEAD':

521

m = None

522

except:

523

m = None # if no group found then merge to HEAD

524

if m in branches and c.branch != m:

525

# insert empty changeset for merge

526

cc = changeset(author=c.author, branch=m, date=c.date,

527

comment='convert-repo: CVS merge from branch %s' % c.branch,

528

entries=[], tags=[], parents=[changesets[branches[m]], c])

529

changesets.insert(i+1, cc)

530

branches[m] = i+1

531

532

# adjust our loop counters now we have inserted a new entry

533

n += 1

534

i += 2

535

continue

536

537

branches[c.branch] = i

538

i += 1

539

540

# Number changesets

541

542

for i, c in enumerate(changesets):

543

c.id = i+1

544

545

ui.status(_('%d changeset entries\n') % len(changesets))

546

547

return changesets

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

This diff has been collapsed as it changes many lines, (547 lines changed) Show them Hide them
		@@ -0,0 +1,547 b''
	1	#
	2	# Mercurial built-in replacement for cvsps.
	3	#
	4	# Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
	5	#
	6	# This software may be used and distributed according to the terms
	7	# of the GNU General Public License, incorporated herein by reference.
	8
	9	import os
	10	import re
	11	import sys
	12	import cPickle as pickle
	13	from mercurial import util
	14	from mercurial.i18n import _
	15
	16	def listsort(list, key):
	17	"helper to sort by key in Python 2.3"
	18	try:
	19	list.sort(key=key)
	20	except TypeError:
	21	list.sort(lambda l, r:cmp(key(l), key(r)))
	22
	23	class logentry(object):
	24	'''Class logentry has the following attributes:
	25	.author - author name as CVS knows it
	26	.branch - name of branch this revision is on
	27	.branches - revision tuple of branches starting at this revision
	28	.comment - commit message
	29	.date - the commit date as a (time, tz) tuple
	30	.dead - true if file revision is dead
	31	.file - Name of file
	32	.lines - a tuple (+lines, -lines) or None
	33	.parent - Previous revision of this entry
	34	.rcs - name of file as returned from CVS
	35	.revision - revision number as tuple
	36	.tags - list of tags on the file
	37	'''
	38	def __init__(self, **entries):
	39	self.__dict__.update(entries)
	40
	41	class logerror(Exception):
	42	pass
	43
	44	def createlog(ui, directory=None, root="", rlog=True, cache=None):
	45	'''Collect the CVS rlog'''
	46
	47	# Because we store many duplicate commit log messages, reusing strings
	48	# saves a lot of memory and pickle storage space.
	49	_scache = {}
	50	def scache(s):
	51	"return a shared version of a string"
	52	return _scache.setdefault(s, s)
	53
	54	ui.status(_('collecting CVS rlog\n'))
	55
	56	log = [] # list of logentry objects containing the CVS state
	57
	58	# patterns to match in CVS (r)log output, by state of use
	59	re_00 = re.compile('RCS file: (.+)$')
	60	re_01 = re.compile('cvs \\[r?log aborted\\]: (.+)$')
	61	re_02 = re.compile('cvs (r?log\|server): (.+)\n$')
	62	re_03 = re.compile("(Cannot access.+CVSROOT)\|(can't create temporary directory.+)$")
	63	re_10 = re.compile('Working file: (.+)$')
	64	re_20 = re.compile('symbolic names:')
	65	re_30 = re.compile('\t(.+): ([\\d.]+)$')
	66	re_31 = re.compile('----------------------------$')
	67	re_32 = re.compile('=============================================================================$')
	68	re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
	69	re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?')
	70	re_70 = re.compile('branches: (.+);$')
	71
	72	prefix = '' # leading path to strip of what we get from CVS
	73
	74	if directory is None:
	75	# Current working directory
	76
	77	# Get the real directory in the repository
	78	try:
	79	prefix = file(os.path.join('CVS','Repository')).read().strip()
	80	if prefix == ".":
	81	prefix=""
	82	directory = prefix
	83	except IOError:
	84	raise logerror('Not a CVS sandbox')
	85
	86	if prefix and not prefix.endswith('/'):
	87	prefix+='/'
	88
	89	# Use the Root file in the sandbox, if it exists
	90	try:
	91	root = file(os.path.join('CVS','Root')).read().strip()
	92	except IOError:
	93	pass
	94
	95	if not root:
	96	root = os.environ.get('CVSROOT', '')
	97
	98	# read log cache if one exists
	99	oldlog = []
	100	date = None
	101
	102	if cache:
	103	cachedir = os.path.expanduser('~/.hg.cvsps')
	104	if not os.path.exists(cachedir):
	105	os.mkdir(cachedir)
	106
	107	# The cvsps cache pickle needs a uniquified name, based on the
	108	# repository location. The address may have all sort of nasties
	109	# in it, slashes, colons and such. So here we take just the
	110	# alphanumerics, concatenated in a way that does not mix up the
	111	# various components, so that
	112	# :pserver:user@server:/path
	113	# and
	114	# /pserver/user/server/path
	115	# are mapped to different cache file names.
	116	cachefile = root.split(":")+[directory, "cache"]
	117	cachefile = ['-'.join(re.findall(r'\w+', s)) for s in cachefile if s]
	118	cachefile = os.path.join(cachedir, '.'.join([s for s in cachefile if s]))
	119
	120	if cache == 'update':
	121	try:
	122	ui.note(_('reading cvs log cache %s\n') % cachefile)
	123	oldlog = pickle.load(file(cachefile))
	124	ui.note(_('cache has %d log entries\n') % len(oldlog))
	125	except Exception, e:
	126	ui.note(_('error reading cache: %r\n') % e)
	127
	128	if oldlog:
	129	date = oldlog[-1].date # last commit date as a (time,tz) tuple
	130	date = util.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')
	131
	132	# build the CVS commandline
	133	cmd = ['cvs', '-q']
	134	if root:
	135	cmd.append('-d%s' % root)
	136	p = root.split(':')[-1]
	137	if not p.endswith('/'):
	138	p+='/'
	139	prefix = p+prefix
	140	cmd.append(['log', 'rlog'][rlog])
	141	if date:
	142	# no space between option and date string
	143	cmd.append('-d>%s' % date)
	144	cmd.append(directory)
	145
	146	# state machine begins here
	147	tags = {} # dictionary of revisions on current file with their tags
	148	state = 0
	149	store = False # set when a new record can be appended
	150
	151	cmd = [util.shellquote(arg) for arg in cmd]
	152	ui.note("running %s\n" % (' '.join(cmd)))
	153	ui.debug("prefix=%r directory=%r root=%r\n" % (prefix, directory, root))
	154
	155	for line in util.popen(' '.join(cmd)):
	156	if line.endswith('\n'):
	157	line = line[:-1]
	158	#ui.debug('state=%d line=%r\n' % (state, line))
	159
	160	if state == 0:
	161	# initial state, consume input until we see 'RCS file'
	162	match = re_00.match(line)
	163	if match:
	164	rcs = match.group(1)
	165	tags = {}
	166	if rlog:
	167	filename = rcs[:-2]
	168	if filename.startswith(prefix):
	169	filename = filename[len(prefix):]
	170	if filename.startswith('/'):
	171	filename = filename[1:]
	172	if filename.startswith('Attic/'):
	173	filename = filename[6:]
	174	else:
	175	filename = filename.replace('/Attic/', '/')
	176	state = 2
	177	continue
	178	state = 1
	179	continue
	180	match = re_01.match(line)
	181	if match:
	182	raise Exception(match.group(1))
	183	match = re_02.match(line)
	184	if match:
	185	raise Exception(match.group(2))
	186	if re_03.match(line):
	187	raise Exception(line)
	188
	189	elif state == 1:
	190	# expect 'Working file' (only when using log instead of rlog)
	191	match = re_10.match(line)
	192	assert match, _('RCS file must be followed by working file')
	193	filename = match.group(1)
	194	state = 2
	195
	196	elif state == 2:
	197	# expect 'symbolic names'
	198	if re_20.match(line):
	199	state = 3
	200
	201	elif state == 3:
	202	# read the symbolic names and store as tags
	203	match = re_30.match(line)
	204	if match:
	205	rev = [int(x) for x in match.group(2).split('.')]
	206
	207	# Convert magic branch number to an odd-numbered one
	208	revn = len(rev)
	209	if revn>3 and (revn%2) == 0 and rev[-2] == 0:
	210	rev = rev[:-2]+rev[-1:]
	211	rev = tuple(rev)
	212
	213	if rev not in tags:
	214	tags[rev] = []
	215	tags[rev].append(match.group(1))
	216
	217	elif re_31.match(line):
	218	state = 5
	219	elif re_32.match(line):
	220	state = 0
	221
	222	elif state == 4:
	223	# expecting '------' separator before first revision
	224	if re_31.match(line):
	225	state = 5
	226	else:
	227	assert not re_32.match(line), _('Must have at least some revisions')
	228
	229	elif state == 5:
	230	# expecting revision number and possibly (ignored) lock indication
	231	# we create the logentry here from values stored in states 0 to 4,
	232	# as this state is re-entered for subsequent revisions of a file.
	233	match = re_50.match(line)
	234	assert match, _('expected revision number')
	235	e = logentry(rcs=scache(rcs), file=scache(filename),
	236	revision=tuple([int(x) for x in match.group(1).split('.')]),
	237	branches=[], parent=None)
	238	state = 6
	239
	240	elif state == 6:
	241	# expecting date, author, state, lines changed
	242	match = re_60.match(line)
	243	assert match, _('revision must be followed by date line')
	244	d = match.group(1)
	245	if d[2] == '/':
	246	# Y2K
	247	d = '19'+d
	248
	249	if len(d.split()) != 3:
	250	# cvs log dates always in GMT
	251	d = d+' UTC'
	252	e.date = util.parsedate(d, ['%y/%m/%d %H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y-%m-%d %H:%M:%S'])
	253	e.author = scache(match.group(2))
	254	e.dead = match.group(3).lower() == 'dead'
	255
	256	if match.group(5):
	257	if match.group(6):
	258	e.lines = (int(match.group(5)), int(match.group(6)))
	259	else:
	260	e.lines = (int(match.group(5)), 0)
	261	elif match.group(6):
	262	e.lines = (0, int(match.group(6)))
	263	else:
	264	e.lines = None
	265	e.comment = []
	266	state = 7
	267
	268	elif state == 7:
	269	# read the revision numbers of branches that start at this revision,
	270	# or store the commit log message otherwise
	271	m = re_70.match(line)
	272	if m:
	273	e.branches = [tuple([int(y) for y in x.strip().split('.')])
	274	for x in m.group(1).split(';')]
	275	state = 8
	276	elif re_31.match(line):
	277	state = 5
	278	store = True
	279	elif re_32.match(line):
	280	state = 0
	281	store = True
	282	else:
	283	e.comment.append(line)
	284
	285	elif state == 8:
	286	# store commit log message
	287	if re_31.match(line):
	288	state = 5
	289	store = True
	290	elif re_32.match(line):
	291	state = 0
	292	store = True
	293	else:
	294	e.comment.append(line)
	295
	296	if store:
	297	# clean up the results and save in the log.
	298	store = False
	299	e.tags = [scache(x) for x in tags.get(e.revision, [])]
	300	e.tags.sort()
	301	e.comment = scache('\n'.join(e.comment))
	302
	303	revn = len(e.revision)
	304	if revn>3 and (revn%2) == 0:
	305	e.branch = tags.get(e.revision[:-1], [None])[0]
	306	else:
	307	e.branch = None
	308
	309	log.append(e)
	310
	311	if len(log)%100 == 0:
	312	ui.status(util.ellipsis('%d %s'%(len(log), e.file), 80)+'\n')
	313
	314	listsort(log, key=lambda x:(x.rcs, x.revision))
	315
	316	# find parent revisions of individual files
	317	versions = {}
	318	for e in log:
	319	branch = e.revision[:-1]
	320	p = versions.get((e.rcs, branch), None)
	321	if p is None:
	322	p = e.revision[:-2]
	323	e.parent = p
	324	versions[(e.rcs, branch)] = e.revision
	325
	326	# update the log cache
	327	if cache:
	328	if log:
	329	# join up the old and new logs
	330	listsort(log, key=lambda x:x.date)
	331
	332	if oldlog and oldlog[-1].date >= log[0].date:
	333	raise logerror('Log cache overlaps with new log entries, re-run without cache.')
	334
	335	log = oldlog+log
	336
	337	# write the new cachefile
	338	ui.note(_('writing cvs log cache %s\n') % cachefile)
	339	pickle.dump(log, file(cachefile, 'w'))
	340	else:
	341	log = oldlog
	342
	343	ui.status(_('%d log entries\n') % len(log))
	344
	345	return log
	346
	347
	348	class changeset(object):
	349	'''Class changeset has the following attributes:
	350	.author - author name as CVS knows it
	351	.branch - name of branch this changeset is on, or None
	352	.comment - commit message
	353	.date - the commit date as a (time,tz) tuple
	354	.entries - list of logentry objects in this changeset
	355	.parents - list of one or two parent changesets
	356	.tags - list of tags on this changeset
	357	'''
	358	def __init__(self, **entries):
	359	self.__dict__.update(entries)
	360
	361	def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
	362	'''Convert log into changesets.'''
	363
	364	ui.status(_('creating changesets\n'))
	365
	366	# Merge changesets
	367
	368	listsort(log, key=lambda x:(x.comment, x.author, x.branch, x.date))
	369
	370	changesets = []
	371	files = {}
	372	c = None
	373	for i, e in enumerate(log):
	374
	375	# Check if log entry belongs to the current changeset or not.
	376	if not (c and
	377	e.comment == c.comment and
	378	e.author == c.author and
	379	e.branch == c.branch and
	380	(c.date[0]+c.date[1]) <= (e.date[0]+e.date[1]) <= (c.date[0]+c.date[1])+fuzz and
	381	e.file not in files):
	382	c = changeset(comment=e.comment, author=e.author,
	383	branch=e.branch, date=e.date, entries=[])
	384	changesets.append(c)
	385	files = {}
	386	if len(changesets)%100 == 0:
	387	ui.status(util.ellipsis('%d %s'%(len(changesets), repr(e.comment)[1:-1]), 80)+'\n')
	388
	389	e.Changeset = c
	390	c.entries.append(e)
	391	files[e.file] = True
	392	c.date = e.date # changeset date is date of latest commit in it
	393
	394	# Sort files in each changeset
	395
	396	for c in changesets:
	397	def pathcompare(l, r):
	398	'Mimic cvsps sorting order'
	399	l = l.split('/')
	400	r = r.split('/')
	401	nl = len(l)
	402	nr = len(r)
	403	n = min(nl, nr)
	404	for i in range(n):
	405	if i+1 == nl and nl<nr:
	406	return -1
	407	elif i+1 == nr and nl>nr:
	408	return +1
	409	elif l[i]<r[i]:
	410	return -1
	411	elif l[i]>r[i]:
	412	return +1
	413	return 0
	414	def entitycompare(l, r):
	415	return pathcompare(l.file, r.file)
	416
	417	c.entries.sort(entitycompare)
	418
	419	# Sort changesets by date
	420
	421	def cscmp(l, r):
	422	d = sum(l.date)-sum(r.date)
	423	if d:
	424	return d
	425
	426	# detect vendor branches and initial commits on a branch
	427	le = {}
	428	for e in l.entries:
	429	le[e.rcs] = e.revision
	430	re = {}
	431	for e in r.entries:
	432	re[e.rcs] = e.revision
	433
	434	d = 0
	435	for e in l.entries:
	436	if re.get(e.rcs, None) == e.parent:
	437	assert not d
	438	d = 1
	439	break
	440
	441	for e in r.entries:
	442	if le.get(e.rcs, None) == e.parent:
	443	assert not d
	444	d = -1
	445	break
	446
	447	return d
	448
	449	changesets.sort(cscmp)
	450
	451	# Collect tags
	452
	453	globaltags = {}
	454	for c in changesets:
	455	tags = {}
	456	for e in c.entries:
	457	for tag in e.tags:
	458	# remember which is the latest changeset to have this tag
	459	globaltags[tag] = c
	460
	461	for c in changesets:
	462	tags = {}
	463	for e in c.entries:
	464	for tag in e.tags:
	465	tags[tag] = True
	466	# remember tags only if this is the latest changeset to have it
	467	tagnames = [tag for tag in tags if globaltags[tag] is c]
	468	tagnames.sort()
	469	c.tags = tagnames
	470
	471	# Find parent changesets, handle {{mergetobranch BRANCHNAME}}
	472	# by inserting dummy changesets with two parents, and handle
	473	# {{mergefrombranch BRANCHNAME}} by setting two parents.
	474
	475	if mergeto is None:
	476	mergeto = r'{{mergetobranch ([-\w]+)}}'
	477	if mergeto:
	478	mergeto = re.compile(mergeto)
	479
	480	if mergefrom is None:
	481	mergefrom = r'{{mergefrombranch ([-\w]+)}}'
	482	if mergefrom:
	483	mergefrom = re.compile(mergefrom)
	484
	485	versions = {} # changeset index where we saw any particular file version
	486	branches = {} # changeset index where we saw a branch
	487	n = len(changesets)
	488	i = 0
	489	while i<n:
	490	c = changesets[i]
	491
	492	for f in c.entries:
	493	versions[(f.rcs, f.revision)] = i
	494
	495	p = None
	496	if c.branch in branches:
	497	p = branches[c.branch]
	498	else:
	499	for f in c.entries:
	500	p = max(p, versions.get((f.rcs, f.parent), None))
	501
	502	c.parents = []
	503	if p is not None:
	504	c.parents.append(changesets[p])
	505
	506	if mergefrom:
	507	m = mergefrom.search(c.comment)
	508	if m:
	509	m = m.group(1)
	510	if m == 'HEAD':
	511	m = None
	512	if m in branches and c.branch != m:
	513	c.parents.append(changesets[branches[m]])
	514
	515	if mergeto:
	516	m = mergeto.search(c.comment)
	517	if m:
	518	try:
	519	m = m.group(1)
	520	if m == 'HEAD':
	521	m = None
	522	except:
	523	m = None # if no group found then merge to HEAD
	524	if m in branches and c.branch != m:
	525	# insert empty changeset for merge
	526	cc = changeset(author=c.author, branch=m, date=c.date,
	527	comment='convert-repo: CVS merge from branch %s' % c.branch,
	528	entries=[], tags=[], parents=[changesets[branches[m]], c])
	529	changesets.insert(i+1, cc)
	530	branches[m] = i+1
	531
	532	# adjust our loop counters now we have inserted a new entry
	533	n += 1
	534	i += 2
	535	continue
	536
	537	branches[c.branch] = i
	538	i += 1
	539
	540	# Number changesets
	541
	542	for i, c in enumerate(changesets):
	543	c.id = i+1
	544
	545	ui.status(_('%d changeset entries\n') % len(changesets))
	546
	547	return changesets