##// END OF EJS Templates
cvsps: fix a final whitespace nit
Dirkjan Ochtman -
r6696:49c0be9e default
parent child Browse files
Show More
@@ -1,552 +1,552 b''
1 1 #
2 2 # Mercurial built-in replacement for cvsps.
3 3 #
4 4 # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
5 5 #
6 6 # This software may be used and distributed according to the terms
7 7 # of the GNU General Public License, incorporated herein by reference.
8 8
9 9 import os
10 10 import re
11 11 import sys
12 12 import cPickle as pickle
13 13 from mercurial import util
14 14 from mercurial.i18n import _
15 15
16 16 def listsort(list, key):
17 17 "helper to sort by key in Python 2.3"
18 18 try:
19 19 list.sort(key=key)
20 20 except TypeError:
21 21 list.sort(lambda l, r: cmp(key(l), key(r)))
22 22
23 23 class logentry(object):
24 24 '''Class logentry has the following attributes:
25 25 .author - author name as CVS knows it
26 26 .branch - name of branch this revision is on
27 27 .branches - revision tuple of branches starting at this revision
28 28 .comment - commit message
29 29 .date - the commit date as a (time, tz) tuple
30 30 .dead - true if file revision is dead
31 31 .file - Name of file
32 32 .lines - a tuple (+lines, -lines) or None
33 33 .parent - Previous revision of this entry
34 34 .rcs - name of file as returned from CVS
35 35 .revision - revision number as tuple
36 36 .tags - list of tags on the file
37 37 '''
38 38 def __init__(self, **entries):
39 39 self.__dict__.update(entries)
40 40
41 41 class logerror(Exception):
42 42 pass
43 43
44 44 def createlog(ui, directory=None, root="", rlog=True, cache=None):
45 45 '''Collect the CVS rlog'''
46 46
47 47 # Because we store many duplicate commit log messages, reusing strings
48 48 # saves a lot of memory and pickle storage space.
49 49 _scache = {}
50 50 def scache(s):
51 51 "return a shared version of a string"
52 52 return _scache.setdefault(s, s)
53 53
54 54 ui.status(_('collecting CVS rlog\n'))
55 55
56 56 log = [] # list of logentry objects containing the CVS state
57 57
58 58 # patterns to match in CVS (r)log output, by state of use
59 59 re_00 = re.compile('RCS file: (.+)$')
60 60 re_01 = re.compile('cvs \\[r?log aborted\\]: (.+)$')
61 61 re_02 = re.compile('cvs (r?log|server): (.+)\n$')
62 62 re_03 = re.compile("(Cannot access.+CVSROOT)|(can't create temporary directory.+)$")
63 63 re_10 = re.compile('Working file: (.+)$')
64 64 re_20 = re.compile('symbolic names:')
65 65 re_30 = re.compile('\t(.+): ([\\d.]+)$')
66 66 re_31 = re.compile('----------------------------$')
67 67 re_32 = re.compile('=============================================================================$')
68 68 re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
69 69 re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?')
70 70 re_70 = re.compile('branches: (.+);$')
71 71
72 72 prefix = '' # leading path to strip of what we get from CVS
73 73
74 74 if directory is None:
75 75 # Current working directory
76 76
77 77 # Get the real directory in the repository
78 78 try:
79 79 prefix = file(os.path.join('CVS','Repository')).read().strip()
80 80 if prefix == ".":
81 81 prefix = ""
82 82 directory = prefix
83 83 except IOError:
84 84 raise logerror('Not a CVS sandbox')
85 85
86 86 if prefix and not prefix.endswith('/'):
87 87 prefix += '/'
88 88
89 89 # Use the Root file in the sandbox, if it exists
90 90 try:
91 91 root = file(os.path.join('CVS','Root')).read().strip()
92 92 except IOError:
93 93 pass
94 94
95 95 if not root:
96 96 root = os.environ.get('CVSROOT', '')
97 97
98 98 # read log cache if one exists
99 99 oldlog = []
100 100 date = None
101 101
102 102 if cache:
103 103 cachedir = os.path.expanduser('~/.hg.cvsps')
104 104 if not os.path.exists(cachedir):
105 105 os.mkdir(cachedir)
106 106
107 107 # The cvsps cache pickle needs a uniquified name, based on the
108 108 # repository location. The address may have all sort of nasties
109 109 # in it, slashes, colons and such. So here we take just the
110 110 # alphanumerics, concatenated in a way that does not mix up the
111 # various components, so that
111 # various components, so that
112 112 # :pserver:user@server:/path
113 113 # and
114 114 # /pserver/user/server/path
115 115 # are mapped to different cache file names.
116 116 cachefile = root.split(":") + [directory, "cache"]
117 117 cachefile = ['-'.join(re.findall(r'\w+', s)) for s in cachefile if s]
118 118 cachefile = os.path.join(cachedir,
119 119 '.'.join([s for s in cachefile if s]))
120 120
121 121 if cache == 'update':
122 122 try:
123 123 ui.note(_('reading cvs log cache %s\n') % cachefile)
124 124 oldlog = pickle.load(file(cachefile))
125 125 ui.note(_('cache has %d log entries\n') % len(oldlog))
126 126 except Exception, e:
127 127 ui.note(_('error reading cache: %r\n') % e)
128 128
129 129 if oldlog:
130 130 date = oldlog[-1].date # last commit date as a (time,tz) tuple
131 131 date = util.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')
132 132
133 133 # build the CVS commandline
134 134 cmd = ['cvs', '-q']
135 135 if root:
136 136 cmd.append('-d%s' % root)
137 137 p = root.split(':')[-1]
138 138 if not p.endswith('/'):
139 139 p += '/'
140 140 prefix = p + prefix
141 141 cmd.append(['log', 'rlog'][rlog])
142 142 if date:
143 143 # no space between option and date string
144 144 cmd.append('-d>%s' % date)
145 145 cmd.append(directory)
146 146
147 147 # state machine begins here
148 148 tags = {} # dictionary of revisions on current file with their tags
149 149 state = 0
150 150 store = False # set when a new record can be appended
151 151
152 152 cmd = [util.shellquote(arg) for arg in cmd]
153 153 ui.note("running %s\n" % (' '.join(cmd)))
154 154 ui.debug("prefix=%r directory=%r root=%r\n" % (prefix, directory, root))
155 155
156 156 for line in util.popen(' '.join(cmd)):
157 157 if line.endswith('\n'):
158 158 line = line[:-1]
159 159 #ui.debug('state=%d line=%r\n' % (state, line))
160 160
161 161 if state == 0:
162 162 # initial state, consume input until we see 'RCS file'
163 163 match = re_00.match(line)
164 164 if match:
165 165 rcs = match.group(1)
166 166 tags = {}
167 167 if rlog:
168 168 filename = rcs[:-2]
169 169 if filename.startswith(prefix):
170 170 filename = filename[len(prefix):]
171 171 if filename.startswith('/'):
172 172 filename = filename[1:]
173 173 if filename.startswith('Attic/'):
174 174 filename = filename[6:]
175 175 else:
176 176 filename = filename.replace('/Attic/', '/')
177 177 state = 2
178 178 continue
179 179 state = 1
180 180 continue
181 181 match = re_01.match(line)
182 182 if match:
183 183 raise Exception(match.group(1))
184 184 match = re_02.match(line)
185 185 if match:
186 186 raise Exception(match.group(2))
187 187 if re_03.match(line):
188 188 raise Exception(line)
189 189
190 190 elif state == 1:
191 191 # expect 'Working file' (only when using log instead of rlog)
192 192 match = re_10.match(line)
193 193 assert match, _('RCS file must be followed by working file')
194 194 filename = match.group(1)
195 195 state = 2
196 196
197 197 elif state == 2:
198 198 # expect 'symbolic names'
199 199 if re_20.match(line):
200 200 state = 3
201 201
202 202 elif state == 3:
203 203 # read the symbolic names and store as tags
204 204 match = re_30.match(line)
205 205 if match:
206 206 rev = [int(x) for x in match.group(2).split('.')]
207 207
208 208 # Convert magic branch number to an odd-numbered one
209 209 revn = len(rev)
210 210 if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
211 211 rev = rev[:-2] + rev[-1:]
212 212 rev = tuple(rev)
213 213
214 214 if rev not in tags:
215 215 tags[rev] = []
216 216 tags[rev].append(match.group(1))
217 217
218 218 elif re_31.match(line):
219 219 state = 5
220 220 elif re_32.match(line):
221 221 state = 0
222 222
223 223 elif state == 4:
224 224 # expecting '------' separator before first revision
225 225 if re_31.match(line):
226 226 state = 5
227 227 else:
228 228 assert not re_32.match(line), _('Must have at least some revisions')
229 229
230 230 elif state == 5:
231 231 # expecting revision number and possibly (ignored) lock indication
232 232 # we create the logentry here from values stored in states 0 to 4,
233 233 # as this state is re-entered for subsequent revisions of a file.
234 234 match = re_50.match(line)
235 235 assert match, _('expected revision number')
236 236 e = logentry(rcs=scache(rcs), file=scache(filename),
237 237 revision=tuple([int(x) for x in match.group(1).split('.')]),
238 238 branches=[], parent=None)
239 239 state = 6
240 240
241 241 elif state == 6:
242 242 # expecting date, author, state, lines changed
243 243 match = re_60.match(line)
244 244 assert match, _('revision must be followed by date line')
245 245 d = match.group(1)
246 246 if d[2] == '/':
247 247 # Y2K
248 248 d = '19' + d
249 249
250 250 if len(d.split()) != 3:
251 251 # cvs log dates always in GMT
252 252 d = d + ' UTC'
253 253 e.date = util.parsedate(d, ['%y/%m/%d %H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y-%m-%d %H:%M:%S'])
254 254 e.author = scache(match.group(2))
255 255 e.dead = match.group(3).lower() == 'dead'
256 256
257 257 if match.group(5):
258 258 if match.group(6):
259 259 e.lines = (int(match.group(5)), int(match.group(6)))
260 260 else:
261 261 e.lines = (int(match.group(5)), 0)
262 262 elif match.group(6):
263 263 e.lines = (0, int(match.group(6)))
264 264 else:
265 265 e.lines = None
266 266 e.comment = []
267 267 state = 7
268 268
269 269 elif state == 7:
270 270 # read the revision numbers of branches that start at this revision
271 271 # or store the commit log message otherwise
272 272 m = re_70.match(line)
273 273 if m:
274 274 e.branches = [tuple([int(y) for y in x.strip().split('.')])
275 275 for x in m.group(1).split(';')]
276 276 state = 8
277 277 elif re_31.match(line):
278 278 state = 5
279 279 store = True
280 280 elif re_32.match(line):
281 281 state = 0
282 282 store = True
283 283 else:
284 284 e.comment.append(line)
285 285
286 286 elif state == 8:
287 287 # store commit log message
288 288 if re_31.match(line):
289 289 state = 5
290 290 store = True
291 291 elif re_32.match(line):
292 292 state = 0
293 293 store = True
294 294 else:
295 295 e.comment.append(line)
296 296
297 297 if store:
298 298 # clean up the results and save in the log.
299 299 store = False
300 300 e.tags = [scache(x) for x in tags.get(e.revision, [])]
301 301 e.tags.sort()
302 302 e.comment = scache('\n'.join(e.comment))
303 303
304 304 revn = len(e.revision)
305 305 if revn > 3 and (revn % 2) == 0:
306 306 e.branch = tags.get(e.revision[:-1], [None])[0]
307 307 else:
308 308 e.branch = None
309 309
310 310 log.append(e)
311 311
312 312 if len(log) % 100 == 0:
313 313 ui.status(util.ellipsis('%d %s' % (len(log), e.file), 80)+'\n')
314 314
315 315 listsort(log, key=lambda x:(x.rcs, x.revision))
316 316
317 317 # find parent revisions of individual files
318 318 versions = {}
319 319 for e in log:
320 320 branch = e.revision[:-1]
321 321 p = versions.get((e.rcs, branch), None)
322 322 if p is None:
323 323 p = e.revision[:-2]
324 324 e.parent = p
325 325 versions[(e.rcs, branch)] = e.revision
326 326
327 327 # update the log cache
328 328 if cache:
329 329 if log:
330 330 # join up the old and new logs
331 331 listsort(log, key=lambda x:x.date)
332 332
333 333 if oldlog and oldlog[-1].date >= log[0].date:
334 334 raise logerror('Log cache overlaps with new log entries,'
335 335 ' re-run without cache.')
336 336
337 337 log = oldlog + log
338 338
339 339 # write the new cachefile
340 340 ui.note(_('writing cvs log cache %s\n') % cachefile)
341 341 pickle.dump(log, file(cachefile, 'w'))
342 342 else:
343 343 log = oldlog
344 344
345 345 ui.status(_('%d log entries\n') % len(log))
346 346
347 347 return log
348 348
349 349
350 350 class changeset(object):
351 351 '''Class changeset has the following attributes:
352 352 .author - author name as CVS knows it
353 353 .branch - name of branch this changeset is on, or None
354 354 .comment - commit message
355 355 .date - the commit date as a (time,tz) tuple
356 356 .entries - list of logentry objects in this changeset
357 357 .parents - list of one or two parent changesets
358 358 .tags - list of tags on this changeset
359 359 '''
360 360 def __init__(self, **entries):
361 361 self.__dict__.update(entries)
362 362
363 363 def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
364 364 '''Convert log into changesets.'''
365 365
366 366 ui.status(_('creating changesets\n'))
367 367
368 368 # Merge changesets
369 369
370 370 listsort(log, key=lambda x:(x.comment, x.author, x.branch, x.date))
371 371
372 372 changesets = []
373 373 files = {}
374 374 c = None
375 375 for i, e in enumerate(log):
376 376
377 377 # Check if log entry belongs to the current changeset or not.
378 378 if not (c and
379 379 e.comment == c.comment and
380 380 e.author == c.author and
381 381 e.branch == c.branch and
382 382 ((c.date[0] + c.date[1]) <=
383 383 (e.date[0] + e.date[1]) <=
384 384 (c.date[0] + c.date[1]) + fuzz) and
385 385 e.file not in files):
386 386 c = changeset(comment=e.comment, author=e.author,
387 387 branch=e.branch, date=e.date, entries=[])
388 388 changesets.append(c)
389 389 files = {}
390 390 if len(changesets) % 100 == 0:
391 391 t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])
392 392 ui.status(util.ellipsis(t, 80) + '\n')
393 393
394 394 e.Changeset = c
395 395 c.entries.append(e)
396 396 files[e.file] = True
397 397 c.date = e.date # changeset date is date of latest commit in it
398 398
399 399 # Sort files in each changeset
400 400
401 401 for c in changesets:
402 402 def pathcompare(l, r):
403 403 'Mimic cvsps sorting order'
404 404 l = l.split('/')
405 405 r = r.split('/')
406 406 nl = len(l)
407 407 nr = len(r)
408 408 n = min(nl, nr)
409 409 for i in range(n):
410 410 if i + 1 == nl and nl < nr:
411 411 return -1
412 412 elif i + 1 == nr and nl > nr:
413 413 return +1
414 414 elif l[i] < r[i]:
415 415 return -1
416 416 elif l[i] > r[i]:
417 417 return +1
418 418 return 0
419 419 def entitycompare(l, r):
420 420 return pathcompare(l.file, r.file)
421 421
422 422 c.entries.sort(entitycompare)
423 423
424 424 # Sort changesets by date
425 425
426 426 def cscmp(l, r):
427 427 d = sum(l.date) - sum(r.date)
428 428 if d:
429 429 return d
430 430
431 431 # detect vendor branches and initial commits on a branch
432 432 le = {}
433 433 for e in l.entries:
434 434 le[e.rcs] = e.revision
435 435 re = {}
436 436 for e in r.entries:
437 437 re[e.rcs] = e.revision
438 438
439 439 d = 0
440 440 for e in l.entries:
441 441 if re.get(e.rcs, None) == e.parent:
442 442 assert not d
443 443 d = 1
444 444 break
445 445
446 446 for e in r.entries:
447 447 if le.get(e.rcs, None) == e.parent:
448 448 assert not d
449 449 d = -1
450 450 break
451 451
452 452 return d
453 453
454 454 changesets.sort(cscmp)
455 455
456 456 # Collect tags
457 457
458 458 globaltags = {}
459 459 for c in changesets:
460 460 tags = {}
461 461 for e in c.entries:
462 462 for tag in e.tags:
463 463 # remember which is the latest changeset to have this tag
464 464 globaltags[tag] = c
465 465
466 466 for c in changesets:
467 467 tags = {}
468 468 for e in c.entries:
469 469 for tag in e.tags:
470 470 tags[tag] = True
471 471 # remember tags only if this is the latest changeset to have it
472 472 tagnames = [tag for tag in tags if globaltags[tag] is c]
473 473 tagnames.sort()
474 474 c.tags = tagnames
475 475
476 476 # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
477 477 # by inserting dummy changesets with two parents, and handle
478 478 # {{mergefrombranch BRANCHNAME}} by setting two parents.
479 479
480 480 if mergeto is None:
481 481 mergeto = r'{{mergetobranch ([-\w]+)}}'
482 482 if mergeto:
483 483 mergeto = re.compile(mergeto)
484 484
485 485 if mergefrom is None:
486 486 mergefrom = r'{{mergefrombranch ([-\w]+)}}'
487 487 if mergefrom:
488 488 mergefrom = re.compile(mergefrom)
489 489
490 490 versions = {} # changeset index where we saw any particular file version
491 491 branches = {} # changeset index where we saw a branch
492 492 n = len(changesets)
493 493 i = 0
494 494 while i<n:
495 495 c = changesets[i]
496 496
497 497 for f in c.entries:
498 498 versions[(f.rcs, f.revision)] = i
499 499
500 500 p = None
501 501 if c.branch in branches:
502 502 p = branches[c.branch]
503 503 else:
504 504 for f in c.entries:
505 505 p = max(p, versions.get((f.rcs, f.parent), None))
506 506
507 507 c.parents = []
508 508 if p is not None:
509 509 c.parents.append(changesets[p])
510 510
511 511 if mergefrom:
512 512 m = mergefrom.search(c.comment)
513 513 if m:
514 514 m = m.group(1)
515 515 if m == 'HEAD':
516 516 m = None
517 517 if m in branches and c.branch != m:
518 518 c.parents.append(changesets[branches[m]])
519 519
520 520 if mergeto:
521 521 m = mergeto.search(c.comment)
522 522 if m:
523 523 try:
524 524 m = m.group(1)
525 525 if m == 'HEAD':
526 526 m = None
527 527 except:
528 528 m = None # if no group found then merge to HEAD
529 529 if m in branches and c.branch != m:
530 530 # insert empty changeset for merge
531 531 cc = changeset(author=c.author, branch=m, date=c.date,
532 532 comment='convert-repo: CVS merge from branch %s' % c.branch,
533 533 entries=[], tags=[], parents=[changesets[branches[m]], c])
534 534 changesets.insert(i + 1, cc)
535 535 branches[m] = i + 1
536 536
537 537 # adjust our loop counters now we have inserted a new entry
538 538 n += 1
539 539 i += 2
540 540 continue
541 541
542 542 branches[c.branch] = i
543 543 i += 1
544 544
545 545 # Number changesets
546 546
547 547 for i, c in enumerate(changesets):
548 548 c.id = i + 1
549 549
550 550 ui.status(_('%d changeset entries\n') % len(changesets))
551 551
552 552 return changesets
General Comments 0
You need to be logged in to leave comments. Login now