##// END OF EJS Templates
changelog: lazily parse files...
Gregory Szorc -
r28493:7796473c default
parent child Browse files
Show More
@@ -1,518 +1,529 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 bin,
15 15 hex,
16 16 nullid,
17 17 )
18 18
19 19 from . import (
20 20 encoding,
21 21 error,
22 22 revlog,
23 23 util,
24 24 )
25 25
26 26 _defaultextra = {'branch': 'default'}
27 27
28 28 def _string_escape(text):
29 29 """
30 30 >>> d = {'nl': chr(10), 'bs': chr(92), 'cr': chr(13), 'nul': chr(0)}
31 31 >>> s = "ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
32 32 >>> s
33 33 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
34 34 >>> res = _string_escape(s)
35 35 >>> s == res.decode('string_escape')
36 36 True
37 37 """
38 38 # subset of the string_escape codec
39 39 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
40 40 return text.replace('\0', '\\0')
41 41
42 42 def decodeextra(text):
43 43 """
44 44 >>> sorted(decodeextra(encodeextra({'foo': 'bar', 'baz': chr(0) + '2'})
45 45 ... ).iteritems())
46 46 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
47 47 >>> sorted(decodeextra(encodeextra({'foo': 'bar',
48 48 ... 'baz': chr(92) + chr(0) + '2'})
49 49 ... ).iteritems())
50 50 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
51 51 """
52 52 extra = _defaultextra.copy()
53 53 for l in text.split('\0'):
54 54 if l:
55 55 if '\\0' in l:
56 56 # fix up \0 without getting into trouble with \\0
57 57 l = l.replace('\\\\', '\\\\\n')
58 58 l = l.replace('\\0', '\0')
59 59 l = l.replace('\n', '')
60 60 k, v = l.decode('string_escape').split(':', 1)
61 61 extra[k] = v
62 62 return extra
63 63
64 64 def encodeextra(d):
65 65 # keys must be sorted to produce a deterministic changelog entry
66 66 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
67 67 return "\0".join(items)
68 68
69 69 def stripdesc(desc):
70 70 """strip trailing whitespace and leading and trailing empty lines"""
71 71 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
72 72
73 73 class appender(object):
74 74 '''the changelog index must be updated last on disk, so we use this class
75 75 to delay writes to it'''
76 76 def __init__(self, vfs, name, mode, buf):
77 77 self.data = buf
78 78 fp = vfs(name, mode)
79 79 self.fp = fp
80 80 self.offset = fp.tell()
81 81 self.size = vfs.fstat(fp).st_size
82 82
83 83 def end(self):
84 84 return self.size + len("".join(self.data))
85 85 def tell(self):
86 86 return self.offset
87 87 def flush(self):
88 88 pass
89 89 def close(self):
90 90 self.fp.close()
91 91
92 92 def seek(self, offset, whence=0):
93 93 '''virtual file offset spans real file and data'''
94 94 if whence == 0:
95 95 self.offset = offset
96 96 elif whence == 1:
97 97 self.offset += offset
98 98 elif whence == 2:
99 99 self.offset = self.end() + offset
100 100 if self.offset < self.size:
101 101 self.fp.seek(self.offset)
102 102
103 103 def read(self, count=-1):
104 104 '''only trick here is reads that span real file and data'''
105 105 ret = ""
106 106 if self.offset < self.size:
107 107 s = self.fp.read(count)
108 108 ret = s
109 109 self.offset += len(s)
110 110 if count > 0:
111 111 count -= len(s)
112 112 if count != 0:
113 113 doff = self.offset - self.size
114 114 self.data.insert(0, "".join(self.data))
115 115 del self.data[1:]
116 116 s = self.data[0][doff:doff + count]
117 117 self.offset += len(s)
118 118 ret += s
119 119 return ret
120 120
121 121 def write(self, s):
122 122 self.data.append(str(s))
123 123 self.offset += len(s)
124 124
125 125 def _divertopener(opener, target):
126 126 """build an opener that writes in 'target.a' instead of 'target'"""
127 127 def _divert(name, mode='r'):
128 128 if name != target:
129 129 return opener(name, mode)
130 130 return opener(name + ".a", mode)
131 131 return _divert
132 132
133 133 def _delayopener(opener, target, buf):
134 134 """build an opener that stores chunks in 'buf' instead of 'target'"""
135 135 def _delay(name, mode='r'):
136 136 if name != target:
137 137 return opener(name, mode)
138 138 return appender(opener, name, mode, buf)
139 139 return _delay
140 140
141 141 _changelogrevision = collections.namedtuple('changelogrevision',
142 142 ('manifest', 'user', 'date',
143 143 'files', 'description', 'extra'))
144 144
145 145 class changelogrevision(object):
146 146 """Holds results of a parsed changelog revision.
147 147
148 148 Changelog revisions consist of multiple pieces of data, including
149 149 the manifest node, user, and date. This object exposes a view into
150 150 the parsed object.
151 151 """
152 152
153 153 __slots__ = (
154 154 '_rawdateextra',
155 155 '_rawdesc',
156 'files',
156 '_rawfiles',
157 157 '_rawmanifest',
158 158 '_rawuser',
159 159 )
160 160
161 161 def __new__(cls, text):
162 162 if not text:
163 163 return _changelogrevision(
164 164 manifest=nullid,
165 165 user='',
166 166 date=(0, 0),
167 167 files=[],
168 168 description='',
169 169 extra=_defaultextra,
170 170 )
171 171
172 172 self = super(changelogrevision, cls).__new__(cls)
173 173 # We could return here and implement the following as an __init__.
174 174 # But doing it here is equivalent and saves an extra function call.
175 175
176 176 # format used:
177 177 # nodeid\n : manifest node in ascii
178 178 # user\n : user, no \n or \r allowed
179 179 # time tz extra\n : date (time is int or float, timezone is int)
180 180 # : extra is metadata, encoded and separated by '\0'
181 181 # : older versions ignore it
182 182 # files\n\n : files modified by the cset, no \n or \r allowed
183 183 # (.*) : comment (free text, ideally utf-8)
184 184 #
185 185 # changelog v0 doesn't use extra
186 186
187 187 doublenl = text.index('\n\n')
188 188 self._rawdesc = text[doublenl + 2:]
189 189
190 190 nl1 = text.index('\n')
191 191 self._rawmanifest = text[0:nl1]
192 192
193 193 nl2 = text.index('\n', nl1 + 1)
194 194 self._rawuser = text[nl1 + 1:nl2]
195 195
196 196 nl3 = text.index('\n', nl2 + 1)
197 197 self._rawdateextra = text[nl2 + 1:nl3]
198 198
199 l = text[:doublenl].split('\n')
200 self.files = l[3:]
199 # The list of files may be empty. Which means nl3 is the first of the
200 # double newline that precedes the description.
201 if nl3 == doublenl:
202 self._rawfiles = None
203 else:
204 self._rawfiles = text[nl3 + 1:doublenl]
201 205
202 206 return self
203 207
204 208 @property
205 209 def manifest(self):
206 210 return bin(self._rawmanifest)
207 211
208 212 @property
209 213 def user(self):
210 214 return encoding.tolocal(self._rawuser)
211 215
212 216 @property
213 217 def _rawdate(self):
214 218 return self._rawdateextra.split(' ', 2)[0:2]
215 219
216 220 @property
217 221 def _rawextra(self):
218 222 fields = self._rawdateextra.split(' ', 2)
219 223 if len(fields) != 3:
220 224 return None
221 225
222 226 return fields[2]
223 227
224 228 @property
225 229 def date(self):
226 230 raw = self._rawdate
227 231 time = float(raw[0])
228 232 # Various tools did silly things with the timezone.
229 233 try:
230 234 timezone = int(raw[1])
231 235 except ValueError:
232 236 timezone = 0
233 237
234 238 return time, timezone
235 239
236 240 @property
237 241 def extra(self):
238 242 raw = self._rawextra
239 243 if raw is None:
240 244 return _defaultextra
241 245
242 246 return decodeextra(raw)
243 247
244 248 @property
249 def files(self):
250 if self._rawfiles is None:
251 return []
252
253 return self._rawfiles.split('\n')
254
255 @property
245 256 def description(self):
246 257 return encoding.tolocal(self._rawdesc)
247 258
248 259 class changelog(revlog.revlog):
249 260 def __init__(self, opener):
250 261 revlog.revlog.__init__(self, opener, "00changelog.i")
251 262 if self._initempty:
252 263 # changelogs don't benefit from generaldelta
253 264 self.version &= ~revlog.REVLOGGENERALDELTA
254 265 self._generaldelta = False
255 266 self._realopener = opener
256 267 self._delayed = False
257 268 self._delaybuf = None
258 269 self._divert = False
259 270 self.filteredrevs = frozenset()
260 271
261 272 def tip(self):
262 273 """filtered version of revlog.tip"""
263 274 for i in xrange(len(self) -1, -2, -1):
264 275 if i not in self.filteredrevs:
265 276 return self.node(i)
266 277
267 278 def __contains__(self, rev):
268 279 """filtered version of revlog.__contains__"""
269 280 return (0 <= rev < len(self)
270 281 and rev not in self.filteredrevs)
271 282
272 283 def __iter__(self):
273 284 """filtered version of revlog.__iter__"""
274 285 if len(self.filteredrevs) == 0:
275 286 return revlog.revlog.__iter__(self)
276 287
277 288 def filterediter():
278 289 for i in xrange(len(self)):
279 290 if i not in self.filteredrevs:
280 291 yield i
281 292
282 293 return filterediter()
283 294
284 295 def revs(self, start=0, stop=None):
285 296 """filtered version of revlog.revs"""
286 297 for i in super(changelog, self).revs(start, stop):
287 298 if i not in self.filteredrevs:
288 299 yield i
289 300
290 301 @util.propertycache
291 302 def nodemap(self):
292 303 # XXX need filtering too
293 304 self.rev(self.node(0))
294 305 return self._nodecache
295 306
296 307 def reachableroots(self, minroot, heads, roots, includepath=False):
297 308 return self.index.reachableroots2(minroot, heads, roots, includepath)
298 309
299 310 def headrevs(self):
300 311 if self.filteredrevs:
301 312 try:
302 313 return self.index.headrevsfiltered(self.filteredrevs)
303 314 # AttributeError covers non-c-extension environments and
304 315 # old c extensions without filter handling.
305 316 except AttributeError:
306 317 return self._headrevs()
307 318
308 319 return super(changelog, self).headrevs()
309 320
310 321 def strip(self, *args, **kwargs):
311 322 # XXX make something better than assert
312 323 # We can't expect proper strip behavior if we are filtered.
313 324 assert not self.filteredrevs
314 325 super(changelog, self).strip(*args, **kwargs)
315 326
316 327 def rev(self, node):
317 328 """filtered version of revlog.rev"""
318 329 r = super(changelog, self).rev(node)
319 330 if r in self.filteredrevs:
320 331 raise error.FilteredLookupError(hex(node), self.indexfile,
321 332 _('filtered node'))
322 333 return r
323 334
324 335 def node(self, rev):
325 336 """filtered version of revlog.node"""
326 337 if rev in self.filteredrevs:
327 338 raise error.FilteredIndexError(rev)
328 339 return super(changelog, self).node(rev)
329 340
330 341 def linkrev(self, rev):
331 342 """filtered version of revlog.linkrev"""
332 343 if rev in self.filteredrevs:
333 344 raise error.FilteredIndexError(rev)
334 345 return super(changelog, self).linkrev(rev)
335 346
336 347 def parentrevs(self, rev):
337 348 """filtered version of revlog.parentrevs"""
338 349 if rev in self.filteredrevs:
339 350 raise error.FilteredIndexError(rev)
340 351 return super(changelog, self).parentrevs(rev)
341 352
342 353 def flags(self, rev):
343 354 """filtered version of revlog.flags"""
344 355 if rev in self.filteredrevs:
345 356 raise error.FilteredIndexError(rev)
346 357 return super(changelog, self).flags(rev)
347 358
348 359 def delayupdate(self, tr):
349 360 "delay visibility of index updates to other readers"
350 361
351 362 if not self._delayed:
352 363 if len(self) == 0:
353 364 self._divert = True
354 365 if self._realopener.exists(self.indexfile + '.a'):
355 366 self._realopener.unlink(self.indexfile + '.a')
356 367 self.opener = _divertopener(self._realopener, self.indexfile)
357 368 else:
358 369 self._delaybuf = []
359 370 self.opener = _delayopener(self._realopener, self.indexfile,
360 371 self._delaybuf)
361 372 self._delayed = True
362 373 tr.addpending('cl-%i' % id(self), self._writepending)
363 374 tr.addfinalize('cl-%i' % id(self), self._finalize)
364 375
365 376 def _finalize(self, tr):
366 377 "finalize index updates"
367 378 self._delayed = False
368 379 self.opener = self._realopener
369 380 # move redirected index data back into place
370 381 if self._divert:
371 382 assert not self._delaybuf
372 383 tmpname = self.indexfile + ".a"
373 384 nfile = self.opener.open(tmpname)
374 385 nfile.close()
375 386 self.opener.rename(tmpname, self.indexfile)
376 387 elif self._delaybuf:
377 388 fp = self.opener(self.indexfile, 'a')
378 389 fp.write("".join(self._delaybuf))
379 390 fp.close()
380 391 self._delaybuf = None
381 392 self._divert = False
382 393 # split when we're done
383 394 self.checkinlinesize(tr)
384 395
385 396 def readpending(self, file):
386 397 """read index data from a "pending" file
387 398
388 399 During a transaction, the actual changeset data is already stored in the
389 400 main file, but not yet finalized in the on-disk index. Instead, a
390 401 "pending" index is written by the transaction logic. If this function
391 402 is running, we are likely in a subprocess invoked in a hook. The
392 403 subprocess is informed that it is within a transaction and needs to
393 404 access its content.
394 405
395 406 This function will read all the index data out of the pending file and
396 407 overwrite the main index."""
397 408
398 409 if not self.opener.exists(file):
399 410 return # no pending data for changelog
400 411 r = revlog.revlog(self.opener, file)
401 412 self.index = r.index
402 413 self.nodemap = r.nodemap
403 414 self._nodecache = r._nodecache
404 415 self._chunkcache = r._chunkcache
405 416
406 417 def _writepending(self, tr):
407 418 "create a file containing the unfinalized state for pretxnchangegroup"
408 419 if self._delaybuf:
409 420 # make a temporary copy of the index
410 421 fp1 = self._realopener(self.indexfile)
411 422 pendingfilename = self.indexfile + ".a"
412 423 # register as a temp file to ensure cleanup on failure
413 424 tr.registertmp(pendingfilename)
414 425 # write existing data
415 426 fp2 = self._realopener(pendingfilename, "w")
416 427 fp2.write(fp1.read())
417 428 # add pending data
418 429 fp2.write("".join(self._delaybuf))
419 430 fp2.close()
420 431 # switch modes so finalize can simply rename
421 432 self._delaybuf = None
422 433 self._divert = True
423 434 self.opener = _divertopener(self._realopener, self.indexfile)
424 435
425 436 if self._divert:
426 437 return True
427 438
428 439 return False
429 440
430 441 def checkinlinesize(self, tr, fp=None):
431 442 if not self._delayed:
432 443 revlog.revlog.checkinlinesize(self, tr, fp)
433 444
434 445 def read(self, node):
435 446 """Obtain data from a parsed changelog revision.
436 447
437 448 Returns a 6-tuple of:
438 449
439 450 - manifest node in binary
440 451 - author/user as a localstr
441 452 - date as a 2-tuple of (time, timezone)
442 453 - list of files
443 454 - commit message as a localstr
444 455 - dict of extra metadata
445 456
446 457 Unless you need to access all fields, consider calling
447 458 ``changelogrevision`` instead, as it is faster for partial object
448 459 access.
449 460 """
450 461 c = changelogrevision(self.revision(node))
451 462 return (
452 463 c.manifest,
453 464 c.user,
454 465 c.date,
455 466 c.files,
456 467 c.description,
457 468 c.extra
458 469 )
459 470
460 471 def changelogrevision(self, nodeorrev):
461 472 """Obtain a ``changelogrevision`` for a node or revision."""
462 473 return changelogrevision(self.revision(nodeorrev))
463 474
464 475 def readfiles(self, node):
465 476 """
466 477 short version of read that only returns the files modified by the cset
467 478 """
468 479 text = self.revision(node)
469 480 if not text:
470 481 return []
471 482 last = text.index("\n\n")
472 483 l = text[:last].split('\n')
473 484 return l[3:]
474 485
475 486 def add(self, manifest, files, desc, transaction, p1, p2,
476 487 user, date=None, extra=None):
477 488 # Convert to UTF-8 encoded bytestrings as the very first
478 489 # thing: calling any method on a localstr object will turn it
479 490 # into a str object and the cached UTF-8 string is thus lost.
480 491 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
481 492
482 493 user = user.strip()
483 494 # An empty username or a username with a "\n" will make the
484 495 # revision text contain two "\n\n" sequences -> corrupt
485 496 # repository since read cannot unpack the revision.
486 497 if not user:
487 498 raise error.RevlogError(_("empty username"))
488 499 if "\n" in user:
489 500 raise error.RevlogError(_("username %s contains a newline")
490 501 % repr(user))
491 502
492 503 desc = stripdesc(desc)
493 504
494 505 if date:
495 506 parseddate = "%d %d" % util.parsedate(date)
496 507 else:
497 508 parseddate = "%d %d" % util.makedate()
498 509 if extra:
499 510 branch = extra.get("branch")
500 511 if branch in ("default", ""):
501 512 del extra["branch"]
502 513 elif branch in (".", "null", "tip"):
503 514 raise error.RevlogError(_('the name \'%s\' is reserved')
504 515 % branch)
505 516 if extra:
506 517 extra = encodeextra(extra)
507 518 parseddate = "%s %s" % (parseddate, extra)
508 519 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
509 520 text = "\n".join(l)
510 521 return self.addrevision(text, transaction, len(self), p1, p2)
511 522
512 523 def branchinfo(self, rev):
513 524 """return the branch name and open/close state of a revision
514 525
515 526 This function exists because creating a changectx object
516 527 just to access this is costly."""
517 528 extra = self.read(rev)[5]
518 529 return encoding.tolocal(extra.get("branch")), 'close' in extra
General Comments 0
You need to be logged in to leave comments. Login now