##// END OF EJS Templates
changelog: lazily parse manifest node...
Gregory Szorc -
r28490:959eadae default
parent child Browse files
Show More
@@ -1,487 +1,493 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 bin,
15 15 hex,
16 16 nullid,
17 17 )
18 18
19 19 from . import (
20 20 encoding,
21 21 error,
22 22 revlog,
23 23 util,
24 24 )
25 25
26 26 _defaultextra = {'branch': 'default'}
27 27
28 28 def _string_escape(text):
29 29 """
30 30 >>> d = {'nl': chr(10), 'bs': chr(92), 'cr': chr(13), 'nul': chr(0)}
31 31 >>> s = "ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
32 32 >>> s
33 33 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
34 34 >>> res = _string_escape(s)
35 35 >>> s == res.decode('string_escape')
36 36 True
37 37 """
38 38 # subset of the string_escape codec
39 39 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
40 40 return text.replace('\0', '\\0')
41 41
42 42 def decodeextra(text):
43 43 """
44 44 >>> sorted(decodeextra(encodeextra({'foo': 'bar', 'baz': chr(0) + '2'})
45 45 ... ).iteritems())
46 46 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
47 47 >>> sorted(decodeextra(encodeextra({'foo': 'bar',
48 48 ... 'baz': chr(92) + chr(0) + '2'})
49 49 ... ).iteritems())
50 50 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
51 51 """
52 52 extra = _defaultextra.copy()
53 53 for l in text.split('\0'):
54 54 if l:
55 55 if '\\0' in l:
56 56 # fix up \0 without getting into trouble with \\0
57 57 l = l.replace('\\\\', '\\\\\n')
58 58 l = l.replace('\\0', '\0')
59 59 l = l.replace('\n', '')
60 60 k, v = l.decode('string_escape').split(':', 1)
61 61 extra[k] = v
62 62 return extra
63 63
64 64 def encodeextra(d):
65 65 # keys must be sorted to produce a deterministic changelog entry
66 66 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
67 67 return "\0".join(items)
68 68
69 69 def stripdesc(desc):
70 70 """strip trailing whitespace and leading and trailing empty lines"""
71 71 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
72 72
73 73 class appender(object):
74 74 '''the changelog index must be updated last on disk, so we use this class
75 75 to delay writes to it'''
76 76 def __init__(self, vfs, name, mode, buf):
77 77 self.data = buf
78 78 fp = vfs(name, mode)
79 79 self.fp = fp
80 80 self.offset = fp.tell()
81 81 self.size = vfs.fstat(fp).st_size
82 82
83 83 def end(self):
84 84 return self.size + len("".join(self.data))
85 85 def tell(self):
86 86 return self.offset
87 87 def flush(self):
88 88 pass
89 89 def close(self):
90 90 self.fp.close()
91 91
92 92 def seek(self, offset, whence=0):
93 93 '''virtual file offset spans real file and data'''
94 94 if whence == 0:
95 95 self.offset = offset
96 96 elif whence == 1:
97 97 self.offset += offset
98 98 elif whence == 2:
99 99 self.offset = self.end() + offset
100 100 if self.offset < self.size:
101 101 self.fp.seek(self.offset)
102 102
103 103 def read(self, count=-1):
104 104 '''only trick here is reads that span real file and data'''
105 105 ret = ""
106 106 if self.offset < self.size:
107 107 s = self.fp.read(count)
108 108 ret = s
109 109 self.offset += len(s)
110 110 if count > 0:
111 111 count -= len(s)
112 112 if count != 0:
113 113 doff = self.offset - self.size
114 114 self.data.insert(0, "".join(self.data))
115 115 del self.data[1:]
116 116 s = self.data[0][doff:doff + count]
117 117 self.offset += len(s)
118 118 ret += s
119 119 return ret
120 120
121 121 def write(self, s):
122 122 self.data.append(str(s))
123 123 self.offset += len(s)
124 124
125 125 def _divertopener(opener, target):
126 126 """build an opener that writes in 'target.a' instead of 'target'"""
127 127 def _divert(name, mode='r'):
128 128 if name != target:
129 129 return opener(name, mode)
130 130 return opener(name + ".a", mode)
131 131 return _divert
132 132
133 133 def _delayopener(opener, target, buf):
134 134 """build an opener that stores chunks in 'buf' instead of 'target'"""
135 135 def _delay(name, mode='r'):
136 136 if name != target:
137 137 return opener(name, mode)
138 138 return appender(opener, name, mode, buf)
139 139 return _delay
140 140
141 141 _changelogrevision = collections.namedtuple('changelogrevision',
142 142 ('manifest', 'user', 'date',
143 143 'files', 'description', 'extra'))
144 144
145 145 class changelogrevision(object):
146 146 """Holds results of a parsed changelog revision.
147 147
148 148 Changelog revisions consist of multiple pieces of data, including
149 149 the manifest node, user, and date. This object exposes a view into
150 150 the parsed object.
151 151 """
152 152
153 153 __slots__ = (
154 154 'date',
155 155 '_rawdesc',
156 156 'extra',
157 157 'files',
158 'manifest',
158 '_rawmanifest',
159 159 'user',
160 160 )
161 161
162 162 def __new__(cls, text):
163 163 if not text:
164 164 return _changelogrevision(
165 165 manifest=nullid,
166 166 user='',
167 167 date=(0, 0),
168 168 files=[],
169 169 description='',
170 170 extra=_defaultextra,
171 171 )
172 172
173 173 self = super(changelogrevision, cls).__new__(cls)
174 174 # We could return here and implement the following as an __init__.
175 175 # But doing it here is equivalent and saves an extra function call.
176 176
177 177 # format used:
178 178 # nodeid\n : manifest node in ascii
179 179 # user\n : user, no \n or \r allowed
180 180 # time tz extra\n : date (time is int or float, timezone is int)
181 181 # : extra is metadata, encoded and separated by '\0'
182 182 # : older versions ignore it
183 183 # files\n\n : files modified by the cset, no \n or \r allowed
184 184 # (.*) : comment (free text, ideally utf-8)
185 185 #
186 186 # changelog v0 doesn't use extra
187 187
188 188 doublenl = text.index('\n\n')
189 189 self._rawdesc = text[doublenl + 2:]
190 190
191 nl1 = text.index('\n')
192 self._rawmanifest = text[0:nl1]
193
191 194 l = text[:doublenl].split('\n')
192 self.manifest = bin(l[0])
193 195 self.user = encoding.tolocal(l[1])
194 196
195 197 tdata = l[2].split(' ', 2)
196 198 if len(tdata) != 3:
197 199 time = float(tdata[0])
198 200 try:
199 201 # various tools did silly things with the time zone field.
200 202 timezone = int(tdata[1])
201 203 except ValueError:
202 204 timezone = 0
203 205 self.extra = _defaultextra
204 206 else:
205 207 time, timezone = float(tdata[0]), int(tdata[1])
206 208 self.extra = decodeextra(tdata[2])
207 209
208 210 self.date = (time, timezone)
209 211 self.files = l[3:]
210 212
211 213 return self
212 214
213 215 @property
216 def manifest(self):
217 return bin(self._rawmanifest)
218
219 @property
214 220 def description(self):
215 221 return encoding.tolocal(self._rawdesc)
216 222
217 223 class changelog(revlog.revlog):
218 224 def __init__(self, opener):
219 225 revlog.revlog.__init__(self, opener, "00changelog.i")
220 226 if self._initempty:
221 227 # changelogs don't benefit from generaldelta
222 228 self.version &= ~revlog.REVLOGGENERALDELTA
223 229 self._generaldelta = False
224 230 self._realopener = opener
225 231 self._delayed = False
226 232 self._delaybuf = None
227 233 self._divert = False
228 234 self.filteredrevs = frozenset()
229 235
230 236 def tip(self):
231 237 """filtered version of revlog.tip"""
232 238 for i in xrange(len(self) -1, -2, -1):
233 239 if i not in self.filteredrevs:
234 240 return self.node(i)
235 241
236 242 def __contains__(self, rev):
237 243 """filtered version of revlog.__contains__"""
238 244 return (0 <= rev < len(self)
239 245 and rev not in self.filteredrevs)
240 246
241 247 def __iter__(self):
242 248 """filtered version of revlog.__iter__"""
243 249 if len(self.filteredrevs) == 0:
244 250 return revlog.revlog.__iter__(self)
245 251
246 252 def filterediter():
247 253 for i in xrange(len(self)):
248 254 if i not in self.filteredrevs:
249 255 yield i
250 256
251 257 return filterediter()
252 258
253 259 def revs(self, start=0, stop=None):
254 260 """filtered version of revlog.revs"""
255 261 for i in super(changelog, self).revs(start, stop):
256 262 if i not in self.filteredrevs:
257 263 yield i
258 264
259 265 @util.propertycache
260 266 def nodemap(self):
261 267 # XXX need filtering too
262 268 self.rev(self.node(0))
263 269 return self._nodecache
264 270
265 271 def reachableroots(self, minroot, heads, roots, includepath=False):
266 272 return self.index.reachableroots2(minroot, heads, roots, includepath)
267 273
268 274 def headrevs(self):
269 275 if self.filteredrevs:
270 276 try:
271 277 return self.index.headrevsfiltered(self.filteredrevs)
272 278 # AttributeError covers non-c-extension environments and
273 279 # old c extensions without filter handling.
274 280 except AttributeError:
275 281 return self._headrevs()
276 282
277 283 return super(changelog, self).headrevs()
278 284
279 285 def strip(self, *args, **kwargs):
280 286 # XXX make something better than assert
281 287 # We can't expect proper strip behavior if we are filtered.
282 288 assert not self.filteredrevs
283 289 super(changelog, self).strip(*args, **kwargs)
284 290
285 291 def rev(self, node):
286 292 """filtered version of revlog.rev"""
287 293 r = super(changelog, self).rev(node)
288 294 if r in self.filteredrevs:
289 295 raise error.FilteredLookupError(hex(node), self.indexfile,
290 296 _('filtered node'))
291 297 return r
292 298
293 299 def node(self, rev):
294 300 """filtered version of revlog.node"""
295 301 if rev in self.filteredrevs:
296 302 raise error.FilteredIndexError(rev)
297 303 return super(changelog, self).node(rev)
298 304
299 305 def linkrev(self, rev):
300 306 """filtered version of revlog.linkrev"""
301 307 if rev in self.filteredrevs:
302 308 raise error.FilteredIndexError(rev)
303 309 return super(changelog, self).linkrev(rev)
304 310
305 311 def parentrevs(self, rev):
306 312 """filtered version of revlog.parentrevs"""
307 313 if rev in self.filteredrevs:
308 314 raise error.FilteredIndexError(rev)
309 315 return super(changelog, self).parentrevs(rev)
310 316
311 317 def flags(self, rev):
312 318 """filtered version of revlog.flags"""
313 319 if rev in self.filteredrevs:
314 320 raise error.FilteredIndexError(rev)
315 321 return super(changelog, self).flags(rev)
316 322
317 323 def delayupdate(self, tr):
318 324 "delay visibility of index updates to other readers"
319 325
320 326 if not self._delayed:
321 327 if len(self) == 0:
322 328 self._divert = True
323 329 if self._realopener.exists(self.indexfile + '.a'):
324 330 self._realopener.unlink(self.indexfile + '.a')
325 331 self.opener = _divertopener(self._realopener, self.indexfile)
326 332 else:
327 333 self._delaybuf = []
328 334 self.opener = _delayopener(self._realopener, self.indexfile,
329 335 self._delaybuf)
330 336 self._delayed = True
331 337 tr.addpending('cl-%i' % id(self), self._writepending)
332 338 tr.addfinalize('cl-%i' % id(self), self._finalize)
333 339
334 340 def _finalize(self, tr):
335 341 "finalize index updates"
336 342 self._delayed = False
337 343 self.opener = self._realopener
338 344 # move redirected index data back into place
339 345 if self._divert:
340 346 assert not self._delaybuf
341 347 tmpname = self.indexfile + ".a"
342 348 nfile = self.opener.open(tmpname)
343 349 nfile.close()
344 350 self.opener.rename(tmpname, self.indexfile)
345 351 elif self._delaybuf:
346 352 fp = self.opener(self.indexfile, 'a')
347 353 fp.write("".join(self._delaybuf))
348 354 fp.close()
349 355 self._delaybuf = None
350 356 self._divert = False
351 357 # split when we're done
352 358 self.checkinlinesize(tr)
353 359
354 360 def readpending(self, file):
355 361 """read index data from a "pending" file
356 362
357 363 During a transaction, the actual changeset data is already stored in the
358 364 main file, but not yet finalized in the on-disk index. Instead, a
359 365 "pending" index is written by the transaction logic. If this function
360 366 is running, we are likely in a subprocess invoked in a hook. The
361 367 subprocess is informed that it is within a transaction and needs to
362 368 access its content.
363 369
364 370 This function will read all the index data out of the pending file and
365 371 overwrite the main index."""
366 372
367 373 if not self.opener.exists(file):
368 374 return # no pending data for changelog
369 375 r = revlog.revlog(self.opener, file)
370 376 self.index = r.index
371 377 self.nodemap = r.nodemap
372 378 self._nodecache = r._nodecache
373 379 self._chunkcache = r._chunkcache
374 380
375 381 def _writepending(self, tr):
376 382 "create a file containing the unfinalized state for pretxnchangegroup"
377 383 if self._delaybuf:
378 384 # make a temporary copy of the index
379 385 fp1 = self._realopener(self.indexfile)
380 386 pendingfilename = self.indexfile + ".a"
381 387 # register as a temp file to ensure cleanup on failure
382 388 tr.registertmp(pendingfilename)
383 389 # write existing data
384 390 fp2 = self._realopener(pendingfilename, "w")
385 391 fp2.write(fp1.read())
386 392 # add pending data
387 393 fp2.write("".join(self._delaybuf))
388 394 fp2.close()
389 395 # switch modes so finalize can simply rename
390 396 self._delaybuf = None
391 397 self._divert = True
392 398 self.opener = _divertopener(self._realopener, self.indexfile)
393 399
394 400 if self._divert:
395 401 return True
396 402
397 403 return False
398 404
399 405 def checkinlinesize(self, tr, fp=None):
400 406 if not self._delayed:
401 407 revlog.revlog.checkinlinesize(self, tr, fp)
402 408
403 409 def read(self, node):
404 410 """Obtain data from a parsed changelog revision.
405 411
406 412 Returns a 6-tuple of:
407 413
408 414 - manifest node in binary
409 415 - author/user as a localstr
410 416 - date as a 2-tuple of (time, timezone)
411 417 - list of files
412 418 - commit message as a localstr
413 419 - dict of extra metadata
414 420
415 421 Unless you need to access all fields, consider calling
416 422 ``changelogrevision`` instead, as it is faster for partial object
417 423 access.
418 424 """
419 425 c = changelogrevision(self.revision(node))
420 426 return (
421 427 c.manifest,
422 428 c.user,
423 429 c.date,
424 430 c.files,
425 431 c.description,
426 432 c.extra
427 433 )
428 434
429 435 def changelogrevision(self, nodeorrev):
430 436 """Obtain a ``changelogrevision`` for a node or revision."""
431 437 return changelogrevision(self.revision(nodeorrev))
432 438
433 439 def readfiles(self, node):
434 440 """
435 441 short version of read that only returns the files modified by the cset
436 442 """
437 443 text = self.revision(node)
438 444 if not text:
439 445 return []
440 446 last = text.index("\n\n")
441 447 l = text[:last].split('\n')
442 448 return l[3:]
443 449
444 450 def add(self, manifest, files, desc, transaction, p1, p2,
445 451 user, date=None, extra=None):
446 452 # Convert to UTF-8 encoded bytestrings as the very first
447 453 # thing: calling any method on a localstr object will turn it
448 454 # into a str object and the cached UTF-8 string is thus lost.
449 455 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
450 456
451 457 user = user.strip()
452 458 # An empty username or a username with a "\n" will make the
453 459 # revision text contain two "\n\n" sequences -> corrupt
454 460 # repository since read cannot unpack the revision.
455 461 if not user:
456 462 raise error.RevlogError(_("empty username"))
457 463 if "\n" in user:
458 464 raise error.RevlogError(_("username %s contains a newline")
459 465 % repr(user))
460 466
461 467 desc = stripdesc(desc)
462 468
463 469 if date:
464 470 parseddate = "%d %d" % util.parsedate(date)
465 471 else:
466 472 parseddate = "%d %d" % util.makedate()
467 473 if extra:
468 474 branch = extra.get("branch")
469 475 if branch in ("default", ""):
470 476 del extra["branch"]
471 477 elif branch in (".", "null", "tip"):
472 478 raise error.RevlogError(_('the name \'%s\' is reserved')
473 479 % branch)
474 480 if extra:
475 481 extra = encodeextra(extra)
476 482 parseddate = "%s %s" % (parseddate, extra)
477 483 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
478 484 text = "\n".join(l)
479 485 return self.addrevision(text, transaction, len(self), p1, p2)
480 486
481 487 def branchinfo(self, rev):
482 488 """return the branch name and open/close state of a revision
483 489
484 490 This function exists because creating a changectx object
485 491 just to access this is costly."""
486 492 extra = self.read(rev)[5]
487 493 return encoding.tolocal(extra.get("branch")), 'close' in extra
General Comments 0
You need to be logged in to leave comments. Login now