##// END OF EJS Templates
changelog: avoid slicing raw data until needed...
Gregory Szorc -
r28495:70c2f8a9 default
parent child Browse files
Show More
@@ -1,529 +1,527 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 bin,
15 15 hex,
16 16 nullid,
17 17 )
18 18
19 19 from . import (
20 20 encoding,
21 21 error,
22 22 revlog,
23 23 util,
24 24 )
25 25
26 26 _defaultextra = {'branch': 'default'}
27 27
28 28 def _string_escape(text):
29 29 """
30 30 >>> d = {'nl': chr(10), 'bs': chr(92), 'cr': chr(13), 'nul': chr(0)}
31 31 >>> s = "ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
32 32 >>> s
33 33 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
34 34 >>> res = _string_escape(s)
35 35 >>> s == res.decode('string_escape')
36 36 True
37 37 """
38 38 # subset of the string_escape codec
39 39 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
40 40 return text.replace('\0', '\\0')
41 41
42 42 def decodeextra(text):
43 43 """
44 44 >>> sorted(decodeextra(encodeextra({'foo': 'bar', 'baz': chr(0) + '2'})
45 45 ... ).iteritems())
46 46 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
47 47 >>> sorted(decodeextra(encodeextra({'foo': 'bar',
48 48 ... 'baz': chr(92) + chr(0) + '2'})
49 49 ... ).iteritems())
50 50 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
51 51 """
52 52 extra = _defaultextra.copy()
53 53 for l in text.split('\0'):
54 54 if l:
55 55 if '\\0' in l:
56 56 # fix up \0 without getting into trouble with \\0
57 57 l = l.replace('\\\\', '\\\\\n')
58 58 l = l.replace('\\0', '\0')
59 59 l = l.replace('\n', '')
60 60 k, v = l.decode('string_escape').split(':', 1)
61 61 extra[k] = v
62 62 return extra
63 63
64 64 def encodeextra(d):
65 65 # keys must be sorted to produce a deterministic changelog entry
66 66 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
67 67 return "\0".join(items)
68 68
69 69 def stripdesc(desc):
70 70 """strip trailing whitespace and leading and trailing empty lines"""
71 71 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
72 72
73 73 class appender(object):
74 74 '''the changelog index must be updated last on disk, so we use this class
75 75 to delay writes to it'''
76 76 def __init__(self, vfs, name, mode, buf):
77 77 self.data = buf
78 78 fp = vfs(name, mode)
79 79 self.fp = fp
80 80 self.offset = fp.tell()
81 81 self.size = vfs.fstat(fp).st_size
82 82
83 83 def end(self):
84 84 return self.size + len("".join(self.data))
85 85 def tell(self):
86 86 return self.offset
87 87 def flush(self):
88 88 pass
89 89 def close(self):
90 90 self.fp.close()
91 91
92 92 def seek(self, offset, whence=0):
93 93 '''virtual file offset spans real file and data'''
94 94 if whence == 0:
95 95 self.offset = offset
96 96 elif whence == 1:
97 97 self.offset += offset
98 98 elif whence == 2:
99 99 self.offset = self.end() + offset
100 100 if self.offset < self.size:
101 101 self.fp.seek(self.offset)
102 102
103 103 def read(self, count=-1):
104 104 '''only trick here is reads that span real file and data'''
105 105 ret = ""
106 106 if self.offset < self.size:
107 107 s = self.fp.read(count)
108 108 ret = s
109 109 self.offset += len(s)
110 110 if count > 0:
111 111 count -= len(s)
112 112 if count != 0:
113 113 doff = self.offset - self.size
114 114 self.data.insert(0, "".join(self.data))
115 115 del self.data[1:]
116 116 s = self.data[0][doff:doff + count]
117 117 self.offset += len(s)
118 118 ret += s
119 119 return ret
120 120
121 121 def write(self, s):
122 122 self.data.append(str(s))
123 123 self.offset += len(s)
124 124
125 125 def _divertopener(opener, target):
126 126 """build an opener that writes in 'target.a' instead of 'target'"""
127 127 def _divert(name, mode='r'):
128 128 if name != target:
129 129 return opener(name, mode)
130 130 return opener(name + ".a", mode)
131 131 return _divert
132 132
133 133 def _delayopener(opener, target, buf):
134 134 """build an opener that stores chunks in 'buf' instead of 'target'"""
135 135 def _delay(name, mode='r'):
136 136 if name != target:
137 137 return opener(name, mode)
138 138 return appender(opener, name, mode, buf)
139 139 return _delay
140 140
141 141 _changelogrevision = collections.namedtuple('changelogrevision',
142 142 ('manifest', 'user', 'date',
143 143 'files', 'description', 'extra'))
144 144
145 145 class changelogrevision(object):
146 146 """Holds results of a parsed changelog revision.
147 147
148 148 Changelog revisions consist of multiple pieces of data, including
149 149 the manifest node, user, and date. This object exposes a view into
150 150 the parsed object.
151 151 """
152 152
153 153 __slots__ = (
154 '_rawdateextra',
155 '_rawdesc',
156 '_rawfiles',
157 '_rawmanifest',
158 '_rawuser',
154 '_offsets',
155 '_text',
159 156 )
160 157
161 158 def __new__(cls, text):
162 159 if not text:
163 160 return _changelogrevision(
164 161 manifest=nullid,
165 162 user='',
166 163 date=(0, 0),
167 164 files=[],
168 165 description='',
169 166 extra=_defaultextra,
170 167 )
171 168
172 169 self = super(changelogrevision, cls).__new__(cls)
173 170 # We could return here and implement the following as an __init__.
174 171 # But doing it here is equivalent and saves an extra function call.
175 172
176 173 # format used:
177 174 # nodeid\n : manifest node in ascii
178 175 # user\n : user, no \n or \r allowed
179 176 # time tz extra\n : date (time is int or float, timezone is int)
180 177 # : extra is metadata, encoded and separated by '\0'
181 178 # : older versions ignore it
182 179 # files\n\n : files modified by the cset, no \n or \r allowed
183 180 # (.*) : comment (free text, ideally utf-8)
184 181 #
185 182 # changelog v0 doesn't use extra
186 183
187 184 nl1 = text.index('\n')
188 self._rawmanifest = text[0:nl1]
189
190 185 nl2 = text.index('\n', nl1 + 1)
191 self._rawuser = text[nl1 + 1:nl2]
192
193 186 nl3 = text.index('\n', nl2 + 1)
194 self._rawdateextra = text[nl2 + 1:nl3]
195 187
196 188 # The list of files may be empty. Which means nl3 is the first of the
197 189 # double newline that precedes the description.
198 190 if text[nl3 + 1] == '\n':
199 self._rawfiles = None
200 self._rawdesc = text[nl3 + 2:]
191 doublenl = nl3
201 192 else:
202 193 doublenl = text.index('\n\n', nl3 + 1)
203 self._rawfiles = text[nl3 + 1:doublenl]
204 self._rawdesc = text[doublenl + 2:]
194
195 self._offsets = (nl1, nl2, nl3, doublenl)
196 self._text = text
205 197
206 198 return self
207 199
208 200 @property
209 201 def manifest(self):
210 return bin(self._rawmanifest)
202 return bin(self._text[0:self._offsets[0]])
211 203
212 204 @property
213 205 def user(self):
214 return encoding.tolocal(self._rawuser)
206 off = self._offsets
207 return encoding.tolocal(self._text[off[0] + 1:off[1]])
215 208
216 209 @property
217 210 def _rawdate(self):
218 return self._rawdateextra.split(' ', 2)[0:2]
211 off = self._offsets
212 dateextra = self._text[off[1] + 1:off[2]]
213 return dateextra.split(' ', 2)[0:2]
219 214
220 215 @property
221 216 def _rawextra(self):
222 fields = self._rawdateextra.split(' ', 2)
217 off = self._offsets
218 dateextra = self._text[off[1] + 1:off[2]]
219 fields = dateextra.split(' ', 2)
223 220 if len(fields) != 3:
224 221 return None
225 222
226 223 return fields[2]
227 224
228 225 @property
229 226 def date(self):
230 227 raw = self._rawdate
231 228 time = float(raw[0])
232 229 # Various tools did silly things with the timezone.
233 230 try:
234 231 timezone = int(raw[1])
235 232 except ValueError:
236 233 timezone = 0
237 234
238 235 return time, timezone
239 236
240 237 @property
241 238 def extra(self):
242 239 raw = self._rawextra
243 240 if raw is None:
244 241 return _defaultextra
245 242
246 243 return decodeextra(raw)
247 244
248 245 @property
249 246 def files(self):
250 if self._rawfiles is None:
247 off = self._offsets
248 if off[2] == off[3]:
251 249 return []
252 250
253 return self._rawfiles.split('\n')
251 return self._text[off[2] + 1:off[3]].split('\n')
254 252
255 253 @property
256 254 def description(self):
257 return encoding.tolocal(self._rawdesc)
255 return encoding.tolocal(self._text[self._offsets[3] + 2:])
258 256
259 257 class changelog(revlog.revlog):
260 258 def __init__(self, opener):
261 259 revlog.revlog.__init__(self, opener, "00changelog.i")
262 260 if self._initempty:
263 261 # changelogs don't benefit from generaldelta
264 262 self.version &= ~revlog.REVLOGGENERALDELTA
265 263 self._generaldelta = False
266 264 self._realopener = opener
267 265 self._delayed = False
268 266 self._delaybuf = None
269 267 self._divert = False
270 268 self.filteredrevs = frozenset()
271 269
272 270 def tip(self):
273 271 """filtered version of revlog.tip"""
274 272 for i in xrange(len(self) -1, -2, -1):
275 273 if i not in self.filteredrevs:
276 274 return self.node(i)
277 275
278 276 def __contains__(self, rev):
279 277 """filtered version of revlog.__contains__"""
280 278 return (0 <= rev < len(self)
281 279 and rev not in self.filteredrevs)
282 280
283 281 def __iter__(self):
284 282 """filtered version of revlog.__iter__"""
285 283 if len(self.filteredrevs) == 0:
286 284 return revlog.revlog.__iter__(self)
287 285
288 286 def filterediter():
289 287 for i in xrange(len(self)):
290 288 if i not in self.filteredrevs:
291 289 yield i
292 290
293 291 return filterediter()
294 292
295 293 def revs(self, start=0, stop=None):
296 294 """filtered version of revlog.revs"""
297 295 for i in super(changelog, self).revs(start, stop):
298 296 if i not in self.filteredrevs:
299 297 yield i
300 298
301 299 @util.propertycache
302 300 def nodemap(self):
303 301 # XXX need filtering too
304 302 self.rev(self.node(0))
305 303 return self._nodecache
306 304
307 305 def reachableroots(self, minroot, heads, roots, includepath=False):
308 306 return self.index.reachableroots2(minroot, heads, roots, includepath)
309 307
310 308 def headrevs(self):
311 309 if self.filteredrevs:
312 310 try:
313 311 return self.index.headrevsfiltered(self.filteredrevs)
314 312 # AttributeError covers non-c-extension environments and
315 313 # old c extensions without filter handling.
316 314 except AttributeError:
317 315 return self._headrevs()
318 316
319 317 return super(changelog, self).headrevs()
320 318
321 319 def strip(self, *args, **kwargs):
322 320 # XXX make something better than assert
323 321 # We can't expect proper strip behavior if we are filtered.
324 322 assert not self.filteredrevs
325 323 super(changelog, self).strip(*args, **kwargs)
326 324
327 325 def rev(self, node):
328 326 """filtered version of revlog.rev"""
329 327 r = super(changelog, self).rev(node)
330 328 if r in self.filteredrevs:
331 329 raise error.FilteredLookupError(hex(node), self.indexfile,
332 330 _('filtered node'))
333 331 return r
334 332
335 333 def node(self, rev):
336 334 """filtered version of revlog.node"""
337 335 if rev in self.filteredrevs:
338 336 raise error.FilteredIndexError(rev)
339 337 return super(changelog, self).node(rev)
340 338
341 339 def linkrev(self, rev):
342 340 """filtered version of revlog.linkrev"""
343 341 if rev in self.filteredrevs:
344 342 raise error.FilteredIndexError(rev)
345 343 return super(changelog, self).linkrev(rev)
346 344
347 345 def parentrevs(self, rev):
348 346 """filtered version of revlog.parentrevs"""
349 347 if rev in self.filteredrevs:
350 348 raise error.FilteredIndexError(rev)
351 349 return super(changelog, self).parentrevs(rev)
352 350
353 351 def flags(self, rev):
354 352 """filtered version of revlog.flags"""
355 353 if rev in self.filteredrevs:
356 354 raise error.FilteredIndexError(rev)
357 355 return super(changelog, self).flags(rev)
358 356
359 357 def delayupdate(self, tr):
360 358 "delay visibility of index updates to other readers"
361 359
362 360 if not self._delayed:
363 361 if len(self) == 0:
364 362 self._divert = True
365 363 if self._realopener.exists(self.indexfile + '.a'):
366 364 self._realopener.unlink(self.indexfile + '.a')
367 365 self.opener = _divertopener(self._realopener, self.indexfile)
368 366 else:
369 367 self._delaybuf = []
370 368 self.opener = _delayopener(self._realopener, self.indexfile,
371 369 self._delaybuf)
372 370 self._delayed = True
373 371 tr.addpending('cl-%i' % id(self), self._writepending)
374 372 tr.addfinalize('cl-%i' % id(self), self._finalize)
375 373
376 374 def _finalize(self, tr):
377 375 "finalize index updates"
378 376 self._delayed = False
379 377 self.opener = self._realopener
380 378 # move redirected index data back into place
381 379 if self._divert:
382 380 assert not self._delaybuf
383 381 tmpname = self.indexfile + ".a"
384 382 nfile = self.opener.open(tmpname)
385 383 nfile.close()
386 384 self.opener.rename(tmpname, self.indexfile)
387 385 elif self._delaybuf:
388 386 fp = self.opener(self.indexfile, 'a')
389 387 fp.write("".join(self._delaybuf))
390 388 fp.close()
391 389 self._delaybuf = None
392 390 self._divert = False
393 391 # split when we're done
394 392 self.checkinlinesize(tr)
395 393
396 394 def readpending(self, file):
397 395 """read index data from a "pending" file
398 396
399 397 During a transaction, the actual changeset data is already stored in the
400 398 main file, but not yet finalized in the on-disk index. Instead, a
401 399 "pending" index is written by the transaction logic. If this function
402 400 is running, we are likely in a subprocess invoked in a hook. The
403 401 subprocess is informed that it is within a transaction and needs to
404 402 access its content.
405 403
406 404 This function will read all the index data out of the pending file and
407 405 overwrite the main index."""
408 406
409 407 if not self.opener.exists(file):
410 408 return # no pending data for changelog
411 409 r = revlog.revlog(self.opener, file)
412 410 self.index = r.index
413 411 self.nodemap = r.nodemap
414 412 self._nodecache = r._nodecache
415 413 self._chunkcache = r._chunkcache
416 414
417 415 def _writepending(self, tr):
418 416 "create a file containing the unfinalized state for pretxnchangegroup"
419 417 if self._delaybuf:
420 418 # make a temporary copy of the index
421 419 fp1 = self._realopener(self.indexfile)
422 420 pendingfilename = self.indexfile + ".a"
423 421 # register as a temp file to ensure cleanup on failure
424 422 tr.registertmp(pendingfilename)
425 423 # write existing data
426 424 fp2 = self._realopener(pendingfilename, "w")
427 425 fp2.write(fp1.read())
428 426 # add pending data
429 427 fp2.write("".join(self._delaybuf))
430 428 fp2.close()
431 429 # switch modes so finalize can simply rename
432 430 self._delaybuf = None
433 431 self._divert = True
434 432 self.opener = _divertopener(self._realopener, self.indexfile)
435 433
436 434 if self._divert:
437 435 return True
438 436
439 437 return False
440 438
441 439 def checkinlinesize(self, tr, fp=None):
442 440 if not self._delayed:
443 441 revlog.revlog.checkinlinesize(self, tr, fp)
444 442
445 443 def read(self, node):
446 444 """Obtain data from a parsed changelog revision.
447 445
448 446 Returns a 6-tuple of:
449 447
450 448 - manifest node in binary
451 449 - author/user as a localstr
452 450 - date as a 2-tuple of (time, timezone)
453 451 - list of files
454 452 - commit message as a localstr
455 453 - dict of extra metadata
456 454
457 455 Unless you need to access all fields, consider calling
458 456 ``changelogrevision`` instead, as it is faster for partial object
459 457 access.
460 458 """
461 459 c = changelogrevision(self.revision(node))
462 460 return (
463 461 c.manifest,
464 462 c.user,
465 463 c.date,
466 464 c.files,
467 465 c.description,
468 466 c.extra
469 467 )
470 468
471 469 def changelogrevision(self, nodeorrev):
472 470 """Obtain a ``changelogrevision`` for a node or revision."""
473 471 return changelogrevision(self.revision(nodeorrev))
474 472
475 473 def readfiles(self, node):
476 474 """
477 475 short version of read that only returns the files modified by the cset
478 476 """
479 477 text = self.revision(node)
480 478 if not text:
481 479 return []
482 480 last = text.index("\n\n")
483 481 l = text[:last].split('\n')
484 482 return l[3:]
485 483
486 484 def add(self, manifest, files, desc, transaction, p1, p2,
487 485 user, date=None, extra=None):
488 486 # Convert to UTF-8 encoded bytestrings as the very first
489 487 # thing: calling any method on a localstr object will turn it
490 488 # into a str object and the cached UTF-8 string is thus lost.
491 489 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
492 490
493 491 user = user.strip()
494 492 # An empty username or a username with a "\n" will make the
495 493 # revision text contain two "\n\n" sequences -> corrupt
496 494 # repository since read cannot unpack the revision.
497 495 if not user:
498 496 raise error.RevlogError(_("empty username"))
499 497 if "\n" in user:
500 498 raise error.RevlogError(_("username %s contains a newline")
501 499 % repr(user))
502 500
503 501 desc = stripdesc(desc)
504 502
505 503 if date:
506 504 parseddate = "%d %d" % util.parsedate(date)
507 505 else:
508 506 parseddate = "%d %d" % util.makedate()
509 507 if extra:
510 508 branch = extra.get("branch")
511 509 if branch in ("default", ""):
512 510 del extra["branch"]
513 511 elif branch in (".", "null", "tip"):
514 512 raise error.RevlogError(_('the name \'%s\' is reserved')
515 513 % branch)
516 514 if extra:
517 515 extra = encodeextra(extra)
518 516 parseddate = "%s %s" % (parseddate, extra)
519 517 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
520 518 text = "\n".join(l)
521 519 return self.addrevision(text, transaction, len(self), p1, p2)
522 520
523 521 def branchinfo(self, rev):
524 522 """return the branch name and open/close state of a revision
525 523
526 524 This function exists because creating a changectx object
527 525 just to access this is costly."""
528 526 extra = self.read(rev)[5]
529 527 return encoding.tolocal(extra.get("branch")), 'close' in extra
General Comments 0
You need to be logged in to leave comments. Login now