##// END OF EJS Templates
changelog: lazily parse user...
Gregory Szorc -
r28491:f57f7500 default
parent child Browse files
Show More
@@ -1,493 +1,499 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 bin,
15 15 hex,
16 16 nullid,
17 17 )
18 18
19 19 from . import (
20 20 encoding,
21 21 error,
22 22 revlog,
23 23 util,
24 24 )
25 25
26 26 _defaultextra = {'branch': 'default'}
27 27
28 28 def _string_escape(text):
29 29 """
30 30 >>> d = {'nl': chr(10), 'bs': chr(92), 'cr': chr(13), 'nul': chr(0)}
31 31 >>> s = "ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
32 32 >>> s
33 33 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
34 34 >>> res = _string_escape(s)
35 35 >>> s == res.decode('string_escape')
36 36 True
37 37 """
38 38 # subset of the string_escape codec
39 39 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
40 40 return text.replace('\0', '\\0')
41 41
42 42 def decodeextra(text):
43 43 """
44 44 >>> sorted(decodeextra(encodeextra({'foo': 'bar', 'baz': chr(0) + '2'})
45 45 ... ).iteritems())
46 46 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
47 47 >>> sorted(decodeextra(encodeextra({'foo': 'bar',
48 48 ... 'baz': chr(92) + chr(0) + '2'})
49 49 ... ).iteritems())
50 50 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
51 51 """
52 52 extra = _defaultextra.copy()
53 53 for l in text.split('\0'):
54 54 if l:
55 55 if '\\0' in l:
56 56 # fix up \0 without getting into trouble with \\0
57 57 l = l.replace('\\\\', '\\\\\n')
58 58 l = l.replace('\\0', '\0')
59 59 l = l.replace('\n', '')
60 60 k, v = l.decode('string_escape').split(':', 1)
61 61 extra[k] = v
62 62 return extra
63 63
64 64 def encodeextra(d):
65 65 # keys must be sorted to produce a deterministic changelog entry
66 66 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
67 67 return "\0".join(items)
68 68
69 69 def stripdesc(desc):
70 70 """strip trailing whitespace and leading and trailing empty lines"""
71 71 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
72 72
73 73 class appender(object):
74 74 '''the changelog index must be updated last on disk, so we use this class
75 75 to delay writes to it'''
76 76 def __init__(self, vfs, name, mode, buf):
77 77 self.data = buf
78 78 fp = vfs(name, mode)
79 79 self.fp = fp
80 80 self.offset = fp.tell()
81 81 self.size = vfs.fstat(fp).st_size
82 82
83 83 def end(self):
84 84 return self.size + len("".join(self.data))
85 85 def tell(self):
86 86 return self.offset
87 87 def flush(self):
88 88 pass
89 89 def close(self):
90 90 self.fp.close()
91 91
92 92 def seek(self, offset, whence=0):
93 93 '''virtual file offset spans real file and data'''
94 94 if whence == 0:
95 95 self.offset = offset
96 96 elif whence == 1:
97 97 self.offset += offset
98 98 elif whence == 2:
99 99 self.offset = self.end() + offset
100 100 if self.offset < self.size:
101 101 self.fp.seek(self.offset)
102 102
103 103 def read(self, count=-1):
104 104 '''only trick here is reads that span real file and data'''
105 105 ret = ""
106 106 if self.offset < self.size:
107 107 s = self.fp.read(count)
108 108 ret = s
109 109 self.offset += len(s)
110 110 if count > 0:
111 111 count -= len(s)
112 112 if count != 0:
113 113 doff = self.offset - self.size
114 114 self.data.insert(0, "".join(self.data))
115 115 del self.data[1:]
116 116 s = self.data[0][doff:doff + count]
117 117 self.offset += len(s)
118 118 ret += s
119 119 return ret
120 120
121 121 def write(self, s):
122 122 self.data.append(str(s))
123 123 self.offset += len(s)
124 124
125 125 def _divertopener(opener, target):
126 126 """build an opener that writes in 'target.a' instead of 'target'"""
127 127 def _divert(name, mode='r'):
128 128 if name != target:
129 129 return opener(name, mode)
130 130 return opener(name + ".a", mode)
131 131 return _divert
132 132
133 133 def _delayopener(opener, target, buf):
134 134 """build an opener that stores chunks in 'buf' instead of 'target'"""
135 135 def _delay(name, mode='r'):
136 136 if name != target:
137 137 return opener(name, mode)
138 138 return appender(opener, name, mode, buf)
139 139 return _delay
140 140
141 141 _changelogrevision = collections.namedtuple('changelogrevision',
142 142 ('manifest', 'user', 'date',
143 143 'files', 'description', 'extra'))
144 144
145 145 class changelogrevision(object):
146 146 """Holds results of a parsed changelog revision.
147 147
148 148 Changelog revisions consist of multiple pieces of data, including
149 149 the manifest node, user, and date. This object exposes a view into
150 150 the parsed object.
151 151 """
152 152
153 153 __slots__ = (
154 154 'date',
155 155 '_rawdesc',
156 156 'extra',
157 157 'files',
158 158 '_rawmanifest',
159 'user',
159 '_rawuser',
160 160 )
161 161
162 162 def __new__(cls, text):
163 163 if not text:
164 164 return _changelogrevision(
165 165 manifest=nullid,
166 166 user='',
167 167 date=(0, 0),
168 168 files=[],
169 169 description='',
170 170 extra=_defaultextra,
171 171 )
172 172
173 173 self = super(changelogrevision, cls).__new__(cls)
174 174 # We could return here and implement the following as an __init__.
175 175 # But doing it here is equivalent and saves an extra function call.
176 176
177 177 # format used:
178 178 # nodeid\n : manifest node in ascii
179 179 # user\n : user, no \n or \r allowed
180 180 # time tz extra\n : date (time is int or float, timezone is int)
181 181 # : extra is metadata, encoded and separated by '\0'
182 182 # : older versions ignore it
183 183 # files\n\n : files modified by the cset, no \n or \r allowed
184 184 # (.*) : comment (free text, ideally utf-8)
185 185 #
186 186 # changelog v0 doesn't use extra
187 187
188 188 doublenl = text.index('\n\n')
189 189 self._rawdesc = text[doublenl + 2:]
190 190
191 191 nl1 = text.index('\n')
192 192 self._rawmanifest = text[0:nl1]
193 193
194 nl2 = text.index('\n', nl1 + 1)
195 self._rawuser = text[nl1 + 1:nl2]
196
194 197 l = text[:doublenl].split('\n')
195 self.user = encoding.tolocal(l[1])
196 198
197 199 tdata = l[2].split(' ', 2)
198 200 if len(tdata) != 3:
199 201 time = float(tdata[0])
200 202 try:
201 203 # various tools did silly things with the time zone field.
202 204 timezone = int(tdata[1])
203 205 except ValueError:
204 206 timezone = 0
205 207 self.extra = _defaultextra
206 208 else:
207 209 time, timezone = float(tdata[0]), int(tdata[1])
208 210 self.extra = decodeextra(tdata[2])
209 211
210 212 self.date = (time, timezone)
211 213 self.files = l[3:]
212 214
213 215 return self
214 216
215 217 @property
216 218 def manifest(self):
217 219 return bin(self._rawmanifest)
218 220
219 221 @property
222 def user(self):
223 return encoding.tolocal(self._rawuser)
224
225 @property
220 226 def description(self):
221 227 return encoding.tolocal(self._rawdesc)
222 228
223 229 class changelog(revlog.revlog):
224 230 def __init__(self, opener):
225 231 revlog.revlog.__init__(self, opener, "00changelog.i")
226 232 if self._initempty:
227 233 # changelogs don't benefit from generaldelta
228 234 self.version &= ~revlog.REVLOGGENERALDELTA
229 235 self._generaldelta = False
230 236 self._realopener = opener
231 237 self._delayed = False
232 238 self._delaybuf = None
233 239 self._divert = False
234 240 self.filteredrevs = frozenset()
235 241
236 242 def tip(self):
237 243 """filtered version of revlog.tip"""
238 244 for i in xrange(len(self) -1, -2, -1):
239 245 if i not in self.filteredrevs:
240 246 return self.node(i)
241 247
242 248 def __contains__(self, rev):
243 249 """filtered version of revlog.__contains__"""
244 250 return (0 <= rev < len(self)
245 251 and rev not in self.filteredrevs)
246 252
247 253 def __iter__(self):
248 254 """filtered version of revlog.__iter__"""
249 255 if len(self.filteredrevs) == 0:
250 256 return revlog.revlog.__iter__(self)
251 257
252 258 def filterediter():
253 259 for i in xrange(len(self)):
254 260 if i not in self.filteredrevs:
255 261 yield i
256 262
257 263 return filterediter()
258 264
259 265 def revs(self, start=0, stop=None):
260 266 """filtered version of revlog.revs"""
261 267 for i in super(changelog, self).revs(start, stop):
262 268 if i not in self.filteredrevs:
263 269 yield i
264 270
265 271 @util.propertycache
266 272 def nodemap(self):
267 273 # XXX need filtering too
268 274 self.rev(self.node(0))
269 275 return self._nodecache
270 276
271 277 def reachableroots(self, minroot, heads, roots, includepath=False):
272 278 return self.index.reachableroots2(minroot, heads, roots, includepath)
273 279
274 280 def headrevs(self):
275 281 if self.filteredrevs:
276 282 try:
277 283 return self.index.headrevsfiltered(self.filteredrevs)
278 284 # AttributeError covers non-c-extension environments and
279 285 # old c extensions without filter handling.
280 286 except AttributeError:
281 287 return self._headrevs()
282 288
283 289 return super(changelog, self).headrevs()
284 290
285 291 def strip(self, *args, **kwargs):
286 292 # XXX make something better than assert
287 293 # We can't expect proper strip behavior if we are filtered.
288 294 assert not self.filteredrevs
289 295 super(changelog, self).strip(*args, **kwargs)
290 296
291 297 def rev(self, node):
292 298 """filtered version of revlog.rev"""
293 299 r = super(changelog, self).rev(node)
294 300 if r in self.filteredrevs:
295 301 raise error.FilteredLookupError(hex(node), self.indexfile,
296 302 _('filtered node'))
297 303 return r
298 304
299 305 def node(self, rev):
300 306 """filtered version of revlog.node"""
301 307 if rev in self.filteredrevs:
302 308 raise error.FilteredIndexError(rev)
303 309 return super(changelog, self).node(rev)
304 310
305 311 def linkrev(self, rev):
306 312 """filtered version of revlog.linkrev"""
307 313 if rev in self.filteredrevs:
308 314 raise error.FilteredIndexError(rev)
309 315 return super(changelog, self).linkrev(rev)
310 316
311 317 def parentrevs(self, rev):
312 318 """filtered version of revlog.parentrevs"""
313 319 if rev in self.filteredrevs:
314 320 raise error.FilteredIndexError(rev)
315 321 return super(changelog, self).parentrevs(rev)
316 322
317 323 def flags(self, rev):
318 324 """filtered version of revlog.flags"""
319 325 if rev in self.filteredrevs:
320 326 raise error.FilteredIndexError(rev)
321 327 return super(changelog, self).flags(rev)
322 328
323 329 def delayupdate(self, tr):
324 330 "delay visibility of index updates to other readers"
325 331
326 332 if not self._delayed:
327 333 if len(self) == 0:
328 334 self._divert = True
329 335 if self._realopener.exists(self.indexfile + '.a'):
330 336 self._realopener.unlink(self.indexfile + '.a')
331 337 self.opener = _divertopener(self._realopener, self.indexfile)
332 338 else:
333 339 self._delaybuf = []
334 340 self.opener = _delayopener(self._realopener, self.indexfile,
335 341 self._delaybuf)
336 342 self._delayed = True
337 343 tr.addpending('cl-%i' % id(self), self._writepending)
338 344 tr.addfinalize('cl-%i' % id(self), self._finalize)
339 345
340 346 def _finalize(self, tr):
341 347 "finalize index updates"
342 348 self._delayed = False
343 349 self.opener = self._realopener
344 350 # move redirected index data back into place
345 351 if self._divert:
346 352 assert not self._delaybuf
347 353 tmpname = self.indexfile + ".a"
348 354 nfile = self.opener.open(tmpname)
349 355 nfile.close()
350 356 self.opener.rename(tmpname, self.indexfile)
351 357 elif self._delaybuf:
352 358 fp = self.opener(self.indexfile, 'a')
353 359 fp.write("".join(self._delaybuf))
354 360 fp.close()
355 361 self._delaybuf = None
356 362 self._divert = False
357 363 # split when we're done
358 364 self.checkinlinesize(tr)
359 365
360 366 def readpending(self, file):
361 367 """read index data from a "pending" file
362 368
363 369 During a transaction, the actual changeset data is already stored in the
364 370 main file, but not yet finalized in the on-disk index. Instead, a
365 371 "pending" index is written by the transaction logic. If this function
366 372 is running, we are likely in a subprocess invoked in a hook. The
367 373 subprocess is informed that it is within a transaction and needs to
368 374 access its content.
369 375
370 376 This function will read all the index data out of the pending file and
371 377 overwrite the main index."""
372 378
373 379 if not self.opener.exists(file):
374 380 return # no pending data for changelog
375 381 r = revlog.revlog(self.opener, file)
376 382 self.index = r.index
377 383 self.nodemap = r.nodemap
378 384 self._nodecache = r._nodecache
379 385 self._chunkcache = r._chunkcache
380 386
381 387 def _writepending(self, tr):
382 388 "create a file containing the unfinalized state for pretxnchangegroup"
383 389 if self._delaybuf:
384 390 # make a temporary copy of the index
385 391 fp1 = self._realopener(self.indexfile)
386 392 pendingfilename = self.indexfile + ".a"
387 393 # register as a temp file to ensure cleanup on failure
388 394 tr.registertmp(pendingfilename)
389 395 # write existing data
390 396 fp2 = self._realopener(pendingfilename, "w")
391 397 fp2.write(fp1.read())
392 398 # add pending data
393 399 fp2.write("".join(self._delaybuf))
394 400 fp2.close()
395 401 # switch modes so finalize can simply rename
396 402 self._delaybuf = None
397 403 self._divert = True
398 404 self.opener = _divertopener(self._realopener, self.indexfile)
399 405
400 406 if self._divert:
401 407 return True
402 408
403 409 return False
404 410
405 411 def checkinlinesize(self, tr, fp=None):
406 412 if not self._delayed:
407 413 revlog.revlog.checkinlinesize(self, tr, fp)
408 414
409 415 def read(self, node):
410 416 """Obtain data from a parsed changelog revision.
411 417
412 418 Returns a 6-tuple of:
413 419
414 420 - manifest node in binary
415 421 - author/user as a localstr
416 422 - date as a 2-tuple of (time, timezone)
417 423 - list of files
418 424 - commit message as a localstr
419 425 - dict of extra metadata
420 426
421 427 Unless you need to access all fields, consider calling
422 428 ``changelogrevision`` instead, as it is faster for partial object
423 429 access.
424 430 """
425 431 c = changelogrevision(self.revision(node))
426 432 return (
427 433 c.manifest,
428 434 c.user,
429 435 c.date,
430 436 c.files,
431 437 c.description,
432 438 c.extra
433 439 )
434 440
435 441 def changelogrevision(self, nodeorrev):
436 442 """Obtain a ``changelogrevision`` for a node or revision."""
437 443 return changelogrevision(self.revision(nodeorrev))
438 444
439 445 def readfiles(self, node):
440 446 """
441 447 short version of read that only returns the files modified by the cset
442 448 """
443 449 text = self.revision(node)
444 450 if not text:
445 451 return []
446 452 last = text.index("\n\n")
447 453 l = text[:last].split('\n')
448 454 return l[3:]
449 455
450 456 def add(self, manifest, files, desc, transaction, p1, p2,
451 457 user, date=None, extra=None):
452 458 # Convert to UTF-8 encoded bytestrings as the very first
453 459 # thing: calling any method on a localstr object will turn it
454 460 # into a str object and the cached UTF-8 string is thus lost.
455 461 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
456 462
457 463 user = user.strip()
458 464 # An empty username or a username with a "\n" will make the
459 465 # revision text contain two "\n\n" sequences -> corrupt
460 466 # repository since read cannot unpack the revision.
461 467 if not user:
462 468 raise error.RevlogError(_("empty username"))
463 469 if "\n" in user:
464 470 raise error.RevlogError(_("username %s contains a newline")
465 471 % repr(user))
466 472
467 473 desc = stripdesc(desc)
468 474
469 475 if date:
470 476 parseddate = "%d %d" % util.parsedate(date)
471 477 else:
472 478 parseddate = "%d %d" % util.makedate()
473 479 if extra:
474 480 branch = extra.get("branch")
475 481 if branch in ("default", ""):
476 482 del extra["branch"]
477 483 elif branch in (".", "null", "tip"):
478 484 raise error.RevlogError(_('the name \'%s\' is reserved')
479 485 % branch)
480 486 if extra:
481 487 extra = encodeextra(extra)
482 488 parseddate = "%s %s" % (parseddate, extra)
483 489 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
484 490 text = "\n".join(l)
485 491 return self.addrevision(text, transaction, len(self), p1, p2)
486 492
487 493 def branchinfo(self, rev):
488 494 """return the branch name and open/close state of a revision
489 495
490 496 This function exists because creating a changectx object
491 497 just to access this is costly."""
492 498 extra = self.read(rev)[5]
493 499 return encoding.tolocal(extra.get("branch")), 'close' in extra
General Comments 0
You need to be logged in to leave comments. Login now