##// END OF EJS Templates
changelog: change the implementation of `_divertopener`...
marmoute -
r44985:897f0ce4 default
parent child Browse files
Show More
@@ -1,629 +1,632 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullid,
15 15 )
16 16 from .thirdparty import attr
17 17
18 18 from . import (
19 19 copies,
20 20 encoding,
21 21 error,
22 22 pycompat,
23 23 revlog,
24 24 )
25 25 from .utils import (
26 26 dateutil,
27 27 stringutil,
28 28 )
29 29
30 30 from .revlogutils import sidedata as sidedatamod
31 31
32 32 _defaultextra = {b'branch': b'default'}
33 33
34 34
35 35 def _string_escape(text):
36 36 """
37 37 >>> from .pycompat import bytechr as chr
38 38 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 39 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 40 >>> s
41 41 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 42 >>> res = _string_escape(s)
43 43 >>> s == _string_unescape(res)
44 44 True
45 45 """
46 46 # subset of the string_escape codec
47 47 text = (
48 48 text.replace(b'\\', b'\\\\')
49 49 .replace(b'\n', b'\\n')
50 50 .replace(b'\r', b'\\r')
51 51 )
52 52 return text.replace(b'\0', b'\\0')
53 53
54 54
55 55 def _string_unescape(text):
56 56 if b'\\0' in text:
57 57 # fix up \0 without getting into trouble with \\0
58 58 text = text.replace(b'\\\\', b'\\\\\n')
59 59 text = text.replace(b'\\0', b'\0')
60 60 text = text.replace(b'\n', b'')
61 61 return stringutil.unescapestr(text)
62 62
63 63
64 64 def decodeextra(text):
65 65 """
66 66 >>> from .pycompat import bytechr as chr
67 67 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 68 ... ).items())
69 69 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 70 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 71 ... b'baz': chr(92) + chr(0) + b'2'})
72 72 ... ).items())
73 73 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 74 """
75 75 extra = _defaultextra.copy()
76 76 for l in text.split(b'\0'):
77 77 if l:
78 78 k, v = _string_unescape(l).split(b':', 1)
79 79 extra[k] = v
80 80 return extra
81 81
82 82
83 83 def encodeextra(d):
84 84 # keys must be sorted to produce a deterministic changelog entry
85 85 items = [
86 86 _string_escape(b'%s:%s' % (k, pycompat.bytestr(d[k])))
87 87 for k in sorted(d)
88 88 ]
89 89 return b"\0".join(items)
90 90
91 91
92 92 def stripdesc(desc):
93 93 """strip trailing whitespace and leading and trailing empty lines"""
94 94 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
95 95
96 96
97 97 class appender(object):
98 98 '''the changelog index must be updated last on disk, so we use this class
99 99 to delay writes to it'''
100 100
101 101 def __init__(self, vfs, name, mode, buf):
102 102 self.data = buf
103 103 fp = vfs(name, mode)
104 104 self.fp = fp
105 105 self.offset = fp.tell()
106 106 self.size = vfs.fstat(fp).st_size
107 107 self._end = self.size
108 108
109 109 def end(self):
110 110 return self._end
111 111
112 112 def tell(self):
113 113 return self.offset
114 114
115 115 def flush(self):
116 116 pass
117 117
118 118 @property
119 119 def closed(self):
120 120 return self.fp.closed
121 121
122 122 def close(self):
123 123 self.fp.close()
124 124
125 125 def seek(self, offset, whence=0):
126 126 '''virtual file offset spans real file and data'''
127 127 if whence == 0:
128 128 self.offset = offset
129 129 elif whence == 1:
130 130 self.offset += offset
131 131 elif whence == 2:
132 132 self.offset = self.end() + offset
133 133 if self.offset < self.size:
134 134 self.fp.seek(self.offset)
135 135
136 136 def read(self, count=-1):
137 137 '''only trick here is reads that span real file and data'''
138 138 ret = b""
139 139 if self.offset < self.size:
140 140 s = self.fp.read(count)
141 141 ret = s
142 142 self.offset += len(s)
143 143 if count > 0:
144 144 count -= len(s)
145 145 if count != 0:
146 146 doff = self.offset - self.size
147 147 self.data.insert(0, b"".join(self.data))
148 148 del self.data[1:]
149 149 s = self.data[0][doff : doff + count]
150 150 self.offset += len(s)
151 151 ret += s
152 152 return ret
153 153
154 154 def write(self, s):
155 155 self.data.append(bytes(s))
156 156 self.offset += len(s)
157 157 self._end += len(s)
158 158
159 159 def __enter__(self):
160 160 self.fp.__enter__()
161 161 return self
162 162
163 163 def __exit__(self, *args):
164 164 return self.fp.__exit__(*args)
165 165
166 166
167 def _divertopener(opener, target):
168 """build an opener that writes in 'target.a' instead of 'target'"""
167 class _divertopener(object):
168 def __init__(self, opener, target):
169 self._opener = opener
170 self._target = target
169 171
170 def _divert(name, mode=b'r', checkambig=False, **kwargs):
171 if name != target:
172 return opener(name, mode, **kwargs)
173 return opener(name + b".a", mode, **kwargs)
172 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
173 if name != self._target:
174 return self._opener(name, mode, **kwargs)
175 return self._opener(name + b".a", mode, **kwargs)
174 176
175 return _divert
177 def __getattr__(self, attr):
178 return getattr(self._opener, attr)
176 179
177 180
178 181 def _delayopener(opener, target, buf):
179 182 """build an opener that stores chunks in 'buf' instead of 'target'"""
180 183
181 184 def _delay(name, mode=b'r', checkambig=False, **kwargs):
182 185 if name != target:
183 186 return opener(name, mode, **kwargs)
184 187 assert not kwargs
185 188 return appender(opener, name, mode, buf)
186 189
187 190 return _delay
188 191
189 192
190 193 @attr.s
191 194 class _changelogrevision(object):
192 195 # Extensions might modify _defaultextra, so let the constructor below pass
193 196 # it in
194 197 extra = attr.ib()
195 198 manifest = attr.ib(default=nullid)
196 199 user = attr.ib(default=b'')
197 200 date = attr.ib(default=(0, 0))
198 201 files = attr.ib(default=attr.Factory(list))
199 202 filesadded = attr.ib(default=None)
200 203 filesremoved = attr.ib(default=None)
201 204 p1copies = attr.ib(default=None)
202 205 p2copies = attr.ib(default=None)
203 206 description = attr.ib(default=b'')
204 207
205 208
206 209 class changelogrevision(object):
207 210 """Holds results of a parsed changelog revision.
208 211
209 212 Changelog revisions consist of multiple pieces of data, including
210 213 the manifest node, user, and date. This object exposes a view into
211 214 the parsed object.
212 215 """
213 216
214 217 __slots__ = (
215 218 '_offsets',
216 219 '_text',
217 220 '_sidedata',
218 221 '_cpsd',
219 222 )
220 223
221 224 def __new__(cls, text, sidedata, cpsd):
222 225 if not text:
223 226 return _changelogrevision(extra=_defaultextra)
224 227
225 228 self = super(changelogrevision, cls).__new__(cls)
226 229 # We could return here and implement the following as an __init__.
227 230 # But doing it here is equivalent and saves an extra function call.
228 231
229 232 # format used:
230 233 # nodeid\n : manifest node in ascii
231 234 # user\n : user, no \n or \r allowed
232 235 # time tz extra\n : date (time is int or float, timezone is int)
233 236 # : extra is metadata, encoded and separated by '\0'
234 237 # : older versions ignore it
235 238 # files\n\n : files modified by the cset, no \n or \r allowed
236 239 # (.*) : comment (free text, ideally utf-8)
237 240 #
238 241 # changelog v0 doesn't use extra
239 242
240 243 nl1 = text.index(b'\n')
241 244 nl2 = text.index(b'\n', nl1 + 1)
242 245 nl3 = text.index(b'\n', nl2 + 1)
243 246
244 247 # The list of files may be empty. Which means nl3 is the first of the
245 248 # double newline that precedes the description.
246 249 if text[nl3 + 1 : nl3 + 2] == b'\n':
247 250 doublenl = nl3
248 251 else:
249 252 doublenl = text.index(b'\n\n', nl3 + 1)
250 253
251 254 self._offsets = (nl1, nl2, nl3, doublenl)
252 255 self._text = text
253 256 self._sidedata = sidedata
254 257 self._cpsd = cpsd
255 258
256 259 return self
257 260
258 261 @property
259 262 def manifest(self):
260 263 return bin(self._text[0 : self._offsets[0]])
261 264
262 265 @property
263 266 def user(self):
264 267 off = self._offsets
265 268 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
266 269
267 270 @property
268 271 def _rawdate(self):
269 272 off = self._offsets
270 273 dateextra = self._text[off[1] + 1 : off[2]]
271 274 return dateextra.split(b' ', 2)[0:2]
272 275
273 276 @property
274 277 def _rawextra(self):
275 278 off = self._offsets
276 279 dateextra = self._text[off[1] + 1 : off[2]]
277 280 fields = dateextra.split(b' ', 2)
278 281 if len(fields) != 3:
279 282 return None
280 283
281 284 return fields[2]
282 285
283 286 @property
284 287 def date(self):
285 288 raw = self._rawdate
286 289 time = float(raw[0])
287 290 # Various tools did silly things with the timezone.
288 291 try:
289 292 timezone = int(raw[1])
290 293 except ValueError:
291 294 timezone = 0
292 295
293 296 return time, timezone
294 297
295 298 @property
296 299 def extra(self):
297 300 raw = self._rawextra
298 301 if raw is None:
299 302 return _defaultextra
300 303
301 304 return decodeextra(raw)
302 305
303 306 @property
304 307 def files(self):
305 308 off = self._offsets
306 309 if off[2] == off[3]:
307 310 return []
308 311
309 312 return self._text[off[2] + 1 : off[3]].split(b'\n')
310 313
311 314 @property
312 315 def filesadded(self):
313 316 if self._cpsd:
314 317 rawindices = self._sidedata.get(sidedatamod.SD_FILESADDED)
315 318 if not rawindices:
316 319 return []
317 320 else:
318 321 rawindices = self.extra.get(b'filesadded')
319 322 if rawindices is None:
320 323 return None
321 324 return copies.decodefileindices(self.files, rawindices)
322 325
323 326 @property
324 327 def filesremoved(self):
325 328 if self._cpsd:
326 329 rawindices = self._sidedata.get(sidedatamod.SD_FILESREMOVED)
327 330 if not rawindices:
328 331 return []
329 332 else:
330 333 rawindices = self.extra.get(b'filesremoved')
331 334 if rawindices is None:
332 335 return None
333 336 return copies.decodefileindices(self.files, rawindices)
334 337
335 338 @property
336 339 def p1copies(self):
337 340 if self._cpsd:
338 341 rawcopies = self._sidedata.get(sidedatamod.SD_P1COPIES)
339 342 if not rawcopies:
340 343 return {}
341 344 else:
342 345 rawcopies = self.extra.get(b'p1copies')
343 346 if rawcopies is None:
344 347 return None
345 348 return copies.decodecopies(self.files, rawcopies)
346 349
347 350 @property
348 351 def p2copies(self):
349 352 if self._cpsd:
350 353 rawcopies = self._sidedata.get(sidedatamod.SD_P2COPIES)
351 354 if not rawcopies:
352 355 return {}
353 356 else:
354 357 rawcopies = self.extra.get(b'p2copies')
355 358 if rawcopies is None:
356 359 return None
357 360 return copies.decodecopies(self.files, rawcopies)
358 361
359 362 @property
360 363 def description(self):
361 364 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
362 365
363 366
364 367 class changelog(revlog.revlog):
365 368 def __init__(self, opener, trypending=False):
366 369 """Load a changelog revlog using an opener.
367 370
368 371 If ``trypending`` is true, we attempt to load the index from a
369 372 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
370 373 The ``00changelog.i.a`` file contains index (and possibly inline
371 374 revision) data for a transaction that hasn't been finalized yet.
372 375 It exists in a separate file to facilitate readers (such as
373 376 hooks processes) accessing data before a transaction is finalized.
374 377 """
375 378 if trypending and opener.exists(b'00changelog.i.a'):
376 379 indexfile = b'00changelog.i.a'
377 380 else:
378 381 indexfile = b'00changelog.i'
379 382
380 383 datafile = b'00changelog.d'
381 384 revlog.revlog.__init__(
382 385 self,
383 386 opener,
384 387 indexfile,
385 388 datafile=datafile,
386 389 checkambig=True,
387 390 mmaplargeindex=True,
388 391 persistentnodemap=opener.options.get(
389 392 b'exp-persistent-nodemap', False
390 393 ),
391 394 )
392 395
393 396 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
394 397 # changelogs don't benefit from generaldelta.
395 398
396 399 self.version &= ~revlog.FLAG_GENERALDELTA
397 400 self._generaldelta = False
398 401
399 402 # Delta chains for changelogs tend to be very small because entries
400 403 # tend to be small and don't delta well with each. So disable delta
401 404 # chains.
402 405 self._storedeltachains = False
403 406
404 407 self._realopener = opener
405 408 self._delayed = False
406 409 self._delaybuf = None
407 410 self._divert = False
408 411 self.filteredrevs = frozenset()
409 412 self._copiesstorage = opener.options.get(b'copies-storage')
410 413
411 414 def delayupdate(self, tr):
412 415 """delay visibility of index updates to other readers"""
413 416
414 417 if not self._delayed:
415 418 if len(self) == 0:
416 419 self._divert = True
417 420 if self._realopener.exists(self.indexfile + b'.a'):
418 421 self._realopener.unlink(self.indexfile + b'.a')
419 422 self.opener = _divertopener(self._realopener, self.indexfile)
420 423 else:
421 424 self._delaybuf = []
422 425 self.opener = _delayopener(
423 426 self._realopener, self.indexfile, self._delaybuf
424 427 )
425 428 self._delayed = True
426 429 tr.addpending(b'cl-%i' % id(self), self._writepending)
427 430 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
428 431
429 432 def _finalize(self, tr):
430 433 """finalize index updates"""
431 434 self._delayed = False
432 435 self.opener = self._realopener
433 436 # move redirected index data back into place
434 437 if self._divert:
435 438 assert not self._delaybuf
436 439 tmpname = self.indexfile + b".a"
437 440 nfile = self.opener.open(tmpname)
438 441 nfile.close()
439 442 self.opener.rename(tmpname, self.indexfile, checkambig=True)
440 443 elif self._delaybuf:
441 444 fp = self.opener(self.indexfile, b'a', checkambig=True)
442 445 fp.write(b"".join(self._delaybuf))
443 446 fp.close()
444 447 self._delaybuf = None
445 448 self._divert = False
446 449 # split when we're done
447 450 self._enforceinlinesize(tr)
448 451
449 452 def _writepending(self, tr):
450 453 """create a file containing the unfinalized state for
451 454 pretxnchangegroup"""
452 455 if self._delaybuf:
453 456 # make a temporary copy of the index
454 457 fp1 = self._realopener(self.indexfile)
455 458 pendingfilename = self.indexfile + b".a"
456 459 # register as a temp file to ensure cleanup on failure
457 460 tr.registertmp(pendingfilename)
458 461 # write existing data
459 462 fp2 = self._realopener(pendingfilename, b"w")
460 463 fp2.write(fp1.read())
461 464 # add pending data
462 465 fp2.write(b"".join(self._delaybuf))
463 466 fp2.close()
464 467 # switch modes so finalize can simply rename
465 468 self._delaybuf = None
466 469 self._divert = True
467 470 self.opener = _divertopener(self._realopener, self.indexfile)
468 471
469 472 if self._divert:
470 473 return True
471 474
472 475 return False
473 476
474 477 def _enforceinlinesize(self, tr, fp=None):
475 478 if not self._delayed:
476 479 revlog.revlog._enforceinlinesize(self, tr, fp)
477 480
478 481 def read(self, node):
479 482 """Obtain data from a parsed changelog revision.
480 483
481 484 Returns a 6-tuple of:
482 485
483 486 - manifest node in binary
484 487 - author/user as a localstr
485 488 - date as a 2-tuple of (time, timezone)
486 489 - list of files
487 490 - commit message as a localstr
488 491 - dict of extra metadata
489 492
490 493 Unless you need to access all fields, consider calling
491 494 ``changelogrevision`` instead, as it is faster for partial object
492 495 access.
493 496 """
494 497 d, s = self._revisiondata(node)
495 498 c = changelogrevision(
496 499 d, s, self._copiesstorage == b'changeset-sidedata'
497 500 )
498 501 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
499 502
500 503 def changelogrevision(self, nodeorrev):
501 504 """Obtain a ``changelogrevision`` for a node or revision."""
502 505 text, sidedata = self._revisiondata(nodeorrev)
503 506 return changelogrevision(
504 507 text, sidedata, self._copiesstorage == b'changeset-sidedata'
505 508 )
506 509
507 510 def readfiles(self, node):
508 511 """
509 512 short version of read that only returns the files modified by the cset
510 513 """
511 514 text = self.revision(node)
512 515 if not text:
513 516 return []
514 517 last = text.index(b"\n\n")
515 518 l = text[:last].split(b'\n')
516 519 return l[3:]
517 520
518 521 def add(
519 522 self,
520 523 manifest,
521 524 files,
522 525 desc,
523 526 transaction,
524 527 p1,
525 528 p2,
526 529 user,
527 530 date=None,
528 531 extra=None,
529 532 p1copies=None,
530 533 p2copies=None,
531 534 filesadded=None,
532 535 filesremoved=None,
533 536 ):
534 537 # Convert to UTF-8 encoded bytestrings as the very first
535 538 # thing: calling any method on a localstr object will turn it
536 539 # into a str object and the cached UTF-8 string is thus lost.
537 540 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
538 541
539 542 user = user.strip()
540 543 # An empty username or a username with a "\n" will make the
541 544 # revision text contain two "\n\n" sequences -> corrupt
542 545 # repository since read cannot unpack the revision.
543 546 if not user:
544 547 raise error.StorageError(_(b"empty username"))
545 548 if b"\n" in user:
546 549 raise error.StorageError(
547 550 _(b"username %r contains a newline") % pycompat.bytestr(user)
548 551 )
549 552
550 553 desc = stripdesc(desc)
551 554
552 555 if date:
553 556 parseddate = b"%d %d" % dateutil.parsedate(date)
554 557 else:
555 558 parseddate = b"%d %d" % dateutil.makedate()
556 559 if extra:
557 560 branch = extra.get(b"branch")
558 561 if branch in (b"default", b""):
559 562 del extra[b"branch"]
560 563 elif branch in (b".", b"null", b"tip"):
561 564 raise error.StorageError(
562 565 _(b'the name \'%s\' is reserved') % branch
563 566 )
564 567 sortedfiles = sorted(files)
565 568 sidedata = None
566 569 if extra is not None:
567 570 for name in (
568 571 b'p1copies',
569 572 b'p2copies',
570 573 b'filesadded',
571 574 b'filesremoved',
572 575 ):
573 576 extra.pop(name, None)
574 577 if p1copies is not None:
575 578 p1copies = copies.encodecopies(sortedfiles, p1copies)
576 579 if p2copies is not None:
577 580 p2copies = copies.encodecopies(sortedfiles, p2copies)
578 581 if filesadded is not None:
579 582 filesadded = copies.encodefileindices(sortedfiles, filesadded)
580 583 if filesremoved is not None:
581 584 filesremoved = copies.encodefileindices(sortedfiles, filesremoved)
582 585 if self._copiesstorage == b'extra':
583 586 extrasentries = p1copies, p2copies, filesadded, filesremoved
584 587 if extra is None and any(x is not None for x in extrasentries):
585 588 extra = {}
586 589 if p1copies is not None:
587 590 extra[b'p1copies'] = p1copies
588 591 if p2copies is not None:
589 592 extra[b'p2copies'] = p2copies
590 593 if filesadded is not None:
591 594 extra[b'filesadded'] = filesadded
592 595 if filesremoved is not None:
593 596 extra[b'filesremoved'] = filesremoved
594 597 elif self._copiesstorage == b'changeset-sidedata':
595 598 sidedata = {}
596 599 if p1copies:
597 600 sidedata[sidedatamod.SD_P1COPIES] = p1copies
598 601 if p2copies:
599 602 sidedata[sidedatamod.SD_P2COPIES] = p2copies
600 603 if filesadded:
601 604 sidedata[sidedatamod.SD_FILESADDED] = filesadded
602 605 if filesremoved:
603 606 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
604 607 if not sidedata:
605 608 sidedata = None
606 609
607 610 if extra:
608 611 extra = encodeextra(extra)
609 612 parseddate = b"%s %s" % (parseddate, extra)
610 613 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
611 614 text = b"\n".join(l)
612 615 return self.addrevision(
613 616 text, transaction, len(self), p1, p2, sidedata=sidedata
614 617 )
615 618
616 619 def branchinfo(self, rev):
617 620 """return the branch name and open/close state of a revision
618 621
619 622 This function exists because creating a changectx object
620 623 just to access this is costly."""
621 624 extra = self.read(rev)[5]
622 625 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
623 626
624 627 def _nodeduplicatecallback(self, transaction, node):
625 628 # keep track of revisions that got "re-added", eg: unbunde of know rev.
626 629 #
627 630 # We track them in a list to preserve their order from the source bundle
628 631 duplicates = transaction.changes.setdefault(b'revduplicates', [])
629 632 duplicates.append(self.rev(node))
General Comments 0
You need to be logged in to leave comments. Login now