##// END OF EJS Templates
sidedata: simply read p2copies files from the `ChangingFiles` object
marmoute -
r46149:147fb889 default
parent child Browse files
Show More
@@ -1,610 +1,606
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullid,
15 15 )
16 16 from .thirdparty import attr
17 17
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 metadata,
22 22 pycompat,
23 23 revlog,
24 24 )
25 25 from .utils import (
26 26 dateutil,
27 27 stringutil,
28 28 )
29 29
30 from .revlogutils import sidedata as sidedatamod
31
32 30 _defaultextra = {b'branch': b'default'}
33 31
34 32
35 33 def _string_escape(text):
36 34 """
37 35 >>> from .pycompat import bytechr as chr
38 36 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 37 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 38 >>> s
41 39 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 40 >>> res = _string_escape(s)
43 41 >>> s == _string_unescape(res)
44 42 True
45 43 """
46 44 # subset of the string_escape codec
47 45 text = (
48 46 text.replace(b'\\', b'\\\\')
49 47 .replace(b'\n', b'\\n')
50 48 .replace(b'\r', b'\\r')
51 49 )
52 50 return text.replace(b'\0', b'\\0')
53 51
54 52
55 53 def _string_unescape(text):
56 54 if b'\\0' in text:
57 55 # fix up \0 without getting into trouble with \\0
58 56 text = text.replace(b'\\\\', b'\\\\\n')
59 57 text = text.replace(b'\\0', b'\0')
60 58 text = text.replace(b'\n', b'')
61 59 return stringutil.unescapestr(text)
62 60
63 61
64 62 def decodeextra(text):
65 63 """
66 64 >>> from .pycompat import bytechr as chr
67 65 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 66 ... ).items())
69 67 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 69 ... b'baz': chr(92) + chr(0) + b'2'})
72 70 ... ).items())
73 71 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 72 """
75 73 extra = _defaultextra.copy()
76 74 for l in text.split(b'\0'):
77 75 if l:
78 76 k, v = _string_unescape(l).split(b':', 1)
79 77 extra[k] = v
80 78 return extra
81 79
82 80
83 81 def encodeextra(d):
84 82 # keys must be sorted to produce a deterministic changelog entry
85 83 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
86 84 return b"\0".join(items)
87 85
88 86
89 87 def stripdesc(desc):
90 88 """strip trailing whitespace and leading and trailing empty lines"""
91 89 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
92 90
93 91
94 92 class appender(object):
95 93 '''the changelog index must be updated last on disk, so we use this class
96 94 to delay writes to it'''
97 95
98 96 def __init__(self, vfs, name, mode, buf):
99 97 self.data = buf
100 98 fp = vfs(name, mode)
101 99 self.fp = fp
102 100 self.offset = fp.tell()
103 101 self.size = vfs.fstat(fp).st_size
104 102 self._end = self.size
105 103
106 104 def end(self):
107 105 return self._end
108 106
109 107 def tell(self):
110 108 return self.offset
111 109
112 110 def flush(self):
113 111 pass
114 112
115 113 @property
116 114 def closed(self):
117 115 return self.fp.closed
118 116
119 117 def close(self):
120 118 self.fp.close()
121 119
122 120 def seek(self, offset, whence=0):
123 121 '''virtual file offset spans real file and data'''
124 122 if whence == 0:
125 123 self.offset = offset
126 124 elif whence == 1:
127 125 self.offset += offset
128 126 elif whence == 2:
129 127 self.offset = self.end() + offset
130 128 if self.offset < self.size:
131 129 self.fp.seek(self.offset)
132 130
133 131 def read(self, count=-1):
134 132 '''only trick here is reads that span real file and data'''
135 133 ret = b""
136 134 if self.offset < self.size:
137 135 s = self.fp.read(count)
138 136 ret = s
139 137 self.offset += len(s)
140 138 if count > 0:
141 139 count -= len(s)
142 140 if count != 0:
143 141 doff = self.offset - self.size
144 142 self.data.insert(0, b"".join(self.data))
145 143 del self.data[1:]
146 144 s = self.data[0][doff : doff + count]
147 145 self.offset += len(s)
148 146 ret += s
149 147 return ret
150 148
151 149 def write(self, s):
152 150 self.data.append(bytes(s))
153 151 self.offset += len(s)
154 152 self._end += len(s)
155 153
156 154 def __enter__(self):
157 155 self.fp.__enter__()
158 156 return self
159 157
160 158 def __exit__(self, *args):
161 159 return self.fp.__exit__(*args)
162 160
163 161
164 162 class _divertopener(object):
165 163 def __init__(self, opener, target):
166 164 self._opener = opener
167 165 self._target = target
168 166
169 167 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
170 168 if name != self._target:
171 169 return self._opener(name, mode, **kwargs)
172 170 return self._opener(name + b".a", mode, **kwargs)
173 171
174 172 def __getattr__(self, attr):
175 173 return getattr(self._opener, attr)
176 174
177 175
178 176 def _delayopener(opener, target, buf):
179 177 """build an opener that stores chunks in 'buf' instead of 'target'"""
180 178
181 179 def _delay(name, mode=b'r', checkambig=False, **kwargs):
182 180 if name != target:
183 181 return opener(name, mode, **kwargs)
184 182 assert not kwargs
185 183 return appender(opener, name, mode, buf)
186 184
187 185 return _delay
188 186
189 187
190 188 @attr.s
191 189 class _changelogrevision(object):
192 190 # Extensions might modify _defaultextra, so let the constructor below pass
193 191 # it in
194 192 extra = attr.ib()
195 193 manifest = attr.ib(default=nullid)
196 194 user = attr.ib(default=b'')
197 195 date = attr.ib(default=(0, 0))
198 196 files = attr.ib(default=attr.Factory(list))
199 197 filesadded = attr.ib(default=None)
200 198 filesremoved = attr.ib(default=None)
201 199 p1copies = attr.ib(default=None)
202 200 p2copies = attr.ib(default=None)
203 201 description = attr.ib(default=b'')
204 202
205 203
206 204 class changelogrevision(object):
207 205 """Holds results of a parsed changelog revision.
208 206
209 207 Changelog revisions consist of multiple pieces of data, including
210 208 the manifest node, user, and date. This object exposes a view into
211 209 the parsed object.
212 210 """
213 211
214 212 __slots__ = (
215 213 '_offsets',
216 214 '_text',
217 215 '_sidedata',
218 216 '_cpsd',
219 217 '_changes',
220 218 )
221 219
222 220 def __new__(cls, text, sidedata, cpsd):
223 221 if not text:
224 222 return _changelogrevision(extra=_defaultextra)
225 223
226 224 self = super(changelogrevision, cls).__new__(cls)
227 225 # We could return here and implement the following as an __init__.
228 226 # But doing it here is equivalent and saves an extra function call.
229 227
230 228 # format used:
231 229 # nodeid\n : manifest node in ascii
232 230 # user\n : user, no \n or \r allowed
233 231 # time tz extra\n : date (time is int or float, timezone is int)
234 232 # : extra is metadata, encoded and separated by '\0'
235 233 # : older versions ignore it
236 234 # files\n\n : files modified by the cset, no \n or \r allowed
237 235 # (.*) : comment (free text, ideally utf-8)
238 236 #
239 237 # changelog v0 doesn't use extra
240 238
241 239 nl1 = text.index(b'\n')
242 240 nl2 = text.index(b'\n', nl1 + 1)
243 241 nl3 = text.index(b'\n', nl2 + 1)
244 242
245 243 # The list of files may be empty. Which means nl3 is the first of the
246 244 # double newline that precedes the description.
247 245 if text[nl3 + 1 : nl3 + 2] == b'\n':
248 246 doublenl = nl3
249 247 else:
250 248 doublenl = text.index(b'\n\n', nl3 + 1)
251 249
252 250 self._offsets = (nl1, nl2, nl3, doublenl)
253 251 self._text = text
254 252 self._sidedata = sidedata
255 253 self._cpsd = cpsd
256 254 self._changes = None
257 255
258 256 return self
259 257
260 258 @property
261 259 def manifest(self):
262 260 return bin(self._text[0 : self._offsets[0]])
263 261
264 262 @property
265 263 def user(self):
266 264 off = self._offsets
267 265 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
268 266
269 267 @property
270 268 def _rawdate(self):
271 269 off = self._offsets
272 270 dateextra = self._text[off[1] + 1 : off[2]]
273 271 return dateextra.split(b' ', 2)[0:2]
274 272
275 273 @property
276 274 def _rawextra(self):
277 275 off = self._offsets
278 276 dateextra = self._text[off[1] + 1 : off[2]]
279 277 fields = dateextra.split(b' ', 2)
280 278 if len(fields) != 3:
281 279 return None
282 280
283 281 return fields[2]
284 282
285 283 @property
286 284 def date(self):
287 285 raw = self._rawdate
288 286 time = float(raw[0])
289 287 # Various tools did silly things with the timezone.
290 288 try:
291 289 timezone = int(raw[1])
292 290 except ValueError:
293 291 timezone = 0
294 292
295 293 return time, timezone
296 294
297 295 @property
298 296 def extra(self):
299 297 raw = self._rawextra
300 298 if raw is None:
301 299 return _defaultextra
302 300
303 301 return decodeextra(raw)
304 302
305 303 @property
306 304 def changes(self):
307 305 if self._changes is not None:
308 306 return self._changes
309 307 if self._cpsd:
310 308 changes = metadata.decode_files_sidedata(self, self._sidedata)
311 309 else:
312 310 changes = metadata.ChangingFiles(
313 311 touched=self.files or (),
314 312 added=self.filesadded or (),
315 313 removed=self.filesremoved or (),
316 314 p1_copies=self.p1copies or {},
317 315 p2_copies=self.p2copies or {},
318 316 )
319 317 self._changes = changes
320 318 return changes
321 319
322 320 @property
323 321 def files(self):
324 322 off = self._offsets
325 323 if off[2] == off[3]:
326 324 return []
327 325
328 326 return self._text[off[2] + 1 : off[3]].split(b'\n')
329 327
330 328 @property
331 329 def filesadded(self):
332 330 if self._cpsd:
333 331 return self.changes.added
334 332 else:
335 333 rawindices = self.extra.get(b'filesadded')
336 334 if rawindices is None:
337 335 return None
338 336 return metadata.decodefileindices(self.files, rawindices)
339 337
340 338 @property
341 339 def filesremoved(self):
342 340 if self._cpsd:
343 341 return self.changes.removed
344 342 else:
345 343 rawindices = self.extra.get(b'filesremoved')
346 344 if rawindices is None:
347 345 return None
348 346 return metadata.decodefileindices(self.files, rawindices)
349 347
350 348 @property
351 349 def p1copies(self):
352 350 if self._cpsd:
353 351 return self.changes.copied_from_p1
354 352 else:
355 353 rawcopies = self.extra.get(b'p1copies')
356 354 if rawcopies is None:
357 355 return None
358 356 return metadata.decodecopies(self.files, rawcopies)
359 357
360 358 @property
361 359 def p2copies(self):
362 360 if self._cpsd:
363 rawcopies = self._sidedata.get(sidedatamod.SD_P2COPIES)
364 if not rawcopies:
365 return {}
361 return self.changes.copied_from_p2
366 362 else:
367 363 rawcopies = self.extra.get(b'p2copies')
368 364 if rawcopies is None:
369 365 return None
370 366 return metadata.decodecopies(self.files, rawcopies)
371 367
372 368 @property
373 369 def description(self):
374 370 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
375 371
376 372
377 373 class changelog(revlog.revlog):
378 374 def __init__(self, opener, trypending=False):
379 375 """Load a changelog revlog using an opener.
380 376
381 377 If ``trypending`` is true, we attempt to load the index from a
382 378 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
383 379 The ``00changelog.i.a`` file contains index (and possibly inline
384 380 revision) data for a transaction that hasn't been finalized yet.
385 381 It exists in a separate file to facilitate readers (such as
386 382 hooks processes) accessing data before a transaction is finalized.
387 383 """
388 384 if trypending and opener.exists(b'00changelog.i.a'):
389 385 indexfile = b'00changelog.i.a'
390 386 else:
391 387 indexfile = b'00changelog.i'
392 388
393 389 datafile = b'00changelog.d'
394 390 revlog.revlog.__init__(
395 391 self,
396 392 opener,
397 393 indexfile,
398 394 datafile=datafile,
399 395 checkambig=True,
400 396 mmaplargeindex=True,
401 397 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
402 398 )
403 399
404 400 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
405 401 # changelogs don't benefit from generaldelta.
406 402
407 403 self.version &= ~revlog.FLAG_GENERALDELTA
408 404 self._generaldelta = False
409 405
410 406 # Delta chains for changelogs tend to be very small because entries
411 407 # tend to be small and don't delta well with each. So disable delta
412 408 # chains.
413 409 self._storedeltachains = False
414 410
415 411 self._realopener = opener
416 412 self._delayed = False
417 413 self._delaybuf = None
418 414 self._divert = False
419 415 self._filteredrevs = frozenset()
420 416 self._filteredrevs_hashcache = {}
421 417 self._copiesstorage = opener.options.get(b'copies-storage')
422 418
423 419 @property
424 420 def filteredrevs(self):
425 421 return self._filteredrevs
426 422
427 423 @filteredrevs.setter
428 424 def filteredrevs(self, val):
429 425 # Ensure all updates go through this function
430 426 assert isinstance(val, frozenset)
431 427 self._filteredrevs = val
432 428 self._filteredrevs_hashcache = {}
433 429
434 430 def delayupdate(self, tr):
435 431 """delay visibility of index updates to other readers"""
436 432
437 433 if not self._delayed:
438 434 if len(self) == 0:
439 435 self._divert = True
440 436 if self._realopener.exists(self.indexfile + b'.a'):
441 437 self._realopener.unlink(self.indexfile + b'.a')
442 438 self.opener = _divertopener(self._realopener, self.indexfile)
443 439 else:
444 440 self._delaybuf = []
445 441 self.opener = _delayopener(
446 442 self._realopener, self.indexfile, self._delaybuf
447 443 )
448 444 self._delayed = True
449 445 tr.addpending(b'cl-%i' % id(self), self._writepending)
450 446 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
451 447
452 448 def _finalize(self, tr):
453 449 """finalize index updates"""
454 450 self._delayed = False
455 451 self.opener = self._realopener
456 452 # move redirected index data back into place
457 453 if self._divert:
458 454 assert not self._delaybuf
459 455 tmpname = self.indexfile + b".a"
460 456 nfile = self.opener.open(tmpname)
461 457 nfile.close()
462 458 self.opener.rename(tmpname, self.indexfile, checkambig=True)
463 459 elif self._delaybuf:
464 460 fp = self.opener(self.indexfile, b'a', checkambig=True)
465 461 fp.write(b"".join(self._delaybuf))
466 462 fp.close()
467 463 self._delaybuf = None
468 464 self._divert = False
469 465 # split when we're done
470 466 self._enforceinlinesize(tr)
471 467
472 468 def _writepending(self, tr):
473 469 """create a file containing the unfinalized state for
474 470 pretxnchangegroup"""
475 471 if self._delaybuf:
476 472 # make a temporary copy of the index
477 473 fp1 = self._realopener(self.indexfile)
478 474 pendingfilename = self.indexfile + b".a"
479 475 # register as a temp file to ensure cleanup on failure
480 476 tr.registertmp(pendingfilename)
481 477 # write existing data
482 478 fp2 = self._realopener(pendingfilename, b"w")
483 479 fp2.write(fp1.read())
484 480 # add pending data
485 481 fp2.write(b"".join(self._delaybuf))
486 482 fp2.close()
487 483 # switch modes so finalize can simply rename
488 484 self._delaybuf = None
489 485 self._divert = True
490 486 self.opener = _divertopener(self._realopener, self.indexfile)
491 487
492 488 if self._divert:
493 489 return True
494 490
495 491 return False
496 492
497 493 def _enforceinlinesize(self, tr, fp=None):
498 494 if not self._delayed:
499 495 revlog.revlog._enforceinlinesize(self, tr, fp)
500 496
501 497 def read(self, node):
502 498 """Obtain data from a parsed changelog revision.
503 499
504 500 Returns a 6-tuple of:
505 501
506 502 - manifest node in binary
507 503 - author/user as a localstr
508 504 - date as a 2-tuple of (time, timezone)
509 505 - list of files
510 506 - commit message as a localstr
511 507 - dict of extra metadata
512 508
513 509 Unless you need to access all fields, consider calling
514 510 ``changelogrevision`` instead, as it is faster for partial object
515 511 access.
516 512 """
517 513 d, s = self._revisiondata(node)
518 514 c = changelogrevision(
519 515 d, s, self._copiesstorage == b'changeset-sidedata'
520 516 )
521 517 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
522 518
523 519 def changelogrevision(self, nodeorrev):
524 520 """Obtain a ``changelogrevision`` for a node or revision."""
525 521 text, sidedata = self._revisiondata(nodeorrev)
526 522 return changelogrevision(
527 523 text, sidedata, self._copiesstorage == b'changeset-sidedata'
528 524 )
529 525
530 526 def readfiles(self, node):
531 527 """
532 528 short version of read that only returns the files modified by the cset
533 529 """
534 530 text = self.revision(node)
535 531 if not text:
536 532 return []
537 533 last = text.index(b"\n\n")
538 534 l = text[:last].split(b'\n')
539 535 return l[3:]
540 536
541 537 def add(
542 538 self,
543 539 manifest,
544 540 files,
545 541 desc,
546 542 transaction,
547 543 p1,
548 544 p2,
549 545 user,
550 546 date=None,
551 547 extra=None,
552 548 ):
553 549 # Convert to UTF-8 encoded bytestrings as the very first
554 550 # thing: calling any method on a localstr object will turn it
555 551 # into a str object and the cached UTF-8 string is thus lost.
556 552 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
557 553
558 554 user = user.strip()
559 555 # An empty username or a username with a "\n" will make the
560 556 # revision text contain two "\n\n" sequences -> corrupt
561 557 # repository since read cannot unpack the revision.
562 558 if not user:
563 559 raise error.StorageError(_(b"empty username"))
564 560 if b"\n" in user:
565 561 raise error.StorageError(
566 562 _(b"username %r contains a newline") % pycompat.bytestr(user)
567 563 )
568 564
569 565 desc = stripdesc(desc)
570 566
571 567 if date:
572 568 parseddate = b"%d %d" % dateutil.parsedate(date)
573 569 else:
574 570 parseddate = b"%d %d" % dateutil.makedate()
575 571 if extra:
576 572 branch = extra.get(b"branch")
577 573 if branch in (b"default", b""):
578 574 del extra[b"branch"]
579 575 elif branch in (b".", b"null", b"tip"):
580 576 raise error.StorageError(
581 577 _(b'the name \'%s\' is reserved') % branch
582 578 )
583 579 sortedfiles = sorted(files.touched)
584 580 sidedata = None
585 581 if self._copiesstorage == b'changeset-sidedata':
586 582 sidedata = metadata.encode_files_sidedata(files)
587 583
588 584 if extra:
589 585 extra = encodeextra(extra)
590 586 parseddate = b"%s %s" % (parseddate, extra)
591 587 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
592 588 text = b"\n".join(l)
593 589 return self.addrevision(
594 590 text, transaction, len(self), p1, p2, sidedata=sidedata
595 591 )
596 592
597 593 def branchinfo(self, rev):
598 594 """return the branch name and open/close state of a revision
599 595
600 596 This function exists because creating a changectx object
601 597 just to access this is costly."""
602 598 extra = self.read(rev)[5]
603 599 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
604 600
605 601 def _nodeduplicatecallback(self, transaction, node):
606 602 # keep track of revisions that got "re-added", eg: unbunde of know rev.
607 603 #
608 604 # We track them in a list to preserve their order from the source bundle
609 605 duplicates = transaction.changes.setdefault(b'revduplicates', [])
610 606 duplicates.append(self.rev(node))
General Comments 0
You need to be logged in to leave comments. Login now