##// END OF EJS Templates
sidedata: simply read p1copies files from the `ChangingFiles` object
marmoute -
r46148:4e2238ba default
parent child Browse files
Show More
@@ -1,612 +1,610 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullid,
15 15 )
16 16 from .thirdparty import attr
17 17
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 metadata,
22 22 pycompat,
23 23 revlog,
24 24 )
25 25 from .utils import (
26 26 dateutil,
27 27 stringutil,
28 28 )
29 29
30 30 from .revlogutils import sidedata as sidedatamod
31 31
32 32 _defaultextra = {b'branch': b'default'}
33 33
34 34
35 35 def _string_escape(text):
36 36 """
37 37 >>> from .pycompat import bytechr as chr
38 38 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 39 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 40 >>> s
41 41 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 42 >>> res = _string_escape(s)
43 43 >>> s == _string_unescape(res)
44 44 True
45 45 """
46 46 # subset of the string_escape codec
47 47 text = (
48 48 text.replace(b'\\', b'\\\\')
49 49 .replace(b'\n', b'\\n')
50 50 .replace(b'\r', b'\\r')
51 51 )
52 52 return text.replace(b'\0', b'\\0')
53 53
54 54
55 55 def _string_unescape(text):
56 56 if b'\\0' in text:
57 57 # fix up \0 without getting into trouble with \\0
58 58 text = text.replace(b'\\\\', b'\\\\\n')
59 59 text = text.replace(b'\\0', b'\0')
60 60 text = text.replace(b'\n', b'')
61 61 return stringutil.unescapestr(text)
62 62
63 63
64 64 def decodeextra(text):
65 65 """
66 66 >>> from .pycompat import bytechr as chr
67 67 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 68 ... ).items())
69 69 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 70 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 71 ... b'baz': chr(92) + chr(0) + b'2'})
72 72 ... ).items())
73 73 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 74 """
75 75 extra = _defaultextra.copy()
76 76 for l in text.split(b'\0'):
77 77 if l:
78 78 k, v = _string_unescape(l).split(b':', 1)
79 79 extra[k] = v
80 80 return extra
81 81
82 82
83 83 def encodeextra(d):
84 84 # keys must be sorted to produce a deterministic changelog entry
85 85 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
86 86 return b"\0".join(items)
87 87
88 88
89 89 def stripdesc(desc):
90 90 """strip trailing whitespace and leading and trailing empty lines"""
91 91 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
92 92
93 93
94 94 class appender(object):
95 95 '''the changelog index must be updated last on disk, so we use this class
96 96 to delay writes to it'''
97 97
98 98 def __init__(self, vfs, name, mode, buf):
99 99 self.data = buf
100 100 fp = vfs(name, mode)
101 101 self.fp = fp
102 102 self.offset = fp.tell()
103 103 self.size = vfs.fstat(fp).st_size
104 104 self._end = self.size
105 105
106 106 def end(self):
107 107 return self._end
108 108
109 109 def tell(self):
110 110 return self.offset
111 111
112 112 def flush(self):
113 113 pass
114 114
115 115 @property
116 116 def closed(self):
117 117 return self.fp.closed
118 118
119 119 def close(self):
120 120 self.fp.close()
121 121
122 122 def seek(self, offset, whence=0):
123 123 '''virtual file offset spans real file and data'''
124 124 if whence == 0:
125 125 self.offset = offset
126 126 elif whence == 1:
127 127 self.offset += offset
128 128 elif whence == 2:
129 129 self.offset = self.end() + offset
130 130 if self.offset < self.size:
131 131 self.fp.seek(self.offset)
132 132
133 133 def read(self, count=-1):
134 134 '''only trick here is reads that span real file and data'''
135 135 ret = b""
136 136 if self.offset < self.size:
137 137 s = self.fp.read(count)
138 138 ret = s
139 139 self.offset += len(s)
140 140 if count > 0:
141 141 count -= len(s)
142 142 if count != 0:
143 143 doff = self.offset - self.size
144 144 self.data.insert(0, b"".join(self.data))
145 145 del self.data[1:]
146 146 s = self.data[0][doff : doff + count]
147 147 self.offset += len(s)
148 148 ret += s
149 149 return ret
150 150
151 151 def write(self, s):
152 152 self.data.append(bytes(s))
153 153 self.offset += len(s)
154 154 self._end += len(s)
155 155
156 156 def __enter__(self):
157 157 self.fp.__enter__()
158 158 return self
159 159
160 160 def __exit__(self, *args):
161 161 return self.fp.__exit__(*args)
162 162
163 163
164 164 class _divertopener(object):
165 165 def __init__(self, opener, target):
166 166 self._opener = opener
167 167 self._target = target
168 168
169 169 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
170 170 if name != self._target:
171 171 return self._opener(name, mode, **kwargs)
172 172 return self._opener(name + b".a", mode, **kwargs)
173 173
174 174 def __getattr__(self, attr):
175 175 return getattr(self._opener, attr)
176 176
177 177
178 178 def _delayopener(opener, target, buf):
179 179 """build an opener that stores chunks in 'buf' instead of 'target'"""
180 180
181 181 def _delay(name, mode=b'r', checkambig=False, **kwargs):
182 182 if name != target:
183 183 return opener(name, mode, **kwargs)
184 184 assert not kwargs
185 185 return appender(opener, name, mode, buf)
186 186
187 187 return _delay
188 188
189 189
190 190 @attr.s
191 191 class _changelogrevision(object):
192 192 # Extensions might modify _defaultextra, so let the constructor below pass
193 193 # it in
194 194 extra = attr.ib()
195 195 manifest = attr.ib(default=nullid)
196 196 user = attr.ib(default=b'')
197 197 date = attr.ib(default=(0, 0))
198 198 files = attr.ib(default=attr.Factory(list))
199 199 filesadded = attr.ib(default=None)
200 200 filesremoved = attr.ib(default=None)
201 201 p1copies = attr.ib(default=None)
202 202 p2copies = attr.ib(default=None)
203 203 description = attr.ib(default=b'')
204 204
205 205
206 206 class changelogrevision(object):
207 207 """Holds results of a parsed changelog revision.
208 208
209 209 Changelog revisions consist of multiple pieces of data, including
210 210 the manifest node, user, and date. This object exposes a view into
211 211 the parsed object.
212 212 """
213 213
214 214 __slots__ = (
215 215 '_offsets',
216 216 '_text',
217 217 '_sidedata',
218 218 '_cpsd',
219 219 '_changes',
220 220 )
221 221
222 222 def __new__(cls, text, sidedata, cpsd):
223 223 if not text:
224 224 return _changelogrevision(extra=_defaultextra)
225 225
226 226 self = super(changelogrevision, cls).__new__(cls)
227 227 # We could return here and implement the following as an __init__.
228 228 # But doing it here is equivalent and saves an extra function call.
229 229
230 230 # format used:
231 231 # nodeid\n : manifest node in ascii
232 232 # user\n : user, no \n or \r allowed
233 233 # time tz extra\n : date (time is int or float, timezone is int)
234 234 # : extra is metadata, encoded and separated by '\0'
235 235 # : older versions ignore it
236 236 # files\n\n : files modified by the cset, no \n or \r allowed
237 237 # (.*) : comment (free text, ideally utf-8)
238 238 #
239 239 # changelog v0 doesn't use extra
240 240
241 241 nl1 = text.index(b'\n')
242 242 nl2 = text.index(b'\n', nl1 + 1)
243 243 nl3 = text.index(b'\n', nl2 + 1)
244 244
245 245 # The list of files may be empty. Which means nl3 is the first of the
246 246 # double newline that precedes the description.
247 247 if text[nl3 + 1 : nl3 + 2] == b'\n':
248 248 doublenl = nl3
249 249 else:
250 250 doublenl = text.index(b'\n\n', nl3 + 1)
251 251
252 252 self._offsets = (nl1, nl2, nl3, doublenl)
253 253 self._text = text
254 254 self._sidedata = sidedata
255 255 self._cpsd = cpsd
256 256 self._changes = None
257 257
258 258 return self
259 259
260 260 @property
261 261 def manifest(self):
262 262 return bin(self._text[0 : self._offsets[0]])
263 263
264 264 @property
265 265 def user(self):
266 266 off = self._offsets
267 267 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
268 268
269 269 @property
270 270 def _rawdate(self):
271 271 off = self._offsets
272 272 dateextra = self._text[off[1] + 1 : off[2]]
273 273 return dateextra.split(b' ', 2)[0:2]
274 274
275 275 @property
276 276 def _rawextra(self):
277 277 off = self._offsets
278 278 dateextra = self._text[off[1] + 1 : off[2]]
279 279 fields = dateextra.split(b' ', 2)
280 280 if len(fields) != 3:
281 281 return None
282 282
283 283 return fields[2]
284 284
285 285 @property
286 286 def date(self):
287 287 raw = self._rawdate
288 288 time = float(raw[0])
289 289 # Various tools did silly things with the timezone.
290 290 try:
291 291 timezone = int(raw[1])
292 292 except ValueError:
293 293 timezone = 0
294 294
295 295 return time, timezone
296 296
297 297 @property
298 298 def extra(self):
299 299 raw = self._rawextra
300 300 if raw is None:
301 301 return _defaultextra
302 302
303 303 return decodeextra(raw)
304 304
305 305 @property
306 306 def changes(self):
307 307 if self._changes is not None:
308 308 return self._changes
309 309 if self._cpsd:
310 310 changes = metadata.decode_files_sidedata(self, self._sidedata)
311 311 else:
312 312 changes = metadata.ChangingFiles(
313 313 touched=self.files or (),
314 314 added=self.filesadded or (),
315 315 removed=self.filesremoved or (),
316 316 p1_copies=self.p1copies or {},
317 317 p2_copies=self.p2copies or {},
318 318 )
319 319 self._changes = changes
320 320 return changes
321 321
322 322 @property
323 323 def files(self):
324 324 off = self._offsets
325 325 if off[2] == off[3]:
326 326 return []
327 327
328 328 return self._text[off[2] + 1 : off[3]].split(b'\n')
329 329
330 330 @property
331 331 def filesadded(self):
332 332 if self._cpsd:
333 333 return self.changes.added
334 334 else:
335 335 rawindices = self.extra.get(b'filesadded')
336 336 if rawindices is None:
337 337 return None
338 338 return metadata.decodefileindices(self.files, rawindices)
339 339
340 340 @property
341 341 def filesremoved(self):
342 342 if self._cpsd:
343 343 return self.changes.removed
344 344 else:
345 345 rawindices = self.extra.get(b'filesremoved')
346 346 if rawindices is None:
347 347 return None
348 348 return metadata.decodefileindices(self.files, rawindices)
349 349
350 350 @property
351 351 def p1copies(self):
352 352 if self._cpsd:
353 rawcopies = self._sidedata.get(sidedatamod.SD_P1COPIES)
354 if not rawcopies:
355 return {}
353 return self.changes.copied_from_p1
356 354 else:
357 355 rawcopies = self.extra.get(b'p1copies')
358 356 if rawcopies is None:
359 357 return None
360 358 return metadata.decodecopies(self.files, rawcopies)
361 359
362 360 @property
363 361 def p2copies(self):
364 362 if self._cpsd:
365 363 rawcopies = self._sidedata.get(sidedatamod.SD_P2COPIES)
366 364 if not rawcopies:
367 365 return {}
368 366 else:
369 367 rawcopies = self.extra.get(b'p2copies')
370 368 if rawcopies is None:
371 369 return None
372 370 return metadata.decodecopies(self.files, rawcopies)
373 371
374 372 @property
375 373 def description(self):
376 374 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
377 375
378 376
379 377 class changelog(revlog.revlog):
380 378 def __init__(self, opener, trypending=False):
381 379 """Load a changelog revlog using an opener.
382 380
383 381 If ``trypending`` is true, we attempt to load the index from a
384 382 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
385 383 The ``00changelog.i.a`` file contains index (and possibly inline
386 384 revision) data for a transaction that hasn't been finalized yet.
387 385 It exists in a separate file to facilitate readers (such as
388 386 hooks processes) accessing data before a transaction is finalized.
389 387 """
390 388 if trypending and opener.exists(b'00changelog.i.a'):
391 389 indexfile = b'00changelog.i.a'
392 390 else:
393 391 indexfile = b'00changelog.i'
394 392
395 393 datafile = b'00changelog.d'
396 394 revlog.revlog.__init__(
397 395 self,
398 396 opener,
399 397 indexfile,
400 398 datafile=datafile,
401 399 checkambig=True,
402 400 mmaplargeindex=True,
403 401 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
404 402 )
405 403
406 404 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
407 405 # changelogs don't benefit from generaldelta.
408 406
409 407 self.version &= ~revlog.FLAG_GENERALDELTA
410 408 self._generaldelta = False
411 409
412 410 # Delta chains for changelogs tend to be very small because entries
413 411 # tend to be small and don't delta well with each. So disable delta
414 412 # chains.
415 413 self._storedeltachains = False
416 414
417 415 self._realopener = opener
418 416 self._delayed = False
419 417 self._delaybuf = None
420 418 self._divert = False
421 419 self._filteredrevs = frozenset()
422 420 self._filteredrevs_hashcache = {}
423 421 self._copiesstorage = opener.options.get(b'copies-storage')
424 422
425 423 @property
426 424 def filteredrevs(self):
427 425 return self._filteredrevs
428 426
429 427 @filteredrevs.setter
430 428 def filteredrevs(self, val):
431 429 # Ensure all updates go through this function
432 430 assert isinstance(val, frozenset)
433 431 self._filteredrevs = val
434 432 self._filteredrevs_hashcache = {}
435 433
436 434 def delayupdate(self, tr):
437 435 """delay visibility of index updates to other readers"""
438 436
439 437 if not self._delayed:
440 438 if len(self) == 0:
441 439 self._divert = True
442 440 if self._realopener.exists(self.indexfile + b'.a'):
443 441 self._realopener.unlink(self.indexfile + b'.a')
444 442 self.opener = _divertopener(self._realopener, self.indexfile)
445 443 else:
446 444 self._delaybuf = []
447 445 self.opener = _delayopener(
448 446 self._realopener, self.indexfile, self._delaybuf
449 447 )
450 448 self._delayed = True
451 449 tr.addpending(b'cl-%i' % id(self), self._writepending)
452 450 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
453 451
454 452 def _finalize(self, tr):
455 453 """finalize index updates"""
456 454 self._delayed = False
457 455 self.opener = self._realopener
458 456 # move redirected index data back into place
459 457 if self._divert:
460 458 assert not self._delaybuf
461 459 tmpname = self.indexfile + b".a"
462 460 nfile = self.opener.open(tmpname)
463 461 nfile.close()
464 462 self.opener.rename(tmpname, self.indexfile, checkambig=True)
465 463 elif self._delaybuf:
466 464 fp = self.opener(self.indexfile, b'a', checkambig=True)
467 465 fp.write(b"".join(self._delaybuf))
468 466 fp.close()
469 467 self._delaybuf = None
470 468 self._divert = False
471 469 # split when we're done
472 470 self._enforceinlinesize(tr)
473 471
474 472 def _writepending(self, tr):
475 473 """create a file containing the unfinalized state for
476 474 pretxnchangegroup"""
477 475 if self._delaybuf:
478 476 # make a temporary copy of the index
479 477 fp1 = self._realopener(self.indexfile)
480 478 pendingfilename = self.indexfile + b".a"
481 479 # register as a temp file to ensure cleanup on failure
482 480 tr.registertmp(pendingfilename)
483 481 # write existing data
484 482 fp2 = self._realopener(pendingfilename, b"w")
485 483 fp2.write(fp1.read())
486 484 # add pending data
487 485 fp2.write(b"".join(self._delaybuf))
488 486 fp2.close()
489 487 # switch modes so finalize can simply rename
490 488 self._delaybuf = None
491 489 self._divert = True
492 490 self.opener = _divertopener(self._realopener, self.indexfile)
493 491
494 492 if self._divert:
495 493 return True
496 494
497 495 return False
498 496
499 497 def _enforceinlinesize(self, tr, fp=None):
500 498 if not self._delayed:
501 499 revlog.revlog._enforceinlinesize(self, tr, fp)
502 500
503 501 def read(self, node):
504 502 """Obtain data from a parsed changelog revision.
505 503
506 504 Returns a 6-tuple of:
507 505
508 506 - manifest node in binary
509 507 - author/user as a localstr
510 508 - date as a 2-tuple of (time, timezone)
511 509 - list of files
512 510 - commit message as a localstr
513 511 - dict of extra metadata
514 512
515 513 Unless you need to access all fields, consider calling
516 514 ``changelogrevision`` instead, as it is faster for partial object
517 515 access.
518 516 """
519 517 d, s = self._revisiondata(node)
520 518 c = changelogrevision(
521 519 d, s, self._copiesstorage == b'changeset-sidedata'
522 520 )
523 521 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
524 522
525 523 def changelogrevision(self, nodeorrev):
526 524 """Obtain a ``changelogrevision`` for a node or revision."""
527 525 text, sidedata = self._revisiondata(nodeorrev)
528 526 return changelogrevision(
529 527 text, sidedata, self._copiesstorage == b'changeset-sidedata'
530 528 )
531 529
532 530 def readfiles(self, node):
533 531 """
534 532 short version of read that only returns the files modified by the cset
535 533 """
536 534 text = self.revision(node)
537 535 if not text:
538 536 return []
539 537 last = text.index(b"\n\n")
540 538 l = text[:last].split(b'\n')
541 539 return l[3:]
542 540
543 541 def add(
544 542 self,
545 543 manifest,
546 544 files,
547 545 desc,
548 546 transaction,
549 547 p1,
550 548 p2,
551 549 user,
552 550 date=None,
553 551 extra=None,
554 552 ):
555 553 # Convert to UTF-8 encoded bytestrings as the very first
556 554 # thing: calling any method on a localstr object will turn it
557 555 # into a str object and the cached UTF-8 string is thus lost.
558 556 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
559 557
560 558 user = user.strip()
561 559 # An empty username or a username with a "\n" will make the
562 560 # revision text contain two "\n\n" sequences -> corrupt
563 561 # repository since read cannot unpack the revision.
564 562 if not user:
565 563 raise error.StorageError(_(b"empty username"))
566 564 if b"\n" in user:
567 565 raise error.StorageError(
568 566 _(b"username %r contains a newline") % pycompat.bytestr(user)
569 567 )
570 568
571 569 desc = stripdesc(desc)
572 570
573 571 if date:
574 572 parseddate = b"%d %d" % dateutil.parsedate(date)
575 573 else:
576 574 parseddate = b"%d %d" % dateutil.makedate()
577 575 if extra:
578 576 branch = extra.get(b"branch")
579 577 if branch in (b"default", b""):
580 578 del extra[b"branch"]
581 579 elif branch in (b".", b"null", b"tip"):
582 580 raise error.StorageError(
583 581 _(b'the name \'%s\' is reserved') % branch
584 582 )
585 583 sortedfiles = sorted(files.touched)
586 584 sidedata = None
587 585 if self._copiesstorage == b'changeset-sidedata':
588 586 sidedata = metadata.encode_files_sidedata(files)
589 587
590 588 if extra:
591 589 extra = encodeextra(extra)
592 590 parseddate = b"%s %s" % (parseddate, extra)
593 591 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
594 592 text = b"\n".join(l)
595 593 return self.addrevision(
596 594 text, transaction, len(self), p1, p2, sidedata=sidedata
597 595 )
598 596
599 597 def branchinfo(self, rev):
600 598 """return the branch name and open/close state of a revision
601 599
602 600 This function exists because creating a changectx object
603 601 just to access this is costly."""
604 602 extra = self.read(rev)[5]
605 603 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
606 604
607 605 def _nodeduplicatecallback(self, transaction, node):
608 606 # keep track of revisions that got "re-added", eg: unbunde of know rev.
609 607 #
610 608 # We track them in a list to preserve their order from the source bundle
611 609 duplicates = transaction.changes.setdefault(b'revduplicates', [])
612 610 duplicates.append(self.rev(node))
General Comments 0
You need to be logged in to leave comments. Login now