##// END OF EJS Templates
sidedata: simply read added files from the `ChangingFiles` object
marmoute -
r46146:48c93a0b default
parent child Browse files
Show More
@@ -1,616 +1,614
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullid,
15 15 )
16 16 from .thirdparty import attr
17 17
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 metadata,
22 22 pycompat,
23 23 revlog,
24 24 )
25 25 from .utils import (
26 26 dateutil,
27 27 stringutil,
28 28 )
29 29
30 30 from .revlogutils import sidedata as sidedatamod
31 31
32 32 _defaultextra = {b'branch': b'default'}
33 33
34 34
35 35 def _string_escape(text):
36 36 """
37 37 >>> from .pycompat import bytechr as chr
38 38 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 39 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 40 >>> s
41 41 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 42 >>> res = _string_escape(s)
43 43 >>> s == _string_unescape(res)
44 44 True
45 45 """
46 46 # subset of the string_escape codec
47 47 text = (
48 48 text.replace(b'\\', b'\\\\')
49 49 .replace(b'\n', b'\\n')
50 50 .replace(b'\r', b'\\r')
51 51 )
52 52 return text.replace(b'\0', b'\\0')
53 53
54 54
55 55 def _string_unescape(text):
56 56 if b'\\0' in text:
57 57 # fix up \0 without getting into trouble with \\0
58 58 text = text.replace(b'\\\\', b'\\\\\n')
59 59 text = text.replace(b'\\0', b'\0')
60 60 text = text.replace(b'\n', b'')
61 61 return stringutil.unescapestr(text)
62 62
63 63
64 64 def decodeextra(text):
65 65 """
66 66 >>> from .pycompat import bytechr as chr
67 67 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 68 ... ).items())
69 69 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 70 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 71 ... b'baz': chr(92) + chr(0) + b'2'})
72 72 ... ).items())
73 73 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 74 """
75 75 extra = _defaultextra.copy()
76 76 for l in text.split(b'\0'):
77 77 if l:
78 78 k, v = _string_unescape(l).split(b':', 1)
79 79 extra[k] = v
80 80 return extra
81 81
82 82
83 83 def encodeextra(d):
84 84 # keys must be sorted to produce a deterministic changelog entry
85 85 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
86 86 return b"\0".join(items)
87 87
88 88
89 89 def stripdesc(desc):
90 90 """strip trailing whitespace and leading and trailing empty lines"""
91 91 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
92 92
93 93
94 94 class appender(object):
95 95 '''the changelog index must be updated last on disk, so we use this class
96 96 to delay writes to it'''
97 97
98 98 def __init__(self, vfs, name, mode, buf):
99 99 self.data = buf
100 100 fp = vfs(name, mode)
101 101 self.fp = fp
102 102 self.offset = fp.tell()
103 103 self.size = vfs.fstat(fp).st_size
104 104 self._end = self.size
105 105
106 106 def end(self):
107 107 return self._end
108 108
109 109 def tell(self):
110 110 return self.offset
111 111
112 112 def flush(self):
113 113 pass
114 114
115 115 @property
116 116 def closed(self):
117 117 return self.fp.closed
118 118
119 119 def close(self):
120 120 self.fp.close()
121 121
122 122 def seek(self, offset, whence=0):
123 123 '''virtual file offset spans real file and data'''
124 124 if whence == 0:
125 125 self.offset = offset
126 126 elif whence == 1:
127 127 self.offset += offset
128 128 elif whence == 2:
129 129 self.offset = self.end() + offset
130 130 if self.offset < self.size:
131 131 self.fp.seek(self.offset)
132 132
133 133 def read(self, count=-1):
134 134 '''only trick here is reads that span real file and data'''
135 135 ret = b""
136 136 if self.offset < self.size:
137 137 s = self.fp.read(count)
138 138 ret = s
139 139 self.offset += len(s)
140 140 if count > 0:
141 141 count -= len(s)
142 142 if count != 0:
143 143 doff = self.offset - self.size
144 144 self.data.insert(0, b"".join(self.data))
145 145 del self.data[1:]
146 146 s = self.data[0][doff : doff + count]
147 147 self.offset += len(s)
148 148 ret += s
149 149 return ret
150 150
151 151 def write(self, s):
152 152 self.data.append(bytes(s))
153 153 self.offset += len(s)
154 154 self._end += len(s)
155 155
156 156 def __enter__(self):
157 157 self.fp.__enter__()
158 158 return self
159 159
160 160 def __exit__(self, *args):
161 161 return self.fp.__exit__(*args)
162 162
163 163
164 164 class _divertopener(object):
165 165 def __init__(self, opener, target):
166 166 self._opener = opener
167 167 self._target = target
168 168
169 169 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
170 170 if name != self._target:
171 171 return self._opener(name, mode, **kwargs)
172 172 return self._opener(name + b".a", mode, **kwargs)
173 173
174 174 def __getattr__(self, attr):
175 175 return getattr(self._opener, attr)
176 176
177 177
178 178 def _delayopener(opener, target, buf):
179 179 """build an opener that stores chunks in 'buf' instead of 'target'"""
180 180
181 181 def _delay(name, mode=b'r', checkambig=False, **kwargs):
182 182 if name != target:
183 183 return opener(name, mode, **kwargs)
184 184 assert not kwargs
185 185 return appender(opener, name, mode, buf)
186 186
187 187 return _delay
188 188
189 189
190 190 @attr.s
191 191 class _changelogrevision(object):
192 192 # Extensions might modify _defaultextra, so let the constructor below pass
193 193 # it in
194 194 extra = attr.ib()
195 195 manifest = attr.ib(default=nullid)
196 196 user = attr.ib(default=b'')
197 197 date = attr.ib(default=(0, 0))
198 198 files = attr.ib(default=attr.Factory(list))
199 199 filesadded = attr.ib(default=None)
200 200 filesremoved = attr.ib(default=None)
201 201 p1copies = attr.ib(default=None)
202 202 p2copies = attr.ib(default=None)
203 203 description = attr.ib(default=b'')
204 204
205 205
206 206 class changelogrevision(object):
207 207 """Holds results of a parsed changelog revision.
208 208
209 209 Changelog revisions consist of multiple pieces of data, including
210 210 the manifest node, user, and date. This object exposes a view into
211 211 the parsed object.
212 212 """
213 213
214 214 __slots__ = (
215 215 '_offsets',
216 216 '_text',
217 217 '_sidedata',
218 218 '_cpsd',
219 219 '_changes',
220 220 )
221 221
222 222 def __new__(cls, text, sidedata, cpsd):
223 223 if not text:
224 224 return _changelogrevision(extra=_defaultextra)
225 225
226 226 self = super(changelogrevision, cls).__new__(cls)
227 227 # We could return here and implement the following as an __init__.
228 228 # But doing it here is equivalent and saves an extra function call.
229 229
230 230 # format used:
231 231 # nodeid\n : manifest node in ascii
232 232 # user\n : user, no \n or \r allowed
233 233 # time tz extra\n : date (time is int or float, timezone is int)
234 234 # : extra is metadata, encoded and separated by '\0'
235 235 # : older versions ignore it
236 236 # files\n\n : files modified by the cset, no \n or \r allowed
237 237 # (.*) : comment (free text, ideally utf-8)
238 238 #
239 239 # changelog v0 doesn't use extra
240 240
241 241 nl1 = text.index(b'\n')
242 242 nl2 = text.index(b'\n', nl1 + 1)
243 243 nl3 = text.index(b'\n', nl2 + 1)
244 244
245 245 # The list of files may be empty. Which means nl3 is the first of the
246 246 # double newline that precedes the description.
247 247 if text[nl3 + 1 : nl3 + 2] == b'\n':
248 248 doublenl = nl3
249 249 else:
250 250 doublenl = text.index(b'\n\n', nl3 + 1)
251 251
252 252 self._offsets = (nl1, nl2, nl3, doublenl)
253 253 self._text = text
254 254 self._sidedata = sidedata
255 255 self._cpsd = cpsd
256 256 self._changes = None
257 257
258 258 return self
259 259
260 260 @property
261 261 def manifest(self):
262 262 return bin(self._text[0 : self._offsets[0]])
263 263
264 264 @property
265 265 def user(self):
266 266 off = self._offsets
267 267 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
268 268
269 269 @property
270 270 def _rawdate(self):
271 271 off = self._offsets
272 272 dateextra = self._text[off[1] + 1 : off[2]]
273 273 return dateextra.split(b' ', 2)[0:2]
274 274
275 275 @property
276 276 def _rawextra(self):
277 277 off = self._offsets
278 278 dateextra = self._text[off[1] + 1 : off[2]]
279 279 fields = dateextra.split(b' ', 2)
280 280 if len(fields) != 3:
281 281 return None
282 282
283 283 return fields[2]
284 284
285 285 @property
286 286 def date(self):
287 287 raw = self._rawdate
288 288 time = float(raw[0])
289 289 # Various tools did silly things with the timezone.
290 290 try:
291 291 timezone = int(raw[1])
292 292 except ValueError:
293 293 timezone = 0
294 294
295 295 return time, timezone
296 296
297 297 @property
298 298 def extra(self):
299 299 raw = self._rawextra
300 300 if raw is None:
301 301 return _defaultextra
302 302
303 303 return decodeextra(raw)
304 304
305 305 @property
306 306 def changes(self):
307 307 if self._changes is not None:
308 308 return self._changes
309 309 if self._cpsd:
310 310 changes = metadata.decode_files_sidedata(self, self._sidedata)
311 311 else:
312 312 changes = metadata.ChangingFiles(
313 313 touched=self.files or (),
314 314 added=self.filesadded or (),
315 315 removed=self.filesremoved or (),
316 316 p1_copies=self.p1copies or {},
317 317 p2_copies=self.p2copies or {},
318 318 )
319 319 self._changes = changes
320 320 return changes
321 321
322 322 @property
323 323 def files(self):
324 324 off = self._offsets
325 325 if off[2] == off[3]:
326 326 return []
327 327
328 328 return self._text[off[2] + 1 : off[3]].split(b'\n')
329 329
330 330 @property
331 331 def filesadded(self):
332 332 if self._cpsd:
333 rawindices = self._sidedata.get(sidedatamod.SD_FILESADDED)
334 if not rawindices:
335 return []
333 return self.changes.added
336 334 else:
337 335 rawindices = self.extra.get(b'filesadded')
338 336 if rawindices is None:
339 337 return None
340 338 return metadata.decodefileindices(self.files, rawindices)
341 339
342 340 @property
343 341 def filesremoved(self):
344 342 if self._cpsd:
345 343 rawindices = self._sidedata.get(sidedatamod.SD_FILESREMOVED)
346 344 if not rawindices:
347 345 return []
348 346 else:
349 347 rawindices = self.extra.get(b'filesremoved')
350 348 if rawindices is None:
351 349 return None
352 350 return metadata.decodefileindices(self.files, rawindices)
353 351
354 352 @property
355 353 def p1copies(self):
356 354 if self._cpsd:
357 355 rawcopies = self._sidedata.get(sidedatamod.SD_P1COPIES)
358 356 if not rawcopies:
359 357 return {}
360 358 else:
361 359 rawcopies = self.extra.get(b'p1copies')
362 360 if rawcopies is None:
363 361 return None
364 362 return metadata.decodecopies(self.files, rawcopies)
365 363
366 364 @property
367 365 def p2copies(self):
368 366 if self._cpsd:
369 367 rawcopies = self._sidedata.get(sidedatamod.SD_P2COPIES)
370 368 if not rawcopies:
371 369 return {}
372 370 else:
373 371 rawcopies = self.extra.get(b'p2copies')
374 372 if rawcopies is None:
375 373 return None
376 374 return metadata.decodecopies(self.files, rawcopies)
377 375
378 376 @property
379 377 def description(self):
380 378 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
381 379
382 380
383 381 class changelog(revlog.revlog):
384 382 def __init__(self, opener, trypending=False):
385 383 """Load a changelog revlog using an opener.
386 384
387 385 If ``trypending`` is true, we attempt to load the index from a
388 386 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
389 387 The ``00changelog.i.a`` file contains index (and possibly inline
390 388 revision) data for a transaction that hasn't been finalized yet.
391 389 It exists in a separate file to facilitate readers (such as
392 390 hooks processes) accessing data before a transaction is finalized.
393 391 """
394 392 if trypending and opener.exists(b'00changelog.i.a'):
395 393 indexfile = b'00changelog.i.a'
396 394 else:
397 395 indexfile = b'00changelog.i'
398 396
399 397 datafile = b'00changelog.d'
400 398 revlog.revlog.__init__(
401 399 self,
402 400 opener,
403 401 indexfile,
404 402 datafile=datafile,
405 403 checkambig=True,
406 404 mmaplargeindex=True,
407 405 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
408 406 )
409 407
410 408 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
411 409 # changelogs don't benefit from generaldelta.
412 410
413 411 self.version &= ~revlog.FLAG_GENERALDELTA
414 412 self._generaldelta = False
415 413
416 414 # Delta chains for changelogs tend to be very small because entries
417 415 # tend to be small and don't delta well with each. So disable delta
418 416 # chains.
419 417 self._storedeltachains = False
420 418
421 419 self._realopener = opener
422 420 self._delayed = False
423 421 self._delaybuf = None
424 422 self._divert = False
425 423 self._filteredrevs = frozenset()
426 424 self._filteredrevs_hashcache = {}
427 425 self._copiesstorage = opener.options.get(b'copies-storage')
428 426
429 427 @property
430 428 def filteredrevs(self):
431 429 return self._filteredrevs
432 430
433 431 @filteredrevs.setter
434 432 def filteredrevs(self, val):
435 433 # Ensure all updates go through this function
436 434 assert isinstance(val, frozenset)
437 435 self._filteredrevs = val
438 436 self._filteredrevs_hashcache = {}
439 437
440 438 def delayupdate(self, tr):
441 439 """delay visibility of index updates to other readers"""
442 440
443 441 if not self._delayed:
444 442 if len(self) == 0:
445 443 self._divert = True
446 444 if self._realopener.exists(self.indexfile + b'.a'):
447 445 self._realopener.unlink(self.indexfile + b'.a')
448 446 self.opener = _divertopener(self._realopener, self.indexfile)
449 447 else:
450 448 self._delaybuf = []
451 449 self.opener = _delayopener(
452 450 self._realopener, self.indexfile, self._delaybuf
453 451 )
454 452 self._delayed = True
455 453 tr.addpending(b'cl-%i' % id(self), self._writepending)
456 454 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
457 455
458 456 def _finalize(self, tr):
459 457 """finalize index updates"""
460 458 self._delayed = False
461 459 self.opener = self._realopener
462 460 # move redirected index data back into place
463 461 if self._divert:
464 462 assert not self._delaybuf
465 463 tmpname = self.indexfile + b".a"
466 464 nfile = self.opener.open(tmpname)
467 465 nfile.close()
468 466 self.opener.rename(tmpname, self.indexfile, checkambig=True)
469 467 elif self._delaybuf:
470 468 fp = self.opener(self.indexfile, b'a', checkambig=True)
471 469 fp.write(b"".join(self._delaybuf))
472 470 fp.close()
473 471 self._delaybuf = None
474 472 self._divert = False
475 473 # split when we're done
476 474 self._enforceinlinesize(tr)
477 475
478 476 def _writepending(self, tr):
479 477 """create a file containing the unfinalized state for
480 478 pretxnchangegroup"""
481 479 if self._delaybuf:
482 480 # make a temporary copy of the index
483 481 fp1 = self._realopener(self.indexfile)
484 482 pendingfilename = self.indexfile + b".a"
485 483 # register as a temp file to ensure cleanup on failure
486 484 tr.registertmp(pendingfilename)
487 485 # write existing data
488 486 fp2 = self._realopener(pendingfilename, b"w")
489 487 fp2.write(fp1.read())
490 488 # add pending data
491 489 fp2.write(b"".join(self._delaybuf))
492 490 fp2.close()
493 491 # switch modes so finalize can simply rename
494 492 self._delaybuf = None
495 493 self._divert = True
496 494 self.opener = _divertopener(self._realopener, self.indexfile)
497 495
498 496 if self._divert:
499 497 return True
500 498
501 499 return False
502 500
503 501 def _enforceinlinesize(self, tr, fp=None):
504 502 if not self._delayed:
505 503 revlog.revlog._enforceinlinesize(self, tr, fp)
506 504
507 505 def read(self, node):
508 506 """Obtain data from a parsed changelog revision.
509 507
510 508 Returns a 6-tuple of:
511 509
512 510 - manifest node in binary
513 511 - author/user as a localstr
514 512 - date as a 2-tuple of (time, timezone)
515 513 - list of files
516 514 - commit message as a localstr
517 515 - dict of extra metadata
518 516
519 517 Unless you need to access all fields, consider calling
520 518 ``changelogrevision`` instead, as it is faster for partial object
521 519 access.
522 520 """
523 521 d, s = self._revisiondata(node)
524 522 c = changelogrevision(
525 523 d, s, self._copiesstorage == b'changeset-sidedata'
526 524 )
527 525 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
528 526
529 527 def changelogrevision(self, nodeorrev):
530 528 """Obtain a ``changelogrevision`` for a node or revision."""
531 529 text, sidedata = self._revisiondata(nodeorrev)
532 530 return changelogrevision(
533 531 text, sidedata, self._copiesstorage == b'changeset-sidedata'
534 532 )
535 533
536 534 def readfiles(self, node):
537 535 """
538 536 short version of read that only returns the files modified by the cset
539 537 """
540 538 text = self.revision(node)
541 539 if not text:
542 540 return []
543 541 last = text.index(b"\n\n")
544 542 l = text[:last].split(b'\n')
545 543 return l[3:]
546 544
547 545 def add(
548 546 self,
549 547 manifest,
550 548 files,
551 549 desc,
552 550 transaction,
553 551 p1,
554 552 p2,
555 553 user,
556 554 date=None,
557 555 extra=None,
558 556 ):
559 557 # Convert to UTF-8 encoded bytestrings as the very first
560 558 # thing: calling any method on a localstr object will turn it
561 559 # into a str object and the cached UTF-8 string is thus lost.
562 560 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
563 561
564 562 user = user.strip()
565 563 # An empty username or a username with a "\n" will make the
566 564 # revision text contain two "\n\n" sequences -> corrupt
567 565 # repository since read cannot unpack the revision.
568 566 if not user:
569 567 raise error.StorageError(_(b"empty username"))
570 568 if b"\n" in user:
571 569 raise error.StorageError(
572 570 _(b"username %r contains a newline") % pycompat.bytestr(user)
573 571 )
574 572
575 573 desc = stripdesc(desc)
576 574
577 575 if date:
578 576 parseddate = b"%d %d" % dateutil.parsedate(date)
579 577 else:
580 578 parseddate = b"%d %d" % dateutil.makedate()
581 579 if extra:
582 580 branch = extra.get(b"branch")
583 581 if branch in (b"default", b""):
584 582 del extra[b"branch"]
585 583 elif branch in (b".", b"null", b"tip"):
586 584 raise error.StorageError(
587 585 _(b'the name \'%s\' is reserved') % branch
588 586 )
589 587 sortedfiles = sorted(files.touched)
590 588 sidedata = None
591 589 if self._copiesstorage == b'changeset-sidedata':
592 590 sidedata = metadata.encode_files_sidedata(files)
593 591
594 592 if extra:
595 593 extra = encodeextra(extra)
596 594 parseddate = b"%s %s" % (parseddate, extra)
597 595 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
598 596 text = b"\n".join(l)
599 597 return self.addrevision(
600 598 text, transaction, len(self), p1, p2, sidedata=sidedata
601 599 )
602 600
603 601 def branchinfo(self, rev):
604 602 """return the branch name and open/close state of a revision
605 603
606 604 This function exists because creating a changectx object
607 605 just to access this is costly."""
608 606 extra = self.read(rev)[5]
609 607 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
610 608
611 609 def _nodeduplicatecallback(self, transaction, node):
612 610 # keep track of revisions that got "re-added", eg: unbunde of know rev.
613 611 #
614 612 # We track them in a list to preserve their order from the source bundle
615 613 duplicates = transaction.changes.setdefault(b'revduplicates', [])
616 614 duplicates.append(self.rev(node))
General Comments 0
You need to be logged in to leave comments. Login now