##// END OF EJS Templates
emitrevision: consider ancestors revision to emit as available base...
marmoute -
r50567:e92de86c default
parent child Browse files
Show More
@@ -1,644 +1,643 b''
1 1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import re
10 10 import struct
11 11
12 12 from ..i18n import _
13 13 from ..node import (
14 14 bin,
15 15 nullrev,
16 16 sha1nodeconstants,
17 17 )
18 18 from .. import (
19 19 dagop,
20 20 error,
21 21 mdiff,
22 22 )
23 23 from ..interfaces import repository
24 24 from ..revlogutils import sidedata as sidedatamod
25 25 from ..utils import hashutil
26 26
27 27 _nullhash = hashutil.sha1(sha1nodeconstants.nullid)
28 28
29 29 # revision data contains extra metadata not part of the official digest
30 30 # Only used in changegroup >= v4.
31 31 CG_FLAG_SIDEDATA = 1
32 32
33 33
34 34 def hashrevisionsha1(text, p1, p2):
35 35 """Compute the SHA-1 for revision data and its parents.
36 36
37 37 This hash combines both the current file contents and its history
38 38 in a manner that makes it easy to distinguish nodes with the same
39 39 content in the revision graph.
40 40 """
41 41 # As of now, if one of the parent node is null, p2 is null
42 42 if p2 == sha1nodeconstants.nullid:
43 43 # deep copy of a hash is faster than creating one
44 44 s = _nullhash.copy()
45 45 s.update(p1)
46 46 else:
47 47 # none of the parent nodes are nullid
48 48 if p1 < p2:
49 49 a = p1
50 50 b = p2
51 51 else:
52 52 a = p2
53 53 b = p1
54 54 s = hashutil.sha1(a)
55 55 s.update(b)
56 56 s.update(text)
57 57 return s.digest()
58 58
59 59
60 60 METADATA_RE = re.compile(b'\x01\n')
61 61
62 62
63 63 def parsemeta(text):
64 64 """Parse metadata header from revision data.
65 65
66 66 Returns a 2-tuple of (metadata, offset), where both can be None if there
67 67 is no metadata.
68 68 """
69 69 # text can be buffer, so we can't use .startswith or .index
70 70 if text[:2] != b'\x01\n':
71 71 return None, None
72 72 s = METADATA_RE.search(text, 2).start()
73 73 mtext = text[2:s]
74 74 meta = {}
75 75 for l in mtext.splitlines():
76 76 k, v = l.split(b': ', 1)
77 77 meta[k] = v
78 78 return meta, s + 2
79 79
80 80
81 81 def packmeta(meta, text):
82 82 """Add metadata to fulltext to produce revision text."""
83 83 keys = sorted(meta)
84 84 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
85 85 return b'\x01\n%s\x01\n%s' % (metatext, text)
86 86
87 87
88 88 def iscensoredtext(text):
89 89 meta = parsemeta(text)[0]
90 90 return meta and b'censored' in meta
91 91
92 92
93 93 def filtermetadata(text):
94 94 """Extract just the revision data from source text.
95 95
96 96 Returns ``text`` unless it has a metadata header, in which case we return
97 97 a new buffer without hte metadata.
98 98 """
99 99 if not text.startswith(b'\x01\n'):
100 100 return text
101 101
102 102 offset = text.index(b'\x01\n', 2)
103 103 return text[offset + 2 :]
104 104
105 105
106 106 def filerevisioncopied(store, node):
107 107 """Resolve file revision copy metadata.
108 108
109 109 Returns ``False`` if the file has no copy metadata. Otherwise a
110 110 2-tuple of the source filename and node.
111 111 """
112 112 if store.parents(node)[0] != sha1nodeconstants.nullid:
113 113 # When creating a copy or move we set filelog parents to null,
114 114 # because contents are probably unrelated and making a delta
115 115 # would not be useful.
116 116 # Conversely, if filelog p1 is non-null we know
117 117 # there is no copy metadata.
118 118 # In the presence of merges, this reasoning becomes invalid
119 119 # if we reorder parents. See tests/test-issue6528.t.
120 120 return False
121 121
122 122 meta = parsemeta(store.revision(node))[0]
123 123
124 124 # copy and copyrev occur in pairs. In rare cases due to old bugs,
125 125 # one can occur without the other. So ensure both are present to flag
126 126 # as a copy.
127 127 if meta and b'copy' in meta and b'copyrev' in meta:
128 128 return meta[b'copy'], bin(meta[b'copyrev'])
129 129
130 130 return False
131 131
132 132
133 133 def filedataequivalent(store, node, filedata):
134 134 """Determines whether file data is equivalent to a stored node.
135 135
136 136 Returns True if the passed file data would hash to the same value
137 137 as a stored revision and False otherwise.
138 138
139 139 When a stored revision is censored, filedata must be empty to have
140 140 equivalence.
141 141
142 142 When a stored revision has copy metadata, it is ignored as part
143 143 of the compare.
144 144 """
145 145
146 146 if filedata.startswith(b'\x01\n'):
147 147 revisiontext = b'\x01\n\x01\n' + filedata
148 148 else:
149 149 revisiontext = filedata
150 150
151 151 p1, p2 = store.parents(node)
152 152
153 153 computednode = hashrevisionsha1(revisiontext, p1, p2)
154 154
155 155 if computednode == node:
156 156 return True
157 157
158 158 # Censored files compare against the empty file.
159 159 if store.iscensored(store.rev(node)):
160 160 return filedata == b''
161 161
162 162 # Renaming a file produces a different hash, even if the data
163 163 # remains unchanged. Check if that's the case.
164 164 if store.renamed(node):
165 165 return store.read(node) == filedata
166 166
167 167 return False
168 168
169 169
170 170 def iterrevs(storelen, start=0, stop=None):
171 171 """Iterate over revision numbers in a store."""
172 172 step = 1
173 173
174 174 if stop is not None:
175 175 if start > stop:
176 176 step = -1
177 177 stop += step
178 178 if stop > storelen:
179 179 stop = storelen
180 180 else:
181 181 stop = storelen
182 182
183 183 return range(start, stop, step)
184 184
185 185
186 186 def fileidlookup(store, fileid, identifier):
187 187 """Resolve the file node for a value.
188 188
189 189 ``store`` is an object implementing the ``ifileindex`` interface.
190 190
191 191 ``fileid`` can be:
192 192
193 193 * A 20 or 32 byte binary node.
194 194 * An integer revision number
195 195 * A 40 or 64 byte hex node.
196 196 * A bytes that can be parsed as an integer representing a revision number.
197 197
198 198 ``identifier`` is used to populate ``error.LookupError`` with an identifier
199 199 for the store.
200 200
201 201 Raises ``error.LookupError`` on failure.
202 202 """
203 203 if isinstance(fileid, int):
204 204 try:
205 205 return store.node(fileid)
206 206 except IndexError:
207 207 raise error.LookupError(
208 208 b'%d' % fileid, identifier, _(b'no match found')
209 209 )
210 210
211 211 if len(fileid) in (20, 32):
212 212 try:
213 213 store.rev(fileid)
214 214 return fileid
215 215 except error.LookupError:
216 216 pass
217 217
218 218 if len(fileid) in (40, 64):
219 219 try:
220 220 rawnode = bin(fileid)
221 221 store.rev(rawnode)
222 222 return rawnode
223 223 except TypeError:
224 224 pass
225 225
226 226 try:
227 227 rev = int(fileid)
228 228
229 229 if b'%d' % rev != fileid:
230 230 raise ValueError
231 231
232 232 try:
233 233 return store.node(rev)
234 234 except (IndexError, TypeError):
235 235 pass
236 236 except (ValueError, OverflowError):
237 237 pass
238 238
239 239 raise error.LookupError(fileid, identifier, _(b'no match found'))
240 240
241 241
242 242 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
243 243 """Resolve information needed to strip revisions.
244 244
245 245 Finds the minimum revision number that must be stripped in order to
246 246 strip ``minlinkrev``.
247 247
248 248 Returns a 2-tuple of the minimum revision number to do that and a set
249 249 of all revision numbers that have linkrevs that would be broken
250 250 by that strip.
251 251
252 252 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
253 253 ``headrevs`` is an iterable of head revisions.
254 254 ``linkrevfn`` is a callable that receives a revision and returns a linked
255 255 revision.
256 256 ``parentrevsfn`` is a callable that receives a revision number and returns
257 257 an iterable of its parent revision numbers.
258 258 """
259 259 brokenrevs = set()
260 260 strippoint = tiprev + 1
261 261
262 262 heads = {}
263 263 futurelargelinkrevs = set()
264 264 for head in headrevs:
265 265 headlinkrev = linkrevfn(head)
266 266 heads[head] = headlinkrev
267 267 if headlinkrev >= minlinkrev:
268 268 futurelargelinkrevs.add(headlinkrev)
269 269
270 270 # This algorithm involves walking down the rev graph, starting at the
271 271 # heads. Since the revs are topologically sorted according to linkrev,
272 272 # once all head linkrevs are below the minlink, we know there are
273 273 # no more revs that could have a linkrev greater than minlink.
274 274 # So we can stop walking.
275 275 while futurelargelinkrevs:
276 276 strippoint -= 1
277 277 linkrev = heads.pop(strippoint)
278 278
279 279 if linkrev < minlinkrev:
280 280 brokenrevs.add(strippoint)
281 281 else:
282 282 futurelargelinkrevs.remove(linkrev)
283 283
284 284 for p in parentrevsfn(strippoint):
285 285 if p != nullrev:
286 286 plinkrev = linkrevfn(p)
287 287 heads[p] = plinkrev
288 288 if plinkrev >= minlinkrev:
289 289 futurelargelinkrevs.add(plinkrev)
290 290
291 291 return strippoint, brokenrevs
292 292
293 293
294 294 def emitrevisions(
295 295 store,
296 296 nodes,
297 297 nodesorder,
298 298 resultcls,
299 299 deltaparentfn=None,
300 300 candeltafn=None,
301 301 rawsizefn=None,
302 302 revdifffn=None,
303 303 flagsfn=None,
304 304 deltamode=repository.CG_DELTAMODE_STD,
305 305 revisiondata=False,
306 306 assumehaveparentrevisions=False,
307 307 sidedata_helpers=None,
308 308 debug_info=None,
309 309 ):
310 310 """Generic implementation of ifiledata.emitrevisions().
311 311
312 312 Emitting revision data is subtly complex. This function attempts to
313 313 encapsulate all the logic for doing so in a backend-agnostic way.
314 314
315 315 ``store``
316 316 Object conforming to ``ifilestorage`` interface.
317 317
318 318 ``nodes``
319 319 List of revision nodes whose data to emit.
320 320
321 321 ``resultcls``
322 322 A type implementing the ``irevisiondelta`` interface that will be
323 323 constructed and returned.
324 324
325 325 ``deltaparentfn`` (optional)
326 326 Callable receiving a revision number and returning the revision number
327 327 of a revision that the internal delta is stored against. This delta
328 328 will be preferred over computing a new arbitrary delta.
329 329
330 330 If not defined, a delta will always be computed from raw revision
331 331 data.
332 332
333 333 ``candeltafn`` (optional)
334 334 Callable receiving a pair of revision numbers that returns a bool
335 335 indicating whether a delta between them can be produced.
336 336
337 337 If not defined, it is assumed that any two revisions can delta with
338 338 each other.
339 339
340 340 ``rawsizefn`` (optional)
341 341 Callable receiving a revision number and returning the length of the
342 342 ``store.rawdata(rev)``.
343 343
344 344 If not defined, ``len(store.rawdata(rev))`` will be called.
345 345
346 346 ``revdifffn`` (optional)
347 347 Callable receiving a pair of revision numbers that returns a delta
348 348 between them.
349 349
350 350 If not defined, a delta will be computed by invoking mdiff code
351 351 on ``store.revision()`` results.
352 352
353 353 Defining this function allows a precomputed or stored delta to be
354 354 used without having to compute on.
355 355
356 356 ``flagsfn`` (optional)
357 357 Callable receiving a revision number and returns the integer flags
358 358 value for it. If not defined, flags value will be 0.
359 359
360 360 ``deltamode``
361 361 constaint on delta to be sent:
362 362 * CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
363 363 * CG_DELTAMODE_PREV - only delta against "prev",
364 364 * CG_DELTAMODE_FULL - only issue full snapshot.
365 365
366 366 Whether to send fulltext revisions instead of deltas, if allowed.
367 367
368 368 ``nodesorder``
369 369 ``revisiondata``
370 370 ``assumehaveparentrevisions``
371 371 ``sidedata_helpers`` (optional)
372 372 If not None, means that sidedata should be included.
373 373 See `revlogutil.sidedata.get_sidedata_helpers`.
374 374
375 375 ``debug_info`
376 376 An optionnal dictionnary to gather information about the bundling
377 377 process (if present, see config: debug.bundling.stats.
378 378 """
379 379
380 380 fnode = store.node
381 381 frev = store.rev
382 parents = store.parentrevs
382 383
383 384 if nodesorder == b'nodes':
384 385 revs = [frev(n) for n in nodes]
385 386 elif nodesorder == b'linear':
386 387 revs = {frev(n) for n in nodes}
387 388 revs = dagop.linearize(revs, store.parentrevs)
388 389 else: # storage and default
389 390 revs = sorted(frev(n) for n in nodes)
390 391
391 392 prevrev = None
392 393
393 394 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
394 prevrev = store.parentrevs(revs[0])[0]
395 prevrev = parents(revs[0])[0]
395 396
396 # Set of revs available to delta against.
397 # Sets of revs available to delta against.
398 emitted = set()
397 399 available = set()
398 parents = []
400 if assumehaveparentrevisions:
401 common_heads = set(p for r in revs for p in parents(r))
402 common_heads.difference_update(revs)
403 available = store.ancestors(common_heads, inclusive=True)
399 404
400 405 def is_usable_base(rev):
401 406 """Is a delta against this revision usable over the wire"""
402 407 if rev == nullrev:
403 408 return False
404 # Base revision was already emitted in this group.
405 if rev in available:
406 return True
407 # Base revision is a parent that hasn't been emitted already.
408 if assumehaveparentrevisions and rev in parents:
409 return True
410 return False
409 return rev in emitted or rev in available
411 410
412 411 for rev in revs:
413 412 if rev == nullrev:
414 413 continue
415 414
416 415 debug_delta_source = None
417 416 if debug_info is not None:
418 417 debug_info['revision-total'] += 1
419 418
420 419 node = fnode(rev)
421 parents[:] = p1rev, p2rev = store.parentrevs(rev)
420 p1rev, p2rev = parents(rev)
422 421
423 422 if debug_info is not None:
424 423 if p1rev != p2rev and p1rev != nullrev and p2rev != nullrev:
425 424 debug_info['merge-total'] += 1
426 425
427 426 if deltaparentfn:
428 427 deltaparentrev = deltaparentfn(rev)
429 428 if debug_info is not None:
430 429 if deltaparentrev == nullrev:
431 430 debug_info['available-full'] += 1
432 431 else:
433 432 debug_info['available-delta'] += 1
434 433
435 434 else:
436 435 deltaparentrev = nullrev
437 436
438 437 # Forced delta against previous mode.
439 438 if deltamode == repository.CG_DELTAMODE_PREV:
440 439 if debug_info is not None:
441 440 debug_delta_source = "prev"
442 441 baserev = prevrev
443 442
444 443 # We're instructed to send fulltext. Honor that.
445 444 elif deltamode == repository.CG_DELTAMODE_FULL:
446 445 if debug_info is not None:
447 446 debug_delta_source = "full"
448 447 baserev = nullrev
449 448 # We're instructed to use p1. Honor that
450 449 elif deltamode == repository.CG_DELTAMODE_P1:
451 450 if debug_info is not None:
452 451 debug_delta_source = "p1"
453 452 baserev = p1rev
454 453
455 454 # There is a delta in storage. We try to use that because it
456 455 # amounts to effectively copying data from storage and is
457 456 # therefore the fastest.
458 457 elif is_usable_base(deltaparentrev):
459 458 if debug_info is not None:
460 459 debug_delta_source = "storage"
461 460 baserev = deltaparentrev
462 461 else:
463 462 if deltaparentrev != nullrev and debug_info is not None:
464 463 debug_info['denied-base-not-available'] += 1
465 464 # No guarantee the receiver has the delta parent, or Storage has a
466 465 # fulltext revision.
467 466 #
468 467 # We compute a delta on the fly to send over the wire.
469 468 #
470 469 # We start with a try against p1, which in the common case should
471 470 # be close to this revision content.
472 471 #
473 472 # note: we could optimize between p1 and p2 in merges cases.
474 473 elif is_usable_base(p1rev):
475 474 if debug_info is not None:
476 475 debug_delta_source = "p1"
477 476 baserev = p1rev
478 477 # if p1 was not an option, try p2
479 478 elif is_usable_base(p2rev):
480 479 if debug_info is not None:
481 480 debug_delta_source = "p2"
482 481 baserev = p2rev
483 482 # Send delta against prev in despair
484 483 #
485 484 # using the closest available ancestors first might be better?
486 485 elif prevrev is not None:
487 486 if debug_info is not None:
488 487 debug_delta_source = "prev"
489 488 baserev = prevrev
490 489 else:
491 490 if debug_info is not None:
492 491 debug_delta_source = "full"
493 492 baserev = nullrev
494 493
495 494 # But we can't actually use our chosen delta base for whatever
496 495 # reason. Reset to fulltext.
497 496 if (
498 497 baserev != nullrev
499 498 and candeltafn is not None
500 499 and not candeltafn(baserev, rev)
501 500 ):
502 501 if debug_info is not None:
503 502 debug_delta_source = "full"
504 503 debug_info['denied-delta-candeltafn'] += 1
505 504 baserev = nullrev
506 505
507 506 revision = None
508 507 delta = None
509 508 baserevisionsize = None
510 509
511 510 if revisiondata:
512 511 if store.iscensored(baserev) or store.iscensored(rev):
513 512 try:
514 513 revision = store.rawdata(node)
515 514 except error.CensoredNodeError as e:
516 515 if debug_info is not None:
517 516 debug_delta_source = "full"
518 517 debug_info['denied-delta-not-available'] += 1
519 518 revision = e.tombstone
520 519
521 520 if baserev != nullrev:
522 521 if rawsizefn:
523 522 baserevisionsize = rawsizefn(baserev)
524 523 else:
525 524 baserevisionsize = len(store.rawdata(baserev))
526 525
527 526 elif (
528 527 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
529 528 ):
530 529 if debug_info is not None:
531 530 debug_info['computed-delta'] += 1 # close enough
532 531 debug_info['delta-full'] += 1
533 532 revision = store.rawdata(node)
534 available.add(rev)
533 emitted.add(rev)
535 534 else:
536 535 if revdifffn:
537 536 if debug_info is not None:
538 537 if debug_delta_source == "full":
539 538 debug_info['computed-delta'] += 1
540 539 debug_info['delta-full'] += 1
541 540 elif debug_delta_source == "prev":
542 541 debug_info['computed-delta'] += 1
543 542 debug_info['delta-against-prev'] += 1
544 543 elif debug_delta_source == "p1":
545 544 debug_info['computed-delta'] += 1
546 545 debug_info['delta-against-p1'] += 1
547 546 elif debug_delta_source == "storage":
548 547 debug_info['reused-storage-delta'] += 1
549 548 else:
550 549 assert False, 'unreachable'
551 550
552 551 delta = revdifffn(baserev, rev)
553 552 else:
554 553 if debug_info is not None:
555 554 if debug_delta_source == "full":
556 555 debug_info['computed-delta'] += 1
557 556 debug_info['delta-full'] += 1
558 557 elif debug_delta_source == "prev":
559 558 debug_info['computed-delta'] += 1
560 559 debug_info['delta-against-prev'] += 1
561 560 elif debug_delta_source == "p1":
562 561 debug_info['computed-delta'] += 1
563 562 debug_info['delta-against-p1'] += 1
564 563 elif debug_delta_source == "storage":
565 564 # seem quite unlikelry to happens
566 565 debug_info['computed-delta'] += 1
567 566 debug_info['reused-storage-delta'] += 1
568 567 else:
569 568 assert False, 'unreachable'
570 569 delta = mdiff.textdiff(
571 570 store.rawdata(baserev), store.rawdata(rev)
572 571 )
573 572
574 available.add(rev)
573 emitted.add(rev)
575 574
576 575 serialized_sidedata = None
577 576 sidedata_flags = (0, 0)
578 577 if sidedata_helpers:
579 578 try:
580 579 old_sidedata = store.sidedata(rev)
581 580 except error.CensoredNodeError:
582 581 # skip any potential sidedata of the censored revision
583 582 sidedata = {}
584 583 else:
585 584 sidedata, sidedata_flags = sidedatamod.run_sidedata_helpers(
586 585 store=store,
587 586 sidedata_helpers=sidedata_helpers,
588 587 sidedata=old_sidedata,
589 588 rev=rev,
590 589 )
591 590 if sidedata:
592 591 serialized_sidedata = sidedatamod.serialize_sidedata(sidedata)
593 592
594 593 flags = flagsfn(rev) if flagsfn else 0
595 594 protocol_flags = 0
596 595 if serialized_sidedata:
597 596 # Advertise that sidedata exists to the other side
598 597 protocol_flags |= CG_FLAG_SIDEDATA
599 598 # Computers and removers can return flags to add and/or remove
600 599 flags = flags | sidedata_flags[0] & ~sidedata_flags[1]
601 600
602 601 yield resultcls(
603 602 node=node,
604 603 p1node=fnode(p1rev),
605 604 p2node=fnode(p2rev),
606 605 basenode=fnode(baserev),
607 606 flags=flags,
608 607 baserevisionsize=baserevisionsize,
609 608 revision=revision,
610 609 delta=delta,
611 610 sidedata=serialized_sidedata,
612 611 protocol_flags=protocol_flags,
613 612 )
614 613
615 614 prevrev = rev
616 615
617 616
618 617 def deltaiscensored(delta, baserev, baselenfn):
619 618 """Determine if a delta represents censored revision data.
620 619
621 620 ``baserev`` is the base revision this delta is encoded against.
622 621 ``baselenfn`` is a callable receiving a revision number that resolves the
623 622 length of the revision fulltext.
624 623
625 624 Returns a bool indicating if the result of the delta represents a censored
626 625 revision.
627 626 """
628 627 # Fragile heuristic: unless new file meta keys are added alphabetically
629 628 # preceding "censored", all censored revisions are prefixed by
630 629 # "\1\ncensored:". A delta producing such a censored revision must be a
631 630 # full-replacement delta, so we inspect the first and only patch in the
632 631 # delta for this prefix.
633 632 hlen = struct.calcsize(b">lll")
634 633 if len(delta) <= hlen:
635 634 return False
636 635
637 636 oldlen = baselenfn(baserev)
638 637 newlen = len(delta) - hlen
639 638 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
640 639 return False
641 640
642 641 add = b"\1\ncensored:"
643 642 addlen = len(add)
644 643 return newlen >= addlen and delta[hlen : hlen + addlen] == add
General Comments 0
You need to be logged in to leave comments. Login now