##// END OF EJS Templates
emitrevision: simplify the fallback to computed delta...
marmoute -
r50564:f064b03d default
parent child Browse files
Show More
@@ -1,641 +1,629 b''
1 1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import re
10 10 import struct
11 11
12 12 from ..i18n import _
13 13 from ..node import (
14 14 bin,
15 15 nullrev,
16 16 sha1nodeconstants,
17 17 )
18 18 from .. import (
19 19 dagop,
20 20 error,
21 21 mdiff,
22 22 )
23 23 from ..interfaces import repository
24 24 from ..revlogutils import sidedata as sidedatamod
25 25 from ..utils import hashutil
26 26
27 27 _nullhash = hashutil.sha1(sha1nodeconstants.nullid)
28 28
29 29 # revision data contains extra metadata not part of the official digest
30 30 # Only used in changegroup >= v4.
31 31 CG_FLAG_SIDEDATA = 1
32 32
33 33
34 34 def hashrevisionsha1(text, p1, p2):
35 35 """Compute the SHA-1 for revision data and its parents.
36 36
37 37 This hash combines both the current file contents and its history
38 38 in a manner that makes it easy to distinguish nodes with the same
39 39 content in the revision graph.
40 40 """
41 41 # As of now, if one of the parent node is null, p2 is null
42 42 if p2 == sha1nodeconstants.nullid:
43 43 # deep copy of a hash is faster than creating one
44 44 s = _nullhash.copy()
45 45 s.update(p1)
46 46 else:
47 47 # none of the parent nodes are nullid
48 48 if p1 < p2:
49 49 a = p1
50 50 b = p2
51 51 else:
52 52 a = p2
53 53 b = p1
54 54 s = hashutil.sha1(a)
55 55 s.update(b)
56 56 s.update(text)
57 57 return s.digest()
58 58
59 59
60 60 METADATA_RE = re.compile(b'\x01\n')
61 61
62 62
63 63 def parsemeta(text):
64 64 """Parse metadata header from revision data.
65 65
66 66 Returns a 2-tuple of (metadata, offset), where both can be None if there
67 67 is no metadata.
68 68 """
69 69 # text can be buffer, so we can't use .startswith or .index
70 70 if text[:2] != b'\x01\n':
71 71 return None, None
72 72 s = METADATA_RE.search(text, 2).start()
73 73 mtext = text[2:s]
74 74 meta = {}
75 75 for l in mtext.splitlines():
76 76 k, v = l.split(b': ', 1)
77 77 meta[k] = v
78 78 return meta, s + 2
79 79
80 80
81 81 def packmeta(meta, text):
82 82 """Add metadata to fulltext to produce revision text."""
83 83 keys = sorted(meta)
84 84 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
85 85 return b'\x01\n%s\x01\n%s' % (metatext, text)
86 86
87 87
88 88 def iscensoredtext(text):
89 89 meta = parsemeta(text)[0]
90 90 return meta and b'censored' in meta
91 91
92 92
93 93 def filtermetadata(text):
94 94 """Extract just the revision data from source text.
95 95
96 96 Returns ``text`` unless it has a metadata header, in which case we return
97 97 a new buffer without hte metadata.
98 98 """
99 99 if not text.startswith(b'\x01\n'):
100 100 return text
101 101
102 102 offset = text.index(b'\x01\n', 2)
103 103 return text[offset + 2 :]
104 104
105 105
106 106 def filerevisioncopied(store, node):
107 107 """Resolve file revision copy metadata.
108 108
109 109 Returns ``False`` if the file has no copy metadata. Otherwise a
110 110 2-tuple of the source filename and node.
111 111 """
112 112 if store.parents(node)[0] != sha1nodeconstants.nullid:
113 113 # When creating a copy or move we set filelog parents to null,
114 114 # because contents are probably unrelated and making a delta
115 115 # would not be useful.
116 116 # Conversely, if filelog p1 is non-null we know
117 117 # there is no copy metadata.
118 118 # In the presence of merges, this reasoning becomes invalid
119 119 # if we reorder parents. See tests/test-issue6528.t.
120 120 return False
121 121
122 122 meta = parsemeta(store.revision(node))[0]
123 123
124 124 # copy and copyrev occur in pairs. In rare cases due to old bugs,
125 125 # one can occur without the other. So ensure both are present to flag
126 126 # as a copy.
127 127 if meta and b'copy' in meta and b'copyrev' in meta:
128 128 return meta[b'copy'], bin(meta[b'copyrev'])
129 129
130 130 return False
131 131
132 132
133 133 def filedataequivalent(store, node, filedata):
134 134 """Determines whether file data is equivalent to a stored node.
135 135
136 136 Returns True if the passed file data would hash to the same value
137 137 as a stored revision and False otherwise.
138 138
139 139 When a stored revision is censored, filedata must be empty to have
140 140 equivalence.
141 141
142 142 When a stored revision has copy metadata, it is ignored as part
143 143 of the compare.
144 144 """
145 145
146 146 if filedata.startswith(b'\x01\n'):
147 147 revisiontext = b'\x01\n\x01\n' + filedata
148 148 else:
149 149 revisiontext = filedata
150 150
151 151 p1, p2 = store.parents(node)
152 152
153 153 computednode = hashrevisionsha1(revisiontext, p1, p2)
154 154
155 155 if computednode == node:
156 156 return True
157 157
158 158 # Censored files compare against the empty file.
159 159 if store.iscensored(store.rev(node)):
160 160 return filedata == b''
161 161
162 162 # Renaming a file produces a different hash, even if the data
163 163 # remains unchanged. Check if that's the case.
164 164 if store.renamed(node):
165 165 return store.read(node) == filedata
166 166
167 167 return False
168 168
169 169
170 170 def iterrevs(storelen, start=0, stop=None):
171 171 """Iterate over revision numbers in a store."""
172 172 step = 1
173 173
174 174 if stop is not None:
175 175 if start > stop:
176 176 step = -1
177 177 stop += step
178 178 if stop > storelen:
179 179 stop = storelen
180 180 else:
181 181 stop = storelen
182 182
183 183 return range(start, stop, step)
184 184
185 185
186 186 def fileidlookup(store, fileid, identifier):
187 187 """Resolve the file node for a value.
188 188
189 189 ``store`` is an object implementing the ``ifileindex`` interface.
190 190
191 191 ``fileid`` can be:
192 192
193 193 * A 20 or 32 byte binary node.
194 194 * An integer revision number
195 195 * A 40 or 64 byte hex node.
196 196 * A bytes that can be parsed as an integer representing a revision number.
197 197
198 198 ``identifier`` is used to populate ``error.LookupError`` with an identifier
199 199 for the store.
200 200
201 201 Raises ``error.LookupError`` on failure.
202 202 """
203 203 if isinstance(fileid, int):
204 204 try:
205 205 return store.node(fileid)
206 206 except IndexError:
207 207 raise error.LookupError(
208 208 b'%d' % fileid, identifier, _(b'no match found')
209 209 )
210 210
211 211 if len(fileid) in (20, 32):
212 212 try:
213 213 store.rev(fileid)
214 214 return fileid
215 215 except error.LookupError:
216 216 pass
217 217
218 218 if len(fileid) in (40, 64):
219 219 try:
220 220 rawnode = bin(fileid)
221 221 store.rev(rawnode)
222 222 return rawnode
223 223 except TypeError:
224 224 pass
225 225
226 226 try:
227 227 rev = int(fileid)
228 228
229 229 if b'%d' % rev != fileid:
230 230 raise ValueError
231 231
232 232 try:
233 233 return store.node(rev)
234 234 except (IndexError, TypeError):
235 235 pass
236 236 except (ValueError, OverflowError):
237 237 pass
238 238
239 239 raise error.LookupError(fileid, identifier, _(b'no match found'))
240 240
241 241
242 242 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
243 243 """Resolve information needed to strip revisions.
244 244
245 245 Finds the minimum revision number that must be stripped in order to
246 246 strip ``minlinkrev``.
247 247
248 248 Returns a 2-tuple of the minimum revision number to do that and a set
249 249 of all revision numbers that have linkrevs that would be broken
250 250 by that strip.
251 251
252 252 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
253 253 ``headrevs`` is an iterable of head revisions.
254 254 ``linkrevfn`` is a callable that receives a revision and returns a linked
255 255 revision.
256 256 ``parentrevsfn`` is a callable that receives a revision number and returns
257 257 an iterable of its parent revision numbers.
258 258 """
259 259 brokenrevs = set()
260 260 strippoint = tiprev + 1
261 261
262 262 heads = {}
263 263 futurelargelinkrevs = set()
264 264 for head in headrevs:
265 265 headlinkrev = linkrevfn(head)
266 266 heads[head] = headlinkrev
267 267 if headlinkrev >= minlinkrev:
268 268 futurelargelinkrevs.add(headlinkrev)
269 269
270 270 # This algorithm involves walking down the rev graph, starting at the
271 271 # heads. Since the revs are topologically sorted according to linkrev,
272 272 # once all head linkrevs are below the minlink, we know there are
273 273 # no more revs that could have a linkrev greater than minlink.
274 274 # So we can stop walking.
275 275 while futurelargelinkrevs:
276 276 strippoint -= 1
277 277 linkrev = heads.pop(strippoint)
278 278
279 279 if linkrev < minlinkrev:
280 280 brokenrevs.add(strippoint)
281 281 else:
282 282 futurelargelinkrevs.remove(linkrev)
283 283
284 284 for p in parentrevsfn(strippoint):
285 285 if p != nullrev:
286 286 plinkrev = linkrevfn(p)
287 287 heads[p] = plinkrev
288 288 if plinkrev >= minlinkrev:
289 289 futurelargelinkrevs.add(plinkrev)
290 290
291 291 return strippoint, brokenrevs
292 292
293 293
294 294 def emitrevisions(
295 295 store,
296 296 nodes,
297 297 nodesorder,
298 298 resultcls,
299 299 deltaparentfn=None,
300 300 candeltafn=None,
301 301 rawsizefn=None,
302 302 revdifffn=None,
303 303 flagsfn=None,
304 304 deltamode=repository.CG_DELTAMODE_STD,
305 305 revisiondata=False,
306 306 assumehaveparentrevisions=False,
307 307 sidedata_helpers=None,
308 308 debug_info=None,
309 309 ):
310 310 """Generic implementation of ifiledata.emitrevisions().
311 311
312 312 Emitting revision data is subtly complex. This function attempts to
313 313 encapsulate all the logic for doing so in a backend-agnostic way.
314 314
315 315 ``store``
316 316 Object conforming to ``ifilestorage`` interface.
317 317
318 318 ``nodes``
319 319 List of revision nodes whose data to emit.
320 320
321 321 ``resultcls``
322 322 A type implementing the ``irevisiondelta`` interface that will be
323 323 constructed and returned.
324 324
325 325 ``deltaparentfn`` (optional)
326 326 Callable receiving a revision number and returning the revision number
327 327 of a revision that the internal delta is stored against. This delta
328 328 will be preferred over computing a new arbitrary delta.
329 329
330 330 If not defined, a delta will always be computed from raw revision
331 331 data.
332 332
333 333 ``candeltafn`` (optional)
334 334 Callable receiving a pair of revision numbers that returns a bool
335 335 indicating whether a delta between them can be produced.
336 336
337 337 If not defined, it is assumed that any two revisions can delta with
338 338 each other.
339 339
340 340 ``rawsizefn`` (optional)
341 341 Callable receiving a revision number and returning the length of the
342 342 ``store.rawdata(rev)``.
343 343
344 344 If not defined, ``len(store.rawdata(rev))`` will be called.
345 345
346 346 ``revdifffn`` (optional)
347 347 Callable receiving a pair of revision numbers that returns a delta
348 348 between them.
349 349
350 350 If not defined, a delta will be computed by invoking mdiff code
351 351 on ``store.revision()`` results.
352 352
353 353 Defining this function allows a precomputed or stored delta to be
354 354 used without having to compute on.
355 355
356 356 ``flagsfn`` (optional)
357 357 Callable receiving a revision number and returns the integer flags
358 358 value for it. If not defined, flags value will be 0.
359 359
360 360 ``deltamode``
361 361 constaint on delta to be sent:
362 362 * CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
363 363 * CG_DELTAMODE_PREV - only delta against "prev",
364 364 * CG_DELTAMODE_FULL - only issue full snapshot.
365 365
366 366 Whether to send fulltext revisions instead of deltas, if allowed.
367 367
368 368 ``nodesorder``
369 369 ``revisiondata``
370 370 ``assumehaveparentrevisions``
371 371 ``sidedata_helpers`` (optional)
372 372 If not None, means that sidedata should be included.
373 373 See `revlogutil.sidedata.get_sidedata_helpers`.
374 374
375 375 ``debug_info`
376 376 An optionnal dictionnary to gather information about the bundling
377 377 process (if present, see config: debug.bundling.stats.
378 378 """
379 379
380 380 fnode = store.node
381 381 frev = store.rev
382 382
383 383 if nodesorder == b'nodes':
384 384 revs = [frev(n) for n in nodes]
385 385 elif nodesorder == b'linear':
386 386 revs = {frev(n) for n in nodes}
387 387 revs = dagop.linearize(revs, store.parentrevs)
388 388 else: # storage and default
389 389 revs = sorted(frev(n) for n in nodes)
390 390
391 391 prevrev = None
392 392
393 393 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
394 394 prevrev = store.parentrevs(revs[0])[0]
395 395
396 396 # Set of revs available to delta against.
397 397 available = set()
398 398 parents = []
399 399
400 400 def is_usable_base(rev):
401 401 """Is a delta against this revision usable over the wire"""
402 402 if rev == nullrev:
403 403 return False
404 404 # Base revision was already emitted in this group.
405 405 if rev in available:
406 406 return True
407 407 # Base revision is a parent that hasn't been emitted already.
408 408 if assumehaveparentrevisions and rev in parents:
409 409 return True
410 410 return False
411 411
412 412 for rev in revs:
413 413 if rev == nullrev:
414 414 continue
415 415
416 416 debug_delta_source = None
417 417 if debug_info is not None:
418 418 debug_info['revision-total'] += 1
419 419
420 420 node = fnode(rev)
421 421 parents[:] = p1rev, p2rev = store.parentrevs(rev)
422 422
423 423 if debug_info is not None:
424 424 if p1rev != p2rev and p1rev != nullrev and p2rev != nullrev:
425 425 debug_info['merge-total'] += 1
426 426
427 427 if deltaparentfn:
428 428 deltaparentrev = deltaparentfn(rev)
429 429 if debug_info is not None:
430 430 if deltaparentrev == nullrev:
431 431 debug_info['available-full'] += 1
432 432 else:
433 433 debug_info['available-delta'] += 1
434 434
435 435 else:
436 436 deltaparentrev = nullrev
437 437
438 438 # Forced delta against previous mode.
439 439 if deltamode == repository.CG_DELTAMODE_PREV:
440 440 if debug_info is not None:
441 441 debug_delta_source = "prev"
442 442 baserev = prevrev
443 443
444 444 # We're instructed to send fulltext. Honor that.
445 445 elif deltamode == repository.CG_DELTAMODE_FULL:
446 446 if debug_info is not None:
447 447 debug_delta_source = "full"
448 448 baserev = nullrev
449 449 # We're instructed to use p1. Honor that
450 450 elif deltamode == repository.CG_DELTAMODE_P1:
451 451 if debug_info is not None:
452 452 debug_delta_source = "p1"
453 453 baserev = p1rev
454 454
455 455 # There is a delta in storage. We try to use that because it
456 456 # amounts to effectively copying data from storage and is
457 457 # therefore the fastest.
458 elif deltaparentrev != nullrev:
459 # If the stored delta works, let us use it !
460 if is_usable_base(deltaparentrev):
461 if debug_info is not None:
462 debug_delta_source = "storage"
463 baserev = deltaparentrev
464 # No guarantee the receiver has the delta parent. Send delta
465 # against last revision (if possible), which in the common case
466 # should be similar enough to this revision that the delta is
467 # reasonable.
458 elif is_usable_base(deltaparentrev):
459 if debug_info is not None:
460 debug_delta_source = "storage"
461 baserev = deltaparentrev
462 else:
463 # No guarantee the receiver has the delta parent, or Storage has a
464 # fulltext revision.
465 #
466 # Send delta against last revision (if possible), which in the
467 # common case should be similar enough to this revision that the
468 # delta is reasonable.
469 if deltaparentrev != nullrev and debug_info is not None:
470 debug_info['denied-base-not-available'] += 1
468 471 elif prevrev is not None:
469 472 if debug_info is not None:
470 debug_info['denied-base-not-available'] += 1
471 473 debug_delta_source = "prev"
472 474 baserev = prevrev
473 475 else:
474 476 if debug_info is not None:
475 debug_info['denied-base-not-available'] += 1
476 477 debug_delta_source = "full"
477 478 baserev = nullrev
478 479
479 # Storage has a fulltext revision.
480
481 # Let's use the previous revision, which is as good a guess as any.
482 # There is definitely room to improve this logic.
483 elif prevrev is not None:
484 if debug_info is not None:
485 debug_delta_source = "prev"
486 baserev = prevrev
487 else:
488 if debug_info is not None:
489 debug_delta_source = "full"
490 baserev = nullrev
491
492 480 # But we can't actually use our chosen delta base for whatever
493 481 # reason. Reset to fulltext.
494 482 if (
495 483 baserev != nullrev
496 484 and candeltafn is not None
497 485 and not candeltafn(baserev, rev)
498 486 ):
499 487 if debug_info is not None:
500 488 debug_delta_source = "full"
501 489 debug_info['denied-delta-candeltafn'] += 1
502 490 baserev = nullrev
503 491
504 492 revision = None
505 493 delta = None
506 494 baserevisionsize = None
507 495
508 496 if revisiondata:
509 497 if store.iscensored(baserev) or store.iscensored(rev):
510 498 try:
511 499 revision = store.rawdata(node)
512 500 except error.CensoredNodeError as e:
513 501 if debug_info is not None:
514 502 debug_delta_source = "full"
515 503 debug_info['denied-delta-not-available'] += 1
516 504 revision = e.tombstone
517 505
518 506 if baserev != nullrev:
519 507 if rawsizefn:
520 508 baserevisionsize = rawsizefn(baserev)
521 509 else:
522 510 baserevisionsize = len(store.rawdata(baserev))
523 511
524 512 elif (
525 513 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
526 514 ):
527 515 if debug_info is not None:
528 516 debug_info['computed-delta'] += 1 # close enough
529 517 debug_info['delta-full'] += 1
530 518 revision = store.rawdata(node)
531 519 available.add(rev)
532 520 else:
533 521 if revdifffn:
534 522 if debug_info is not None:
535 523 if debug_delta_source == "full":
536 524 debug_info['computed-delta'] += 1
537 525 debug_info['delta-full'] += 1
538 526 elif debug_delta_source == "prev":
539 527 debug_info['computed-delta'] += 1
540 528 debug_info['delta-against-prev'] += 1
541 529 elif debug_delta_source == "p1":
542 530 debug_info['computed-delta'] += 1
543 531 debug_info['delta-against-p1'] += 1
544 532 elif debug_delta_source == "storage":
545 533 debug_info['reused-storage-delta'] += 1
546 534 else:
547 535 assert False, 'unreachable'
548 536
549 537 delta = revdifffn(baserev, rev)
550 538 else:
551 539 if debug_info is not None:
552 540 if debug_delta_source == "full":
553 541 debug_info['computed-delta'] += 1
554 542 debug_info['delta-full'] += 1
555 543 elif debug_delta_source == "prev":
556 544 debug_info['computed-delta'] += 1
557 545 debug_info['delta-against-prev'] += 1
558 546 elif debug_delta_source == "p1":
559 547 debug_info['computed-delta'] += 1
560 548 debug_info['delta-against-p1'] += 1
561 549 elif debug_delta_source == "storage":
562 550 # seem quite unlikelry to happens
563 551 debug_info['computed-delta'] += 1
564 552 debug_info['reused-storage-delta'] += 1
565 553 else:
566 554 assert False, 'unreachable'
567 555 delta = mdiff.textdiff(
568 556 store.rawdata(baserev), store.rawdata(rev)
569 557 )
570 558
571 559 available.add(rev)
572 560
573 561 serialized_sidedata = None
574 562 sidedata_flags = (0, 0)
575 563 if sidedata_helpers:
576 564 try:
577 565 old_sidedata = store.sidedata(rev)
578 566 except error.CensoredNodeError:
579 567 # skip any potential sidedata of the censored revision
580 568 sidedata = {}
581 569 else:
582 570 sidedata, sidedata_flags = sidedatamod.run_sidedata_helpers(
583 571 store=store,
584 572 sidedata_helpers=sidedata_helpers,
585 573 sidedata=old_sidedata,
586 574 rev=rev,
587 575 )
588 576 if sidedata:
589 577 serialized_sidedata = sidedatamod.serialize_sidedata(sidedata)
590 578
591 579 flags = flagsfn(rev) if flagsfn else 0
592 580 protocol_flags = 0
593 581 if serialized_sidedata:
594 582 # Advertise that sidedata exists to the other side
595 583 protocol_flags |= CG_FLAG_SIDEDATA
596 584 # Computers and removers can return flags to add and/or remove
597 585 flags = flags | sidedata_flags[0] & ~sidedata_flags[1]
598 586
599 587 yield resultcls(
600 588 node=node,
601 589 p1node=fnode(p1rev),
602 590 p2node=fnode(p2rev),
603 591 basenode=fnode(baserev),
604 592 flags=flags,
605 593 baserevisionsize=baserevisionsize,
606 594 revision=revision,
607 595 delta=delta,
608 596 sidedata=serialized_sidedata,
609 597 protocol_flags=protocol_flags,
610 598 )
611 599
612 600 prevrev = rev
613 601
614 602
615 603 def deltaiscensored(delta, baserev, baselenfn):
616 604 """Determine if a delta represents censored revision data.
617 605
618 606 ``baserev`` is the base revision this delta is encoded against.
619 607 ``baselenfn`` is a callable receiving a revision number that resolves the
620 608 length of the revision fulltext.
621 609
622 610 Returns a bool indicating if the result of the delta represents a censored
623 611 revision.
624 612 """
625 613 # Fragile heuristic: unless new file meta keys are added alphabetically
626 614 # preceding "censored", all censored revisions are prefixed by
627 615 # "\1\ncensored:". A delta producing such a censored revision must be a
628 616 # full-replacement delta, so we inspect the first and only patch in the
629 617 # delta for this prefix.
630 618 hlen = struct.calcsize(b">lll")
631 619 if len(delta) <= hlen:
632 620 return False
633 621
634 622 oldlen = baselenfn(baserev)
635 623 newlen = len(delta) - hlen
636 624 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
637 625 return False
638 626
639 627 add = b"\1\ncensored:"
640 628 addlen = len(add)
641 629 return newlen >= addlen and delta[hlen : hlen + addlen] == add
General Comments 0
You need to be logged in to leave comments. Login now