##// END OF EJS Templates
emitrevision: add a small closure to check if a base is usable...
marmoute -
r50562:191f5057 default
parent child Browse files
Show More
@@ -1,636 +1,639 b''
1 1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import re
10 10 import struct
11 11
12 12 from ..i18n import _
13 13 from ..node import (
14 14 bin,
15 15 nullrev,
16 16 sha1nodeconstants,
17 17 )
18 18 from .. import (
19 19 dagop,
20 20 error,
21 21 mdiff,
22 22 )
23 23 from ..interfaces import repository
24 24 from ..revlogutils import sidedata as sidedatamod
25 25 from ..utils import hashutil
26 26
27 27 _nullhash = hashutil.sha1(sha1nodeconstants.nullid)
28 28
29 29 # revision data contains extra metadata not part of the official digest
30 30 # Only used in changegroup >= v4.
31 31 CG_FLAG_SIDEDATA = 1
32 32
33 33
34 34 def hashrevisionsha1(text, p1, p2):
35 35 """Compute the SHA-1 for revision data and its parents.
36 36
37 37 This hash combines both the current file contents and its history
38 38 in a manner that makes it easy to distinguish nodes with the same
39 39 content in the revision graph.
40 40 """
41 41 # As of now, if one of the parent node is null, p2 is null
42 42 if p2 == sha1nodeconstants.nullid:
43 43 # deep copy of a hash is faster than creating one
44 44 s = _nullhash.copy()
45 45 s.update(p1)
46 46 else:
47 47 # none of the parent nodes are nullid
48 48 if p1 < p2:
49 49 a = p1
50 50 b = p2
51 51 else:
52 52 a = p2
53 53 b = p1
54 54 s = hashutil.sha1(a)
55 55 s.update(b)
56 56 s.update(text)
57 57 return s.digest()
58 58
59 59
60 60 METADATA_RE = re.compile(b'\x01\n')
61 61
62 62
63 63 def parsemeta(text):
64 64 """Parse metadata header from revision data.
65 65
66 66 Returns a 2-tuple of (metadata, offset), where both can be None if there
67 67 is no metadata.
68 68 """
69 69 # text can be buffer, so we can't use .startswith or .index
70 70 if text[:2] != b'\x01\n':
71 71 return None, None
72 72 s = METADATA_RE.search(text, 2).start()
73 73 mtext = text[2:s]
74 74 meta = {}
75 75 for l in mtext.splitlines():
76 76 k, v = l.split(b': ', 1)
77 77 meta[k] = v
78 78 return meta, s + 2
79 79
80 80
81 81 def packmeta(meta, text):
82 82 """Add metadata to fulltext to produce revision text."""
83 83 keys = sorted(meta)
84 84 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
85 85 return b'\x01\n%s\x01\n%s' % (metatext, text)
86 86
87 87
88 88 def iscensoredtext(text):
89 89 meta = parsemeta(text)[0]
90 90 return meta and b'censored' in meta
91 91
92 92
93 93 def filtermetadata(text):
94 94 """Extract just the revision data from source text.
95 95
96 96 Returns ``text`` unless it has a metadata header, in which case we return
97 97 a new buffer without hte metadata.
98 98 """
99 99 if not text.startswith(b'\x01\n'):
100 100 return text
101 101
102 102 offset = text.index(b'\x01\n', 2)
103 103 return text[offset + 2 :]
104 104
105 105
106 106 def filerevisioncopied(store, node):
107 107 """Resolve file revision copy metadata.
108 108
109 109 Returns ``False`` if the file has no copy metadata. Otherwise a
110 110 2-tuple of the source filename and node.
111 111 """
112 112 if store.parents(node)[0] != sha1nodeconstants.nullid:
113 113 # When creating a copy or move we set filelog parents to null,
114 114 # because contents are probably unrelated and making a delta
115 115 # would not be useful.
116 116 # Conversely, if filelog p1 is non-null we know
117 117 # there is no copy metadata.
118 118 # In the presence of merges, this reasoning becomes invalid
119 119 # if we reorder parents. See tests/test-issue6528.t.
120 120 return False
121 121
122 122 meta = parsemeta(store.revision(node))[0]
123 123
124 124 # copy and copyrev occur in pairs. In rare cases due to old bugs,
125 125 # one can occur without the other. So ensure both are present to flag
126 126 # as a copy.
127 127 if meta and b'copy' in meta and b'copyrev' in meta:
128 128 return meta[b'copy'], bin(meta[b'copyrev'])
129 129
130 130 return False
131 131
132 132
133 133 def filedataequivalent(store, node, filedata):
134 134 """Determines whether file data is equivalent to a stored node.
135 135
136 136 Returns True if the passed file data would hash to the same value
137 137 as a stored revision and False otherwise.
138 138
139 139 When a stored revision is censored, filedata must be empty to have
140 140 equivalence.
141 141
142 142 When a stored revision has copy metadata, it is ignored as part
143 143 of the compare.
144 144 """
145 145
146 146 if filedata.startswith(b'\x01\n'):
147 147 revisiontext = b'\x01\n\x01\n' + filedata
148 148 else:
149 149 revisiontext = filedata
150 150
151 151 p1, p2 = store.parents(node)
152 152
153 153 computednode = hashrevisionsha1(revisiontext, p1, p2)
154 154
155 155 if computednode == node:
156 156 return True
157 157
158 158 # Censored files compare against the empty file.
159 159 if store.iscensored(store.rev(node)):
160 160 return filedata == b''
161 161
162 162 # Renaming a file produces a different hash, even if the data
163 163 # remains unchanged. Check if that's the case.
164 164 if store.renamed(node):
165 165 return store.read(node) == filedata
166 166
167 167 return False
168 168
169 169
170 170 def iterrevs(storelen, start=0, stop=None):
171 171 """Iterate over revision numbers in a store."""
172 172 step = 1
173 173
174 174 if stop is not None:
175 175 if start > stop:
176 176 step = -1
177 177 stop += step
178 178 if stop > storelen:
179 179 stop = storelen
180 180 else:
181 181 stop = storelen
182 182
183 183 return range(start, stop, step)
184 184
185 185
186 186 def fileidlookup(store, fileid, identifier):
187 187 """Resolve the file node for a value.
188 188
189 189 ``store`` is an object implementing the ``ifileindex`` interface.
190 190
191 191 ``fileid`` can be:
192 192
193 193 * A 20 or 32 byte binary node.
194 194 * An integer revision number
195 195 * A 40 or 64 byte hex node.
196 196 * A bytes that can be parsed as an integer representing a revision number.
197 197
198 198 ``identifier`` is used to populate ``error.LookupError`` with an identifier
199 199 for the store.
200 200
201 201 Raises ``error.LookupError`` on failure.
202 202 """
203 203 if isinstance(fileid, int):
204 204 try:
205 205 return store.node(fileid)
206 206 except IndexError:
207 207 raise error.LookupError(
208 208 b'%d' % fileid, identifier, _(b'no match found')
209 209 )
210 210
211 211 if len(fileid) in (20, 32):
212 212 try:
213 213 store.rev(fileid)
214 214 return fileid
215 215 except error.LookupError:
216 216 pass
217 217
218 218 if len(fileid) in (40, 64):
219 219 try:
220 220 rawnode = bin(fileid)
221 221 store.rev(rawnode)
222 222 return rawnode
223 223 except TypeError:
224 224 pass
225 225
226 226 try:
227 227 rev = int(fileid)
228 228
229 229 if b'%d' % rev != fileid:
230 230 raise ValueError
231 231
232 232 try:
233 233 return store.node(rev)
234 234 except (IndexError, TypeError):
235 235 pass
236 236 except (ValueError, OverflowError):
237 237 pass
238 238
239 239 raise error.LookupError(fileid, identifier, _(b'no match found'))
240 240
241 241
242 242 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
243 243 """Resolve information needed to strip revisions.
244 244
245 245 Finds the minimum revision number that must be stripped in order to
246 246 strip ``minlinkrev``.
247 247
248 248 Returns a 2-tuple of the minimum revision number to do that and a set
249 249 of all revision numbers that have linkrevs that would be broken
250 250 by that strip.
251 251
252 252 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
253 253 ``headrevs`` is an iterable of head revisions.
254 254 ``linkrevfn`` is a callable that receives a revision and returns a linked
255 255 revision.
256 256 ``parentrevsfn`` is a callable that receives a revision number and returns
257 257 an iterable of its parent revision numbers.
258 258 """
259 259 brokenrevs = set()
260 260 strippoint = tiprev + 1
261 261
262 262 heads = {}
263 263 futurelargelinkrevs = set()
264 264 for head in headrevs:
265 265 headlinkrev = linkrevfn(head)
266 266 heads[head] = headlinkrev
267 267 if headlinkrev >= minlinkrev:
268 268 futurelargelinkrevs.add(headlinkrev)
269 269
270 270 # This algorithm involves walking down the rev graph, starting at the
271 271 # heads. Since the revs are topologically sorted according to linkrev,
272 272 # once all head linkrevs are below the minlink, we know there are
273 273 # no more revs that could have a linkrev greater than minlink.
274 274 # So we can stop walking.
275 275 while futurelargelinkrevs:
276 276 strippoint -= 1
277 277 linkrev = heads.pop(strippoint)
278 278
279 279 if linkrev < minlinkrev:
280 280 brokenrevs.add(strippoint)
281 281 else:
282 282 futurelargelinkrevs.remove(linkrev)
283 283
284 284 for p in parentrevsfn(strippoint):
285 285 if p != nullrev:
286 286 plinkrev = linkrevfn(p)
287 287 heads[p] = plinkrev
288 288 if plinkrev >= minlinkrev:
289 289 futurelargelinkrevs.add(plinkrev)
290 290
291 291 return strippoint, brokenrevs
292 292
293 293
294 294 def emitrevisions(
295 295 store,
296 296 nodes,
297 297 nodesorder,
298 298 resultcls,
299 299 deltaparentfn=None,
300 300 candeltafn=None,
301 301 rawsizefn=None,
302 302 revdifffn=None,
303 303 flagsfn=None,
304 304 deltamode=repository.CG_DELTAMODE_STD,
305 305 revisiondata=False,
306 306 assumehaveparentrevisions=False,
307 307 sidedata_helpers=None,
308 308 debug_info=None,
309 309 ):
310 310 """Generic implementation of ifiledata.emitrevisions().
311 311
312 312 Emitting revision data is subtly complex. This function attempts to
313 313 encapsulate all the logic for doing so in a backend-agnostic way.
314 314
315 315 ``store``
316 316 Object conforming to ``ifilestorage`` interface.
317 317
318 318 ``nodes``
319 319 List of revision nodes whose data to emit.
320 320
321 321 ``resultcls``
322 322 A type implementing the ``irevisiondelta`` interface that will be
323 323 constructed and returned.
324 324
325 325 ``deltaparentfn`` (optional)
326 326 Callable receiving a revision number and returning the revision number
327 327 of a revision that the internal delta is stored against. This delta
328 328 will be preferred over computing a new arbitrary delta.
329 329
330 330 If not defined, a delta will always be computed from raw revision
331 331 data.
332 332
333 333 ``candeltafn`` (optional)
334 334 Callable receiving a pair of revision numbers that returns a bool
335 335 indicating whether a delta between them can be produced.
336 336
337 337 If not defined, it is assumed that any two revisions can delta with
338 338 each other.
339 339
340 340 ``rawsizefn`` (optional)
341 341 Callable receiving a revision number and returning the length of the
342 342 ``store.rawdata(rev)``.
343 343
344 344 If not defined, ``len(store.rawdata(rev))`` will be called.
345 345
346 346 ``revdifffn`` (optional)
347 347 Callable receiving a pair of revision numbers that returns a delta
348 348 between them.
349 349
350 350 If not defined, a delta will be computed by invoking mdiff code
351 351 on ``store.revision()`` results.
352 352
353 353 Defining this function allows a precomputed or stored delta to be
354 354 used without having to compute on.
355 355
356 356 ``flagsfn`` (optional)
357 357 Callable receiving a revision number and returns the integer flags
358 358 value for it. If not defined, flags value will be 0.
359 359
360 360 ``deltamode``
361 361 constaint on delta to be sent:
362 362 * CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
363 363 * CG_DELTAMODE_PREV - only delta against "prev",
364 364 * CG_DELTAMODE_FULL - only issue full snapshot.
365 365
366 366 Whether to send fulltext revisions instead of deltas, if allowed.
367 367
368 368 ``nodesorder``
369 369 ``revisiondata``
370 370 ``assumehaveparentrevisions``
371 371 ``sidedata_helpers`` (optional)
372 372 If not None, means that sidedata should be included.
373 373 See `revlogutil.sidedata.get_sidedata_helpers`.
374 374
375 375 ``debug_info`
376 376 An optionnal dictionnary to gather information about the bundling
377 377 process (if present, see config: debug.bundling.stats.
378 378 """
379 379
380 380 fnode = store.node
381 381 frev = store.rev
382 382
383 383 if nodesorder == b'nodes':
384 384 revs = [frev(n) for n in nodes]
385 385 elif nodesorder == b'linear':
386 386 revs = {frev(n) for n in nodes}
387 387 revs = dagop.linearize(revs, store.parentrevs)
388 388 else: # storage and default
389 389 revs = sorted(frev(n) for n in nodes)
390 390
391 391 prevrev = None
392 392
393 393 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
394 394 prevrev = store.parentrevs(revs[0])[0]
395 395
396 396 # Set of revs available to delta against.
397 397 available = set()
398 398
399 def is_usable_base(rev):
400 return rev != nullrev and rev in available
401
399 402 for rev in revs:
400 403 if rev == nullrev:
401 404 continue
402 405
403 406 debug_delta_source = None
404 407 if debug_info is not None:
405 408 debug_info['revision-total'] += 1
406 409
407 410 node = fnode(rev)
408 411 p1rev, p2rev = store.parentrevs(rev)
409 412
410 413 if debug_info is not None:
411 414 if p1rev != p2rev and p1rev != nullrev and p2rev != nullrev:
412 415 debug_info['merge-total'] += 1
413 416
414 417 if deltaparentfn:
415 418 deltaparentrev = deltaparentfn(rev)
416 419 if debug_info is not None:
417 420 if deltaparentrev == nullrev:
418 421 debug_info['available-full'] += 1
419 422 else:
420 423 debug_info['available-delta'] += 1
421 424
422 425 else:
423 426 deltaparentrev = nullrev
424 427
425 428 # Forced delta against previous mode.
426 429 if deltamode == repository.CG_DELTAMODE_PREV:
427 430 if debug_info is not None:
428 431 debug_delta_source = "prev"
429 432 baserev = prevrev
430 433
431 434 # We're instructed to send fulltext. Honor that.
432 435 elif deltamode == repository.CG_DELTAMODE_FULL:
433 436 if debug_info is not None:
434 437 debug_delta_source = "full"
435 438 baserev = nullrev
436 439 # We're instructed to use p1. Honor that
437 440 elif deltamode == repository.CG_DELTAMODE_P1:
438 441 if debug_info is not None:
439 442 debug_delta_source = "p1"
440 443 baserev = p1rev
441 444
442 445 # There is a delta in storage. We try to use that because it
443 446 # amounts to effectively copying data from storage and is
444 447 # therefore the fastest.
445 448 elif deltaparentrev != nullrev:
446 449 # Base revision was already emitted in this group. We can
447 450 # always safely use the delta.
448 if deltaparentrev in available:
451 if is_usable_base(deltaparentrev):
449 452 if debug_info is not None:
450 453 debug_delta_source = "storage"
451 454 baserev = deltaparentrev
452 455
453 456 # Base revision is a parent that hasn't been emitted already.
454 457 # Use it if we can assume the receiver has the parent revision.
455 458 elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev):
456 459 if debug_info is not None:
457 460 debug_delta_source = "storage"
458 461 baserev = deltaparentrev
459 462 # No guarantee the receiver has the delta parent. Send delta
460 463 # against last revision (if possible), which in the common case
461 464 # should be similar enough to this revision that the delta is
462 465 # reasonable.
463 466 elif prevrev is not None:
464 467 if debug_info is not None:
465 468 debug_info['denied-base-not-available'] += 1
466 469 debug_delta_source = "prev"
467 470 baserev = prevrev
468 471 else:
469 472 if debug_info is not None:
470 473 debug_info['denied-base-not-available'] += 1
471 474 debug_delta_source = "full"
472 475 baserev = nullrev
473 476
474 477 # Storage has a fulltext revision.
475 478
476 479 # Let's use the previous revision, which is as good a guess as any.
477 480 # There is definitely room to improve this logic.
478 481 elif prevrev is not None:
479 482 if debug_info is not None:
480 483 debug_delta_source = "prev"
481 484 baserev = prevrev
482 485 else:
483 486 if debug_info is not None:
484 487 debug_delta_source = "full"
485 488 baserev = nullrev
486 489
487 490 # But we can't actually use our chosen delta base for whatever
488 491 # reason. Reset to fulltext.
489 492 if (
490 493 baserev != nullrev
491 494 and candeltafn is not None
492 495 and not candeltafn(baserev, rev)
493 496 ):
494 497 if debug_info is not None:
495 498 debug_delta_source = "full"
496 499 debug_info['denied-delta-candeltafn'] += 1
497 500 baserev = nullrev
498 501
499 502 revision = None
500 503 delta = None
501 504 baserevisionsize = None
502 505
503 506 if revisiondata:
504 507 if store.iscensored(baserev) or store.iscensored(rev):
505 508 try:
506 509 revision = store.rawdata(node)
507 510 except error.CensoredNodeError as e:
508 511 if debug_info is not None:
509 512 debug_delta_source = "full"
510 513 debug_info['denied-delta-not-available'] += 1
511 514 revision = e.tombstone
512 515
513 516 if baserev != nullrev:
514 517 if rawsizefn:
515 518 baserevisionsize = rawsizefn(baserev)
516 519 else:
517 520 baserevisionsize = len(store.rawdata(baserev))
518 521
519 522 elif (
520 523 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
521 524 ):
522 525 if debug_info is not None:
523 526 debug_info['computed-delta'] += 1 # close enough
524 527 debug_info['delta-full'] += 1
525 528 revision = store.rawdata(node)
526 529 available.add(rev)
527 530 else:
528 531 if revdifffn:
529 532 if debug_info is not None:
530 533 if debug_delta_source == "full":
531 534 debug_info['computed-delta'] += 1
532 535 debug_info['delta-full'] += 1
533 536 elif debug_delta_source == "prev":
534 537 debug_info['computed-delta'] += 1
535 538 debug_info['delta-against-prev'] += 1
536 539 elif debug_delta_source == "p1":
537 540 debug_info['computed-delta'] += 1
538 541 debug_info['delta-against-p1'] += 1
539 542 elif debug_delta_source == "storage":
540 543 debug_info['reused-storage-delta'] += 1
541 544 else:
542 545 assert False, 'unreachable'
543 546
544 547 delta = revdifffn(baserev, rev)
545 548 else:
546 549 if debug_info is not None:
547 550 if debug_delta_source == "full":
548 551 debug_info['computed-delta'] += 1
549 552 debug_info['delta-full'] += 1
550 553 elif debug_delta_source == "prev":
551 554 debug_info['computed-delta'] += 1
552 555 debug_info['delta-against-prev'] += 1
553 556 elif debug_delta_source == "p1":
554 557 debug_info['computed-delta'] += 1
555 558 debug_info['delta-against-p1'] += 1
556 559 elif debug_delta_source == "storage":
557 560 # seem quite unlikelry to happens
558 561 debug_info['computed-delta'] += 1
559 562 debug_info['reused-storage-delta'] += 1
560 563 else:
561 564 assert False, 'unreachable'
562 565 delta = mdiff.textdiff(
563 566 store.rawdata(baserev), store.rawdata(rev)
564 567 )
565 568
566 569 available.add(rev)
567 570
568 571 serialized_sidedata = None
569 572 sidedata_flags = (0, 0)
570 573 if sidedata_helpers:
571 574 try:
572 575 old_sidedata = store.sidedata(rev)
573 576 except error.CensoredNodeError:
574 577 # skip any potential sidedata of the censored revision
575 578 sidedata = {}
576 579 else:
577 580 sidedata, sidedata_flags = sidedatamod.run_sidedata_helpers(
578 581 store=store,
579 582 sidedata_helpers=sidedata_helpers,
580 583 sidedata=old_sidedata,
581 584 rev=rev,
582 585 )
583 586 if sidedata:
584 587 serialized_sidedata = sidedatamod.serialize_sidedata(sidedata)
585 588
586 589 flags = flagsfn(rev) if flagsfn else 0
587 590 protocol_flags = 0
588 591 if serialized_sidedata:
589 592 # Advertise that sidedata exists to the other side
590 593 protocol_flags |= CG_FLAG_SIDEDATA
591 594 # Computers and removers can return flags to add and/or remove
592 595 flags = flags | sidedata_flags[0] & ~sidedata_flags[1]
593 596
594 597 yield resultcls(
595 598 node=node,
596 599 p1node=fnode(p1rev),
597 600 p2node=fnode(p2rev),
598 601 basenode=fnode(baserev),
599 602 flags=flags,
600 603 baserevisionsize=baserevisionsize,
601 604 revision=revision,
602 605 delta=delta,
603 606 sidedata=serialized_sidedata,
604 607 protocol_flags=protocol_flags,
605 608 )
606 609
607 610 prevrev = rev
608 611
609 612
610 613 def deltaiscensored(delta, baserev, baselenfn):
611 614 """Determine if a delta represents censored revision data.
612 615
613 616 ``baserev`` is the base revision this delta is encoded against.
614 617 ``baselenfn`` is a callable receiving a revision number that resolves the
615 618 length of the revision fulltext.
616 619
617 620 Returns a bool indicating if the result of the delta represents a censored
618 621 revision.
619 622 """
620 623 # Fragile heuristic: unless new file meta keys are added alphabetically
621 624 # preceding "censored", all censored revisions are prefixed by
622 625 # "\1\ncensored:". A delta producing such a censored revision must be a
623 626 # full-replacement delta, so we inspect the first and only patch in the
624 627 # delta for this prefix.
625 628 hlen = struct.calcsize(b">lll")
626 629 if len(delta) <= hlen:
627 630 return False
628 631
629 632 oldlen = baselenfn(baserev)
630 633 newlen = len(delta) - hlen
631 634 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
632 635 return False
633 636
634 637 add = b"\1\ncensored:"
635 638 addlen = len(add)
636 639 return newlen >= addlen and delta[hlen : hlen + addlen] == add
General Comments 0
You need to be logged in to leave comments. Login now