##// END OF EJS Templates
storageutil: match node length with repository
Joerg Sonnenberger -
r50862:5698c5ee default
parent child Browse files
Show More
@@ -1,647 +1,647 b''
1 1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import re
10 10 import struct
11 11
12 12 from ..i18n import _
13 13 from ..node import (
14 14 bin,
15 15 nullrev,
16 16 sha1nodeconstants,
17 17 )
18 18 from .. import (
19 19 dagop,
20 20 error,
21 21 mdiff,
22 22 )
23 23 from ..interfaces import repository
24 24 from ..revlogutils import sidedata as sidedatamod
25 25 from ..utils import hashutil
26 26
27 27 _nullhash = hashutil.sha1(sha1nodeconstants.nullid)
28 28
29 29 # revision data contains extra metadata not part of the official digest
30 30 # Only used in changegroup >= v4.
31 31 CG_FLAG_SIDEDATA = 1
32 32
33 33
34 34 def hashrevisionsha1(text, p1, p2):
35 35 """Compute the SHA-1 for revision data and its parents.
36 36
37 37 This hash combines both the current file contents and its history
38 38 in a manner that makes it easy to distinguish nodes with the same
39 39 content in the revision graph.
40 40 """
41 41 # As of now, if one of the parent node is null, p2 is null
42 42 if p2 == sha1nodeconstants.nullid:
43 43 # deep copy of a hash is faster than creating one
44 44 s = _nullhash.copy()
45 45 s.update(p1)
46 46 else:
47 47 # none of the parent nodes are nullid
48 48 if p1 < p2:
49 49 a = p1
50 50 b = p2
51 51 else:
52 52 a = p2
53 53 b = p1
54 54 s = hashutil.sha1(a)
55 55 s.update(b)
56 56 s.update(text)
57 57 return s.digest()
58 58
59 59
60 60 METADATA_RE = re.compile(b'\x01\n')
61 61
62 62
63 63 def parsemeta(text):
64 64 """Parse metadata header from revision data.
65 65
66 66 Returns a 2-tuple of (metadata, offset), where both can be None if there
67 67 is no metadata.
68 68 """
69 69 # text can be buffer, so we can't use .startswith or .index
70 70 if text[:2] != b'\x01\n':
71 71 return None, None
72 72 s = METADATA_RE.search(text, 2).start()
73 73 mtext = text[2:s]
74 74 meta = {}
75 75 for l in mtext.splitlines():
76 76 k, v = l.split(b': ', 1)
77 77 meta[k] = v
78 78 return meta, s + 2
79 79
80 80
81 81 def packmeta(meta, text):
82 82 """Add metadata to fulltext to produce revision text."""
83 83 keys = sorted(meta)
84 84 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
85 85 return b'\x01\n%s\x01\n%s' % (metatext, text)
86 86
87 87
88 88 def iscensoredtext(text):
89 89 meta = parsemeta(text)[0]
90 90 return meta and b'censored' in meta
91 91
92 92
93 93 def filtermetadata(text):
94 94 """Extract just the revision data from source text.
95 95
96 96 Returns ``text`` unless it has a metadata header, in which case we return
97 97 a new buffer without hte metadata.
98 98 """
99 99 if not text.startswith(b'\x01\n'):
100 100 return text
101 101
102 102 offset = text.index(b'\x01\n', 2)
103 103 return text[offset + 2 :]
104 104
105 105
106 106 def filerevisioncopied(store, node):
107 107 """Resolve file revision copy metadata.
108 108
109 109 Returns ``False`` if the file has no copy metadata. Otherwise a
110 110 2-tuple of the source filename and node.
111 111 """
112 112 if store.parents(node)[0] != sha1nodeconstants.nullid:
113 113 # When creating a copy or move we set filelog parents to null,
114 114 # because contents are probably unrelated and making a delta
115 115 # would not be useful.
116 116 # Conversely, if filelog p1 is non-null we know
117 117 # there is no copy metadata.
118 118 # In the presence of merges, this reasoning becomes invalid
119 119 # if we reorder parents. See tests/test-issue6528.t.
120 120 return False
121 121
122 122 meta = parsemeta(store.revision(node))[0]
123 123
124 124 # copy and copyrev occur in pairs. In rare cases due to old bugs,
125 125 # one can occur without the other. So ensure both are present to flag
126 126 # as a copy.
127 127 if meta and b'copy' in meta and b'copyrev' in meta:
128 128 return meta[b'copy'], bin(meta[b'copyrev'])
129 129
130 130 return False
131 131
132 132
133 133 def filedataequivalent(store, node, filedata):
134 134 """Determines whether file data is equivalent to a stored node.
135 135
136 136 Returns True if the passed file data would hash to the same value
137 137 as a stored revision and False otherwise.
138 138
139 139 When a stored revision is censored, filedata must be empty to have
140 140 equivalence.
141 141
142 142 When a stored revision has copy metadata, it is ignored as part
143 143 of the compare.
144 144 """
145 145
146 146 if filedata.startswith(b'\x01\n'):
147 147 revisiontext = b'\x01\n\x01\n' + filedata
148 148 else:
149 149 revisiontext = filedata
150 150
151 151 p1, p2 = store.parents(node)
152 152
153 153 computednode = hashrevisionsha1(revisiontext, p1, p2)
154 154
155 155 if computednode == node:
156 156 return True
157 157
158 158 # Censored files compare against the empty file.
159 159 if store.iscensored(store.rev(node)):
160 160 return filedata == b''
161 161
162 162 # Renaming a file produces a different hash, even if the data
163 163 # remains unchanged. Check if that's the case.
164 164 if store.renamed(node):
165 165 return store.read(node) == filedata
166 166
167 167 return False
168 168
169 169
170 170 def iterrevs(storelen, start=0, stop=None):
171 171 """Iterate over revision numbers in a store."""
172 172 step = 1
173 173
174 174 if stop is not None:
175 175 if start > stop:
176 176 step = -1
177 177 stop += step
178 178 if stop > storelen:
179 179 stop = storelen
180 180 else:
181 181 stop = storelen
182 182
183 183 return range(start, stop, step)
184 184
185 185
186 186 def fileidlookup(store, fileid, identifier):
187 187 """Resolve the file node for a value.
188 188
189 189 ``store`` is an object implementing the ``ifileindex`` interface.
190 190
191 191 ``fileid`` can be:
192 192
193 * A 20 or 32 byte binary node.
193 * A binary node of appropiate size (e.g. 20/32 Bytes).
194 194 * An integer revision number
195 * A 40 or 64 byte hex node.
195 * A hex node of appropiate size (e.g. 40/64 Bytes).
196 196 * A bytes that can be parsed as an integer representing a revision number.
197 197
198 198 ``identifier`` is used to populate ``error.LookupError`` with an identifier
199 199 for the store.
200 200
201 201 Raises ``error.LookupError`` on failure.
202 202 """
203 203 if isinstance(fileid, int):
204 204 try:
205 205 return store.node(fileid)
206 206 except IndexError:
207 207 raise error.LookupError(
208 208 b'%d' % fileid, identifier, _(b'no match found')
209 209 )
210 210
211 if len(fileid) in (20, 32):
211 if len(fileid) == len(store.nullid):
212 212 try:
213 213 store.rev(fileid)
214 214 return fileid
215 215 except error.LookupError:
216 216 pass
217 217
218 if len(fileid) in (40, 64):
218 if len(fileid) == 2 * len(store.nullid):
219 219 try:
220 220 rawnode = bin(fileid)
221 221 store.rev(rawnode)
222 222 return rawnode
223 223 except TypeError:
224 224 pass
225 225
226 226 try:
227 227 rev = int(fileid)
228 228
229 229 if b'%d' % rev != fileid:
230 230 raise ValueError
231 231
232 232 try:
233 233 return store.node(rev)
234 234 except (IndexError, TypeError):
235 235 pass
236 236 except (ValueError, OverflowError):
237 237 pass
238 238
239 239 raise error.LookupError(fileid, identifier, _(b'no match found'))
240 240
241 241
242 242 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
243 243 """Resolve information needed to strip revisions.
244 244
245 245 Finds the minimum revision number that must be stripped in order to
246 246 strip ``minlinkrev``.
247 247
248 248 Returns a 2-tuple of the minimum revision number to do that and a set
249 249 of all revision numbers that have linkrevs that would be broken
250 250 by that strip.
251 251
252 252 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
253 253 ``headrevs`` is an iterable of head revisions.
254 254 ``linkrevfn`` is a callable that receives a revision and returns a linked
255 255 revision.
256 256 ``parentrevsfn`` is a callable that receives a revision number and returns
257 257 an iterable of its parent revision numbers.
258 258 """
259 259 brokenrevs = set()
260 260 strippoint = tiprev + 1
261 261
262 262 heads = {}
263 263 futurelargelinkrevs = set()
264 264 for head in headrevs:
265 265 headlinkrev = linkrevfn(head)
266 266 heads[head] = headlinkrev
267 267 if headlinkrev >= minlinkrev:
268 268 futurelargelinkrevs.add(headlinkrev)
269 269
270 270 # This algorithm involves walking down the rev graph, starting at the
271 271 # heads. Since the revs are topologically sorted according to linkrev,
272 272 # once all head linkrevs are below the minlink, we know there are
273 273 # no more revs that could have a linkrev greater than minlink.
274 274 # So we can stop walking.
275 275 while futurelargelinkrevs:
276 276 strippoint -= 1
277 277 linkrev = heads.pop(strippoint)
278 278
279 279 if linkrev < minlinkrev:
280 280 brokenrevs.add(strippoint)
281 281 else:
282 282 futurelargelinkrevs.remove(linkrev)
283 283
284 284 for p in parentrevsfn(strippoint):
285 285 if p != nullrev:
286 286 plinkrev = linkrevfn(p)
287 287 heads[p] = plinkrev
288 288 if plinkrev >= minlinkrev:
289 289 futurelargelinkrevs.add(plinkrev)
290 290
291 291 return strippoint, brokenrevs
292 292
293 293
294 294 def emitrevisions(
295 295 store,
296 296 nodes,
297 297 nodesorder,
298 298 resultcls,
299 299 deltaparentfn=None,
300 300 candeltafn=None,
301 301 rawsizefn=None,
302 302 revdifffn=None,
303 303 flagsfn=None,
304 304 deltamode=repository.CG_DELTAMODE_STD,
305 305 revisiondata=False,
306 306 assumehaveparentrevisions=False,
307 307 sidedata_helpers=None,
308 308 debug_info=None,
309 309 ):
310 310 """Generic implementation of ifiledata.emitrevisions().
311 311
312 312 Emitting revision data is subtly complex. This function attempts to
313 313 encapsulate all the logic for doing so in a backend-agnostic way.
314 314
315 315 ``store``
316 316 Object conforming to ``ifilestorage`` interface.
317 317
318 318 ``nodes``
319 319 List of revision nodes whose data to emit.
320 320
321 321 ``resultcls``
322 322 A type implementing the ``irevisiondelta`` interface that will be
323 323 constructed and returned.
324 324
325 325 ``deltaparentfn`` (optional)
326 326 Callable receiving a revision number and returning the revision number
327 327 of a revision that the internal delta is stored against. This delta
328 328 will be preferred over computing a new arbitrary delta.
329 329
330 330 If not defined, a delta will always be computed from raw revision
331 331 data.
332 332
333 333 ``candeltafn`` (optional)
334 334 Callable receiving a pair of revision numbers that returns a bool
335 335 indicating whether a delta between them can be produced.
336 336
337 337 If not defined, it is assumed that any two revisions can delta with
338 338 each other.
339 339
340 340 ``rawsizefn`` (optional)
341 341 Callable receiving a revision number and returning the length of the
342 342 ``store.rawdata(rev)``.
343 343
344 344 If not defined, ``len(store.rawdata(rev))`` will be called.
345 345
346 346 ``revdifffn`` (optional)
347 347 Callable receiving a pair of revision numbers that returns a delta
348 348 between them.
349 349
350 350 If not defined, a delta will be computed by invoking mdiff code
351 351 on ``store.revision()`` results.
352 352
353 353 Defining this function allows a precomputed or stored delta to be
354 354 used without having to compute on.
355 355
356 356 ``flagsfn`` (optional)
357 357 Callable receiving a revision number and returns the integer flags
358 358 value for it. If not defined, flags value will be 0.
359 359
360 360 ``deltamode``
361 361 constaint on delta to be sent:
362 362 * CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
363 363 * CG_DELTAMODE_PREV - only delta against "prev",
364 364 * CG_DELTAMODE_FULL - only issue full snapshot.
365 365
366 366 Whether to send fulltext revisions instead of deltas, if allowed.
367 367
368 368 ``nodesorder``
369 369 ``revisiondata``
370 370 ``assumehaveparentrevisions``
371 371 ``sidedata_helpers`` (optional)
372 372 If not None, means that sidedata should be included.
373 373 See `revlogutil.sidedata.get_sidedata_helpers`.
374 374
375 375 ``debug_info`
376 376 An optionnal dictionnary to gather information about the bundling
377 377 process (if present, see config: debug.bundling.stats.
378 378 """
379 379
380 380 fnode = store.node
381 381 frev = store.rev
382 382 parents = store.parentrevs
383 383
384 384 if nodesorder == b'nodes':
385 385 revs = [frev(n) for n in nodes]
386 386 elif nodesorder == b'linear':
387 387 revs = {frev(n) for n in nodes}
388 388 revs = dagop.linearize(revs, store.parentrevs)
389 389 else: # storage and default
390 390 revs = sorted(frev(n) for n in nodes)
391 391
392 392 prevrev = None
393 393
394 394 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
395 395 prevrev = parents(revs[0])[0]
396 396
397 397 # Sets of revs available to delta against.
398 398 emitted = set()
399 399 available = set()
400 400 if assumehaveparentrevisions:
401 401 common_heads = set(p for r in revs for p in parents(r))
402 402 common_heads.difference_update(revs)
403 403 available = store.ancestors(common_heads, inclusive=True)
404 404
405 405 def is_usable_base(rev):
406 406 """Is a delta against this revision usable over the wire"""
407 407 if rev == nullrev:
408 408 return False
409 409 return rev in emitted or rev in available
410 410
411 411 for rev in revs:
412 412 if rev == nullrev:
413 413 continue
414 414
415 415 debug_delta_source = None
416 416 if debug_info is not None:
417 417 debug_info['revision-total'] += 1
418 418
419 419 node = fnode(rev)
420 420 p1rev, p2rev = parents(rev)
421 421
422 422 if debug_info is not None:
423 423 if p1rev != p2rev and p1rev != nullrev and p2rev != nullrev:
424 424 debug_info['merge-total'] += 1
425 425
426 426 if deltaparentfn:
427 427 deltaparentrev = deltaparentfn(rev)
428 428 if debug_info is not None:
429 429 if deltaparentrev == nullrev:
430 430 debug_info['available-full'] += 1
431 431 else:
432 432 debug_info['available-delta'] += 1
433 433
434 434 else:
435 435 deltaparentrev = nullrev
436 436
437 437 # Forced delta against previous mode.
438 438 if deltamode == repository.CG_DELTAMODE_PREV:
439 439 if debug_info is not None:
440 440 debug_delta_source = "prev"
441 441 baserev = prevrev
442 442
443 443 # We're instructed to send fulltext. Honor that.
444 444 elif deltamode == repository.CG_DELTAMODE_FULL:
445 445 if debug_info is not None:
446 446 debug_delta_source = "full"
447 447 baserev = nullrev
448 448 # We're instructed to use p1. Honor that
449 449 elif deltamode == repository.CG_DELTAMODE_P1:
450 450 if debug_info is not None:
451 451 debug_delta_source = "p1"
452 452 baserev = p1rev
453 453
454 454 # There is a delta in storage. We try to use that because it
455 455 # amounts to effectively copying data from storage and is
456 456 # therefore the fastest.
457 457 elif is_usable_base(deltaparentrev):
458 458 if debug_info is not None:
459 459 debug_delta_source = "storage"
460 460 baserev = deltaparentrev
461 461 elif deltaparentrev == nullrev:
462 462 if debug_info is not None:
463 463 debug_delta_source = "storage"
464 464 baserev = deltaparentrev
465 465 else:
466 466 if deltaparentrev != nullrev and debug_info is not None:
467 467 debug_info['denied-base-not-available'] += 1
468 468 # No guarantee the receiver has the delta parent, or Storage has a
469 469 # fulltext revision.
470 470 #
471 471 # We compute a delta on the fly to send over the wire.
472 472 #
473 473 # We start with a try against p1, which in the common case should
474 474 # be close to this revision content.
475 475 #
476 476 # note: we could optimize between p1 and p2 in merges cases.
477 477 elif is_usable_base(p1rev):
478 478 if debug_info is not None:
479 479 debug_delta_source = "p1"
480 480 baserev = p1rev
481 481 # if p1 was not an option, try p2
482 482 elif is_usable_base(p2rev):
483 483 if debug_info is not None:
484 484 debug_delta_source = "p2"
485 485 baserev = p2rev
486 486 # Send delta against prev in despair
487 487 #
488 488 # using the closest available ancestors first might be better?
489 489 elif prevrev is not None:
490 490 if debug_info is not None:
491 491 debug_delta_source = "prev"
492 492 baserev = prevrev
493 493 else:
494 494 if debug_info is not None:
495 495 debug_delta_source = "full"
496 496 baserev = nullrev
497 497
498 498 # But we can't actually use our chosen delta base for whatever
499 499 # reason. Reset to fulltext.
500 500 if (
501 501 baserev != nullrev
502 502 and candeltafn is not None
503 503 and not candeltafn(baserev, rev)
504 504 ):
505 505 if debug_info is not None:
506 506 debug_delta_source = "full"
507 507 debug_info['denied-delta-candeltafn'] += 1
508 508 baserev = nullrev
509 509
510 510 revision = None
511 511 delta = None
512 512 baserevisionsize = None
513 513
514 514 if revisiondata:
515 515 if store.iscensored(baserev) or store.iscensored(rev):
516 516 try:
517 517 revision = store.rawdata(node)
518 518 except error.CensoredNodeError as e:
519 519 if debug_info is not None:
520 520 debug_delta_source = "full"
521 521 debug_info['denied-delta-not-available'] += 1
522 522 revision = e.tombstone
523 523
524 524 if baserev != nullrev:
525 525 if rawsizefn:
526 526 baserevisionsize = rawsizefn(baserev)
527 527 else:
528 528 baserevisionsize = len(store.rawdata(baserev))
529 529
530 530 elif (
531 531 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
532 532 ):
533 533 if debug_info is not None:
534 534 debug_info['computed-delta'] += 1 # close enough
535 535 debug_info['delta-full'] += 1
536 536 revision = store.rawdata(node)
537 537 emitted.add(rev)
538 538 else:
539 539 if revdifffn:
540 540 if debug_info is not None:
541 541 if debug_delta_source == "full":
542 542 debug_info['computed-delta'] += 1
543 543 debug_info['delta-full'] += 1
544 544 elif debug_delta_source == "prev":
545 545 debug_info['computed-delta'] += 1
546 546 debug_info['delta-against-prev'] += 1
547 547 elif debug_delta_source == "p1":
548 548 debug_info['computed-delta'] += 1
549 549 debug_info['delta-against-p1'] += 1
550 550 elif debug_delta_source == "storage":
551 551 debug_info['reused-storage-delta'] += 1
552 552 else:
553 553 assert False, 'unreachable'
554 554
555 555 delta = revdifffn(baserev, rev)
556 556 else:
557 557 if debug_info is not None:
558 558 if debug_delta_source == "full":
559 559 debug_info['computed-delta'] += 1
560 560 debug_info['delta-full'] += 1
561 561 elif debug_delta_source == "prev":
562 562 debug_info['computed-delta'] += 1
563 563 debug_info['delta-against-prev'] += 1
564 564 elif debug_delta_source == "p1":
565 565 debug_info['computed-delta'] += 1
566 566 debug_info['delta-against-p1'] += 1
567 567 elif debug_delta_source == "storage":
568 568 # seem quite unlikelry to happens
569 569 debug_info['computed-delta'] += 1
570 570 debug_info['reused-storage-delta'] += 1
571 571 else:
572 572 assert False, 'unreachable'
573 573 delta = mdiff.textdiff(
574 574 store.rawdata(baserev), store.rawdata(rev)
575 575 )
576 576
577 577 emitted.add(rev)
578 578
579 579 serialized_sidedata = None
580 580 sidedata_flags = (0, 0)
581 581 if sidedata_helpers:
582 582 try:
583 583 old_sidedata = store.sidedata(rev)
584 584 except error.CensoredNodeError:
585 585 # skip any potential sidedata of the censored revision
586 586 sidedata = {}
587 587 else:
588 588 sidedata, sidedata_flags = sidedatamod.run_sidedata_helpers(
589 589 store=store,
590 590 sidedata_helpers=sidedata_helpers,
591 591 sidedata=old_sidedata,
592 592 rev=rev,
593 593 )
594 594 if sidedata:
595 595 serialized_sidedata = sidedatamod.serialize_sidedata(sidedata)
596 596
597 597 flags = flagsfn(rev) if flagsfn else 0
598 598 protocol_flags = 0
599 599 if serialized_sidedata:
600 600 # Advertise that sidedata exists to the other side
601 601 protocol_flags |= CG_FLAG_SIDEDATA
602 602 # Computers and removers can return flags to add and/or remove
603 603 flags = flags | sidedata_flags[0] & ~sidedata_flags[1]
604 604
605 605 yield resultcls(
606 606 node=node,
607 607 p1node=fnode(p1rev),
608 608 p2node=fnode(p2rev),
609 609 basenode=fnode(baserev),
610 610 flags=flags,
611 611 baserevisionsize=baserevisionsize,
612 612 revision=revision,
613 613 delta=delta,
614 614 sidedata=serialized_sidedata,
615 615 protocol_flags=protocol_flags,
616 616 )
617 617
618 618 prevrev = rev
619 619
620 620
621 621 def deltaiscensored(delta, baserev, baselenfn):
622 622 """Determine if a delta represents censored revision data.
623 623
624 624 ``baserev`` is the base revision this delta is encoded against.
625 625 ``baselenfn`` is a callable receiving a revision number that resolves the
626 626 length of the revision fulltext.
627 627
628 628 Returns a bool indicating if the result of the delta represents a censored
629 629 revision.
630 630 """
631 631 # Fragile heuristic: unless new file meta keys are added alphabetically
632 632 # preceding "censored", all censored revisions are prefixed by
633 633 # "\1\ncensored:". A delta producing such a censored revision must be a
634 634 # full-replacement delta, so we inspect the first and only patch in the
635 635 # delta for this prefix.
636 636 hlen = struct.calcsize(b">lll")
637 637 if len(delta) <= hlen:
638 638 return False
639 639
640 640 oldlen = baselenfn(baserev)
641 641 newlen = len(delta) - hlen
642 642 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
643 643 return False
644 644
645 645 add = b"\1\ncensored:"
646 646 addlen = len(add)
647 647 return newlen >= addlen and delta[hlen : hlen + addlen] == add
General Comments 0
You need to be logged in to leave comments. Login now