##// END OF EJS Templates
emitrevision: simplify the fallback to computed delta...
marmoute -
r50682:bb2c663c stable
parent child Browse files
Show More
@@ -1,560 +1,552 b''
1 1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import re
10 10 import struct
11 11
12 12 from ..i18n import _
13 13 from ..node import (
14 14 bin,
15 15 nullrev,
16 16 sha1nodeconstants,
17 17 )
18 18 from .. import (
19 19 dagop,
20 20 error,
21 21 mdiff,
22 22 )
23 23 from ..interfaces import repository
24 24 from ..revlogutils import sidedata as sidedatamod
25 25 from ..utils import hashutil
26 26
27 27 _nullhash = hashutil.sha1(sha1nodeconstants.nullid)
28 28
29 29 # revision data contains extra metadata not part of the official digest
30 30 # Only used in changegroup >= v4.
31 31 CG_FLAG_SIDEDATA = 1
32 32
33 33
34 34 def hashrevisionsha1(text, p1, p2):
35 35 """Compute the SHA-1 for revision data and its parents.
36 36
37 37 This hash combines both the current file contents and its history
38 38 in a manner that makes it easy to distinguish nodes with the same
39 39 content in the revision graph.
40 40 """
41 41 # As of now, if one of the parent node is null, p2 is null
42 42 if p2 == sha1nodeconstants.nullid:
43 43 # deep copy of a hash is faster than creating one
44 44 s = _nullhash.copy()
45 45 s.update(p1)
46 46 else:
47 47 # none of the parent nodes are nullid
48 48 if p1 < p2:
49 49 a = p1
50 50 b = p2
51 51 else:
52 52 a = p2
53 53 b = p1
54 54 s = hashutil.sha1(a)
55 55 s.update(b)
56 56 s.update(text)
57 57 return s.digest()
58 58
59 59
60 60 METADATA_RE = re.compile(b'\x01\n')
61 61
62 62
63 63 def parsemeta(text):
64 64 """Parse metadata header from revision data.
65 65
66 66 Returns a 2-tuple of (metadata, offset), where both can be None if there
67 67 is no metadata.
68 68 """
69 69 # text can be buffer, so we can't use .startswith or .index
70 70 if text[:2] != b'\x01\n':
71 71 return None, None
72 72 s = METADATA_RE.search(text, 2).start()
73 73 mtext = text[2:s]
74 74 meta = {}
75 75 for l in mtext.splitlines():
76 76 k, v = l.split(b': ', 1)
77 77 meta[k] = v
78 78 return meta, s + 2
79 79
80 80
81 81 def packmeta(meta, text):
82 82 """Add metadata to fulltext to produce revision text."""
83 83 keys = sorted(meta)
84 84 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
85 85 return b'\x01\n%s\x01\n%s' % (metatext, text)
86 86
87 87
88 88 def iscensoredtext(text):
89 89 meta = parsemeta(text)[0]
90 90 return meta and b'censored' in meta
91 91
92 92
93 93 def filtermetadata(text):
94 94 """Extract just the revision data from source text.
95 95
96 96 Returns ``text`` unless it has a metadata header, in which case we return
97 97 a new buffer without hte metadata.
98 98 """
99 99 if not text.startswith(b'\x01\n'):
100 100 return text
101 101
102 102 offset = text.index(b'\x01\n', 2)
103 103 return text[offset + 2 :]
104 104
105 105
106 106 def filerevisioncopied(store, node):
107 107 """Resolve file revision copy metadata.
108 108
109 109 Returns ``False`` if the file has no copy metadata. Otherwise a
110 110 2-tuple of the source filename and node.
111 111 """
112 112 if store.parents(node)[0] != sha1nodeconstants.nullid:
113 113 # When creating a copy or move we set filelog parents to null,
114 114 # because contents are probably unrelated and making a delta
115 115 # would not be useful.
116 116 # Conversely, if filelog p1 is non-null we know
117 117 # there is no copy metadata.
118 118 # In the presence of merges, this reasoning becomes invalid
119 119 # if we reorder parents. See tests/test-issue6528.t.
120 120 return False
121 121
122 122 meta = parsemeta(store.revision(node))[0]
123 123
124 124 # copy and copyrev occur in pairs. In rare cases due to old bugs,
125 125 # one can occur without the other. So ensure both are present to flag
126 126 # as a copy.
127 127 if meta and b'copy' in meta and b'copyrev' in meta:
128 128 return meta[b'copy'], bin(meta[b'copyrev'])
129 129
130 130 return False
131 131
132 132
133 133 def filedataequivalent(store, node, filedata):
134 134 """Determines whether file data is equivalent to a stored node.
135 135
136 136 Returns True if the passed file data would hash to the same value
137 137 as a stored revision and False otherwise.
138 138
139 139 When a stored revision is censored, filedata must be empty to have
140 140 equivalence.
141 141
142 142 When a stored revision has copy metadata, it is ignored as part
143 143 of the compare.
144 144 """
145 145
146 146 if filedata.startswith(b'\x01\n'):
147 147 revisiontext = b'\x01\n\x01\n' + filedata
148 148 else:
149 149 revisiontext = filedata
150 150
151 151 p1, p2 = store.parents(node)
152 152
153 153 computednode = hashrevisionsha1(revisiontext, p1, p2)
154 154
155 155 if computednode == node:
156 156 return True
157 157
158 158 # Censored files compare against the empty file.
159 159 if store.iscensored(store.rev(node)):
160 160 return filedata == b''
161 161
162 162 # Renaming a file produces a different hash, even if the data
163 163 # remains unchanged. Check if that's the case.
164 164 if store.renamed(node):
165 165 return store.read(node) == filedata
166 166
167 167 return False
168 168
169 169
170 170 def iterrevs(storelen, start=0, stop=None):
171 171 """Iterate over revision numbers in a store."""
172 172 step = 1
173 173
174 174 if stop is not None:
175 175 if start > stop:
176 176 step = -1
177 177 stop += step
178 178 if stop > storelen:
179 179 stop = storelen
180 180 else:
181 181 stop = storelen
182 182
183 183 return range(start, stop, step)
184 184
185 185
186 186 def fileidlookup(store, fileid, identifier):
187 187 """Resolve the file node for a value.
188 188
189 189 ``store`` is an object implementing the ``ifileindex`` interface.
190 190
191 191 ``fileid`` can be:
192 192
193 193 * A 20 or 32 byte binary node.
194 194 * An integer revision number
195 195 * A 40 or 64 byte hex node.
196 196 * A bytes that can be parsed as an integer representing a revision number.
197 197
198 198 ``identifier`` is used to populate ``error.LookupError`` with an identifier
199 199 for the store.
200 200
201 201 Raises ``error.LookupError`` on failure.
202 202 """
203 203 if isinstance(fileid, int):
204 204 try:
205 205 return store.node(fileid)
206 206 except IndexError:
207 207 raise error.LookupError(
208 208 b'%d' % fileid, identifier, _(b'no match found')
209 209 )
210 210
211 211 if len(fileid) in (20, 32):
212 212 try:
213 213 store.rev(fileid)
214 214 return fileid
215 215 except error.LookupError:
216 216 pass
217 217
218 218 if len(fileid) in (40, 64):
219 219 try:
220 220 rawnode = bin(fileid)
221 221 store.rev(rawnode)
222 222 return rawnode
223 223 except TypeError:
224 224 pass
225 225
226 226 try:
227 227 rev = int(fileid)
228 228
229 229 if b'%d' % rev != fileid:
230 230 raise ValueError
231 231
232 232 try:
233 233 return store.node(rev)
234 234 except (IndexError, TypeError):
235 235 pass
236 236 except (ValueError, OverflowError):
237 237 pass
238 238
239 239 raise error.LookupError(fileid, identifier, _(b'no match found'))
240 240
241 241
242 242 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
243 243 """Resolve information needed to strip revisions.
244 244
245 245 Finds the minimum revision number that must be stripped in order to
246 246 strip ``minlinkrev``.
247 247
248 248 Returns a 2-tuple of the minimum revision number to do that and a set
249 249 of all revision numbers that have linkrevs that would be broken
250 250 by that strip.
251 251
252 252 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
253 253 ``headrevs`` is an iterable of head revisions.
254 254 ``linkrevfn`` is a callable that receives a revision and returns a linked
255 255 revision.
256 256 ``parentrevsfn`` is a callable that receives a revision number and returns
257 257 an iterable of its parent revision numbers.
258 258 """
259 259 brokenrevs = set()
260 260 strippoint = tiprev + 1
261 261
262 262 heads = {}
263 263 futurelargelinkrevs = set()
264 264 for head in headrevs:
265 265 headlinkrev = linkrevfn(head)
266 266 heads[head] = headlinkrev
267 267 if headlinkrev >= minlinkrev:
268 268 futurelargelinkrevs.add(headlinkrev)
269 269
270 270 # This algorithm involves walking down the rev graph, starting at the
271 271 # heads. Since the revs are topologically sorted according to linkrev,
272 272 # once all head linkrevs are below the minlink, we know there are
273 273 # no more revs that could have a linkrev greater than minlink.
274 274 # So we can stop walking.
275 275 while futurelargelinkrevs:
276 276 strippoint -= 1
277 277 linkrev = heads.pop(strippoint)
278 278
279 279 if linkrev < minlinkrev:
280 280 brokenrevs.add(strippoint)
281 281 else:
282 282 futurelargelinkrevs.remove(linkrev)
283 283
284 284 for p in parentrevsfn(strippoint):
285 285 if p != nullrev:
286 286 plinkrev = linkrevfn(p)
287 287 heads[p] = plinkrev
288 288 if plinkrev >= minlinkrev:
289 289 futurelargelinkrevs.add(plinkrev)
290 290
291 291 return strippoint, brokenrevs
292 292
293 293
294 294 def emitrevisions(
295 295 store,
296 296 nodes,
297 297 nodesorder,
298 298 resultcls,
299 299 deltaparentfn=None,
300 300 candeltafn=None,
301 301 rawsizefn=None,
302 302 revdifffn=None,
303 303 flagsfn=None,
304 304 deltamode=repository.CG_DELTAMODE_STD,
305 305 revisiondata=False,
306 306 assumehaveparentrevisions=False,
307 307 sidedata_helpers=None,
308 308 ):
309 309 """Generic implementation of ifiledata.emitrevisions().
310 310
311 311 Emitting revision data is subtly complex. This function attempts to
312 312 encapsulate all the logic for doing so in a backend-agnostic way.
313 313
314 314 ``store``
315 315 Object conforming to ``ifilestorage`` interface.
316 316
317 317 ``nodes``
318 318 List of revision nodes whose data to emit.
319 319
320 320 ``resultcls``
321 321 A type implementing the ``irevisiondelta`` interface that will be
322 322 constructed and returned.
323 323
324 324 ``deltaparentfn`` (optional)
325 325 Callable receiving a revision number and returning the revision number
326 326 of a revision that the internal delta is stored against. This delta
327 327 will be preferred over computing a new arbitrary delta.
328 328
329 329 If not defined, a delta will always be computed from raw revision
330 330 data.
331 331
332 332 ``candeltafn`` (optional)
333 333 Callable receiving a pair of revision numbers that returns a bool
334 334 indicating whether a delta between them can be produced.
335 335
336 336 If not defined, it is assumed that any two revisions can delta with
337 337 each other.
338 338
339 339 ``rawsizefn`` (optional)
340 340 Callable receiving a revision number and returning the length of the
341 341 ``store.rawdata(rev)``.
342 342
343 343 If not defined, ``len(store.rawdata(rev))`` will be called.
344 344
345 345 ``revdifffn`` (optional)
346 346 Callable receiving a pair of revision numbers that returns a delta
347 347 between them.
348 348
349 349 If not defined, a delta will be computed by invoking mdiff code
350 350 on ``store.revision()`` results.
351 351
352 352 Defining this function allows a precomputed or stored delta to be
353 353 used without having to compute on.
354 354
355 355 ``flagsfn`` (optional)
356 356 Callable receiving a revision number and returns the integer flags
357 357 value for it. If not defined, flags value will be 0.
358 358
359 359 ``deltamode``
360 360 constaint on delta to be sent:
361 361 * CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
362 362 * CG_DELTAMODE_PREV - only delta against "prev",
363 363 * CG_DELTAMODE_FULL - only issue full snapshot.
364 364
365 365 Whether to send fulltext revisions instead of deltas, if allowed.
366 366
367 367 ``nodesorder``
368 368 ``revisiondata``
369 369 ``assumehaveparentrevisions``
370 370 ``sidedata_helpers`` (optional)
371 371 If not None, means that sidedata should be included.
372 372 See `revlogutil.sidedata.get_sidedata_helpers`.
373 373 """
374 374
375 375 fnode = store.node
376 376 frev = store.rev
377 377
378 378 if nodesorder == b'nodes':
379 379 revs = [frev(n) for n in nodes]
380 380 elif nodesorder == b'linear':
381 381 revs = {frev(n) for n in nodes}
382 382 revs = dagop.linearize(revs, store.parentrevs)
383 383 else: # storage and default
384 384 revs = sorted(frev(n) for n in nodes)
385 385
386 386 prevrev = None
387 387
388 388 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
389 389 prevrev = store.parentrevs(revs[0])[0]
390 390
391 391 # Set of revs available to delta against.
392 392 available = set()
393 393 parents = []
394 394
395 395 def is_usable_base(rev):
396 396 """Is a delta against this revision usable over the wire"""
397 397 if rev == nullrev:
398 398 return False
399 399 # Base revision was already emitted in this group.
400 400 if rev in available:
401 401 return True
402 402 # Base revision is a parent that hasn't been emitted already.
403 403 if assumehaveparentrevisions and rev in parents:
404 404 return True
405 405 return False
406 406
407 407 for rev in revs:
408 408 if rev == nullrev:
409 409 continue
410 410
411 411 node = fnode(rev)
412 412 parents[:] = p1rev, p2rev = store.parentrevs(rev)
413 413
414 414 if deltaparentfn:
415 415 deltaparentrev = deltaparentfn(rev)
416 416 else:
417 417 deltaparentrev = nullrev
418 418
419 419 # Forced delta against previous mode.
420 420 if deltamode == repository.CG_DELTAMODE_PREV:
421 421 baserev = prevrev
422 422
423 423 # We're instructed to send fulltext. Honor that.
424 424 elif deltamode == repository.CG_DELTAMODE_FULL:
425 425 baserev = nullrev
426 426 # We're instructed to use p1. Honor that
427 427 elif deltamode == repository.CG_DELTAMODE_P1:
428 428 baserev = p1rev
429 429
430 430 # There is a delta in storage. We try to use that because it
431 431 # amounts to effectively copying data from storage and is
432 432 # therefore the fastest.
433 elif deltaparentrev != nullrev:
434 # If the stored delta works, let us use it !
435 if is_usable_base(deltaparentrev):
436 baserev = deltaparentrev
437 # No guarantee the receiver has the delta parent. Send delta
438 # against last revision (if possible), which in the common case
439 # should be similar enough to this revision that the delta is
440 # reasonable.
441 elif prevrev is not None:
433 elif is_usable_base(deltaparentrev):
434 baserev = deltaparentrev
435 else:
436 # No guarantee the receiver has the delta parent, or Storage has a
437 # fulltext revision.
438 #
439 # Send delta against last revision (if possible), which in the
440 # common case should be similar enough to this revision that the
441 # delta is reasonable.
442 if prevrev is not None:
442 443 baserev = prevrev
443 444 else:
444 445 baserev = nullrev
445 446
446 # Storage has a fulltext revision.
447
448 # Let's use the previous revision, which is as good a guess as any.
449 # There is definitely room to improve this logic.
450 elif prevrev is not None:
451 baserev = prevrev
452 else:
453 baserev = nullrev
454
455 447 # But we can't actually use our chosen delta base for whatever
456 448 # reason. Reset to fulltext.
457 449 if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):
458 450 baserev = nullrev
459 451
460 452 revision = None
461 453 delta = None
462 454 baserevisionsize = None
463 455
464 456 if revisiondata:
465 457 if store.iscensored(baserev) or store.iscensored(rev):
466 458 try:
467 459 revision = store.rawdata(node)
468 460 except error.CensoredNodeError as e:
469 461 revision = e.tombstone
470 462
471 463 if baserev != nullrev:
472 464 if rawsizefn:
473 465 baserevisionsize = rawsizefn(baserev)
474 466 else:
475 467 baserevisionsize = len(store.rawdata(baserev))
476 468
477 469 elif (
478 470 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
479 471 ):
480 472 revision = store.rawdata(node)
481 473 available.add(rev)
482 474 else:
483 475 if revdifffn:
484 476 delta = revdifffn(baserev, rev)
485 477 else:
486 478 delta = mdiff.textdiff(
487 479 store.rawdata(baserev), store.rawdata(rev)
488 480 )
489 481
490 482 available.add(rev)
491 483
492 484 serialized_sidedata = None
493 485 sidedata_flags = (0, 0)
494 486 if sidedata_helpers:
495 487 try:
496 488 old_sidedata = store.sidedata(rev)
497 489 except error.CensoredNodeError:
498 490 # skip any potential sidedata of the censored revision
499 491 sidedata = {}
500 492 else:
501 493 sidedata, sidedata_flags = sidedatamod.run_sidedata_helpers(
502 494 store=store,
503 495 sidedata_helpers=sidedata_helpers,
504 496 sidedata=old_sidedata,
505 497 rev=rev,
506 498 )
507 499 if sidedata:
508 500 serialized_sidedata = sidedatamod.serialize_sidedata(sidedata)
509 501
510 502 flags = flagsfn(rev) if flagsfn else 0
511 503 protocol_flags = 0
512 504 if serialized_sidedata:
513 505 # Advertise that sidedata exists to the other side
514 506 protocol_flags |= CG_FLAG_SIDEDATA
515 507 # Computers and removers can return flags to add and/or remove
516 508 flags = flags | sidedata_flags[0] & ~sidedata_flags[1]
517 509
518 510 yield resultcls(
519 511 node=node,
520 512 p1node=fnode(p1rev),
521 513 p2node=fnode(p2rev),
522 514 basenode=fnode(baserev),
523 515 flags=flags,
524 516 baserevisionsize=baserevisionsize,
525 517 revision=revision,
526 518 delta=delta,
527 519 sidedata=serialized_sidedata,
528 520 protocol_flags=protocol_flags,
529 521 )
530 522
531 523 prevrev = rev
532 524
533 525
534 526 def deltaiscensored(delta, baserev, baselenfn):
535 527 """Determine if a delta represents censored revision data.
536 528
537 529 ``baserev`` is the base revision this delta is encoded against.
538 530 ``baselenfn`` is a callable receiving a revision number that resolves the
539 531 length of the revision fulltext.
540 532
541 533 Returns a bool indicating if the result of the delta represents a censored
542 534 revision.
543 535 """
544 536 # Fragile heuristic: unless new file meta keys are added alphabetically
545 537 # preceding "censored", all censored revisions are prefixed by
546 538 # "\1\ncensored:". A delta producing such a censored revision must be a
547 539 # full-replacement delta, so we inspect the first and only patch in the
548 540 # delta for this prefix.
549 541 hlen = struct.calcsize(b">lll")
550 542 if len(delta) <= hlen:
551 543 return False
552 544
553 545 oldlen = baselenfn(baserev)
554 546 newlen = len(delta) - hlen
555 547 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
556 548 return False
557 549
558 550 add = b"\1\ncensored:"
559 551 addlen = len(add)
560 552 return newlen >= addlen and delta[hlen : hlen + addlen] == add
General Comments 0
You need to be logged in to leave comments. Login now