##// END OF EJS Templates
emitrevision: also check the parents in the availability closure...
marmoute -
r50681:0bda07f3 stable
parent child Browse files
Show More
@@ -1,557 +1,560 b''
1 1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import re
10 10 import struct
11 11
12 12 from ..i18n import _
13 13 from ..node import (
14 14 bin,
15 15 nullrev,
16 16 sha1nodeconstants,
17 17 )
18 18 from .. import (
19 19 dagop,
20 20 error,
21 21 mdiff,
22 22 )
23 23 from ..interfaces import repository
24 24 from ..revlogutils import sidedata as sidedatamod
25 25 from ..utils import hashutil
26 26
27 27 _nullhash = hashutil.sha1(sha1nodeconstants.nullid)
28 28
29 29 # revision data contains extra metadata not part of the official digest
30 30 # Only used in changegroup >= v4.
31 31 CG_FLAG_SIDEDATA = 1
32 32
33 33
34 34 def hashrevisionsha1(text, p1, p2):
35 35 """Compute the SHA-1 for revision data and its parents.
36 36
37 37 This hash combines both the current file contents and its history
38 38 in a manner that makes it easy to distinguish nodes with the same
39 39 content in the revision graph.
40 40 """
41 41 # As of now, if one of the parent node is null, p2 is null
42 42 if p2 == sha1nodeconstants.nullid:
43 43 # deep copy of a hash is faster than creating one
44 44 s = _nullhash.copy()
45 45 s.update(p1)
46 46 else:
47 47 # none of the parent nodes are nullid
48 48 if p1 < p2:
49 49 a = p1
50 50 b = p2
51 51 else:
52 52 a = p2
53 53 b = p1
54 54 s = hashutil.sha1(a)
55 55 s.update(b)
56 56 s.update(text)
57 57 return s.digest()
58 58
59 59
60 60 METADATA_RE = re.compile(b'\x01\n')
61 61
62 62
63 63 def parsemeta(text):
64 64 """Parse metadata header from revision data.
65 65
66 66 Returns a 2-tuple of (metadata, offset), where both can be None if there
67 67 is no metadata.
68 68 """
69 69 # text can be buffer, so we can't use .startswith or .index
70 70 if text[:2] != b'\x01\n':
71 71 return None, None
72 72 s = METADATA_RE.search(text, 2).start()
73 73 mtext = text[2:s]
74 74 meta = {}
75 75 for l in mtext.splitlines():
76 76 k, v = l.split(b': ', 1)
77 77 meta[k] = v
78 78 return meta, s + 2
79 79
80 80
81 81 def packmeta(meta, text):
82 82 """Add metadata to fulltext to produce revision text."""
83 83 keys = sorted(meta)
84 84 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
85 85 return b'\x01\n%s\x01\n%s' % (metatext, text)
86 86
87 87
88 88 def iscensoredtext(text):
89 89 meta = parsemeta(text)[0]
90 90 return meta and b'censored' in meta
91 91
92 92
93 93 def filtermetadata(text):
94 94 """Extract just the revision data from source text.
95 95
96 96 Returns ``text`` unless it has a metadata header, in which case we return
97 97 a new buffer without hte metadata.
98 98 """
99 99 if not text.startswith(b'\x01\n'):
100 100 return text
101 101
102 102 offset = text.index(b'\x01\n', 2)
103 103 return text[offset + 2 :]
104 104
105 105
106 106 def filerevisioncopied(store, node):
107 107 """Resolve file revision copy metadata.
108 108
109 109 Returns ``False`` if the file has no copy metadata. Otherwise a
110 110 2-tuple of the source filename and node.
111 111 """
112 112 if store.parents(node)[0] != sha1nodeconstants.nullid:
113 113 # When creating a copy or move we set filelog parents to null,
114 114 # because contents are probably unrelated and making a delta
115 115 # would not be useful.
116 116 # Conversely, if filelog p1 is non-null we know
117 117 # there is no copy metadata.
118 118 # In the presence of merges, this reasoning becomes invalid
119 119 # if we reorder parents. See tests/test-issue6528.t.
120 120 return False
121 121
122 122 meta = parsemeta(store.revision(node))[0]
123 123
124 124 # copy and copyrev occur in pairs. In rare cases due to old bugs,
125 125 # one can occur without the other. So ensure both are present to flag
126 126 # as a copy.
127 127 if meta and b'copy' in meta and b'copyrev' in meta:
128 128 return meta[b'copy'], bin(meta[b'copyrev'])
129 129
130 130 return False
131 131
132 132
133 133 def filedataequivalent(store, node, filedata):
134 134 """Determines whether file data is equivalent to a stored node.
135 135
136 136 Returns True if the passed file data would hash to the same value
137 137 as a stored revision and False otherwise.
138 138
139 139 When a stored revision is censored, filedata must be empty to have
140 140 equivalence.
141 141
142 142 When a stored revision has copy metadata, it is ignored as part
143 143 of the compare.
144 144 """
145 145
146 146 if filedata.startswith(b'\x01\n'):
147 147 revisiontext = b'\x01\n\x01\n' + filedata
148 148 else:
149 149 revisiontext = filedata
150 150
151 151 p1, p2 = store.parents(node)
152 152
153 153 computednode = hashrevisionsha1(revisiontext, p1, p2)
154 154
155 155 if computednode == node:
156 156 return True
157 157
158 158 # Censored files compare against the empty file.
159 159 if store.iscensored(store.rev(node)):
160 160 return filedata == b''
161 161
162 162 # Renaming a file produces a different hash, even if the data
163 163 # remains unchanged. Check if that's the case.
164 164 if store.renamed(node):
165 165 return store.read(node) == filedata
166 166
167 167 return False
168 168
169 169
170 170 def iterrevs(storelen, start=0, stop=None):
171 171 """Iterate over revision numbers in a store."""
172 172 step = 1
173 173
174 174 if stop is not None:
175 175 if start > stop:
176 176 step = -1
177 177 stop += step
178 178 if stop > storelen:
179 179 stop = storelen
180 180 else:
181 181 stop = storelen
182 182
183 183 return range(start, stop, step)
184 184
185 185
186 186 def fileidlookup(store, fileid, identifier):
187 187 """Resolve the file node for a value.
188 188
189 189 ``store`` is an object implementing the ``ifileindex`` interface.
190 190
191 191 ``fileid`` can be:
192 192
193 193 * A 20 or 32 byte binary node.
194 194 * An integer revision number
195 195 * A 40 or 64 byte hex node.
196 196 * A bytes that can be parsed as an integer representing a revision number.
197 197
198 198 ``identifier`` is used to populate ``error.LookupError`` with an identifier
199 199 for the store.
200 200
201 201 Raises ``error.LookupError`` on failure.
202 202 """
203 203 if isinstance(fileid, int):
204 204 try:
205 205 return store.node(fileid)
206 206 except IndexError:
207 207 raise error.LookupError(
208 208 b'%d' % fileid, identifier, _(b'no match found')
209 209 )
210 210
211 211 if len(fileid) in (20, 32):
212 212 try:
213 213 store.rev(fileid)
214 214 return fileid
215 215 except error.LookupError:
216 216 pass
217 217
218 218 if len(fileid) in (40, 64):
219 219 try:
220 220 rawnode = bin(fileid)
221 221 store.rev(rawnode)
222 222 return rawnode
223 223 except TypeError:
224 224 pass
225 225
226 226 try:
227 227 rev = int(fileid)
228 228
229 229 if b'%d' % rev != fileid:
230 230 raise ValueError
231 231
232 232 try:
233 233 return store.node(rev)
234 234 except (IndexError, TypeError):
235 235 pass
236 236 except (ValueError, OverflowError):
237 237 pass
238 238
239 239 raise error.LookupError(fileid, identifier, _(b'no match found'))
240 240
241 241
242 242 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
243 243 """Resolve information needed to strip revisions.
244 244
245 245 Finds the minimum revision number that must be stripped in order to
246 246 strip ``minlinkrev``.
247 247
248 248 Returns a 2-tuple of the minimum revision number to do that and a set
249 249 of all revision numbers that have linkrevs that would be broken
250 250 by that strip.
251 251
252 252 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
253 253 ``headrevs`` is an iterable of head revisions.
254 254 ``linkrevfn`` is a callable that receives a revision and returns a linked
255 255 revision.
256 256 ``parentrevsfn`` is a callable that receives a revision number and returns
257 257 an iterable of its parent revision numbers.
258 258 """
259 259 brokenrevs = set()
260 260 strippoint = tiprev + 1
261 261
262 262 heads = {}
263 263 futurelargelinkrevs = set()
264 264 for head in headrevs:
265 265 headlinkrev = linkrevfn(head)
266 266 heads[head] = headlinkrev
267 267 if headlinkrev >= minlinkrev:
268 268 futurelargelinkrevs.add(headlinkrev)
269 269
270 270 # This algorithm involves walking down the rev graph, starting at the
271 271 # heads. Since the revs are topologically sorted according to linkrev,
272 272 # once all head linkrevs are below the minlink, we know there are
273 273 # no more revs that could have a linkrev greater than minlink.
274 274 # So we can stop walking.
275 275 while futurelargelinkrevs:
276 276 strippoint -= 1
277 277 linkrev = heads.pop(strippoint)
278 278
279 279 if linkrev < minlinkrev:
280 280 brokenrevs.add(strippoint)
281 281 else:
282 282 futurelargelinkrevs.remove(linkrev)
283 283
284 284 for p in parentrevsfn(strippoint):
285 285 if p != nullrev:
286 286 plinkrev = linkrevfn(p)
287 287 heads[p] = plinkrev
288 288 if plinkrev >= minlinkrev:
289 289 futurelargelinkrevs.add(plinkrev)
290 290
291 291 return strippoint, brokenrevs
292 292
293 293
294 294 def emitrevisions(
295 295 store,
296 296 nodes,
297 297 nodesorder,
298 298 resultcls,
299 299 deltaparentfn=None,
300 300 candeltafn=None,
301 301 rawsizefn=None,
302 302 revdifffn=None,
303 303 flagsfn=None,
304 304 deltamode=repository.CG_DELTAMODE_STD,
305 305 revisiondata=False,
306 306 assumehaveparentrevisions=False,
307 307 sidedata_helpers=None,
308 308 ):
309 309 """Generic implementation of ifiledata.emitrevisions().
310 310
311 311 Emitting revision data is subtly complex. This function attempts to
312 312 encapsulate all the logic for doing so in a backend-agnostic way.
313 313
314 314 ``store``
315 315 Object conforming to ``ifilestorage`` interface.
316 316
317 317 ``nodes``
318 318 List of revision nodes whose data to emit.
319 319
320 320 ``resultcls``
321 321 A type implementing the ``irevisiondelta`` interface that will be
322 322 constructed and returned.
323 323
324 324 ``deltaparentfn`` (optional)
325 325 Callable receiving a revision number and returning the revision number
326 326 of a revision that the internal delta is stored against. This delta
327 327 will be preferred over computing a new arbitrary delta.
328 328
329 329 If not defined, a delta will always be computed from raw revision
330 330 data.
331 331
332 332 ``candeltafn`` (optional)
333 333 Callable receiving a pair of revision numbers that returns a bool
334 334 indicating whether a delta between them can be produced.
335 335
336 336 If not defined, it is assumed that any two revisions can delta with
337 337 each other.
338 338
339 339 ``rawsizefn`` (optional)
340 340 Callable receiving a revision number and returning the length of the
341 341 ``store.rawdata(rev)``.
342 342
343 343 If not defined, ``len(store.rawdata(rev))`` will be called.
344 344
345 345 ``revdifffn`` (optional)
346 346 Callable receiving a pair of revision numbers that returns a delta
347 347 between them.
348 348
349 349 If not defined, a delta will be computed by invoking mdiff code
350 350 on ``store.revision()`` results.
351 351
352 352 Defining this function allows a precomputed or stored delta to be
353 353 used without having to compute on.
354 354
355 355 ``flagsfn`` (optional)
356 356 Callable receiving a revision number and returns the integer flags
357 357 value for it. If not defined, flags value will be 0.
358 358
359 359 ``deltamode``
360 360 constaint on delta to be sent:
361 361 * CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
362 362 * CG_DELTAMODE_PREV - only delta against "prev",
363 363 * CG_DELTAMODE_FULL - only issue full snapshot.
364 364
365 365 Whether to send fulltext revisions instead of deltas, if allowed.
366 366
367 367 ``nodesorder``
368 368 ``revisiondata``
369 369 ``assumehaveparentrevisions``
370 370 ``sidedata_helpers`` (optional)
371 371 If not None, means that sidedata should be included.
372 372 See `revlogutil.sidedata.get_sidedata_helpers`.
373 373 """
374 374
375 375 fnode = store.node
376 376 frev = store.rev
377 377
378 378 if nodesorder == b'nodes':
379 379 revs = [frev(n) for n in nodes]
380 380 elif nodesorder == b'linear':
381 381 revs = {frev(n) for n in nodes}
382 382 revs = dagop.linearize(revs, store.parentrevs)
383 383 else: # storage and default
384 384 revs = sorted(frev(n) for n in nodes)
385 385
386 386 prevrev = None
387 387
388 388 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
389 389 prevrev = store.parentrevs(revs[0])[0]
390 390
391 391 # Set of revs available to delta against.
392 392 available = set()
393 parents = []
393 394
394 395 def is_usable_base(rev):
395 return rev != nullrev and rev in available
396 """Is a delta against this revision usable over the wire"""
397 if rev == nullrev:
398 return False
399 # Base revision was already emitted in this group.
400 if rev in available:
401 return True
402 # Base revision is a parent that hasn't been emitted already.
403 if assumehaveparentrevisions and rev in parents:
404 return True
405 return False
396 406
397 407 for rev in revs:
398 408 if rev == nullrev:
399 409 continue
400 410
401 411 node = fnode(rev)
402 p1rev, p2rev = store.parentrevs(rev)
412 parents[:] = p1rev, p2rev = store.parentrevs(rev)
403 413
404 414 if deltaparentfn:
405 415 deltaparentrev = deltaparentfn(rev)
406 416 else:
407 417 deltaparentrev = nullrev
408 418
409 419 # Forced delta against previous mode.
410 420 if deltamode == repository.CG_DELTAMODE_PREV:
411 421 baserev = prevrev
412 422
413 423 # We're instructed to send fulltext. Honor that.
414 424 elif deltamode == repository.CG_DELTAMODE_FULL:
415 425 baserev = nullrev
416 426 # We're instructed to use p1. Honor that
417 427 elif deltamode == repository.CG_DELTAMODE_P1:
418 428 baserev = p1rev
419 429
420 430 # There is a delta in storage. We try to use that because it
421 431 # amounts to effectively copying data from storage and is
422 432 # therefore the fastest.
423 433 elif deltaparentrev != nullrev:
424 # Base revision was already emitted in this group. We can
425 # always safely use the delta.
434 # If the stored delta works, let us use it !
426 435 if is_usable_base(deltaparentrev):
427 436 baserev = deltaparentrev
428
429 # Base revision is a parent that hasn't been emitted already.
430 # Use it if we can assume the receiver has the parent revision.
431 elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev):
432 baserev = deltaparentrev
433
434 437 # No guarantee the receiver has the delta parent. Send delta
435 438 # against last revision (if possible), which in the common case
436 439 # should be similar enough to this revision that the delta is
437 440 # reasonable.
438 441 elif prevrev is not None:
439 442 baserev = prevrev
440 443 else:
441 444 baserev = nullrev
442 445
443 446 # Storage has a fulltext revision.
444 447
445 448 # Let's use the previous revision, which is as good a guess as any.
446 449 # There is definitely room to improve this logic.
447 450 elif prevrev is not None:
448 451 baserev = prevrev
449 452 else:
450 453 baserev = nullrev
451 454
452 455 # But we can't actually use our chosen delta base for whatever
453 456 # reason. Reset to fulltext.
454 457 if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):
455 458 baserev = nullrev
456 459
457 460 revision = None
458 461 delta = None
459 462 baserevisionsize = None
460 463
461 464 if revisiondata:
462 465 if store.iscensored(baserev) or store.iscensored(rev):
463 466 try:
464 467 revision = store.rawdata(node)
465 468 except error.CensoredNodeError as e:
466 469 revision = e.tombstone
467 470
468 471 if baserev != nullrev:
469 472 if rawsizefn:
470 473 baserevisionsize = rawsizefn(baserev)
471 474 else:
472 475 baserevisionsize = len(store.rawdata(baserev))
473 476
474 477 elif (
475 478 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
476 479 ):
477 480 revision = store.rawdata(node)
478 481 available.add(rev)
479 482 else:
480 483 if revdifffn:
481 484 delta = revdifffn(baserev, rev)
482 485 else:
483 486 delta = mdiff.textdiff(
484 487 store.rawdata(baserev), store.rawdata(rev)
485 488 )
486 489
487 490 available.add(rev)
488 491
489 492 serialized_sidedata = None
490 493 sidedata_flags = (0, 0)
491 494 if sidedata_helpers:
492 495 try:
493 496 old_sidedata = store.sidedata(rev)
494 497 except error.CensoredNodeError:
495 498 # skip any potential sidedata of the censored revision
496 499 sidedata = {}
497 500 else:
498 501 sidedata, sidedata_flags = sidedatamod.run_sidedata_helpers(
499 502 store=store,
500 503 sidedata_helpers=sidedata_helpers,
501 504 sidedata=old_sidedata,
502 505 rev=rev,
503 506 )
504 507 if sidedata:
505 508 serialized_sidedata = sidedatamod.serialize_sidedata(sidedata)
506 509
507 510 flags = flagsfn(rev) if flagsfn else 0
508 511 protocol_flags = 0
509 512 if serialized_sidedata:
510 513 # Advertise that sidedata exists to the other side
511 514 protocol_flags |= CG_FLAG_SIDEDATA
512 515 # Computers and removers can return flags to add and/or remove
513 516 flags = flags | sidedata_flags[0] & ~sidedata_flags[1]
514 517
515 518 yield resultcls(
516 519 node=node,
517 520 p1node=fnode(p1rev),
518 521 p2node=fnode(p2rev),
519 522 basenode=fnode(baserev),
520 523 flags=flags,
521 524 baserevisionsize=baserevisionsize,
522 525 revision=revision,
523 526 delta=delta,
524 527 sidedata=serialized_sidedata,
525 528 protocol_flags=protocol_flags,
526 529 )
527 530
528 531 prevrev = rev
529 532
530 533
531 534 def deltaiscensored(delta, baserev, baselenfn):
532 535 """Determine if a delta represents censored revision data.
533 536
534 537 ``baserev`` is the base revision this delta is encoded against.
535 538 ``baselenfn`` is a callable receiving a revision number that resolves the
536 539 length of the revision fulltext.
537 540
538 541 Returns a bool indicating if the result of the delta represents a censored
539 542 revision.
540 543 """
541 544 # Fragile heuristic: unless new file meta keys are added alphabetically
542 545 # preceding "censored", all censored revisions are prefixed by
543 546 # "\1\ncensored:". A delta producing such a censored revision must be a
544 547 # full-replacement delta, so we inspect the first and only patch in the
545 548 # delta for this prefix.
546 549 hlen = struct.calcsize(b">lll")
547 550 if len(delta) <= hlen:
548 551 return False
549 552
550 553 oldlen = baselenfn(baserev)
551 554 newlen = len(delta) - hlen
552 555 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
553 556 return False
554 557
555 558 add = b"\1\ncensored:"
556 559 addlen = len(add)
557 560 return newlen >= addlen and delta[hlen : hlen + addlen] == add
General Comments 0
You need to be logged in to leave comments. Login now