##// END OF EJS Templates
filelog: add a comment explaining a fast path in filerevisioncopied()...
Simon Sapin -
r49377:5026a0d3 default
parent child Browse files
Show More
@@ -1,549 +1,556 b''
1 1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import re
11 11 import struct
12 12
13 13 from ..i18n import _
14 14 from ..node import (
15 15 bin,
16 16 nullrev,
17 17 sha1nodeconstants,
18 18 )
19 19 from .. import (
20 20 dagop,
21 21 error,
22 22 mdiff,
23 23 pycompat,
24 24 )
25 25 from ..interfaces import repository
26 26 from ..revlogutils import sidedata as sidedatamod
27 27 from ..utils import hashutil
28 28
29 29 _nullhash = hashutil.sha1(sha1nodeconstants.nullid)
30 30
31 31 # revision data contains extra metadata not part of the official digest
32 32 # Only used in changegroup >= v4.
33 33 CG_FLAG_SIDEDATA = 1
34 34
35 35
36 36 def hashrevisionsha1(text, p1, p2):
37 37 """Compute the SHA-1 for revision data and its parents.
38 38
39 39 This hash combines both the current file contents and its history
40 40 in a manner that makes it easy to distinguish nodes with the same
41 41 content in the revision graph.
42 42 """
43 43 # As of now, if one of the parent node is null, p2 is null
44 44 if p2 == sha1nodeconstants.nullid:
45 45 # deep copy of a hash is faster than creating one
46 46 s = _nullhash.copy()
47 47 s.update(p1)
48 48 else:
49 49 # none of the parent nodes are nullid
50 50 if p1 < p2:
51 51 a = p1
52 52 b = p2
53 53 else:
54 54 a = p2
55 55 b = p1
56 56 s = hashutil.sha1(a)
57 57 s.update(b)
58 58 s.update(text)
59 59 return s.digest()
60 60
61 61
62 62 METADATA_RE = re.compile(b'\x01\n')
63 63
64 64
65 65 def parsemeta(text):
66 66 """Parse metadata header from revision data.
67 67
68 68 Returns a 2-tuple of (metadata, offset), where both can be None if there
69 69 is no metadata.
70 70 """
71 71 # text can be buffer, so we can't use .startswith or .index
72 72 if text[:2] != b'\x01\n':
73 73 return None, None
74 74 s = METADATA_RE.search(text, 2).start()
75 75 mtext = text[2:s]
76 76 meta = {}
77 77 for l in mtext.splitlines():
78 78 k, v = l.split(b': ', 1)
79 79 meta[k] = v
80 80 return meta, s + 2
81 81
82 82
83 83 def packmeta(meta, text):
84 84 """Add metadata to fulltext to produce revision text."""
85 85 keys = sorted(meta)
86 86 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
87 87 return b'\x01\n%s\x01\n%s' % (metatext, text)
88 88
89 89
90 90 def iscensoredtext(text):
91 91 meta = parsemeta(text)[0]
92 92 return meta and b'censored' in meta
93 93
94 94
95 95 def filtermetadata(text):
96 96 """Extract just the revision data from source text.
97 97
98 98 Returns ``text`` unless it has a metadata header, in which case we return
99 99 a new buffer without hte metadata.
100 100 """
101 101 if not text.startswith(b'\x01\n'):
102 102 return text
103 103
104 104 offset = text.index(b'\x01\n', 2)
105 105 return text[offset + 2 :]
106 106
107 107
108 108 def filerevisioncopied(store, node):
109 109 """Resolve file revision copy metadata.
110 110
111 111 Returns ``False`` if the file has no copy metadata. Otherwise a
112 112 2-tuple of the source filename and node.
113 113 """
114 114 if store.parents(node)[0] != sha1nodeconstants.nullid:
115 # When creating a copy or move we set filelog parents to null,
116 # because contents are probably unrelated and making a delta
117 # would not be useful.
118 # Conversely, if filelog p1 is non-null we know
119 # there is no copy metadata.
120 # In the presence of merges, this reasoning becomes invalid
121 # if we reorder parents. See tests/test-issue6528.t.
115 122 return False
116 123
117 124 meta = parsemeta(store.revision(node))[0]
118 125
119 126 # copy and copyrev occur in pairs. In rare cases due to old bugs,
120 127 # one can occur without the other. So ensure both are present to flag
121 128 # as a copy.
122 129 if meta and b'copy' in meta and b'copyrev' in meta:
123 130 return meta[b'copy'], bin(meta[b'copyrev'])
124 131
125 132 return False
126 133
127 134
128 135 def filedataequivalent(store, node, filedata):
129 136 """Determines whether file data is equivalent to a stored node.
130 137
131 138 Returns True if the passed file data would hash to the same value
132 139 as a stored revision and False otherwise.
133 140
134 141 When a stored revision is censored, filedata must be empty to have
135 142 equivalence.
136 143
137 144 When a stored revision has copy metadata, it is ignored as part
138 145 of the compare.
139 146 """
140 147
141 148 if filedata.startswith(b'\x01\n'):
142 149 revisiontext = b'\x01\n\x01\n' + filedata
143 150 else:
144 151 revisiontext = filedata
145 152
146 153 p1, p2 = store.parents(node)
147 154
148 155 computednode = hashrevisionsha1(revisiontext, p1, p2)
149 156
150 157 if computednode == node:
151 158 return True
152 159
153 160 # Censored files compare against the empty file.
154 161 if store.iscensored(store.rev(node)):
155 162 return filedata == b''
156 163
157 164 # Renaming a file produces a different hash, even if the data
158 165 # remains unchanged. Check if that's the case.
159 166 if store.renamed(node):
160 167 return store.read(node) == filedata
161 168
162 169 return False
163 170
164 171
165 172 def iterrevs(storelen, start=0, stop=None):
166 173 """Iterate over revision numbers in a store."""
167 174 step = 1
168 175
169 176 if stop is not None:
170 177 if start > stop:
171 178 step = -1
172 179 stop += step
173 180 if stop > storelen:
174 181 stop = storelen
175 182 else:
176 183 stop = storelen
177 184
178 185 return pycompat.xrange(start, stop, step)
179 186
180 187
181 188 def fileidlookup(store, fileid, identifier):
182 189 """Resolve the file node for a value.
183 190
184 191 ``store`` is an object implementing the ``ifileindex`` interface.
185 192
186 193 ``fileid`` can be:
187 194
188 195 * A 20 or 32 byte binary node.
189 196 * An integer revision number
190 197 * A 40 or 64 byte hex node.
191 198 * A bytes that can be parsed as an integer representing a revision number.
192 199
193 200 ``identifier`` is used to populate ``error.LookupError`` with an identifier
194 201 for the store.
195 202
196 203 Raises ``error.LookupError`` on failure.
197 204 """
198 205 if isinstance(fileid, int):
199 206 try:
200 207 return store.node(fileid)
201 208 except IndexError:
202 209 raise error.LookupError(
203 210 b'%d' % fileid, identifier, _(b'no match found')
204 211 )
205 212
206 213 if len(fileid) in (20, 32):
207 214 try:
208 215 store.rev(fileid)
209 216 return fileid
210 217 except error.LookupError:
211 218 pass
212 219
213 220 if len(fileid) in (40, 64):
214 221 try:
215 222 rawnode = bin(fileid)
216 223 store.rev(rawnode)
217 224 return rawnode
218 225 except TypeError:
219 226 pass
220 227
221 228 try:
222 229 rev = int(fileid)
223 230
224 231 if b'%d' % rev != fileid:
225 232 raise ValueError
226 233
227 234 try:
228 235 return store.node(rev)
229 236 except (IndexError, TypeError):
230 237 pass
231 238 except (ValueError, OverflowError):
232 239 pass
233 240
234 241 raise error.LookupError(fileid, identifier, _(b'no match found'))
235 242
236 243
237 244 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
238 245 """Resolve information needed to strip revisions.
239 246
240 247 Finds the minimum revision number that must be stripped in order to
241 248 strip ``minlinkrev``.
242 249
243 250 Returns a 2-tuple of the minimum revision number to do that and a set
244 251 of all revision numbers that have linkrevs that would be broken
245 252 by that strip.
246 253
247 254 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
248 255 ``headrevs`` is an iterable of head revisions.
249 256 ``linkrevfn`` is a callable that receives a revision and returns a linked
250 257 revision.
251 258 ``parentrevsfn`` is a callable that receives a revision number and returns
252 259 an iterable of its parent revision numbers.
253 260 """
254 261 brokenrevs = set()
255 262 strippoint = tiprev + 1
256 263
257 264 heads = {}
258 265 futurelargelinkrevs = set()
259 266 for head in headrevs:
260 267 headlinkrev = linkrevfn(head)
261 268 heads[head] = headlinkrev
262 269 if headlinkrev >= minlinkrev:
263 270 futurelargelinkrevs.add(headlinkrev)
264 271
265 272 # This algorithm involves walking down the rev graph, starting at the
266 273 # heads. Since the revs are topologically sorted according to linkrev,
267 274 # once all head linkrevs are below the minlink, we know there are
268 275 # no more revs that could have a linkrev greater than minlink.
269 276 # So we can stop walking.
270 277 while futurelargelinkrevs:
271 278 strippoint -= 1
272 279 linkrev = heads.pop(strippoint)
273 280
274 281 if linkrev < minlinkrev:
275 282 brokenrevs.add(strippoint)
276 283 else:
277 284 futurelargelinkrevs.remove(linkrev)
278 285
279 286 for p in parentrevsfn(strippoint):
280 287 if p != nullrev:
281 288 plinkrev = linkrevfn(p)
282 289 heads[p] = plinkrev
283 290 if plinkrev >= minlinkrev:
284 291 futurelargelinkrevs.add(plinkrev)
285 292
286 293 return strippoint, brokenrevs
287 294
288 295
289 296 def emitrevisions(
290 297 store,
291 298 nodes,
292 299 nodesorder,
293 300 resultcls,
294 301 deltaparentfn=None,
295 302 candeltafn=None,
296 303 rawsizefn=None,
297 304 revdifffn=None,
298 305 flagsfn=None,
299 306 deltamode=repository.CG_DELTAMODE_STD,
300 307 revisiondata=False,
301 308 assumehaveparentrevisions=False,
302 309 sidedata_helpers=None,
303 310 ):
304 311 """Generic implementation of ifiledata.emitrevisions().
305 312
306 313 Emitting revision data is subtly complex. This function attempts to
307 314 encapsulate all the logic for doing so in a backend-agnostic way.
308 315
309 316 ``store``
310 317 Object conforming to ``ifilestorage`` interface.
311 318
312 319 ``nodes``
313 320 List of revision nodes whose data to emit.
314 321
315 322 ``resultcls``
316 323 A type implementing the ``irevisiondelta`` interface that will be
317 324 constructed and returned.
318 325
319 326 ``deltaparentfn`` (optional)
320 327 Callable receiving a revision number and returning the revision number
321 328 of a revision that the internal delta is stored against. This delta
322 329 will be preferred over computing a new arbitrary delta.
323 330
324 331 If not defined, a delta will always be computed from raw revision
325 332 data.
326 333
327 334 ``candeltafn`` (optional)
328 335 Callable receiving a pair of revision numbers that returns a bool
329 336 indicating whether a delta between them can be produced.
330 337
331 338 If not defined, it is assumed that any two revisions can delta with
332 339 each other.
333 340
334 341 ``rawsizefn`` (optional)
335 342 Callable receiving a revision number and returning the length of the
336 343 ``store.rawdata(rev)``.
337 344
338 345 If not defined, ``len(store.rawdata(rev))`` will be called.
339 346
340 347 ``revdifffn`` (optional)
341 348 Callable receiving a pair of revision numbers that returns a delta
342 349 between them.
343 350
344 351 If not defined, a delta will be computed by invoking mdiff code
345 352 on ``store.revision()`` results.
346 353
347 354 Defining this function allows a precomputed or stored delta to be
348 355 used without having to compute on.
349 356
350 357 ``flagsfn`` (optional)
351 358 Callable receiving a revision number and returns the integer flags
352 359 value for it. If not defined, flags value will be 0.
353 360
354 361 ``deltamode``
355 362 constaint on delta to be sent:
356 363 * CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
357 364 * CG_DELTAMODE_PREV - only delta against "prev",
358 365 * CG_DELTAMODE_FULL - only issue full snapshot.
359 366
360 367 Whether to send fulltext revisions instead of deltas, if allowed.
361 368
362 369 ``nodesorder``
363 370 ``revisiondata``
364 371 ``assumehaveparentrevisions``
365 372 ``sidedata_helpers`` (optional)
366 373 If not None, means that sidedata should be included.
367 374 See `revlogutil.sidedata.get_sidedata_helpers`.
368 375 """
369 376
370 377 fnode = store.node
371 378 frev = store.rev
372 379
373 380 if nodesorder == b'nodes':
374 381 revs = [frev(n) for n in nodes]
375 382 elif nodesorder == b'linear':
376 383 revs = {frev(n) for n in nodes}
377 384 revs = dagop.linearize(revs, store.parentrevs)
378 385 else: # storage and default
379 386 revs = sorted(frev(n) for n in nodes)
380 387
381 388 prevrev = None
382 389
383 390 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
384 391 prevrev = store.parentrevs(revs[0])[0]
385 392
386 393 # Set of revs available to delta against.
387 394 available = set()
388 395
389 396 for rev in revs:
390 397 if rev == nullrev:
391 398 continue
392 399
393 400 node = fnode(rev)
394 401 p1rev, p2rev = store.parentrevs(rev)
395 402
396 403 if deltaparentfn:
397 404 deltaparentrev = deltaparentfn(rev)
398 405 else:
399 406 deltaparentrev = nullrev
400 407
401 408 # Forced delta against previous mode.
402 409 if deltamode == repository.CG_DELTAMODE_PREV:
403 410 baserev = prevrev
404 411
405 412 # We're instructed to send fulltext. Honor that.
406 413 elif deltamode == repository.CG_DELTAMODE_FULL:
407 414 baserev = nullrev
408 415 # We're instructed to use p1. Honor that
409 416 elif deltamode == repository.CG_DELTAMODE_P1:
410 417 baserev = p1rev
411 418
412 419 # There is a delta in storage. We try to use that because it
413 420 # amounts to effectively copying data from storage and is
414 421 # therefore the fastest.
415 422 elif deltaparentrev != nullrev:
416 423 # Base revision was already emitted in this group. We can
417 424 # always safely use the delta.
418 425 if deltaparentrev in available:
419 426 baserev = deltaparentrev
420 427
421 428 # Base revision is a parent that hasn't been emitted already.
422 429 # Use it if we can assume the receiver has the parent revision.
423 430 elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev):
424 431 baserev = deltaparentrev
425 432
426 433 # No guarantee the receiver has the delta parent. Send delta
427 434 # against last revision (if possible), which in the common case
428 435 # should be similar enough to this revision that the delta is
429 436 # reasonable.
430 437 elif prevrev is not None:
431 438 baserev = prevrev
432 439 else:
433 440 baserev = nullrev
434 441
435 442 # Storage has a fulltext revision.
436 443
437 444 # Let's use the previous revision, which is as good a guess as any.
438 445 # There is definitely room to improve this logic.
439 446 elif prevrev is not None:
440 447 baserev = prevrev
441 448 else:
442 449 baserev = nullrev
443 450
444 451 # But we can't actually use our chosen delta base for whatever
445 452 # reason. Reset to fulltext.
446 453 if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):
447 454 baserev = nullrev
448 455
449 456 revision = None
450 457 delta = None
451 458 baserevisionsize = None
452 459
453 460 if revisiondata:
454 461 if store.iscensored(baserev) or store.iscensored(rev):
455 462 try:
456 463 revision = store.rawdata(node)
457 464 except error.CensoredNodeError as e:
458 465 revision = e.tombstone
459 466
460 467 if baserev != nullrev:
461 468 if rawsizefn:
462 469 baserevisionsize = rawsizefn(baserev)
463 470 else:
464 471 baserevisionsize = len(store.rawdata(baserev))
465 472
466 473 elif (
467 474 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
468 475 ):
469 476 revision = store.rawdata(node)
470 477 available.add(rev)
471 478 else:
472 479 if revdifffn:
473 480 delta = revdifffn(baserev, rev)
474 481 else:
475 482 delta = mdiff.textdiff(
476 483 store.rawdata(baserev), store.rawdata(rev)
477 484 )
478 485
479 486 available.add(rev)
480 487
481 488 serialized_sidedata = None
482 489 sidedata_flags = (0, 0)
483 490 if sidedata_helpers:
484 491 try:
485 492 old_sidedata = store.sidedata(rev)
486 493 except error.CensoredNodeError:
487 494 # skip any potential sidedata of the censored revision
488 495 sidedata = {}
489 496 else:
490 497 sidedata, sidedata_flags = sidedatamod.run_sidedata_helpers(
491 498 store=store,
492 499 sidedata_helpers=sidedata_helpers,
493 500 sidedata=old_sidedata,
494 501 rev=rev,
495 502 )
496 503 if sidedata:
497 504 serialized_sidedata = sidedatamod.serialize_sidedata(sidedata)
498 505
499 506 flags = flagsfn(rev) if flagsfn else 0
500 507 protocol_flags = 0
501 508 if serialized_sidedata:
502 509 # Advertise that sidedata exists to the other side
503 510 protocol_flags |= CG_FLAG_SIDEDATA
504 511 # Computers and removers can return flags to add and/or remove
505 512 flags = flags | sidedata_flags[0] & ~sidedata_flags[1]
506 513
507 514 yield resultcls(
508 515 node=node,
509 516 p1node=fnode(p1rev),
510 517 p2node=fnode(p2rev),
511 518 basenode=fnode(baserev),
512 519 flags=flags,
513 520 baserevisionsize=baserevisionsize,
514 521 revision=revision,
515 522 delta=delta,
516 523 sidedata=serialized_sidedata,
517 524 protocol_flags=protocol_flags,
518 525 )
519 526
520 527 prevrev = rev
521 528
522 529
523 530 def deltaiscensored(delta, baserev, baselenfn):
524 531 """Determine if a delta represents censored revision data.
525 532
526 533 ``baserev`` is the base revision this delta is encoded against.
527 534 ``baselenfn`` is a callable receiving a revision number that resolves the
528 535 length of the revision fulltext.
529 536
530 537 Returns a bool indicating if the result of the delta represents a censored
531 538 revision.
532 539 """
533 540 # Fragile heuristic: unless new file meta keys are added alphabetically
534 541 # preceding "censored", all censored revisions are prefixed by
535 542 # "\1\ncensored:". A delta producing such a censored revision must be a
536 543 # full-replacement delta, so we inspect the first and only patch in the
537 544 # delta for this prefix.
538 545 hlen = struct.calcsize(b">lll")
539 546 if len(delta) <= hlen:
540 547 return False
541 548
542 549 oldlen = baselenfn(baserev)
543 550 newlen = len(delta) - hlen
544 551 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
545 552 return False
546 553
547 554 add = b"\1\ncensored:"
548 555 addlen = len(add)
549 556 return newlen >= addlen and delta[hlen : hlen + addlen] == add
General Comments 0
You need to be logged in to leave comments. Login now