##// END OF EJS Templates
censor: do not process sidedata of censored revision while bundling...
marmoute -
r48132:24ea3ef3 default
parent child Browse files
Show More
@@ -1,544 +1,549 b''
1 1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import re
11 11 import struct
12 12
13 13 from ..i18n import _
14 14 from ..node import (
15 15 bin,
16 16 nullrev,
17 17 sha1nodeconstants,
18 18 )
19 19 from .. import (
20 20 dagop,
21 21 error,
22 22 mdiff,
23 23 pycompat,
24 24 )
25 25 from ..interfaces import repository
26 26 from ..revlogutils import sidedata as sidedatamod
27 27 from ..utils import hashutil
28 28
29 29 _nullhash = hashutil.sha1(sha1nodeconstants.nullid)
30 30
31 31 # revision data contains extra metadata not part of the official digest
32 32 # Only used in changegroup >= v4.
33 33 CG_FLAG_SIDEDATA = 1
34 34
35 35
36 36 def hashrevisionsha1(text, p1, p2):
37 37 """Compute the SHA-1 for revision data and its parents.
38 38
39 39 This hash combines both the current file contents and its history
40 40 in a manner that makes it easy to distinguish nodes with the same
41 41 content in the revision graph.
42 42 """
43 43 # As of now, if one of the parent node is null, p2 is null
44 44 if p2 == sha1nodeconstants.nullid:
45 45 # deep copy of a hash is faster than creating one
46 46 s = _nullhash.copy()
47 47 s.update(p1)
48 48 else:
49 49 # none of the parent nodes are nullid
50 50 if p1 < p2:
51 51 a = p1
52 52 b = p2
53 53 else:
54 54 a = p2
55 55 b = p1
56 56 s = hashutil.sha1(a)
57 57 s.update(b)
58 58 s.update(text)
59 59 return s.digest()
60 60
61 61
62 62 METADATA_RE = re.compile(b'\x01\n')
63 63
64 64
65 65 def parsemeta(text):
66 66 """Parse metadata header from revision data.
67 67
68 68 Returns a 2-tuple of (metadata, offset), where both can be None if there
69 69 is no metadata.
70 70 """
71 71 # text can be buffer, so we can't use .startswith or .index
72 72 if text[:2] != b'\x01\n':
73 73 return None, None
74 74 s = METADATA_RE.search(text, 2).start()
75 75 mtext = text[2:s]
76 76 meta = {}
77 77 for l in mtext.splitlines():
78 78 k, v = l.split(b': ', 1)
79 79 meta[k] = v
80 80 return meta, s + 2
81 81
82 82
83 83 def packmeta(meta, text):
84 84 """Add metadata to fulltext to produce revision text."""
85 85 keys = sorted(meta)
86 86 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
87 87 return b'\x01\n%s\x01\n%s' % (metatext, text)
88 88
89 89
90 90 def iscensoredtext(text):
91 91 meta = parsemeta(text)[0]
92 92 return meta and b'censored' in meta
93 93
94 94
95 95 def filtermetadata(text):
96 96 """Extract just the revision data from source text.
97 97
98 98 Returns ``text`` unless it has a metadata header, in which case we return
99 99 a new buffer without hte metadata.
100 100 """
101 101 if not text.startswith(b'\x01\n'):
102 102 return text
103 103
104 104 offset = text.index(b'\x01\n', 2)
105 105 return text[offset + 2 :]
106 106
107 107
108 108 def filerevisioncopied(store, node):
109 109 """Resolve file revision copy metadata.
110 110
111 111 Returns ``False`` if the file has no copy metadata. Otherwise a
112 112 2-tuple of the source filename and node.
113 113 """
114 114 if store.parents(node)[0] != sha1nodeconstants.nullid:
115 115 return False
116 116
117 117 meta = parsemeta(store.revision(node))[0]
118 118
119 119 # copy and copyrev occur in pairs. In rare cases due to old bugs,
120 120 # one can occur without the other. So ensure both are present to flag
121 121 # as a copy.
122 122 if meta and b'copy' in meta and b'copyrev' in meta:
123 123 return meta[b'copy'], bin(meta[b'copyrev'])
124 124
125 125 return False
126 126
127 127
128 128 def filedataequivalent(store, node, filedata):
129 129 """Determines whether file data is equivalent to a stored node.
130 130
131 131 Returns True if the passed file data would hash to the same value
132 132 as a stored revision and False otherwise.
133 133
134 134 When a stored revision is censored, filedata must be empty to have
135 135 equivalence.
136 136
137 137 When a stored revision has copy metadata, it is ignored as part
138 138 of the compare.
139 139 """
140 140
141 141 if filedata.startswith(b'\x01\n'):
142 142 revisiontext = b'\x01\n\x01\n' + filedata
143 143 else:
144 144 revisiontext = filedata
145 145
146 146 p1, p2 = store.parents(node)
147 147
148 148 computednode = hashrevisionsha1(revisiontext, p1, p2)
149 149
150 150 if computednode == node:
151 151 return True
152 152
153 153 # Censored files compare against the empty file.
154 154 if store.iscensored(store.rev(node)):
155 155 return filedata == b''
156 156
157 157 # Renaming a file produces a different hash, even if the data
158 158 # remains unchanged. Check if that's the case.
159 159 if store.renamed(node):
160 160 return store.read(node) == filedata
161 161
162 162 return False
163 163
164 164
165 165 def iterrevs(storelen, start=0, stop=None):
166 166 """Iterate over revision numbers in a store."""
167 167 step = 1
168 168
169 169 if stop is not None:
170 170 if start > stop:
171 171 step = -1
172 172 stop += step
173 173 if stop > storelen:
174 174 stop = storelen
175 175 else:
176 176 stop = storelen
177 177
178 178 return pycompat.xrange(start, stop, step)
179 179
180 180
181 181 def fileidlookup(store, fileid, identifier):
182 182 """Resolve the file node for a value.
183 183
184 184 ``store`` is an object implementing the ``ifileindex`` interface.
185 185
186 186 ``fileid`` can be:
187 187
188 188 * A 20 or 32 byte binary node.
189 189 * An integer revision number
190 190 * A 40 or 64 byte hex node.
191 191 * A bytes that can be parsed as an integer representing a revision number.
192 192
193 193 ``identifier`` is used to populate ``error.LookupError`` with an identifier
194 194 for the store.
195 195
196 196 Raises ``error.LookupError`` on failure.
197 197 """
198 198 if isinstance(fileid, int):
199 199 try:
200 200 return store.node(fileid)
201 201 except IndexError:
202 202 raise error.LookupError(
203 203 b'%d' % fileid, identifier, _(b'no match found')
204 204 )
205 205
206 206 if len(fileid) in (20, 32):
207 207 try:
208 208 store.rev(fileid)
209 209 return fileid
210 210 except error.LookupError:
211 211 pass
212 212
213 213 if len(fileid) in (40, 64):
214 214 try:
215 215 rawnode = bin(fileid)
216 216 store.rev(rawnode)
217 217 return rawnode
218 218 except TypeError:
219 219 pass
220 220
221 221 try:
222 222 rev = int(fileid)
223 223
224 224 if b'%d' % rev != fileid:
225 225 raise ValueError
226 226
227 227 try:
228 228 return store.node(rev)
229 229 except (IndexError, TypeError):
230 230 pass
231 231 except (ValueError, OverflowError):
232 232 pass
233 233
234 234 raise error.LookupError(fileid, identifier, _(b'no match found'))
235 235
236 236
237 237 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
238 238 """Resolve information needed to strip revisions.
239 239
240 240 Finds the minimum revision number that must be stripped in order to
241 241 strip ``minlinkrev``.
242 242
243 243 Returns a 2-tuple of the minimum revision number to do that and a set
244 244 of all revision numbers that have linkrevs that would be broken
245 245 by that strip.
246 246
247 247 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
248 248 ``headrevs`` is an iterable of head revisions.
249 249 ``linkrevfn`` is a callable that receives a revision and returns a linked
250 250 revision.
251 251 ``parentrevsfn`` is a callable that receives a revision number and returns
252 252 an iterable of its parent revision numbers.
253 253 """
254 254 brokenrevs = set()
255 255 strippoint = tiprev + 1
256 256
257 257 heads = {}
258 258 futurelargelinkrevs = set()
259 259 for head in headrevs:
260 260 headlinkrev = linkrevfn(head)
261 261 heads[head] = headlinkrev
262 262 if headlinkrev >= minlinkrev:
263 263 futurelargelinkrevs.add(headlinkrev)
264 264
265 265 # This algorithm involves walking down the rev graph, starting at the
266 266 # heads. Since the revs are topologically sorted according to linkrev,
267 267 # once all head linkrevs are below the minlink, we know there are
268 268 # no more revs that could have a linkrev greater than minlink.
269 269 # So we can stop walking.
270 270 while futurelargelinkrevs:
271 271 strippoint -= 1
272 272 linkrev = heads.pop(strippoint)
273 273
274 274 if linkrev < minlinkrev:
275 275 brokenrevs.add(strippoint)
276 276 else:
277 277 futurelargelinkrevs.remove(linkrev)
278 278
279 279 for p in parentrevsfn(strippoint):
280 280 if p != nullrev:
281 281 plinkrev = linkrevfn(p)
282 282 heads[p] = plinkrev
283 283 if plinkrev >= minlinkrev:
284 284 futurelargelinkrevs.add(plinkrev)
285 285
286 286 return strippoint, brokenrevs
287 287
288 288
289 289 def emitrevisions(
290 290 store,
291 291 nodes,
292 292 nodesorder,
293 293 resultcls,
294 294 deltaparentfn=None,
295 295 candeltafn=None,
296 296 rawsizefn=None,
297 297 revdifffn=None,
298 298 flagsfn=None,
299 299 deltamode=repository.CG_DELTAMODE_STD,
300 300 revisiondata=False,
301 301 assumehaveparentrevisions=False,
302 302 sidedata_helpers=None,
303 303 ):
304 304 """Generic implementation of ifiledata.emitrevisions().
305 305
306 306 Emitting revision data is subtly complex. This function attempts to
307 307 encapsulate all the logic for doing so in a backend-agnostic way.
308 308
309 309 ``store``
310 310 Object conforming to ``ifilestorage`` interface.
311 311
312 312 ``nodes``
313 313 List of revision nodes whose data to emit.
314 314
315 315 ``resultcls``
316 316 A type implementing the ``irevisiondelta`` interface that will be
317 317 constructed and returned.
318 318
319 319 ``deltaparentfn`` (optional)
320 320 Callable receiving a revision number and returning the revision number
321 321 of a revision that the internal delta is stored against. This delta
322 322 will be preferred over computing a new arbitrary delta.
323 323
324 324 If not defined, a delta will always be computed from raw revision
325 325 data.
326 326
327 327 ``candeltafn`` (optional)
328 328 Callable receiving a pair of revision numbers that returns a bool
329 329 indicating whether a delta between them can be produced.
330 330
331 331 If not defined, it is assumed that any two revisions can delta with
332 332 each other.
333 333
334 334 ``rawsizefn`` (optional)
335 335 Callable receiving a revision number and returning the length of the
336 336 ``store.rawdata(rev)``.
337 337
338 338 If not defined, ``len(store.rawdata(rev))`` will be called.
339 339
340 340 ``revdifffn`` (optional)
341 341 Callable receiving a pair of revision numbers that returns a delta
342 342 between them.
343 343
344 344 If not defined, a delta will be computed by invoking mdiff code
345 345 on ``store.revision()`` results.
346 346
347 347 Defining this function allows a precomputed or stored delta to be
348 348 used without having to compute on.
349 349
350 350 ``flagsfn`` (optional)
351 351 Callable receiving a revision number and returns the integer flags
352 352 value for it. If not defined, flags value will be 0.
353 353
354 354 ``deltamode``
355 355 constaint on delta to be sent:
356 356 * CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
357 357 * CG_DELTAMODE_PREV - only delta against "prev",
358 358 * CG_DELTAMODE_FULL - only issue full snapshot.
359 359
360 360 Whether to send fulltext revisions instead of deltas, if allowed.
361 361
362 362 ``nodesorder``
363 363 ``revisiondata``
364 364 ``assumehaveparentrevisions``
365 365 ``sidedata_helpers`` (optional)
366 366 If not None, means that sidedata should be included.
367 367 See `revlogutil.sidedata.get_sidedata_helpers`.
368 368 """
369 369
370 370 fnode = store.node
371 371 frev = store.rev
372 372
373 373 if nodesorder == b'nodes':
374 374 revs = [frev(n) for n in nodes]
375 375 elif nodesorder == b'linear':
376 376 revs = {frev(n) for n in nodes}
377 377 revs = dagop.linearize(revs, store.parentrevs)
378 378 else: # storage and default
379 379 revs = sorted(frev(n) for n in nodes)
380 380
381 381 prevrev = None
382 382
383 383 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
384 384 prevrev = store.parentrevs(revs[0])[0]
385 385
386 386 # Set of revs available to delta against.
387 387 available = set()
388 388
389 389 for rev in revs:
390 390 if rev == nullrev:
391 391 continue
392 392
393 393 node = fnode(rev)
394 394 p1rev, p2rev = store.parentrevs(rev)
395 395
396 396 if deltaparentfn:
397 397 deltaparentrev = deltaparentfn(rev)
398 398 else:
399 399 deltaparentrev = nullrev
400 400
401 401 # Forced delta against previous mode.
402 402 if deltamode == repository.CG_DELTAMODE_PREV:
403 403 baserev = prevrev
404 404
405 405 # We're instructed to send fulltext. Honor that.
406 406 elif deltamode == repository.CG_DELTAMODE_FULL:
407 407 baserev = nullrev
408 408 # We're instructed to use p1. Honor that
409 409 elif deltamode == repository.CG_DELTAMODE_P1:
410 410 baserev = p1rev
411 411
412 412 # There is a delta in storage. We try to use that because it
413 413 # amounts to effectively copying data from storage and is
414 414 # therefore the fastest.
415 415 elif deltaparentrev != nullrev:
416 416 # Base revision was already emitted in this group. We can
417 417 # always safely use the delta.
418 418 if deltaparentrev in available:
419 419 baserev = deltaparentrev
420 420
421 421 # Base revision is a parent that hasn't been emitted already.
422 422 # Use it if we can assume the receiver has the parent revision.
423 423 elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev):
424 424 baserev = deltaparentrev
425 425
426 426 # No guarantee the receiver has the delta parent. Send delta
427 427 # against last revision (if possible), which in the common case
428 428 # should be similar enough to this revision that the delta is
429 429 # reasonable.
430 430 elif prevrev is not None:
431 431 baserev = prevrev
432 432 else:
433 433 baserev = nullrev
434 434
435 435 # Storage has a fulltext revision.
436 436
437 437 # Let's use the previous revision, which is as good a guess as any.
438 438 # There is definitely room to improve this logic.
439 439 elif prevrev is not None:
440 440 baserev = prevrev
441 441 else:
442 442 baserev = nullrev
443 443
444 444 # But we can't actually use our chosen delta base for whatever
445 445 # reason. Reset to fulltext.
446 446 if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):
447 447 baserev = nullrev
448 448
449 449 revision = None
450 450 delta = None
451 451 baserevisionsize = None
452 452
453 453 if revisiondata:
454 454 if store.iscensored(baserev) or store.iscensored(rev):
455 455 try:
456 456 revision = store.rawdata(node)
457 457 except error.CensoredNodeError as e:
458 458 revision = e.tombstone
459 459
460 460 if baserev != nullrev:
461 461 if rawsizefn:
462 462 baserevisionsize = rawsizefn(baserev)
463 463 else:
464 464 baserevisionsize = len(store.rawdata(baserev))
465 465
466 466 elif (
467 467 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
468 468 ):
469 469 revision = store.rawdata(node)
470 470 available.add(rev)
471 471 else:
472 472 if revdifffn:
473 473 delta = revdifffn(baserev, rev)
474 474 else:
475 475 delta = mdiff.textdiff(
476 476 store.rawdata(baserev), store.rawdata(rev)
477 477 )
478 478
479 479 available.add(rev)
480 480
481 481 serialized_sidedata = None
482 482 sidedata_flags = (0, 0)
483 483 if sidedata_helpers:
484 try:
484 485 old_sidedata = store.sidedata(rev)
486 except error.CensoredNodeError:
487 # skip any potential sidedata of the censored revision
488 sidedata = {}
489 else:
485 490 sidedata, sidedata_flags = sidedatamod.run_sidedata_helpers(
486 491 store=store,
487 492 sidedata_helpers=sidedata_helpers,
488 493 sidedata=old_sidedata,
489 494 rev=rev,
490 495 )
491 496 if sidedata:
492 497 serialized_sidedata = sidedatamod.serialize_sidedata(sidedata)
493 498
494 499 flags = flagsfn(rev) if flagsfn else 0
495 500 protocol_flags = 0
496 501 if serialized_sidedata:
497 502 # Advertise that sidedata exists to the other side
498 503 protocol_flags |= CG_FLAG_SIDEDATA
499 504 # Computers and removers can return flags to add and/or remove
500 505 flags = flags | sidedata_flags[0] & ~sidedata_flags[1]
501 506
502 507 yield resultcls(
503 508 node=node,
504 509 p1node=fnode(p1rev),
505 510 p2node=fnode(p2rev),
506 511 basenode=fnode(baserev),
507 512 flags=flags,
508 513 baserevisionsize=baserevisionsize,
509 514 revision=revision,
510 515 delta=delta,
511 516 sidedata=serialized_sidedata,
512 517 protocol_flags=protocol_flags,
513 518 )
514 519
515 520 prevrev = rev
516 521
517 522
518 523 def deltaiscensored(delta, baserev, baselenfn):
519 524 """Determine if a delta represents censored revision data.
520 525
521 526 ``baserev`` is the base revision this delta is encoded against.
522 527 ``baselenfn`` is a callable receiving a revision number that resolves the
523 528 length of the revision fulltext.
524 529
525 530 Returns a bool indicating if the result of the delta represents a censored
526 531 revision.
527 532 """
528 533 # Fragile heuristic: unless new file meta keys are added alphabetically
529 534 # preceding "censored", all censored revisions are prefixed by
530 535 # "\1\ncensored:". A delta producing such a censored revision must be a
531 536 # full-replacement delta, so we inspect the first and only patch in the
532 537 # delta for this prefix.
533 538 hlen = struct.calcsize(b">lll")
534 539 if len(delta) <= hlen:
535 540 return False
536 541
537 542 oldlen = baselenfn(baserev)
538 543 newlen = len(delta) - hlen
539 544 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
540 545 return False
541 546
542 547 add = b"\1\ncensored:"
543 548 addlen = len(add)
544 549 return newlen >= addlen and delta[hlen : hlen + addlen] == add
General Comments 0
You need to be logged in to leave comments. Login now