##// END OF EJS Templates
revlogutils: remember known metadata parents for issue6528...
Joerg Sonnenberger -
r52807:3dbbb7d9 default
parent child Browse files
Show More
@@ -1,886 +1,887
1 1 # censor code related to censoring revision
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from __future__ import annotations
11 11
12 12 import binascii
13 13 import contextlib
14 14 import os
15 15 import struct
16 16
17 17 from ..node import (
18 18 nullrev,
19 19 )
20 20 from .constants import (
21 21 COMP_MODE_PLAIN,
22 22 ENTRY_DATA_COMPRESSED_LENGTH,
23 23 ENTRY_DATA_COMPRESSION_MODE,
24 24 ENTRY_DATA_OFFSET,
25 25 ENTRY_DATA_UNCOMPRESSED_LENGTH,
26 26 ENTRY_DELTA_BASE,
27 27 ENTRY_LINK_REV,
28 28 ENTRY_NODE_ID,
29 29 ENTRY_PARENT_1,
30 30 ENTRY_PARENT_2,
31 31 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
32 32 ENTRY_SIDEDATA_COMPRESSION_MODE,
33 33 ENTRY_SIDEDATA_OFFSET,
34 34 REVIDX_ISCENSORED,
35 35 REVLOGV0,
36 36 REVLOGV1,
37 37 )
38 38 from ..i18n import _
39 39
40 40 from .. import (
41 41 error,
42 42 mdiff,
43 43 pycompat,
44 44 revlogutils,
45 45 util,
46 46 )
47 47 from ..utils import (
48 48 storageutil,
49 49 )
50 50 from . import (
51 51 constants,
52 52 deltas,
53 53 )
54 54
55 55
56 56 def v1_censor(rl, tr, censor_nodes, tombstone=b''):
57 57 """censors a revision in a "version 1" revlog"""
58 58 assert rl._format_version == constants.REVLOGV1, rl._format_version
59 59
60 60 # avoid cycle
61 61 from .. import revlog
62 62
63 63 censor_revs = set(rl.rev(node) for node in censor_nodes)
64 64 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
65 65
66 66 # Rewriting the revlog in place is hard. Our strategy for censoring is
67 67 # to create a new revlog, copy all revisions to it, then replace the
68 68 # revlogs on transaction close.
69 69 #
70 70 # This is a bit dangerous. We could easily have a mismatch of state.
71 71 newrl = revlog.revlog(
72 72 rl.opener,
73 73 target=rl.target,
74 74 radix=rl.radix,
75 75 postfix=b'tmpcensored',
76 76 censorable=True,
77 77 data_config=rl.data_config,
78 78 delta_config=rl.delta_config,
79 79 feature_config=rl.feature_config,
80 80 may_inline=rl._inline,
81 81 )
82 82 # inline splitting will prepare some transaction work that will get
83 83 # confused by the final file move. So if there is a risk of not being
84 84 # inline at the end, we prevent the new revlog to be inline in the first
85 85 # place.
86 86 assert not (newrl._inline and not rl._inline)
87 87
88 88 for rev in rl.revs():
89 89 node = rl.node(rev)
90 90 p1, p2 = rl.parents(node)
91 91
92 92 if rev in censor_revs:
93 93 newrl.addrawrevision(
94 94 tombstone,
95 95 tr,
96 96 rl.linkrev(rev),
97 97 p1,
98 98 p2,
99 99 node,
100 100 constants.REVIDX_ISCENSORED,
101 101 )
102 102
103 103 if newrl.deltaparent(rev) != nullrev:
104 104 m = _(b'censored revision stored as delta; cannot censor')
105 105 h = _(
106 106 b'censoring of revlogs is not fully implemented;'
107 107 b' please report this bug'
108 108 )
109 109 raise error.Abort(m, hint=h)
110 110 continue
111 111
112 112 if rl.iscensored(rev):
113 113 if rl.deltaparent(rev) != nullrev:
114 114 m = _(
115 115 b'cannot censor due to censored '
116 116 b'revision having delta stored'
117 117 )
118 118 raise error.Abort(m)
119 119 rawtext = rl._inner._chunk(rev)
120 120 else:
121 121 rawtext = rl.rawdata(rev)
122 122
123 123 newrl.addrawrevision(
124 124 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
125 125 )
126 126
127 127 tr.addbackup(rl._indexfile, location=b'store')
128 128 if not rl._inline:
129 129 tr.addbackup(rl._datafile, location=b'store')
130 130
131 131 rl.opener.rename(newrl._indexfile, rl._indexfile)
132 132 if newrl._inline:
133 133 assert rl._inline
134 134 else:
135 135 assert not rl._inline
136 136 rl.opener.rename(newrl._datafile, rl._datafile)
137 137
138 138 rl.clearcaches()
139 139 chunk_cache = rl._loadindex()
140 140 rl._load_inner(chunk_cache)
141 141
142 142
143 143 def v2_censor(revlog, tr, censor_nodes, tombstone=b''):
144 144 """censors a revision in a "version 2" revlog"""
145 145 assert revlog._format_version != REVLOGV0, revlog._format_version
146 146 assert revlog._format_version != REVLOGV1, revlog._format_version
147 147
148 148 censor_revs = {revlog.rev(node) for node in censor_nodes}
149 149 _rewrite_v2(revlog, tr, censor_revs, tombstone)
150 150
151 151
152 152 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
153 153 """rewrite a revlog to censor some of its content
154 154
155 155 General principle
156 156
157 157 We create new revlog files (index/data/sidedata) to copy the content of
158 158 the existing data without the censored data.
159 159
160 160 We need to recompute new delta for any revision that used the censored
161 161 revision as delta base. As the cumulative size of the new delta may be
162 162 large, we store them in a temporary file until they are stored in their
163 163 final destination.
164 164
165 165 All data before the censored data can be blindly copied. The rest needs
166 166 to be copied as we go and the associated index entry needs adjustement.
167 167 """
168 168 assert revlog._format_version != REVLOGV0, revlog._format_version
169 169 assert revlog._format_version != REVLOGV1, revlog._format_version
170 170
171 171 old_index = revlog.index
172 172 docket = revlog._docket
173 173
174 174 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
175 175
176 176 first_excl_rev = min(censor_revs)
177 177
178 178 first_excl_entry = revlog.index[first_excl_rev]
179 179 index_cutoff = revlog.index.entry_size * first_excl_rev
180 180 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
181 181 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
182 182
183 183 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
184 184 # rev β†’ (new_base, data_start, data_end, compression_mode)
185 185 rewritten_entries = _precompute_rewritten_delta(
186 186 revlog,
187 187 old_index,
188 188 censor_revs,
189 189 tmp_storage,
190 190 )
191 191
192 192 all_files = _setup_new_files(
193 193 revlog,
194 194 index_cutoff,
195 195 data_cutoff,
196 196 sidedata_cutoff,
197 197 )
198 198
199 199 # we dont need to open the old index file since its content already
200 200 # exist in a usable form in `old_index`.
201 201 with all_files() as open_files:
202 202 (
203 203 old_data_file,
204 204 old_sidedata_file,
205 205 new_index_file,
206 206 new_data_file,
207 207 new_sidedata_file,
208 208 ) = open_files
209 209
210 210 # writing the censored revision
211 211
212 212 # Writing all subsequent revisions
213 213 for rev in range(first_excl_rev, len(old_index)):
214 214 if rev in censor_revs:
215 215 _rewrite_censor(
216 216 revlog,
217 217 old_index,
218 218 open_files,
219 219 rev,
220 220 tombstone,
221 221 )
222 222 else:
223 223 _rewrite_simple(
224 224 revlog,
225 225 old_index,
226 226 open_files,
227 227 rev,
228 228 rewritten_entries,
229 229 tmp_storage,
230 230 )
231 231 docket.write(transaction=None, stripping=True)
232 232
233 233
234 234 def _precompute_rewritten_delta(
235 235 revlog,
236 236 old_index,
237 237 excluded_revs,
238 238 tmp_storage,
239 239 ):
240 240 """Compute new delta for revisions whose delta is based on revision that
241 241 will not survive as is.
242 242
243 243 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
244 244 """
245 245 dc = deltas.deltacomputer(revlog)
246 246 rewritten_entries = {}
247 247 first_excl_rev = min(excluded_revs)
248 248 with revlog.reading():
249 249 for rev in range(first_excl_rev, len(old_index)):
250 250 if rev in excluded_revs:
251 251 # this revision will be preserved as is, so we don't need to
252 252 # consider recomputing a delta.
253 253 continue
254 254 entry = old_index[rev]
255 255 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
256 256 continue
257 257 # This is a revision that use the censored revision as the base
258 258 # for its delta. We need a need new deltas
259 259 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
260 260 # this revision is empty, we can delta against nullrev
261 261 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
262 262 else:
263 263 text = revlog.rawdata(rev)
264 264 info = revlogutils.revisioninfo(
265 265 node=entry[ENTRY_NODE_ID],
266 266 p1=revlog.node(entry[ENTRY_PARENT_1]),
267 267 p2=revlog.node(entry[ENTRY_PARENT_2]),
268 268 btext=[text],
269 269 textlen=len(text),
270 270 cachedelta=None,
271 271 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
272 272 )
273 273 d = dc.finddeltainfo(
274 274 info, excluded_bases=excluded_revs, target_rev=rev
275 275 )
276 276 default_comp = revlog._docket.default_compression_header
277 277 comp_mode, d = deltas.delta_compression(default_comp, d)
278 278 # using `tell` is a bit lazy, but we are not here for speed
279 279 start = tmp_storage.tell()
280 280 tmp_storage.write(d.data[1])
281 281 end = tmp_storage.tell()
282 282 rewritten_entries[rev] = (d.base, start, end, comp_mode)
283 283 return rewritten_entries
284 284
285 285
286 286 def _setup_new_files(
287 287 revlog,
288 288 index_cutoff,
289 289 data_cutoff,
290 290 sidedata_cutoff,
291 291 ):
292 292 """
293 293
294 294 return a context manager to open all the relevant files:
295 295 - old_data_file,
296 296 - old_sidedata_file,
297 297 - new_index_file,
298 298 - new_data_file,
299 299 - new_sidedata_file,
300 300
301 301 The old_index_file is not here because it is accessed through the
302 302 `old_index` object if the caller function.
303 303 """
304 304 docket = revlog._docket
305 305 old_index_filepath = revlog.opener.join(docket.index_filepath())
306 306 old_data_filepath = revlog.opener.join(docket.data_filepath())
307 307 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
308 308
309 309 new_index_filepath = revlog.opener.join(docket.new_index_file())
310 310 new_data_filepath = revlog.opener.join(docket.new_data_file())
311 311 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
312 312
313 313 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
314 314 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
315 315 util.copyfile(
316 316 old_sidedata_filepath,
317 317 new_sidedata_filepath,
318 318 nb_bytes=sidedata_cutoff,
319 319 )
320 320 revlog.opener.register_file(docket.index_filepath())
321 321 revlog.opener.register_file(docket.data_filepath())
322 322 revlog.opener.register_file(docket.sidedata_filepath())
323 323
324 324 docket.index_end = index_cutoff
325 325 docket.data_end = data_cutoff
326 326 docket.sidedata_end = sidedata_cutoff
327 327
328 328 # reload the revlog internal information
329 329 revlog.clearcaches()
330 330 revlog._loadindex(docket=docket)
331 331
332 332 @contextlib.contextmanager
333 333 def all_files_opener():
334 334 # hide opening in an helper function to please check-code, black
335 335 # and various python version at the same time
336 336 with open(old_data_filepath, 'rb') as old_data_file:
337 337 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
338 338 with open(new_index_filepath, 'r+b') as new_index_file:
339 339 with open(new_data_filepath, 'r+b') as new_data_file:
340 340 with open(
341 341 new_sidedata_filepath, 'r+b'
342 342 ) as new_sidedata_file:
343 343 new_index_file.seek(0, os.SEEK_END)
344 344 assert new_index_file.tell() == index_cutoff
345 345 new_data_file.seek(0, os.SEEK_END)
346 346 assert new_data_file.tell() == data_cutoff
347 347 new_sidedata_file.seek(0, os.SEEK_END)
348 348 assert new_sidedata_file.tell() == sidedata_cutoff
349 349 yield (
350 350 old_data_file,
351 351 old_sidedata_file,
352 352 new_index_file,
353 353 new_data_file,
354 354 new_sidedata_file,
355 355 )
356 356
357 357 return all_files_opener
358 358
359 359
360 360 def _rewrite_simple(
361 361 revlog,
362 362 old_index,
363 363 all_files,
364 364 rev,
365 365 rewritten_entries,
366 366 tmp_storage,
367 367 ):
368 368 """append a normal revision to the index after the rewritten one(s)"""
369 369 (
370 370 old_data_file,
371 371 old_sidedata_file,
372 372 new_index_file,
373 373 new_data_file,
374 374 new_sidedata_file,
375 375 ) = all_files
376 376 entry = old_index[rev]
377 377 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
378 378 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
379 379
380 380 if rev not in rewritten_entries:
381 381 old_data_file.seek(old_data_offset)
382 382 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
383 383 new_data = old_data_file.read(new_data_size)
384 384 data_delta_base = entry[ENTRY_DELTA_BASE]
385 385 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
386 386 else:
387 387 (
388 388 data_delta_base,
389 389 start,
390 390 end,
391 391 d_comp_mode,
392 392 ) = rewritten_entries[rev]
393 393 new_data_size = end - start
394 394 tmp_storage.seek(start)
395 395 new_data = tmp_storage.read(new_data_size)
396 396
397 397 # It might be faster to group continuous read/write operation,
398 398 # however, this is censor, an operation that is not focussed
399 399 # around stellar performance. So I have not written this
400 400 # optimisation yet.
401 401 new_data_offset = new_data_file.tell()
402 402 new_data_file.write(new_data)
403 403
404 404 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
405 405 new_sidedata_offset = new_sidedata_file.tell()
406 406 if 0 < sidedata_size:
407 407 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
408 408 old_sidedata_file.seek(old_sidedata_offset)
409 409 new_sidedata = old_sidedata_file.read(sidedata_size)
410 410 new_sidedata_file.write(new_sidedata)
411 411
412 412 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
413 413 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
414 414 assert data_delta_base <= rev, (data_delta_base, rev)
415 415
416 416 new_entry = revlogutils.entry(
417 417 flags=flags,
418 418 data_offset=new_data_offset,
419 419 data_compressed_length=new_data_size,
420 420 data_uncompressed_length=data_uncompressed_length,
421 421 data_delta_base=data_delta_base,
422 422 link_rev=entry[ENTRY_LINK_REV],
423 423 parent_rev_1=entry[ENTRY_PARENT_1],
424 424 parent_rev_2=entry[ENTRY_PARENT_2],
425 425 node_id=entry[ENTRY_NODE_ID],
426 426 sidedata_offset=new_sidedata_offset,
427 427 sidedata_compressed_length=sidedata_size,
428 428 data_compression_mode=d_comp_mode,
429 429 sidedata_compression_mode=sd_com_mode,
430 430 )
431 431 revlog.index.append(new_entry)
432 432 entry_bin = revlog.index.entry_binary(rev)
433 433 new_index_file.write(entry_bin)
434 434
435 435 revlog._docket.index_end = new_index_file.tell()
436 436 revlog._docket.data_end = new_data_file.tell()
437 437 revlog._docket.sidedata_end = new_sidedata_file.tell()
438 438
439 439
440 440 def _rewrite_censor(
441 441 revlog,
442 442 old_index,
443 443 all_files,
444 444 rev,
445 445 tombstone,
446 446 ):
447 447 """rewrite and append a censored revision"""
448 448 (
449 449 old_data_file,
450 450 old_sidedata_file,
451 451 new_index_file,
452 452 new_data_file,
453 453 new_sidedata_file,
454 454 ) = all_files
455 455 entry = old_index[rev]
456 456
457 457 # XXX consider trying the default compression too
458 458 new_data_size = len(tombstone)
459 459 new_data_offset = new_data_file.tell()
460 460 new_data_file.write(tombstone)
461 461
462 462 # we are not adding any sidedata as they might leak info about the censored version
463 463
464 464 link_rev = entry[ENTRY_LINK_REV]
465 465
466 466 p1 = entry[ENTRY_PARENT_1]
467 467 p2 = entry[ENTRY_PARENT_2]
468 468
469 469 new_entry = revlogutils.entry(
470 470 flags=constants.REVIDX_ISCENSORED,
471 471 data_offset=new_data_offset,
472 472 data_compressed_length=new_data_size,
473 473 data_uncompressed_length=new_data_size,
474 474 data_delta_base=rev,
475 475 link_rev=link_rev,
476 476 parent_rev_1=p1,
477 477 parent_rev_2=p2,
478 478 node_id=entry[ENTRY_NODE_ID],
479 479 sidedata_offset=0,
480 480 sidedata_compressed_length=0,
481 481 data_compression_mode=COMP_MODE_PLAIN,
482 482 sidedata_compression_mode=COMP_MODE_PLAIN,
483 483 )
484 484 revlog.index.append(new_entry)
485 485 entry_bin = revlog.index.entry_binary(rev)
486 486 new_index_file.write(entry_bin)
487 487 revlog._docket.index_end = new_index_file.tell()
488 488 revlog._docket.data_end = new_data_file.tell()
489 489
490 490
491 491 def _get_filename_from_filelog_index(path):
492 492 # Drop the extension and the `data/` prefix
493 493 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
494 494 if len(path_part) < 2:
495 495 msg = _(b"cannot recognize filelog from filename: '%s'")
496 496 msg %= path
497 497 raise error.Abort(msg)
498 498
499 499 return path_part[1]
500 500
501 501
502 502 def _filelog_from_filename(repo, path):
503 503 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
504 504
505 505 from .. import filelog # avoid cycle
506 506
507 507 fl = filelog.filelog(repo.svfs, path)
508 508 return fl
509 509
510 510
511 511 def _write_swapped_parents(repo, rl, rev, offset, fp):
512 512 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
513 513 from ..pure import parsers # avoid cycle
514 514
515 515 if repo._currentlock(repo._lockref) is None:
516 516 # Let's be paranoid about it
517 517 msg = "repo needs to be locked to rewrite parents"
518 518 raise error.ProgrammingError(msg)
519 519
520 520 index_format = parsers.IndexObject.index_format
521 521 entry = rl.index[rev]
522 522 new_entry = list(entry)
523 523 new_entry[5], new_entry[6] = entry[6], entry[5]
524 524 packed = index_format.pack(*new_entry[:8])
525 525 fp.seek(offset)
526 526 fp.write(packed)
527 527
528 528
529 529 def _reorder_filelog_parents(repo, fl, to_fix):
530 530 """
531 531 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
532 532 new version to disk, overwriting the old one with a rename.
533 533 """
534 534 from ..pure import parsers # avoid cycle
535 535
536 536 ui = repo.ui
537 537 assert len(to_fix) > 0
538 538 rl = fl._revlog
539 539 if rl._format_version != constants.REVLOGV1:
540 540 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
541 541 raise error.ProgrammingError(msg)
542 542
543 543 index_file = rl._indexfile
544 544 new_file_path = index_file + b'.tmp-parents-fix'
545 545 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
546 546
547 547 with ui.uninterruptible():
548 548 try:
549 549 util.copyfile(
550 550 rl.opener.join(index_file),
551 551 rl.opener.join(new_file_path),
552 552 checkambig=rl.data_config.check_ambig,
553 553 )
554 554
555 555 with rl.opener(new_file_path, mode=b"r+") as fp:
556 556 if rl._inline:
557 557 index = parsers.InlinedIndexObject(fp.read())
558 558 for rev in fl.revs():
559 559 if rev in to_fix:
560 560 offset = index._calculate_index(rev)
561 561 _write_swapped_parents(repo, rl, rev, offset, fp)
562 562 ui.write(repaired_msg % (rev, index_file))
563 563 else:
564 564 index_format = parsers.IndexObject.index_format
565 565 for rev in to_fix:
566 566 offset = rev * index_format.size
567 567 _write_swapped_parents(repo, rl, rev, offset, fp)
568 568 ui.write(repaired_msg % (rev, index_file))
569 569
570 570 rl.opener.rename(new_file_path, index_file)
571 571 rl.clearcaches()
572 572 rl._loadindex()
573 573 finally:
574 574 util.tryunlink(new_file_path)
575 575
576 576
577 577 def _is_revision_affected(fl, filerev, metadata_cache=None):
578 578 full_text = lambda: fl._revlog.rawdata(filerev)
579 579 parent_revs = lambda: fl._revlog.parentrevs(filerev)
580 580 return _is_revision_affected_inner(
581 581 full_text, parent_revs, filerev, metadata_cache
582 582 )
583 583
584 584
585 585 def _is_revision_affected_inner(
586 586 full_text,
587 587 parents_revs,
588 588 filerev,
589 589 metadata_cache=None,
590 590 ):
591 591 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
592 592 special meaning compared to the reverse in the context of filelog-based
593 593 copytracing. issue6528 exists because new code assumed that parent ordering
594 594 didn't matter, so this detects if the revision contains metadata (since
595 595 it's only used for filelog-based copytracing) and its parents are in the
596 596 "wrong" order."""
597 597 try:
598 598 raw_text = full_text()
599 599 except error.CensoredNodeError:
600 600 # We don't care about censored nodes as they never carry metadata
601 601 return False
602 602
603 603 # raw text can be a `memoryview`, which doesn't implement `startswith`
604 604 has_meta = bytes(raw_text[:2]) == b'\x01\n'
605 605 if metadata_cache is not None:
606 606 metadata_cache[filerev] = has_meta
607 607 if has_meta:
608 608 (p1, p2) = parents_revs()
609 609 if p1 != nullrev and p2 == nullrev:
610 610 return True
611 611 return False
612 612
613 613
614 614 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
615 615 rl = fl._revlog
616 616 is_censored = lambda: rl.iscensored(filerev)
617 617 delta_base = lambda: rl.deltaparent(filerev)
618 618 delta = lambda: rl._inner._chunk(filerev)
619 619 full_text = lambda: rl.rawdata(filerev)
620 620 parent_revs = lambda: rl.parentrevs(filerev)
621 621 return _is_revision_affected_fast_inner(
622 622 is_censored,
623 623 delta_base,
624 624 delta,
625 625 full_text,
626 626 parent_revs,
627 627 filerev,
628 628 metadata_cache,
629 629 )
630 630
631 631
632 632 def _is_revision_affected_fast_inner(
633 633 is_censored,
634 634 delta_base,
635 635 delta,
636 636 full_text,
637 637 parent_revs,
638 638 filerev,
639 639 metadata_cache,
640 640 ):
641 641 """Optimization fast-path for `_is_revision_affected`.
642 642
643 643 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
644 644 revision to check if its base has metadata, saving computation of the full
645 645 text, instead looking at the current delta.
646 646
647 647 This optimization only works if the revisions are looked at in order."""
648 648
649 649 if is_censored():
650 650 # Censored revisions don't contain metadata, so they cannot be affected
651 651 metadata_cache[filerev] = False
652 652 return False
653 653
654 654 p1, p2 = parent_revs()
655 655 if p1 == nullrev or p2 != nullrev:
656 metadata_cache[filerev] = True
656 657 return False
657 658
658 659 delta_parent = delta_base()
659 660 parent_has_metadata = metadata_cache.get(delta_parent)
660 661 if parent_has_metadata is None:
661 662 return _is_revision_affected_inner(
662 663 full_text,
663 664 parent_revs,
664 665 filerev,
665 666 metadata_cache,
666 667 )
667 668
668 669 chunk = delta()
669 670 if not len(chunk):
670 671 # No diff for this revision
671 672 metadata_cache[filerev] = parent_has_metadata
672 673 return parent_has_metadata
673 674
674 675 header_length = 12
675 676 if len(chunk) < header_length:
676 677 raise error.Abort(_(b"patch cannot be decoded"))
677 678
678 679 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
679 680
680 681 if start < 2: # len(b'\x01\n') == 2
681 682 # This delta does *something* to the metadata marker (if any).
682 683 # Check it the slow way
683 684 is_affected = _is_revision_affected_inner(
684 685 full_text,
685 686 parent_revs,
686 687 filerev,
687 688 metadata_cache,
688 689 )
689 690 return is_affected
690 691
691 692 # The diff did not remove or add the metadata header, it's then in the same
692 693 # situation as its parent
693 694 metadata_cache[filerev] = parent_has_metadata
694 695 return parent_has_metadata
695 696
696 697
697 698 def _from_report(ui, repo, context, from_report, dry_run):
698 699 """
699 700 Fix the revisions given in the `from_report` file, but still checks if the
700 701 revisions are indeed affected to prevent an unfortunate cyclic situation
701 702 where we'd swap well-ordered parents again.
702 703
703 704 See the doc for `debug_fix_issue6528` for the format documentation.
704 705 """
705 706 ui.write(_(b"loading report file '%s'\n") % from_report)
706 707
707 708 with context(), open(from_report, mode='rb') as f:
708 709 for line in f.read().split(b'\n'):
709 710 if not line:
710 711 continue
711 712 filenodes, filename = line.split(b' ', 1)
712 713 fl = _filelog_from_filename(repo, filename)
713 714 to_fix = set(
714 715 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
715 716 )
716 717 excluded = set()
717 718
718 719 for filerev in to_fix:
719 720 if _is_revision_affected(fl, filerev):
720 721 msg = b"found affected revision %d for filelog '%s'\n"
721 722 ui.warn(msg % (filerev, filename))
722 723 else:
723 724 msg = _(b"revision %s of file '%s' is not affected\n")
724 725 msg %= (binascii.hexlify(fl.node(filerev)), filename)
725 726 ui.warn(msg)
726 727 excluded.add(filerev)
727 728
728 729 to_fix = to_fix - excluded
729 730 if not to_fix:
730 731 msg = _(b"no affected revisions were found for '%s'\n")
731 732 ui.write(msg % filename)
732 733 continue
733 734 if not dry_run:
734 735 _reorder_filelog_parents(repo, fl, sorted(to_fix))
735 736
736 737
737 738 def filter_delta_issue6528(revlog, deltas_iter):
738 739 """filter incomind deltas to repaire issue 6528 on the fly"""
739 740 metadata_cache = {nullrev: False}
740 741
741 742 deltacomputer = deltas.deltacomputer(revlog)
742 743
743 744 for rev, d in enumerate(deltas_iter, len(revlog)):
744 745 (
745 746 node,
746 747 p1_node,
747 748 p2_node,
748 749 linknode,
749 750 deltabase,
750 751 delta,
751 752 flags,
752 753 sidedata,
753 754 ) = d
754 755
755 756 if not revlog.index.has_node(deltabase):
756 757 raise error.LookupError(
757 758 deltabase, revlog.radix, _(b'unknown parent')
758 759 )
759 760 base_rev = revlog.rev(deltabase)
760 761 if not revlog.index.has_node(p1_node):
761 762 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
762 763 p1_rev = revlog.rev(p1_node)
763 764 if not revlog.index.has_node(p2_node):
764 765 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
765 766 p2_rev = revlog.rev(p2_node)
766 767
767 768 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
768 769 delta_base = lambda: revlog.rev(delta_base)
769 770 delta_base = lambda: base_rev
770 771 parent_revs = lambda: (p1_rev, p2_rev)
771 772
772 773 def full_text():
773 774 # note: being able to reuse the full text computation in the
774 775 # underlying addrevision would be useful however this is a bit too
775 776 # intrusive the for the "quick" issue6528 we are writing before the
776 777 # 5.8 release
777 778 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
778 779
779 780 revinfo = revlogutils.revisioninfo(
780 781 node,
781 782 p1_node,
782 783 p2_node,
783 784 [None],
784 785 textlen,
785 786 (base_rev, delta),
786 787 flags,
787 788 )
788 789 return deltacomputer.buildtext(revinfo)
789 790
790 791 is_affected = _is_revision_affected_fast_inner(
791 792 is_censored,
792 793 delta_base,
793 794 lambda: delta,
794 795 full_text,
795 796 parent_revs,
796 797 rev,
797 798 metadata_cache,
798 799 )
799 800 if is_affected:
800 801 d = (
801 802 node,
802 803 p2_node,
803 804 p1_node,
804 805 linknode,
805 806 deltabase,
806 807 delta,
807 808 flags,
808 809 sidedata,
809 810 )
810 811 yield d
811 812
812 813
813 814 def repair_issue6528(
814 815 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
815 816 ):
816 817 @contextlib.contextmanager
817 818 def context():
818 819 if dry_run or to_report: # No need for locking
819 820 yield
820 821 else:
821 822 with repo.wlock(), repo.lock():
822 823 yield
823 824
824 825 if from_report:
825 826 return _from_report(ui, repo, context, from_report, dry_run)
826 827
827 828 report_entries = []
828 829
829 830 with context():
830 831 files = list(
831 832 entry
832 833 for entry in repo.store.data_entries()
833 834 if entry.is_revlog and entry.is_filelog
834 835 )
835 836
836 837 progress = ui.makeprogress(
837 838 _(b"looking for affected revisions"),
838 839 unit=_(b"filelogs"),
839 840 total=len(files),
840 841 )
841 842 found_nothing = True
842 843
843 844 for entry in files:
844 845 progress.increment()
845 846 filename = entry.target_id
846 847 fl = _filelog_from_filename(repo, entry.target_id)
847 848
848 849 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
849 850 to_fix = set()
850 851 metadata_cache = {nullrev: False}
851 852 for filerev in fl.revs():
852 853 affected = _is_revision_affected_fast(
853 854 repo, fl, filerev, metadata_cache
854 855 )
855 856 if paranoid:
856 857 slow = _is_revision_affected(fl, filerev)
857 858 if slow != affected:
858 859 msg = _(b"paranoid check failed for '%s' at node %s")
859 860 node = binascii.hexlify(fl.node(filerev))
860 861 raise error.Abort(msg % (filename, node))
861 862 if affected:
862 863 msg = b"found affected revision %d for file '%s'\n"
863 864 ui.warn(msg % (filerev, filename))
864 865 found_nothing = False
865 866 if not dry_run:
866 867 if to_report:
867 868 to_fix.add(binascii.hexlify(fl.node(filerev)))
868 869 else:
869 870 to_fix.add(filerev)
870 871
871 872 if to_fix:
872 873 to_fix = sorted(to_fix)
873 874 if to_report:
874 875 report_entries.append((filename, to_fix))
875 876 else:
876 877 _reorder_filelog_parents(repo, fl, to_fix)
877 878
878 879 if found_nothing:
879 880 ui.write(_(b"no affected revisions were found\n"))
880 881
881 882 if to_report and report_entries:
882 883 with open(to_report, mode="wb") as f:
883 884 for path, to_fix in report_entries:
884 885 f.write(b"%s %s\n" % (b",".join(to_fix), path))
885 886
886 887 progress.complete()
General Comments 0
You need to be logged in to leave comments. Login now