##// END OF EJS Templates
issue6528: implement _is_revision_affected_fast using callback...
marmoute -
r48627:c02ce6de stable
parent child Browse files
Show More
@@ -1,768 +1,802 b''
1 1 # censor code related to censoring revision
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 import binascii
11 11 import contextlib
12 12 import os
13 13 import struct
14 14
15 15 from ..node import (
16 16 nullrev,
17 17 )
18 18 from .constants import (
19 19 COMP_MODE_PLAIN,
20 20 ENTRY_DATA_COMPRESSED_LENGTH,
21 21 ENTRY_DATA_COMPRESSION_MODE,
22 22 ENTRY_DATA_OFFSET,
23 23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
24 24 ENTRY_DELTA_BASE,
25 25 ENTRY_LINK_REV,
26 26 ENTRY_NODE_ID,
27 27 ENTRY_PARENT_1,
28 28 ENTRY_PARENT_2,
29 29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
30 30 ENTRY_SIDEDATA_COMPRESSION_MODE,
31 31 ENTRY_SIDEDATA_OFFSET,
32 32 REVLOGV0,
33 33 REVLOGV1,
34 34 )
35 35 from ..i18n import _
36 36
37 37 from .. import (
38 38 error,
39 39 pycompat,
40 40 revlogutils,
41 41 util,
42 42 )
43 43 from ..utils import (
44 44 storageutil,
45 45 )
46 46 from . import (
47 47 constants,
48 48 deltas,
49 49 )
50 50
51 51
52 52 def v1_censor(rl, tr, censornode, tombstone=b''):
53 53 """censors a revision in a "version 1" revlog"""
54 54 assert rl._format_version == constants.REVLOGV1, rl._format_version
55 55
56 56 # avoid cycle
57 57 from .. import revlog
58 58
59 59 censorrev = rl.rev(censornode)
60 60 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
61 61
62 62 # Rewriting the revlog in place is hard. Our strategy for censoring is
63 63 # to create a new revlog, copy all revisions to it, then replace the
64 64 # revlogs on transaction close.
65 65 #
66 66 # This is a bit dangerous. We could easily have a mismatch of state.
67 67 newrl = revlog.revlog(
68 68 rl.opener,
69 69 target=rl.target,
70 70 radix=rl.radix,
71 71 postfix=b'tmpcensored',
72 72 censorable=True,
73 73 )
74 74 newrl._format_version = rl._format_version
75 75 newrl._format_flags = rl._format_flags
76 76 newrl._generaldelta = rl._generaldelta
77 77 newrl._parse_index = rl._parse_index
78 78
79 79 for rev in rl.revs():
80 80 node = rl.node(rev)
81 81 p1, p2 = rl.parents(node)
82 82
83 83 if rev == censorrev:
84 84 newrl.addrawrevision(
85 85 tombstone,
86 86 tr,
87 87 rl.linkrev(censorrev),
88 88 p1,
89 89 p2,
90 90 censornode,
91 91 constants.REVIDX_ISCENSORED,
92 92 )
93 93
94 94 if newrl.deltaparent(rev) != nullrev:
95 95 m = _(b'censored revision stored as delta; cannot censor')
96 96 h = _(
97 97 b'censoring of revlogs is not fully implemented;'
98 98 b' please report this bug'
99 99 )
100 100 raise error.Abort(m, hint=h)
101 101 continue
102 102
103 103 if rl.iscensored(rev):
104 104 if rl.deltaparent(rev) != nullrev:
105 105 m = _(
106 106 b'cannot censor due to censored '
107 107 b'revision having delta stored'
108 108 )
109 109 raise error.Abort(m)
110 110 rawtext = rl._chunk(rev)
111 111 else:
112 112 rawtext = rl.rawdata(rev)
113 113
114 114 newrl.addrawrevision(
115 115 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
116 116 )
117 117
118 118 tr.addbackup(rl._indexfile, location=b'store')
119 119 if not rl._inline:
120 120 tr.addbackup(rl._datafile, location=b'store')
121 121
122 122 rl.opener.rename(newrl._indexfile, rl._indexfile)
123 123 if not rl._inline:
124 124 rl.opener.rename(newrl._datafile, rl._datafile)
125 125
126 126 rl.clearcaches()
127 127 rl._loadindex()
128 128
129 129
130 130 def v2_censor(revlog, tr, censornode, tombstone=b''):
131 131 """censors a revision in a "version 2" revlog"""
132 132 assert revlog._format_version != REVLOGV0, revlog._format_version
133 133 assert revlog._format_version != REVLOGV1, revlog._format_version
134 134
135 135 censor_revs = {revlog.rev(censornode)}
136 136 _rewrite_v2(revlog, tr, censor_revs, tombstone)
137 137
138 138
139 139 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
140 140 """rewrite a revlog to censor some of its content
141 141
142 142 General principle
143 143
144 144 We create new revlog files (index/data/sidedata) to copy the content of
145 145 the existing data without the censored data.
146 146
147 147 We need to recompute new delta for any revision that used the censored
148 148 revision as delta base. As the cumulative size of the new delta may be
149 149 large, we store them in a temporary file until they are stored in their
150 150 final destination.
151 151
152 152 All data before the censored data can be blindly copied. The rest needs
153 153 to be copied as we go and the associated index entry needs adjustement.
154 154 """
155 155 assert revlog._format_version != REVLOGV0, revlog._format_version
156 156 assert revlog._format_version != REVLOGV1, revlog._format_version
157 157
158 158 old_index = revlog.index
159 159 docket = revlog._docket
160 160
161 161 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
162 162
163 163 first_excl_rev = min(censor_revs)
164 164
165 165 first_excl_entry = revlog.index[first_excl_rev]
166 166 index_cutoff = revlog.index.entry_size * first_excl_rev
167 167 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
168 168 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
169 169
170 170 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
171 171 # rev β†’ (new_base, data_start, data_end, compression_mode)
172 172 rewritten_entries = _precompute_rewritten_delta(
173 173 revlog,
174 174 old_index,
175 175 censor_revs,
176 176 tmp_storage,
177 177 )
178 178
179 179 all_files = _setup_new_files(
180 180 revlog,
181 181 index_cutoff,
182 182 data_cutoff,
183 183 sidedata_cutoff,
184 184 )
185 185
186 186 # we dont need to open the old index file since its content already
187 187 # exist in a usable form in `old_index`.
188 188 with all_files() as open_files:
189 189 (
190 190 old_data_file,
191 191 old_sidedata_file,
192 192 new_index_file,
193 193 new_data_file,
194 194 new_sidedata_file,
195 195 ) = open_files
196 196
197 197 # writing the censored revision
198 198
199 199 # Writing all subsequent revisions
200 200 for rev in range(first_excl_rev, len(old_index)):
201 201 if rev in censor_revs:
202 202 _rewrite_censor(
203 203 revlog,
204 204 old_index,
205 205 open_files,
206 206 rev,
207 207 tombstone,
208 208 )
209 209 else:
210 210 _rewrite_simple(
211 211 revlog,
212 212 old_index,
213 213 open_files,
214 214 rev,
215 215 rewritten_entries,
216 216 tmp_storage,
217 217 )
218 218 docket.write(transaction=None, stripping=True)
219 219
220 220
221 221 def _precompute_rewritten_delta(
222 222 revlog,
223 223 old_index,
224 224 excluded_revs,
225 225 tmp_storage,
226 226 ):
227 227 """Compute new delta for revisions whose delta is based on revision that
228 228 will not survive as is.
229 229
230 230 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
231 231 """
232 232 dc = deltas.deltacomputer(revlog)
233 233 rewritten_entries = {}
234 234 first_excl_rev = min(excluded_revs)
235 235 with revlog._segmentfile._open_read() as dfh:
236 236 for rev in range(first_excl_rev, len(old_index)):
237 237 if rev in excluded_revs:
238 238 # this revision will be preserved as is, so we don't need to
239 239 # consider recomputing a delta.
240 240 continue
241 241 entry = old_index[rev]
242 242 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
243 243 continue
244 244 # This is a revision that use the censored revision as the base
245 245 # for its delta. We need a need new deltas
246 246 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
247 247 # this revision is empty, we can delta against nullrev
248 248 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
249 249 else:
250 250
251 251 text = revlog.rawdata(rev, _df=dfh)
252 252 info = revlogutils.revisioninfo(
253 253 node=entry[ENTRY_NODE_ID],
254 254 p1=revlog.node(entry[ENTRY_PARENT_1]),
255 255 p2=revlog.node(entry[ENTRY_PARENT_2]),
256 256 btext=[text],
257 257 textlen=len(text),
258 258 cachedelta=None,
259 259 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
260 260 )
261 261 d = dc.finddeltainfo(
262 262 info, dfh, excluded_bases=excluded_revs, target_rev=rev
263 263 )
264 264 default_comp = revlog._docket.default_compression_header
265 265 comp_mode, d = deltas.delta_compression(default_comp, d)
266 266 # using `tell` is a bit lazy, but we are not here for speed
267 267 start = tmp_storage.tell()
268 268 tmp_storage.write(d.data[1])
269 269 end = tmp_storage.tell()
270 270 rewritten_entries[rev] = (d.base, start, end, comp_mode)
271 271 return rewritten_entries
272 272
273 273
274 274 def _setup_new_files(
275 275 revlog,
276 276 index_cutoff,
277 277 data_cutoff,
278 278 sidedata_cutoff,
279 279 ):
280 280 """
281 281
282 282 return a context manager to open all the relevant files:
283 283 - old_data_file,
284 284 - old_sidedata_file,
285 285 - new_index_file,
286 286 - new_data_file,
287 287 - new_sidedata_file,
288 288
289 289 The old_index_file is not here because it is accessed through the
290 290 `old_index` object if the caller function.
291 291 """
292 292 docket = revlog._docket
293 293 old_index_filepath = revlog.opener.join(docket.index_filepath())
294 294 old_data_filepath = revlog.opener.join(docket.data_filepath())
295 295 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
296 296
297 297 new_index_filepath = revlog.opener.join(docket.new_index_file())
298 298 new_data_filepath = revlog.opener.join(docket.new_data_file())
299 299 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
300 300
301 301 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
302 302 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
303 303 util.copyfile(
304 304 old_sidedata_filepath,
305 305 new_sidedata_filepath,
306 306 nb_bytes=sidedata_cutoff,
307 307 )
308 308 revlog.opener.register_file(docket.index_filepath())
309 309 revlog.opener.register_file(docket.data_filepath())
310 310 revlog.opener.register_file(docket.sidedata_filepath())
311 311
312 312 docket.index_end = index_cutoff
313 313 docket.data_end = data_cutoff
314 314 docket.sidedata_end = sidedata_cutoff
315 315
316 316 # reload the revlog internal information
317 317 revlog.clearcaches()
318 318 revlog._loadindex(docket=docket)
319 319
320 320 @contextlib.contextmanager
321 321 def all_files_opener():
322 322 # hide opening in an helper function to please check-code, black
323 323 # and various python version at the same time
324 324 with open(old_data_filepath, 'rb') as old_data_file:
325 325 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
326 326 with open(new_index_filepath, 'r+b') as new_index_file:
327 327 with open(new_data_filepath, 'r+b') as new_data_file:
328 328 with open(
329 329 new_sidedata_filepath, 'r+b'
330 330 ) as new_sidedata_file:
331 331 new_index_file.seek(0, os.SEEK_END)
332 332 assert new_index_file.tell() == index_cutoff
333 333 new_data_file.seek(0, os.SEEK_END)
334 334 assert new_data_file.tell() == data_cutoff
335 335 new_sidedata_file.seek(0, os.SEEK_END)
336 336 assert new_sidedata_file.tell() == sidedata_cutoff
337 337 yield (
338 338 old_data_file,
339 339 old_sidedata_file,
340 340 new_index_file,
341 341 new_data_file,
342 342 new_sidedata_file,
343 343 )
344 344
345 345 return all_files_opener
346 346
347 347
348 348 def _rewrite_simple(
349 349 revlog,
350 350 old_index,
351 351 all_files,
352 352 rev,
353 353 rewritten_entries,
354 354 tmp_storage,
355 355 ):
356 356 """append a normal revision to the index after the rewritten one(s)"""
357 357 (
358 358 old_data_file,
359 359 old_sidedata_file,
360 360 new_index_file,
361 361 new_data_file,
362 362 new_sidedata_file,
363 363 ) = all_files
364 364 entry = old_index[rev]
365 365 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
366 366 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
367 367
368 368 if rev not in rewritten_entries:
369 369 old_data_file.seek(old_data_offset)
370 370 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
371 371 new_data = old_data_file.read(new_data_size)
372 372 data_delta_base = entry[ENTRY_DELTA_BASE]
373 373 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
374 374 else:
375 375 (
376 376 data_delta_base,
377 377 start,
378 378 end,
379 379 d_comp_mode,
380 380 ) = rewritten_entries[rev]
381 381 new_data_size = end - start
382 382 tmp_storage.seek(start)
383 383 new_data = tmp_storage.read(new_data_size)
384 384
385 385 # It might be faster to group continuous read/write operation,
386 386 # however, this is censor, an operation that is not focussed
387 387 # around stellar performance. So I have not written this
388 388 # optimisation yet.
389 389 new_data_offset = new_data_file.tell()
390 390 new_data_file.write(new_data)
391 391
392 392 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
393 393 new_sidedata_offset = new_sidedata_file.tell()
394 394 if 0 < sidedata_size:
395 395 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
396 396 old_sidedata_file.seek(old_sidedata_offset)
397 397 new_sidedata = old_sidedata_file.read(sidedata_size)
398 398 new_sidedata_file.write(new_sidedata)
399 399
400 400 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
401 401 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
402 402 assert data_delta_base <= rev, (data_delta_base, rev)
403 403
404 404 new_entry = revlogutils.entry(
405 405 flags=flags,
406 406 data_offset=new_data_offset,
407 407 data_compressed_length=new_data_size,
408 408 data_uncompressed_length=data_uncompressed_length,
409 409 data_delta_base=data_delta_base,
410 410 link_rev=entry[ENTRY_LINK_REV],
411 411 parent_rev_1=entry[ENTRY_PARENT_1],
412 412 parent_rev_2=entry[ENTRY_PARENT_2],
413 413 node_id=entry[ENTRY_NODE_ID],
414 414 sidedata_offset=new_sidedata_offset,
415 415 sidedata_compressed_length=sidedata_size,
416 416 data_compression_mode=d_comp_mode,
417 417 sidedata_compression_mode=sd_com_mode,
418 418 )
419 419 revlog.index.append(new_entry)
420 420 entry_bin = revlog.index.entry_binary(rev)
421 421 new_index_file.write(entry_bin)
422 422
423 423 revlog._docket.index_end = new_index_file.tell()
424 424 revlog._docket.data_end = new_data_file.tell()
425 425 revlog._docket.sidedata_end = new_sidedata_file.tell()
426 426
427 427
428 428 def _rewrite_censor(
429 429 revlog,
430 430 old_index,
431 431 all_files,
432 432 rev,
433 433 tombstone,
434 434 ):
435 435 """rewrite and append a censored revision"""
436 436 (
437 437 old_data_file,
438 438 old_sidedata_file,
439 439 new_index_file,
440 440 new_data_file,
441 441 new_sidedata_file,
442 442 ) = all_files
443 443 entry = old_index[rev]
444 444
445 445 # XXX consider trying the default compression too
446 446 new_data_size = len(tombstone)
447 447 new_data_offset = new_data_file.tell()
448 448 new_data_file.write(tombstone)
449 449
450 450 # we are not adding any sidedata as they might leak info about the censored version
451 451
452 452 link_rev = entry[ENTRY_LINK_REV]
453 453
454 454 p1 = entry[ENTRY_PARENT_1]
455 455 p2 = entry[ENTRY_PARENT_2]
456 456
457 457 new_entry = revlogutils.entry(
458 458 flags=constants.REVIDX_ISCENSORED,
459 459 data_offset=new_data_offset,
460 460 data_compressed_length=new_data_size,
461 461 data_uncompressed_length=new_data_size,
462 462 data_delta_base=rev,
463 463 link_rev=link_rev,
464 464 parent_rev_1=p1,
465 465 parent_rev_2=p2,
466 466 node_id=entry[ENTRY_NODE_ID],
467 467 sidedata_offset=0,
468 468 sidedata_compressed_length=0,
469 469 data_compression_mode=COMP_MODE_PLAIN,
470 470 sidedata_compression_mode=COMP_MODE_PLAIN,
471 471 )
472 472 revlog.index.append(new_entry)
473 473 entry_bin = revlog.index.entry_binary(rev)
474 474 new_index_file.write(entry_bin)
475 475 revlog._docket.index_end = new_index_file.tell()
476 476 revlog._docket.data_end = new_data_file.tell()
477 477
478 478
479 479 def _get_filename_from_filelog_index(path):
480 480 # Drop the extension and the `data/` prefix
481 481 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
482 482 if len(path_part) < 2:
483 483 msg = _(b"cannot recognize filelog from filename: '%s'")
484 484 msg %= path
485 485 raise error.Abort(msg)
486 486
487 487 return path_part[1]
488 488
489 489
490 490 def _filelog_from_filename(repo, path):
491 491 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
492 492
493 493 from .. import filelog # avoid cycle
494 494
495 495 fl = filelog.filelog(repo.svfs, path)
496 496 return fl
497 497
498 498
499 499 def _write_swapped_parents(repo, rl, rev, offset, fp):
500 500 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
501 501 from ..pure import parsers # avoid cycle
502 502
503 503 if repo._currentlock(repo._lockref) is None:
504 504 # Let's be paranoid about it
505 505 msg = "repo needs to be locked to rewrite parents"
506 506 raise error.ProgrammingError(msg)
507 507
508 508 index_format = parsers.IndexObject.index_format
509 509 entry = rl.index[rev]
510 510 new_entry = list(entry)
511 511 new_entry[5], new_entry[6] = entry[6], entry[5]
512 512 packed = index_format.pack(*new_entry[:8])
513 513 fp.seek(offset)
514 514 fp.write(packed)
515 515
516 516
517 517 def _reorder_filelog_parents(repo, fl, to_fix):
518 518 """
519 519 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
520 520 new version to disk, overwriting the old one with a rename.
521 521 """
522 522 from ..pure import parsers # avoid cycle
523 523
524 524 ui = repo.ui
525 525 assert len(to_fix) > 0
526 526 rl = fl._revlog
527 527 if rl._format_version != constants.REVLOGV1:
528 528 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
529 529 raise error.ProgrammingError(msg)
530 530
531 531 index_file = rl._indexfile
532 532 new_file_path = index_file + b'.tmp-parents-fix'
533 533 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
534 534
535 535 with ui.uninterruptible():
536 536 try:
537 537 util.copyfile(
538 538 rl.opener.join(index_file),
539 539 rl.opener.join(new_file_path),
540 540 checkambig=rl._checkambig,
541 541 )
542 542
543 543 with rl.opener(new_file_path, mode=b"r+") as fp:
544 544 if rl._inline:
545 545 index = parsers.InlinedIndexObject(fp.read())
546 546 for rev in fl.revs():
547 547 if rev in to_fix:
548 548 offset = index._calculate_index(rev)
549 549 _write_swapped_parents(repo, rl, rev, offset, fp)
550 550 ui.write(repaired_msg % (rev, index_file))
551 551 else:
552 552 index_format = parsers.IndexObject.index_format
553 553 for rev in to_fix:
554 554 offset = rev * index_format.size
555 555 _write_swapped_parents(repo, rl, rev, offset, fp)
556 556 ui.write(repaired_msg % (rev, index_file))
557 557
558 558 rl.opener.rename(new_file_path, index_file)
559 559 rl.clearcaches()
560 560 rl._loadindex()
561 561 finally:
562 562 util.tryunlink(new_file_path)
563 563
564 564
565 565 def _is_revision_affected(fl, filerev, metadata_cache=None):
566 566 full_text = lambda: fl._revlog.rawdata(filerev)
567 567 parent_revs = lambda: fl._revlog.parentrevs(filerev)
568 568 return _is_revision_affected_inner(
569 569 full_text, parent_revs, filerev, metadata_cache
570 570 )
571 571
572 572
573 573 def _is_revision_affected_inner(
574 574 full_text,
575 575 parents_revs,
576 576 filerev,
577 577 metadata_cache=None,
578 578 ):
579 579 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
580 580 special meaning compared to the reverse in the context of filelog-based
581 581 copytracing. issue6528 exists because new code assumed that parent ordering
582 582 didn't matter, so this detects if the revision contains metadata (since
583 583 it's only used for filelog-based copytracing) and its parents are in the
584 584 "wrong" order."""
585 585 try:
586 586 raw_text = full_text()
587 587 except error.CensoredNodeError:
588 588 # We don't care about censored nodes as they never carry metadata
589 589 return False
590 590 has_meta = raw_text.startswith(b'\x01\n')
591 591 if metadata_cache is not None:
592 592 metadata_cache[filerev] = has_meta
593 593 if has_meta:
594 594 (p1, p2) = parents_revs()
595 595 if p1 != nullrev and p2 == nullrev:
596 596 return True
597 597 return False
598 598
599 599
600 600 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
601 rl = fl._revlog
602 is_censored = lambda: rl.iscensored(filerev)
603 delta_base = lambda: rl.deltaparent(filerev)
604 delta = lambda: rl._chunk(filerev)
605 full_text = lambda: rl.rawdata(filerev)
606 parent_revs = lambda: rl.parentrevs(filerev)
607 return _is_revision_affected_fast_inner(
608 is_censored,
609 delta_base,
610 delta,
611 full_text,
612 parent_revs,
613 filerev,
614 metadata_cache,
615 )
616
617
618 def _is_revision_affected_fast_inner(
619 is_censored,
620 delta_base,
621 delta,
622 full_text,
623 parent_revs,
624 filerev,
625 metadata_cache,
626 ):
601 627 """Optimization fast-path for `_is_revision_affected`.
602 628
603 629 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
604 630 revision to check if its base has metadata, saving computation of the full
605 631 text, instead looking at the current delta.
606 632
607 633 This optimization only works if the revisions are looked at in order."""
608 rl = fl._revlog
609 634
610 if rl.iscensored(filerev):
635 if is_censored():
611 636 # Censored revisions don't contain metadata, so they cannot be affected
612 637 metadata_cache[filerev] = False
613 638 return False
614 639
615 p1, p2 = rl.parentrevs(filerev)
640 p1, p2 = parent_revs()
616 641 if p1 == nullrev or p2 != nullrev:
617 642 return False
618 643
619 delta_parent = rl.deltaparent(filerev)
644 delta_parent = delta_base()
620 645 parent_has_metadata = metadata_cache.get(delta_parent)
621 646 if parent_has_metadata is None:
622 is_affected = _is_revision_affected(fl, filerev, metadata_cache)
623 return is_affected
647 return _is_revision_affected_inner(
648 full_text,
649 parent_revs,
650 filerev,
651 metadata_cache,
652 )
624 653
625 chunk = rl._chunk(filerev)
654 chunk = delta()
626 655 if not len(chunk):
627 656 # No diff for this revision
628 657 return parent_has_metadata
629 658
630 659 header_length = 12
631 660 if len(chunk) < header_length:
632 661 raise error.Abort(_(b"patch cannot be decoded"))
633 662
634 663 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
635 664
636 665 if start < 2: # len(b'\x01\n') == 2
637 666 # This delta does *something* to the metadata marker (if any).
638 667 # Check it the slow way
639 is_affected = _is_revision_affected(fl, filerev, metadata_cache)
668 is_affected = _is_revision_affected_inner(
669 full_text,
670 parent_revs,
671 filerev,
672 metadata_cache,
673 )
640 674 return is_affected
641 675
642 676 # The diff did not remove or add the metadata header, it's then in the same
643 677 # situation as its parent
644 678 metadata_cache[filerev] = parent_has_metadata
645 679 return parent_has_metadata
646 680
647 681
648 682 def _from_report(ui, repo, context, from_report, dry_run):
649 683 """
650 684 Fix the revisions given in the `from_report` file, but still checks if the
651 685 revisions are indeed affected to prevent an unfortunate cyclic situation
652 686 where we'd swap well-ordered parents again.
653 687
654 688 See the doc for `debug_fix_issue6528` for the format documentation.
655 689 """
656 690 ui.write(_(b"loading report file '%s'\n") % from_report)
657 691
658 692 with context(), open(from_report, mode='rb') as f:
659 693 for line in f.read().split(b'\n'):
660 694 if not line:
661 695 continue
662 696 filenodes, filename = line.split(b' ', 1)
663 697 fl = _filelog_from_filename(repo, filename)
664 698 to_fix = set(
665 699 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
666 700 )
667 701 excluded = set()
668 702
669 703 for filerev in to_fix:
670 704 if _is_revision_affected(fl, filerev):
671 705 msg = b"found affected revision %d for filelog '%s'\n"
672 706 ui.warn(msg % (filerev, filename))
673 707 else:
674 708 msg = _(b"revision %s of file '%s' is not affected\n")
675 709 msg %= (binascii.hexlify(fl.node(filerev)), filename)
676 710 ui.warn(msg)
677 711 excluded.add(filerev)
678 712
679 713 to_fix = to_fix - excluded
680 714 if not to_fix:
681 715 msg = _(b"no affected revisions were found for '%s'\n")
682 716 ui.write(msg % filename)
683 717 continue
684 718 if not dry_run:
685 719 _reorder_filelog_parents(repo, fl, sorted(to_fix))
686 720
687 721
688 722 def repair_issue6528(
689 723 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
690 724 ):
691 725 from .. import store # avoid cycle
692 726
693 727 @contextlib.contextmanager
694 728 def context():
695 729 if dry_run or to_report: # No need for locking
696 730 yield
697 731 else:
698 732 with repo.wlock(), repo.lock():
699 733 yield
700 734
701 735 if from_report:
702 736 return _from_report(ui, repo, context, from_report, dry_run)
703 737
704 738 report_entries = []
705 739
706 740 with context():
707 741 files = list(
708 742 (file_type, path)
709 743 for (file_type, path, _e, _s) in repo.store.datafiles()
710 744 if path.endswith(b'.i') and file_type & store.FILEFLAGS_FILELOG
711 745 )
712 746
713 747 progress = ui.makeprogress(
714 748 _(b"looking for affected revisions"),
715 749 unit=_(b"filelogs"),
716 750 total=len(files),
717 751 )
718 752 found_nothing = True
719 753
720 754 for file_type, path in files:
721 755 if (
722 756 not path.endswith(b'.i')
723 757 or not file_type & store.FILEFLAGS_FILELOG
724 758 ):
725 759 continue
726 760 progress.increment()
727 761 filename = _get_filename_from_filelog_index(path)
728 762 fl = _filelog_from_filename(repo, filename)
729 763
730 764 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
731 765 to_fix = set()
732 766 metadata_cache = {}
733 767 for filerev in fl.revs():
734 768 affected = _is_revision_affected_fast(
735 769 repo, fl, filerev, metadata_cache
736 770 )
737 771 if paranoid:
738 772 slow = _is_revision_affected(fl, filerev)
739 773 if slow != affected:
740 774 msg = _(b"paranoid check failed for '%s' at node %s")
741 775 node = binascii.hexlify(fl.node(filerev))
742 776 raise error.Abort(msg % (filename, node))
743 777 if affected:
744 778 msg = b"found affected revision %d for filelog '%s'\n"
745 779 ui.warn(msg % (filerev, path))
746 780 found_nothing = False
747 781 if not dry_run:
748 782 if to_report:
749 783 to_fix.add(binascii.hexlify(fl.node(filerev)))
750 784 else:
751 785 to_fix.add(filerev)
752 786
753 787 if to_fix:
754 788 to_fix = sorted(to_fix)
755 789 if to_report:
756 790 report_entries.append((filename, to_fix))
757 791 else:
758 792 _reorder_filelog_parents(repo, fl, to_fix)
759 793
760 794 if found_nothing:
761 795 ui.write(_(b"no affected revisions were found\n"))
762 796
763 797 if to_report and report_entries:
764 798 with open(to_report, mode="wb") as f:
765 799 for path, to_fix in report_entries:
766 800 f.write(b"%s %s\n" % (b",".join(to_fix), path))
767 801
768 802 progress.complete()
General Comments 0
You need to be logged in to leave comments. Login now