##// END OF EJS Templates
rewrite: fix issue6599...
Raphaël Gomès -
r49063:531d26b1 stable
parent child Browse files
Show More
@@ -1,886 +1,888 b''
1 1 # censor code related to censoring revision
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 import binascii
11 11 import contextlib
12 12 import os
13 13 import struct
14 14
15 15 from ..node import (
16 16 nullrev,
17 17 )
18 18 from .constants import (
19 19 COMP_MODE_PLAIN,
20 20 ENTRY_DATA_COMPRESSED_LENGTH,
21 21 ENTRY_DATA_COMPRESSION_MODE,
22 22 ENTRY_DATA_OFFSET,
23 23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
24 24 ENTRY_DELTA_BASE,
25 25 ENTRY_LINK_REV,
26 26 ENTRY_NODE_ID,
27 27 ENTRY_PARENT_1,
28 28 ENTRY_PARENT_2,
29 29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
30 30 ENTRY_SIDEDATA_COMPRESSION_MODE,
31 31 ENTRY_SIDEDATA_OFFSET,
32 32 REVIDX_ISCENSORED,
33 33 REVLOGV0,
34 34 REVLOGV1,
35 35 )
36 36 from ..i18n import _
37 37
38 38 from .. import (
39 39 error,
40 40 mdiff,
41 41 pycompat,
42 42 revlogutils,
43 43 util,
44 44 )
45 45 from ..utils import (
46 46 storageutil,
47 47 )
48 48 from . import (
49 49 constants,
50 50 deltas,
51 51 )
52 52
53 53
54 54 def v1_censor(rl, tr, censornode, tombstone=b''):
55 55 """censors a revision in a "version 1" revlog"""
56 56 assert rl._format_version == constants.REVLOGV1, rl._format_version
57 57
58 58 # avoid cycle
59 59 from .. import revlog
60 60
61 61 censorrev = rl.rev(censornode)
62 62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
63 63
64 64 # Rewriting the revlog in place is hard. Our strategy for censoring is
65 65 # to create a new revlog, copy all revisions to it, then replace the
66 66 # revlogs on transaction close.
67 67 #
68 68 # This is a bit dangerous. We could easily have a mismatch of state.
69 69 newrl = revlog.revlog(
70 70 rl.opener,
71 71 target=rl.target,
72 72 radix=rl.radix,
73 73 postfix=b'tmpcensored',
74 74 censorable=True,
75 75 )
76 76 newrl._format_version = rl._format_version
77 77 newrl._format_flags = rl._format_flags
78 78 newrl._generaldelta = rl._generaldelta
79 79 newrl._parse_index = rl._parse_index
80 80
81 81 for rev in rl.revs():
82 82 node = rl.node(rev)
83 83 p1, p2 = rl.parents(node)
84 84
85 85 if rev == censorrev:
86 86 newrl.addrawrevision(
87 87 tombstone,
88 88 tr,
89 89 rl.linkrev(censorrev),
90 90 p1,
91 91 p2,
92 92 censornode,
93 93 constants.REVIDX_ISCENSORED,
94 94 )
95 95
96 96 if newrl.deltaparent(rev) != nullrev:
97 97 m = _(b'censored revision stored as delta; cannot censor')
98 98 h = _(
99 99 b'censoring of revlogs is not fully implemented;'
100 100 b' please report this bug'
101 101 )
102 102 raise error.Abort(m, hint=h)
103 103 continue
104 104
105 105 if rl.iscensored(rev):
106 106 if rl.deltaparent(rev) != nullrev:
107 107 m = _(
108 108 b'cannot censor due to censored '
109 109 b'revision having delta stored'
110 110 )
111 111 raise error.Abort(m)
112 112 rawtext = rl._chunk(rev)
113 113 else:
114 114 rawtext = rl.rawdata(rev)
115 115
116 116 newrl.addrawrevision(
117 117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
118 118 )
119 119
120 120 tr.addbackup(rl._indexfile, location=b'store')
121 121 if not rl._inline:
122 122 tr.addbackup(rl._datafile, location=b'store')
123 123
124 124 rl.opener.rename(newrl._indexfile, rl._indexfile)
125 125 if not rl._inline:
126 126 rl.opener.rename(newrl._datafile, rl._datafile)
127 127
128 128 rl.clearcaches()
129 129 rl._loadindex()
130 130
131 131
132 132 def v2_censor(revlog, tr, censornode, tombstone=b''):
133 133 """censors a revision in a "version 2" revlog"""
134 134 assert revlog._format_version != REVLOGV0, revlog._format_version
135 135 assert revlog._format_version != REVLOGV1, revlog._format_version
136 136
137 137 censor_revs = {revlog.rev(censornode)}
138 138 _rewrite_v2(revlog, tr, censor_revs, tombstone)
139 139
140 140
141 141 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
142 142 """rewrite a revlog to censor some of its content
143 143
144 144 General principle
145 145
146 146 We create new revlog files (index/data/sidedata) to copy the content of
147 147 the existing data without the censored data.
148 148
149 149 We need to recompute new delta for any revision that used the censored
150 150 revision as delta base. As the cumulative size of the new delta may be
151 151 large, we store them in a temporary file until they are stored in their
152 152 final destination.
153 153
154 154 All data before the censored data can be blindly copied. The rest needs
155 155 to be copied as we go and the associated index entry needs adjustement.
156 156 """
157 157 assert revlog._format_version != REVLOGV0, revlog._format_version
158 158 assert revlog._format_version != REVLOGV1, revlog._format_version
159 159
160 160 old_index = revlog.index
161 161 docket = revlog._docket
162 162
163 163 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
164 164
165 165 first_excl_rev = min(censor_revs)
166 166
167 167 first_excl_entry = revlog.index[first_excl_rev]
168 168 index_cutoff = revlog.index.entry_size * first_excl_rev
169 169 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
170 170 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
171 171
172 172 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
173 173 # rev → (new_base, data_start, data_end, compression_mode)
174 174 rewritten_entries = _precompute_rewritten_delta(
175 175 revlog,
176 176 old_index,
177 177 censor_revs,
178 178 tmp_storage,
179 179 )
180 180
181 181 all_files = _setup_new_files(
182 182 revlog,
183 183 index_cutoff,
184 184 data_cutoff,
185 185 sidedata_cutoff,
186 186 )
187 187
188 188 # we dont need to open the old index file since its content already
189 189 # exist in a usable form in `old_index`.
190 190 with all_files() as open_files:
191 191 (
192 192 old_data_file,
193 193 old_sidedata_file,
194 194 new_index_file,
195 195 new_data_file,
196 196 new_sidedata_file,
197 197 ) = open_files
198 198
199 199 # writing the censored revision
200 200
201 201 # Writing all subsequent revisions
202 202 for rev in range(first_excl_rev, len(old_index)):
203 203 if rev in censor_revs:
204 204 _rewrite_censor(
205 205 revlog,
206 206 old_index,
207 207 open_files,
208 208 rev,
209 209 tombstone,
210 210 )
211 211 else:
212 212 _rewrite_simple(
213 213 revlog,
214 214 old_index,
215 215 open_files,
216 216 rev,
217 217 rewritten_entries,
218 218 tmp_storage,
219 219 )
220 220 docket.write(transaction=None, stripping=True)
221 221
222 222
223 223 def _precompute_rewritten_delta(
224 224 revlog,
225 225 old_index,
226 226 excluded_revs,
227 227 tmp_storage,
228 228 ):
229 229 """Compute new delta for revisions whose delta is based on revision that
230 230 will not survive as is.
231 231
232 232 Return a mapping: {rev → (new_base, data_start, data_end, compression_mode)}
233 233 """
234 234 dc = deltas.deltacomputer(revlog)
235 235 rewritten_entries = {}
236 236 first_excl_rev = min(excluded_revs)
237 237 with revlog._segmentfile._open_read() as dfh:
238 238 for rev in range(first_excl_rev, len(old_index)):
239 239 if rev in excluded_revs:
240 240 # this revision will be preserved as is, so we don't need to
241 241 # consider recomputing a delta.
242 242 continue
243 243 entry = old_index[rev]
244 244 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
245 245 continue
246 246 # This is a revision that use the censored revision as the base
247 247 # for its delta. We need a need new deltas
248 248 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
249 249 # this revision is empty, we can delta against nullrev
250 250 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
251 251 else:
252 252
253 253 text = revlog.rawdata(rev, _df=dfh)
254 254 info = revlogutils.revisioninfo(
255 255 node=entry[ENTRY_NODE_ID],
256 256 p1=revlog.node(entry[ENTRY_PARENT_1]),
257 257 p2=revlog.node(entry[ENTRY_PARENT_2]),
258 258 btext=[text],
259 259 textlen=len(text),
260 260 cachedelta=None,
261 261 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
262 262 )
263 263 d = dc.finddeltainfo(
264 264 info, dfh, excluded_bases=excluded_revs, target_rev=rev
265 265 )
266 266 default_comp = revlog._docket.default_compression_header
267 267 comp_mode, d = deltas.delta_compression(default_comp, d)
268 268 # using `tell` is a bit lazy, but we are not here for speed
269 269 start = tmp_storage.tell()
270 270 tmp_storage.write(d.data[1])
271 271 end = tmp_storage.tell()
272 272 rewritten_entries[rev] = (d.base, start, end, comp_mode)
273 273 return rewritten_entries
274 274
275 275
276 276 def _setup_new_files(
277 277 revlog,
278 278 index_cutoff,
279 279 data_cutoff,
280 280 sidedata_cutoff,
281 281 ):
282 282 """
283 283
284 284 return a context manager to open all the relevant files:
285 285 - old_data_file,
286 286 - old_sidedata_file,
287 287 - new_index_file,
288 288 - new_data_file,
289 289 - new_sidedata_file,
290 290
291 291 The old_index_file is not here because it is accessed through the
292 292 `old_index` object if the caller function.
293 293 """
294 294 docket = revlog._docket
295 295 old_index_filepath = revlog.opener.join(docket.index_filepath())
296 296 old_data_filepath = revlog.opener.join(docket.data_filepath())
297 297 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
298 298
299 299 new_index_filepath = revlog.opener.join(docket.new_index_file())
300 300 new_data_filepath = revlog.opener.join(docket.new_data_file())
301 301 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
302 302
303 303 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
304 304 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
305 305 util.copyfile(
306 306 old_sidedata_filepath,
307 307 new_sidedata_filepath,
308 308 nb_bytes=sidedata_cutoff,
309 309 )
310 310 revlog.opener.register_file(docket.index_filepath())
311 311 revlog.opener.register_file(docket.data_filepath())
312 312 revlog.opener.register_file(docket.sidedata_filepath())
313 313
314 314 docket.index_end = index_cutoff
315 315 docket.data_end = data_cutoff
316 316 docket.sidedata_end = sidedata_cutoff
317 317
318 318 # reload the revlog internal information
319 319 revlog.clearcaches()
320 320 revlog._loadindex(docket=docket)
321 321
322 322 @contextlib.contextmanager
323 323 def all_files_opener():
324 324 # hide opening in an helper function to please check-code, black
325 325 # and various python version at the same time
326 326 with open(old_data_filepath, 'rb') as old_data_file:
327 327 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
328 328 with open(new_index_filepath, 'r+b') as new_index_file:
329 329 with open(new_data_filepath, 'r+b') as new_data_file:
330 330 with open(
331 331 new_sidedata_filepath, 'r+b'
332 332 ) as new_sidedata_file:
333 333 new_index_file.seek(0, os.SEEK_END)
334 334 assert new_index_file.tell() == index_cutoff
335 335 new_data_file.seek(0, os.SEEK_END)
336 336 assert new_data_file.tell() == data_cutoff
337 337 new_sidedata_file.seek(0, os.SEEK_END)
338 338 assert new_sidedata_file.tell() == sidedata_cutoff
339 339 yield (
340 340 old_data_file,
341 341 old_sidedata_file,
342 342 new_index_file,
343 343 new_data_file,
344 344 new_sidedata_file,
345 345 )
346 346
347 347 return all_files_opener
348 348
349 349
350 350 def _rewrite_simple(
351 351 revlog,
352 352 old_index,
353 353 all_files,
354 354 rev,
355 355 rewritten_entries,
356 356 tmp_storage,
357 357 ):
358 358 """append a normal revision to the index after the rewritten one(s)"""
359 359 (
360 360 old_data_file,
361 361 old_sidedata_file,
362 362 new_index_file,
363 363 new_data_file,
364 364 new_sidedata_file,
365 365 ) = all_files
366 366 entry = old_index[rev]
367 367 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
368 368 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
369 369
370 370 if rev not in rewritten_entries:
371 371 old_data_file.seek(old_data_offset)
372 372 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
373 373 new_data = old_data_file.read(new_data_size)
374 374 data_delta_base = entry[ENTRY_DELTA_BASE]
375 375 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
376 376 else:
377 377 (
378 378 data_delta_base,
379 379 start,
380 380 end,
381 381 d_comp_mode,
382 382 ) = rewritten_entries[rev]
383 383 new_data_size = end - start
384 384 tmp_storage.seek(start)
385 385 new_data = tmp_storage.read(new_data_size)
386 386
387 387 # It might be faster to group continuous read/write operation,
388 388 # however, this is censor, an operation that is not focussed
389 389 # around stellar performance. So I have not written this
390 390 # optimisation yet.
391 391 new_data_offset = new_data_file.tell()
392 392 new_data_file.write(new_data)
393 393
394 394 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
395 395 new_sidedata_offset = new_sidedata_file.tell()
396 396 if 0 < sidedata_size:
397 397 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
398 398 old_sidedata_file.seek(old_sidedata_offset)
399 399 new_sidedata = old_sidedata_file.read(sidedata_size)
400 400 new_sidedata_file.write(new_sidedata)
401 401
402 402 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
403 403 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
404 404 assert data_delta_base <= rev, (data_delta_base, rev)
405 405
406 406 new_entry = revlogutils.entry(
407 407 flags=flags,
408 408 data_offset=new_data_offset,
409 409 data_compressed_length=new_data_size,
410 410 data_uncompressed_length=data_uncompressed_length,
411 411 data_delta_base=data_delta_base,
412 412 link_rev=entry[ENTRY_LINK_REV],
413 413 parent_rev_1=entry[ENTRY_PARENT_1],
414 414 parent_rev_2=entry[ENTRY_PARENT_2],
415 415 node_id=entry[ENTRY_NODE_ID],
416 416 sidedata_offset=new_sidedata_offset,
417 417 sidedata_compressed_length=sidedata_size,
418 418 data_compression_mode=d_comp_mode,
419 419 sidedata_compression_mode=sd_com_mode,
420 420 )
421 421 revlog.index.append(new_entry)
422 422 entry_bin = revlog.index.entry_binary(rev)
423 423 new_index_file.write(entry_bin)
424 424
425 425 revlog._docket.index_end = new_index_file.tell()
426 426 revlog._docket.data_end = new_data_file.tell()
427 427 revlog._docket.sidedata_end = new_sidedata_file.tell()
428 428
429 429
430 430 def _rewrite_censor(
431 431 revlog,
432 432 old_index,
433 433 all_files,
434 434 rev,
435 435 tombstone,
436 436 ):
437 437 """rewrite and append a censored revision"""
438 438 (
439 439 old_data_file,
440 440 old_sidedata_file,
441 441 new_index_file,
442 442 new_data_file,
443 443 new_sidedata_file,
444 444 ) = all_files
445 445 entry = old_index[rev]
446 446
447 447 # XXX consider trying the default compression too
448 448 new_data_size = len(tombstone)
449 449 new_data_offset = new_data_file.tell()
450 450 new_data_file.write(tombstone)
451 451
452 452 # we are not adding any sidedata as they might leak info about the censored version
453 453
454 454 link_rev = entry[ENTRY_LINK_REV]
455 455
456 456 p1 = entry[ENTRY_PARENT_1]
457 457 p2 = entry[ENTRY_PARENT_2]
458 458
459 459 new_entry = revlogutils.entry(
460 460 flags=constants.REVIDX_ISCENSORED,
461 461 data_offset=new_data_offset,
462 462 data_compressed_length=new_data_size,
463 463 data_uncompressed_length=new_data_size,
464 464 data_delta_base=rev,
465 465 link_rev=link_rev,
466 466 parent_rev_1=p1,
467 467 parent_rev_2=p2,
468 468 node_id=entry[ENTRY_NODE_ID],
469 469 sidedata_offset=0,
470 470 sidedata_compressed_length=0,
471 471 data_compression_mode=COMP_MODE_PLAIN,
472 472 sidedata_compression_mode=COMP_MODE_PLAIN,
473 473 )
474 474 revlog.index.append(new_entry)
475 475 entry_bin = revlog.index.entry_binary(rev)
476 476 new_index_file.write(entry_bin)
477 477 revlog._docket.index_end = new_index_file.tell()
478 478 revlog._docket.data_end = new_data_file.tell()
479 479
480 480
481 481 def _get_filename_from_filelog_index(path):
482 482 # Drop the extension and the `data/` prefix
483 483 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
484 484 if len(path_part) < 2:
485 485 msg = _(b"cannot recognize filelog from filename: '%s'")
486 486 msg %= path
487 487 raise error.Abort(msg)
488 488
489 489 return path_part[1]
490 490
491 491
492 492 def _filelog_from_filename(repo, path):
493 493 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
494 494
495 495 from .. import filelog # avoid cycle
496 496
497 497 fl = filelog.filelog(repo.svfs, path)
498 498 return fl
499 499
500 500
501 501 def _write_swapped_parents(repo, rl, rev, offset, fp):
502 502 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
503 503 from ..pure import parsers # avoid cycle
504 504
505 505 if repo._currentlock(repo._lockref) is None:
506 506 # Let's be paranoid about it
507 507 msg = "repo needs to be locked to rewrite parents"
508 508 raise error.ProgrammingError(msg)
509 509
510 510 index_format = parsers.IndexObject.index_format
511 511 entry = rl.index[rev]
512 512 new_entry = list(entry)
513 513 new_entry[5], new_entry[6] = entry[6], entry[5]
514 514 packed = index_format.pack(*new_entry[:8])
515 515 fp.seek(offset)
516 516 fp.write(packed)
517 517
518 518
519 519 def _reorder_filelog_parents(repo, fl, to_fix):
520 520 """
521 521 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
522 522 new version to disk, overwriting the old one with a rename.
523 523 """
524 524 from ..pure import parsers # avoid cycle
525 525
526 526 ui = repo.ui
527 527 assert len(to_fix) > 0
528 528 rl = fl._revlog
529 529 if rl._format_version != constants.REVLOGV1:
530 530 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
531 531 raise error.ProgrammingError(msg)
532 532
533 533 index_file = rl._indexfile
534 534 new_file_path = index_file + b'.tmp-parents-fix'
535 535 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
536 536
537 537 with ui.uninterruptible():
538 538 try:
539 539 util.copyfile(
540 540 rl.opener.join(index_file),
541 541 rl.opener.join(new_file_path),
542 542 checkambig=rl._checkambig,
543 543 )
544 544
545 545 with rl.opener(new_file_path, mode=b"r+") as fp:
546 546 if rl._inline:
547 547 index = parsers.InlinedIndexObject(fp.read())
548 548 for rev in fl.revs():
549 549 if rev in to_fix:
550 550 offset = index._calculate_index(rev)
551 551 _write_swapped_parents(repo, rl, rev, offset, fp)
552 552 ui.write(repaired_msg % (rev, index_file))
553 553 else:
554 554 index_format = parsers.IndexObject.index_format
555 555 for rev in to_fix:
556 556 offset = rev * index_format.size
557 557 _write_swapped_parents(repo, rl, rev, offset, fp)
558 558 ui.write(repaired_msg % (rev, index_file))
559 559
560 560 rl.opener.rename(new_file_path, index_file)
561 561 rl.clearcaches()
562 562 rl._loadindex()
563 563 finally:
564 564 util.tryunlink(new_file_path)
565 565
566 566
567 567 def _is_revision_affected(fl, filerev, metadata_cache=None):
568 568 full_text = lambda: fl._revlog.rawdata(filerev)
569 569 parent_revs = lambda: fl._revlog.parentrevs(filerev)
570 570 return _is_revision_affected_inner(
571 571 full_text, parent_revs, filerev, metadata_cache
572 572 )
573 573
574 574
575 575 def _is_revision_affected_inner(
576 576 full_text,
577 577 parents_revs,
578 578 filerev,
579 579 metadata_cache=None,
580 580 ):
581 581 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
582 582 special meaning compared to the reverse in the context of filelog-based
583 583 copytracing. issue6528 exists because new code assumed that parent ordering
584 584 didn't matter, so this detects if the revision contains metadata (since
585 585 it's only used for filelog-based copytracing) and its parents are in the
586 586 "wrong" order."""
587 587 try:
588 588 raw_text = full_text()
589 589 except error.CensoredNodeError:
590 590 # We don't care about censored nodes as they never carry metadata
591 591 return False
592 has_meta = raw_text.startswith(b'\x01\n')
592
593 # raw text can be a `memoryview`, which doesn't implement `startswith`
594 has_meta = bytes(raw_text[:2]) == b'\x01\n'
593 595 if metadata_cache is not None:
594 596 metadata_cache[filerev] = has_meta
595 597 if has_meta:
596 598 (p1, p2) = parents_revs()
597 599 if p1 != nullrev and p2 == nullrev:
598 600 return True
599 601 return False
600 602
601 603
602 604 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
603 605 rl = fl._revlog
604 606 is_censored = lambda: rl.iscensored(filerev)
605 607 delta_base = lambda: rl.deltaparent(filerev)
606 608 delta = lambda: rl._chunk(filerev)
607 609 full_text = lambda: rl.rawdata(filerev)
608 610 parent_revs = lambda: rl.parentrevs(filerev)
609 611 return _is_revision_affected_fast_inner(
610 612 is_censored,
611 613 delta_base,
612 614 delta,
613 615 full_text,
614 616 parent_revs,
615 617 filerev,
616 618 metadata_cache,
617 619 )
618 620
619 621
620 622 def _is_revision_affected_fast_inner(
621 623 is_censored,
622 624 delta_base,
623 625 delta,
624 626 full_text,
625 627 parent_revs,
626 628 filerev,
627 629 metadata_cache,
628 630 ):
629 631 """Optimization fast-path for `_is_revision_affected`.
630 632
631 633 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
632 634 revision to check if its base has metadata, saving computation of the full
633 635 text, instead looking at the current delta.
634 636
635 637 This optimization only works if the revisions are looked at in order."""
636 638
637 639 if is_censored():
638 640 # Censored revisions don't contain metadata, so they cannot be affected
639 641 metadata_cache[filerev] = False
640 642 return False
641 643
642 644 p1, p2 = parent_revs()
643 645 if p1 == nullrev or p2 != nullrev:
644 646 return False
645 647
646 648 delta_parent = delta_base()
647 649 parent_has_metadata = metadata_cache.get(delta_parent)
648 650 if parent_has_metadata is None:
649 651 return _is_revision_affected_inner(
650 652 full_text,
651 653 parent_revs,
652 654 filerev,
653 655 metadata_cache,
654 656 )
655 657
656 658 chunk = delta()
657 659 if not len(chunk):
658 660 # No diff for this revision
659 661 return parent_has_metadata
660 662
661 663 header_length = 12
662 664 if len(chunk) < header_length:
663 665 raise error.Abort(_(b"patch cannot be decoded"))
664 666
665 667 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
666 668
667 669 if start < 2: # len(b'\x01\n') == 2
668 670 # This delta does *something* to the metadata marker (if any).
669 671 # Check it the slow way
670 672 is_affected = _is_revision_affected_inner(
671 673 full_text,
672 674 parent_revs,
673 675 filerev,
674 676 metadata_cache,
675 677 )
676 678 return is_affected
677 679
678 680 # The diff did not remove or add the metadata header, it's then in the same
679 681 # situation as its parent
680 682 metadata_cache[filerev] = parent_has_metadata
681 683 return parent_has_metadata
682 684
683 685
684 686 def _from_report(ui, repo, context, from_report, dry_run):
685 687 """
686 688 Fix the revisions given in the `from_report` file, but still checks if the
687 689 revisions are indeed affected to prevent an unfortunate cyclic situation
688 690 where we'd swap well-ordered parents again.
689 691
690 692 See the doc for `debug_fix_issue6528` for the format documentation.
691 693 """
692 694 ui.write(_(b"loading report file '%s'\n") % from_report)
693 695
694 696 with context(), open(from_report, mode='rb') as f:
695 697 for line in f.read().split(b'\n'):
696 698 if not line:
697 699 continue
698 700 filenodes, filename = line.split(b' ', 1)
699 701 fl = _filelog_from_filename(repo, filename)
700 702 to_fix = set(
701 703 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
702 704 )
703 705 excluded = set()
704 706
705 707 for filerev in to_fix:
706 708 if _is_revision_affected(fl, filerev):
707 709 msg = b"found affected revision %d for filelog '%s'\n"
708 710 ui.warn(msg % (filerev, filename))
709 711 else:
710 712 msg = _(b"revision %s of file '%s' is not affected\n")
711 713 msg %= (binascii.hexlify(fl.node(filerev)), filename)
712 714 ui.warn(msg)
713 715 excluded.add(filerev)
714 716
715 717 to_fix = to_fix - excluded
716 718 if not to_fix:
717 719 msg = _(b"no affected revisions were found for '%s'\n")
718 720 ui.write(msg % filename)
719 721 continue
720 722 if not dry_run:
721 723 _reorder_filelog_parents(repo, fl, sorted(to_fix))
722 724
723 725
724 726 def filter_delta_issue6528(revlog, deltas_iter):
725 727 """filter incomind deltas to repaire issue 6528 on the fly"""
726 728 metadata_cache = {}
727 729
728 730 deltacomputer = deltas.deltacomputer(revlog)
729 731
730 732 for rev, d in enumerate(deltas_iter, len(revlog)):
731 733 (
732 734 node,
733 735 p1_node,
734 736 p2_node,
735 737 linknode,
736 738 deltabase,
737 739 delta,
738 740 flags,
739 741 sidedata,
740 742 ) = d
741 743
742 744 if not revlog.index.has_node(deltabase):
743 745 raise error.LookupError(
744 746 deltabase, revlog.radix, _(b'unknown parent')
745 747 )
746 748 base_rev = revlog.rev(deltabase)
747 749 if not revlog.index.has_node(p1_node):
748 750 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
749 751 p1_rev = revlog.rev(p1_node)
750 752 if not revlog.index.has_node(p2_node):
751 753 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
752 754 p2_rev = revlog.rev(p2_node)
753 755
754 756 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
755 757 delta_base = lambda: revlog.rev(delta_base)
756 758 delta_base = lambda: base_rev
757 759 parent_revs = lambda: (p1_rev, p2_rev)
758 760
759 761 def full_text():
760 762 # note: being able to reuse the full text computation in the
761 763 # underlying addrevision would be useful however this is a bit too
762 764 # intrusive the for the "quick" issue6528 we are writing before the
763 765 # 5.8 release
764 766 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
765 767
766 768 revinfo = revlogutils.revisioninfo(
767 769 node,
768 770 p1_node,
769 771 p2_node,
770 772 [None],
771 773 textlen,
772 774 (base_rev, delta),
773 775 flags,
774 776 )
775 777 # cached by the global "writing" context
776 778 assert revlog._writinghandles is not None
777 779 if revlog._inline:
778 780 fh = revlog._writinghandles[0]
779 781 else:
780 782 fh = revlog._writinghandles[1]
781 783 return deltacomputer.buildtext(revinfo, fh)
782 784
783 785 is_affected = _is_revision_affected_fast_inner(
784 786 is_censored,
785 787 delta_base,
786 788 lambda: delta,
787 789 full_text,
788 790 parent_revs,
789 791 rev,
790 792 metadata_cache,
791 793 )
792 794 if is_affected:
793 795 d = (
794 796 node,
795 797 p2_node,
796 798 p1_node,
797 799 linknode,
798 800 deltabase,
799 801 delta,
800 802 flags,
801 803 sidedata,
802 804 )
803 805 yield d
804 806
805 807
806 808 def repair_issue6528(
807 809 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
808 810 ):
809 811 from .. import store # avoid cycle
810 812
811 813 @contextlib.contextmanager
812 814 def context():
813 815 if dry_run or to_report: # No need for locking
814 816 yield
815 817 else:
816 818 with repo.wlock(), repo.lock():
817 819 yield
818 820
819 821 if from_report:
820 822 return _from_report(ui, repo, context, from_report, dry_run)
821 823
822 824 report_entries = []
823 825
824 826 with context():
825 827 files = list(
826 828 (file_type, path)
827 829 for (file_type, path, _e, _s) in repo.store.datafiles()
828 830 if path.endswith(b'.i') and file_type & store.FILEFLAGS_FILELOG
829 831 )
830 832
831 833 progress = ui.makeprogress(
832 834 _(b"looking for affected revisions"),
833 835 unit=_(b"filelogs"),
834 836 total=len(files),
835 837 )
836 838 found_nothing = True
837 839
838 840 for file_type, path in files:
839 841 if (
840 842 not path.endswith(b'.i')
841 843 or not file_type & store.FILEFLAGS_FILELOG
842 844 ):
843 845 continue
844 846 progress.increment()
845 847 filename = _get_filename_from_filelog_index(path)
846 848 fl = _filelog_from_filename(repo, filename)
847 849
848 850 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
849 851 to_fix = set()
850 852 metadata_cache = {}
851 853 for filerev in fl.revs():
852 854 affected = _is_revision_affected_fast(
853 855 repo, fl, filerev, metadata_cache
854 856 )
855 857 if paranoid:
856 858 slow = _is_revision_affected(fl, filerev)
857 859 if slow != affected:
858 860 msg = _(b"paranoid check failed for '%s' at node %s")
859 861 node = binascii.hexlify(fl.node(filerev))
860 862 raise error.Abort(msg % (filename, node))
861 863 if affected:
862 864 msg = b"found affected revision %d for filelog '%s'\n"
863 865 ui.warn(msg % (filerev, path))
864 866 found_nothing = False
865 867 if not dry_run:
866 868 if to_report:
867 869 to_fix.add(binascii.hexlify(fl.node(filerev)))
868 870 else:
869 871 to_fix.add(filerev)
870 872
871 873 if to_fix:
872 874 to_fix = sorted(to_fix)
873 875 if to_report:
874 876 report_entries.append((filename, to_fix))
875 877 else:
876 878 _reorder_filelog_parents(repo, fl, to_fix)
877 879
878 880 if found_nothing:
879 881 ui.write(_(b"no affected revisions were found\n"))
880 882
881 883 if to_report and report_entries:
882 884 with open(to_report, mode="wb") as f:
883 885 for path, to_fix in report_entries:
884 886 f.write(b"%s %s\n" % (b",".join(to_fix), path))
885 887
886 888 progress.complete()
General Comments 0
You need to be logged in to leave comments. Login now