##// END OF EJS Templates
revlogutils: for issue6528 fix, pre-cache nullrev as metadata-free
Joerg Sonnenberger -
r52806:576876a5 default
parent child Browse files
Show More
@@ -1,886 +1,886
1 1 # censor code related to censoring revision
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from __future__ import annotations
11 11
12 12 import binascii
13 13 import contextlib
14 14 import os
15 15 import struct
16 16
17 17 from ..node import (
18 18 nullrev,
19 19 )
20 20 from .constants import (
21 21 COMP_MODE_PLAIN,
22 22 ENTRY_DATA_COMPRESSED_LENGTH,
23 23 ENTRY_DATA_COMPRESSION_MODE,
24 24 ENTRY_DATA_OFFSET,
25 25 ENTRY_DATA_UNCOMPRESSED_LENGTH,
26 26 ENTRY_DELTA_BASE,
27 27 ENTRY_LINK_REV,
28 28 ENTRY_NODE_ID,
29 29 ENTRY_PARENT_1,
30 30 ENTRY_PARENT_2,
31 31 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
32 32 ENTRY_SIDEDATA_COMPRESSION_MODE,
33 33 ENTRY_SIDEDATA_OFFSET,
34 34 REVIDX_ISCENSORED,
35 35 REVLOGV0,
36 36 REVLOGV1,
37 37 )
38 38 from ..i18n import _
39 39
40 40 from .. import (
41 41 error,
42 42 mdiff,
43 43 pycompat,
44 44 revlogutils,
45 45 util,
46 46 )
47 47 from ..utils import (
48 48 storageutil,
49 49 )
50 50 from . import (
51 51 constants,
52 52 deltas,
53 53 )
54 54
55 55
56 56 def v1_censor(rl, tr, censor_nodes, tombstone=b''):
57 57 """censors a revision in a "version 1" revlog"""
58 58 assert rl._format_version == constants.REVLOGV1, rl._format_version
59 59
60 60 # avoid cycle
61 61 from .. import revlog
62 62
63 63 censor_revs = set(rl.rev(node) for node in censor_nodes)
64 64 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
65 65
66 66 # Rewriting the revlog in place is hard. Our strategy for censoring is
67 67 # to create a new revlog, copy all revisions to it, then replace the
68 68 # revlogs on transaction close.
69 69 #
70 70 # This is a bit dangerous. We could easily have a mismatch of state.
71 71 newrl = revlog.revlog(
72 72 rl.opener,
73 73 target=rl.target,
74 74 radix=rl.radix,
75 75 postfix=b'tmpcensored',
76 76 censorable=True,
77 77 data_config=rl.data_config,
78 78 delta_config=rl.delta_config,
79 79 feature_config=rl.feature_config,
80 80 may_inline=rl._inline,
81 81 )
82 82 # inline splitting will prepare some transaction work that will get
83 83 # confused by the final file move. So if there is a risk of not being
84 84 # inline at the end, we prevent the new revlog to be inline in the first
85 85 # place.
86 86 assert not (newrl._inline and not rl._inline)
87 87
88 88 for rev in rl.revs():
89 89 node = rl.node(rev)
90 90 p1, p2 = rl.parents(node)
91 91
92 92 if rev in censor_revs:
93 93 newrl.addrawrevision(
94 94 tombstone,
95 95 tr,
96 96 rl.linkrev(rev),
97 97 p1,
98 98 p2,
99 99 node,
100 100 constants.REVIDX_ISCENSORED,
101 101 )
102 102
103 103 if newrl.deltaparent(rev) != nullrev:
104 104 m = _(b'censored revision stored as delta; cannot censor')
105 105 h = _(
106 106 b'censoring of revlogs is not fully implemented;'
107 107 b' please report this bug'
108 108 )
109 109 raise error.Abort(m, hint=h)
110 110 continue
111 111
112 112 if rl.iscensored(rev):
113 113 if rl.deltaparent(rev) != nullrev:
114 114 m = _(
115 115 b'cannot censor due to censored '
116 116 b'revision having delta stored'
117 117 )
118 118 raise error.Abort(m)
119 119 rawtext = rl._inner._chunk(rev)
120 120 else:
121 121 rawtext = rl.rawdata(rev)
122 122
123 123 newrl.addrawrevision(
124 124 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
125 125 )
126 126
127 127 tr.addbackup(rl._indexfile, location=b'store')
128 128 if not rl._inline:
129 129 tr.addbackup(rl._datafile, location=b'store')
130 130
131 131 rl.opener.rename(newrl._indexfile, rl._indexfile)
132 132 if newrl._inline:
133 133 assert rl._inline
134 134 else:
135 135 assert not rl._inline
136 136 rl.opener.rename(newrl._datafile, rl._datafile)
137 137
138 138 rl.clearcaches()
139 139 chunk_cache = rl._loadindex()
140 140 rl._load_inner(chunk_cache)
141 141
142 142
143 143 def v2_censor(revlog, tr, censor_nodes, tombstone=b''):
144 144 """censors a revision in a "version 2" revlog"""
145 145 assert revlog._format_version != REVLOGV0, revlog._format_version
146 146 assert revlog._format_version != REVLOGV1, revlog._format_version
147 147
148 148 censor_revs = {revlog.rev(node) for node in censor_nodes}
149 149 _rewrite_v2(revlog, tr, censor_revs, tombstone)
150 150
151 151
152 152 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
153 153 """rewrite a revlog to censor some of its content
154 154
155 155 General principle
156 156
157 157 We create new revlog files (index/data/sidedata) to copy the content of
158 158 the existing data without the censored data.
159 159
160 160 We need to recompute new delta for any revision that used the censored
161 161 revision as delta base. As the cumulative size of the new delta may be
162 162 large, we store them in a temporary file until they are stored in their
163 163 final destination.
164 164
165 165 All data before the censored data can be blindly copied. The rest needs
166 166 to be copied as we go and the associated index entry needs adjustement.
167 167 """
168 168 assert revlog._format_version != REVLOGV0, revlog._format_version
169 169 assert revlog._format_version != REVLOGV1, revlog._format_version
170 170
171 171 old_index = revlog.index
172 172 docket = revlog._docket
173 173
174 174 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
175 175
176 176 first_excl_rev = min(censor_revs)
177 177
178 178 first_excl_entry = revlog.index[first_excl_rev]
179 179 index_cutoff = revlog.index.entry_size * first_excl_rev
180 180 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
181 181 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
182 182
183 183 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
184 184 # rev β†’ (new_base, data_start, data_end, compression_mode)
185 185 rewritten_entries = _precompute_rewritten_delta(
186 186 revlog,
187 187 old_index,
188 188 censor_revs,
189 189 tmp_storage,
190 190 )
191 191
192 192 all_files = _setup_new_files(
193 193 revlog,
194 194 index_cutoff,
195 195 data_cutoff,
196 196 sidedata_cutoff,
197 197 )
198 198
199 199 # we dont need to open the old index file since its content already
200 200 # exist in a usable form in `old_index`.
201 201 with all_files() as open_files:
202 202 (
203 203 old_data_file,
204 204 old_sidedata_file,
205 205 new_index_file,
206 206 new_data_file,
207 207 new_sidedata_file,
208 208 ) = open_files
209 209
210 210 # writing the censored revision
211 211
212 212 # Writing all subsequent revisions
213 213 for rev in range(first_excl_rev, len(old_index)):
214 214 if rev in censor_revs:
215 215 _rewrite_censor(
216 216 revlog,
217 217 old_index,
218 218 open_files,
219 219 rev,
220 220 tombstone,
221 221 )
222 222 else:
223 223 _rewrite_simple(
224 224 revlog,
225 225 old_index,
226 226 open_files,
227 227 rev,
228 228 rewritten_entries,
229 229 tmp_storage,
230 230 )
231 231 docket.write(transaction=None, stripping=True)
232 232
233 233
234 234 def _precompute_rewritten_delta(
235 235 revlog,
236 236 old_index,
237 237 excluded_revs,
238 238 tmp_storage,
239 239 ):
240 240 """Compute new delta for revisions whose delta is based on revision that
241 241 will not survive as is.
242 242
243 243 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
244 244 """
245 245 dc = deltas.deltacomputer(revlog)
246 246 rewritten_entries = {}
247 247 first_excl_rev = min(excluded_revs)
248 248 with revlog.reading():
249 249 for rev in range(first_excl_rev, len(old_index)):
250 250 if rev in excluded_revs:
251 251 # this revision will be preserved as is, so we don't need to
252 252 # consider recomputing a delta.
253 253 continue
254 254 entry = old_index[rev]
255 255 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
256 256 continue
257 257 # This is a revision that use the censored revision as the base
258 258 # for its delta. We need a need new deltas
259 259 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
260 260 # this revision is empty, we can delta against nullrev
261 261 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
262 262 else:
263 263 text = revlog.rawdata(rev)
264 264 info = revlogutils.revisioninfo(
265 265 node=entry[ENTRY_NODE_ID],
266 266 p1=revlog.node(entry[ENTRY_PARENT_1]),
267 267 p2=revlog.node(entry[ENTRY_PARENT_2]),
268 268 btext=[text],
269 269 textlen=len(text),
270 270 cachedelta=None,
271 271 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
272 272 )
273 273 d = dc.finddeltainfo(
274 274 info, excluded_bases=excluded_revs, target_rev=rev
275 275 )
276 276 default_comp = revlog._docket.default_compression_header
277 277 comp_mode, d = deltas.delta_compression(default_comp, d)
278 278 # using `tell` is a bit lazy, but we are not here for speed
279 279 start = tmp_storage.tell()
280 280 tmp_storage.write(d.data[1])
281 281 end = tmp_storage.tell()
282 282 rewritten_entries[rev] = (d.base, start, end, comp_mode)
283 283 return rewritten_entries
284 284
285 285
286 286 def _setup_new_files(
287 287 revlog,
288 288 index_cutoff,
289 289 data_cutoff,
290 290 sidedata_cutoff,
291 291 ):
292 292 """
293 293
294 294 return a context manager to open all the relevant files:
295 295 - old_data_file,
296 296 - old_sidedata_file,
297 297 - new_index_file,
298 298 - new_data_file,
299 299 - new_sidedata_file,
300 300
301 301 The old_index_file is not here because it is accessed through the
302 302 `old_index` object if the caller function.
303 303 """
304 304 docket = revlog._docket
305 305 old_index_filepath = revlog.opener.join(docket.index_filepath())
306 306 old_data_filepath = revlog.opener.join(docket.data_filepath())
307 307 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
308 308
309 309 new_index_filepath = revlog.opener.join(docket.new_index_file())
310 310 new_data_filepath = revlog.opener.join(docket.new_data_file())
311 311 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
312 312
313 313 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
314 314 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
315 315 util.copyfile(
316 316 old_sidedata_filepath,
317 317 new_sidedata_filepath,
318 318 nb_bytes=sidedata_cutoff,
319 319 )
320 320 revlog.opener.register_file(docket.index_filepath())
321 321 revlog.opener.register_file(docket.data_filepath())
322 322 revlog.opener.register_file(docket.sidedata_filepath())
323 323
324 324 docket.index_end = index_cutoff
325 325 docket.data_end = data_cutoff
326 326 docket.sidedata_end = sidedata_cutoff
327 327
328 328 # reload the revlog internal information
329 329 revlog.clearcaches()
330 330 revlog._loadindex(docket=docket)
331 331
332 332 @contextlib.contextmanager
333 333 def all_files_opener():
334 334 # hide opening in an helper function to please check-code, black
335 335 # and various python version at the same time
336 336 with open(old_data_filepath, 'rb') as old_data_file:
337 337 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
338 338 with open(new_index_filepath, 'r+b') as new_index_file:
339 339 with open(new_data_filepath, 'r+b') as new_data_file:
340 340 with open(
341 341 new_sidedata_filepath, 'r+b'
342 342 ) as new_sidedata_file:
343 343 new_index_file.seek(0, os.SEEK_END)
344 344 assert new_index_file.tell() == index_cutoff
345 345 new_data_file.seek(0, os.SEEK_END)
346 346 assert new_data_file.tell() == data_cutoff
347 347 new_sidedata_file.seek(0, os.SEEK_END)
348 348 assert new_sidedata_file.tell() == sidedata_cutoff
349 349 yield (
350 350 old_data_file,
351 351 old_sidedata_file,
352 352 new_index_file,
353 353 new_data_file,
354 354 new_sidedata_file,
355 355 )
356 356
357 357 return all_files_opener
358 358
359 359
360 360 def _rewrite_simple(
361 361 revlog,
362 362 old_index,
363 363 all_files,
364 364 rev,
365 365 rewritten_entries,
366 366 tmp_storage,
367 367 ):
368 368 """append a normal revision to the index after the rewritten one(s)"""
369 369 (
370 370 old_data_file,
371 371 old_sidedata_file,
372 372 new_index_file,
373 373 new_data_file,
374 374 new_sidedata_file,
375 375 ) = all_files
376 376 entry = old_index[rev]
377 377 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
378 378 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
379 379
380 380 if rev not in rewritten_entries:
381 381 old_data_file.seek(old_data_offset)
382 382 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
383 383 new_data = old_data_file.read(new_data_size)
384 384 data_delta_base = entry[ENTRY_DELTA_BASE]
385 385 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
386 386 else:
387 387 (
388 388 data_delta_base,
389 389 start,
390 390 end,
391 391 d_comp_mode,
392 392 ) = rewritten_entries[rev]
393 393 new_data_size = end - start
394 394 tmp_storage.seek(start)
395 395 new_data = tmp_storage.read(new_data_size)
396 396
397 397 # It might be faster to group continuous read/write operation,
398 398 # however, this is censor, an operation that is not focussed
399 399 # around stellar performance. So I have not written this
400 400 # optimisation yet.
401 401 new_data_offset = new_data_file.tell()
402 402 new_data_file.write(new_data)
403 403
404 404 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
405 405 new_sidedata_offset = new_sidedata_file.tell()
406 406 if 0 < sidedata_size:
407 407 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
408 408 old_sidedata_file.seek(old_sidedata_offset)
409 409 new_sidedata = old_sidedata_file.read(sidedata_size)
410 410 new_sidedata_file.write(new_sidedata)
411 411
412 412 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
413 413 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
414 414 assert data_delta_base <= rev, (data_delta_base, rev)
415 415
416 416 new_entry = revlogutils.entry(
417 417 flags=flags,
418 418 data_offset=new_data_offset,
419 419 data_compressed_length=new_data_size,
420 420 data_uncompressed_length=data_uncompressed_length,
421 421 data_delta_base=data_delta_base,
422 422 link_rev=entry[ENTRY_LINK_REV],
423 423 parent_rev_1=entry[ENTRY_PARENT_1],
424 424 parent_rev_2=entry[ENTRY_PARENT_2],
425 425 node_id=entry[ENTRY_NODE_ID],
426 426 sidedata_offset=new_sidedata_offset,
427 427 sidedata_compressed_length=sidedata_size,
428 428 data_compression_mode=d_comp_mode,
429 429 sidedata_compression_mode=sd_com_mode,
430 430 )
431 431 revlog.index.append(new_entry)
432 432 entry_bin = revlog.index.entry_binary(rev)
433 433 new_index_file.write(entry_bin)
434 434
435 435 revlog._docket.index_end = new_index_file.tell()
436 436 revlog._docket.data_end = new_data_file.tell()
437 437 revlog._docket.sidedata_end = new_sidedata_file.tell()
438 438
439 439
440 440 def _rewrite_censor(
441 441 revlog,
442 442 old_index,
443 443 all_files,
444 444 rev,
445 445 tombstone,
446 446 ):
447 447 """rewrite and append a censored revision"""
448 448 (
449 449 old_data_file,
450 450 old_sidedata_file,
451 451 new_index_file,
452 452 new_data_file,
453 453 new_sidedata_file,
454 454 ) = all_files
455 455 entry = old_index[rev]
456 456
457 457 # XXX consider trying the default compression too
458 458 new_data_size = len(tombstone)
459 459 new_data_offset = new_data_file.tell()
460 460 new_data_file.write(tombstone)
461 461
462 462 # we are not adding any sidedata as they might leak info about the censored version
463 463
464 464 link_rev = entry[ENTRY_LINK_REV]
465 465
466 466 p1 = entry[ENTRY_PARENT_1]
467 467 p2 = entry[ENTRY_PARENT_2]
468 468
469 469 new_entry = revlogutils.entry(
470 470 flags=constants.REVIDX_ISCENSORED,
471 471 data_offset=new_data_offset,
472 472 data_compressed_length=new_data_size,
473 473 data_uncompressed_length=new_data_size,
474 474 data_delta_base=rev,
475 475 link_rev=link_rev,
476 476 parent_rev_1=p1,
477 477 parent_rev_2=p2,
478 478 node_id=entry[ENTRY_NODE_ID],
479 479 sidedata_offset=0,
480 480 sidedata_compressed_length=0,
481 481 data_compression_mode=COMP_MODE_PLAIN,
482 482 sidedata_compression_mode=COMP_MODE_PLAIN,
483 483 )
484 484 revlog.index.append(new_entry)
485 485 entry_bin = revlog.index.entry_binary(rev)
486 486 new_index_file.write(entry_bin)
487 487 revlog._docket.index_end = new_index_file.tell()
488 488 revlog._docket.data_end = new_data_file.tell()
489 489
490 490
491 491 def _get_filename_from_filelog_index(path):
492 492 # Drop the extension and the `data/` prefix
493 493 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
494 494 if len(path_part) < 2:
495 495 msg = _(b"cannot recognize filelog from filename: '%s'")
496 496 msg %= path
497 497 raise error.Abort(msg)
498 498
499 499 return path_part[1]
500 500
501 501
502 502 def _filelog_from_filename(repo, path):
503 503 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
504 504
505 505 from .. import filelog # avoid cycle
506 506
507 507 fl = filelog.filelog(repo.svfs, path)
508 508 return fl
509 509
510 510
511 511 def _write_swapped_parents(repo, rl, rev, offset, fp):
512 512 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
513 513 from ..pure import parsers # avoid cycle
514 514
515 515 if repo._currentlock(repo._lockref) is None:
516 516 # Let's be paranoid about it
517 517 msg = "repo needs to be locked to rewrite parents"
518 518 raise error.ProgrammingError(msg)
519 519
520 520 index_format = parsers.IndexObject.index_format
521 521 entry = rl.index[rev]
522 522 new_entry = list(entry)
523 523 new_entry[5], new_entry[6] = entry[6], entry[5]
524 524 packed = index_format.pack(*new_entry[:8])
525 525 fp.seek(offset)
526 526 fp.write(packed)
527 527
528 528
529 529 def _reorder_filelog_parents(repo, fl, to_fix):
530 530 """
531 531 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
532 532 new version to disk, overwriting the old one with a rename.
533 533 """
534 534 from ..pure import parsers # avoid cycle
535 535
536 536 ui = repo.ui
537 537 assert len(to_fix) > 0
538 538 rl = fl._revlog
539 539 if rl._format_version != constants.REVLOGV1:
540 540 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
541 541 raise error.ProgrammingError(msg)
542 542
543 543 index_file = rl._indexfile
544 544 new_file_path = index_file + b'.tmp-parents-fix'
545 545 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
546 546
547 547 with ui.uninterruptible():
548 548 try:
549 549 util.copyfile(
550 550 rl.opener.join(index_file),
551 551 rl.opener.join(new_file_path),
552 552 checkambig=rl.data_config.check_ambig,
553 553 )
554 554
555 555 with rl.opener(new_file_path, mode=b"r+") as fp:
556 556 if rl._inline:
557 557 index = parsers.InlinedIndexObject(fp.read())
558 558 for rev in fl.revs():
559 559 if rev in to_fix:
560 560 offset = index._calculate_index(rev)
561 561 _write_swapped_parents(repo, rl, rev, offset, fp)
562 562 ui.write(repaired_msg % (rev, index_file))
563 563 else:
564 564 index_format = parsers.IndexObject.index_format
565 565 for rev in to_fix:
566 566 offset = rev * index_format.size
567 567 _write_swapped_parents(repo, rl, rev, offset, fp)
568 568 ui.write(repaired_msg % (rev, index_file))
569 569
570 570 rl.opener.rename(new_file_path, index_file)
571 571 rl.clearcaches()
572 572 rl._loadindex()
573 573 finally:
574 574 util.tryunlink(new_file_path)
575 575
576 576
577 577 def _is_revision_affected(fl, filerev, metadata_cache=None):
578 578 full_text = lambda: fl._revlog.rawdata(filerev)
579 579 parent_revs = lambda: fl._revlog.parentrevs(filerev)
580 580 return _is_revision_affected_inner(
581 581 full_text, parent_revs, filerev, metadata_cache
582 582 )
583 583
584 584
585 585 def _is_revision_affected_inner(
586 586 full_text,
587 587 parents_revs,
588 588 filerev,
589 589 metadata_cache=None,
590 590 ):
591 591 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
592 592 special meaning compared to the reverse in the context of filelog-based
593 593 copytracing. issue6528 exists because new code assumed that parent ordering
594 594 didn't matter, so this detects if the revision contains metadata (since
595 595 it's only used for filelog-based copytracing) and its parents are in the
596 596 "wrong" order."""
597 597 try:
598 598 raw_text = full_text()
599 599 except error.CensoredNodeError:
600 600 # We don't care about censored nodes as they never carry metadata
601 601 return False
602 602
603 603 # raw text can be a `memoryview`, which doesn't implement `startswith`
604 604 has_meta = bytes(raw_text[:2]) == b'\x01\n'
605 605 if metadata_cache is not None:
606 606 metadata_cache[filerev] = has_meta
607 607 if has_meta:
608 608 (p1, p2) = parents_revs()
609 609 if p1 != nullrev and p2 == nullrev:
610 610 return True
611 611 return False
612 612
613 613
614 614 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
615 615 rl = fl._revlog
616 616 is_censored = lambda: rl.iscensored(filerev)
617 617 delta_base = lambda: rl.deltaparent(filerev)
618 618 delta = lambda: rl._inner._chunk(filerev)
619 619 full_text = lambda: rl.rawdata(filerev)
620 620 parent_revs = lambda: rl.parentrevs(filerev)
621 621 return _is_revision_affected_fast_inner(
622 622 is_censored,
623 623 delta_base,
624 624 delta,
625 625 full_text,
626 626 parent_revs,
627 627 filerev,
628 628 metadata_cache,
629 629 )
630 630
631 631
632 632 def _is_revision_affected_fast_inner(
633 633 is_censored,
634 634 delta_base,
635 635 delta,
636 636 full_text,
637 637 parent_revs,
638 638 filerev,
639 639 metadata_cache,
640 640 ):
641 641 """Optimization fast-path for `_is_revision_affected`.
642 642
643 643 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
644 644 revision to check if its base has metadata, saving computation of the full
645 645 text, instead looking at the current delta.
646 646
647 647 This optimization only works if the revisions are looked at in order."""
648 648
649 649 if is_censored():
650 650 # Censored revisions don't contain metadata, so they cannot be affected
651 651 metadata_cache[filerev] = False
652 652 return False
653 653
654 654 p1, p2 = parent_revs()
655 655 if p1 == nullrev or p2 != nullrev:
656 656 return False
657 657
658 658 delta_parent = delta_base()
659 659 parent_has_metadata = metadata_cache.get(delta_parent)
660 660 if parent_has_metadata is None:
661 661 return _is_revision_affected_inner(
662 662 full_text,
663 663 parent_revs,
664 664 filerev,
665 665 metadata_cache,
666 666 )
667 667
668 668 chunk = delta()
669 669 if not len(chunk):
670 670 # No diff for this revision
671 671 metadata_cache[filerev] = parent_has_metadata
672 672 return parent_has_metadata
673 673
674 674 header_length = 12
675 675 if len(chunk) < header_length:
676 676 raise error.Abort(_(b"patch cannot be decoded"))
677 677
678 678 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
679 679
680 680 if start < 2: # len(b'\x01\n') == 2
681 681 # This delta does *something* to the metadata marker (if any).
682 682 # Check it the slow way
683 683 is_affected = _is_revision_affected_inner(
684 684 full_text,
685 685 parent_revs,
686 686 filerev,
687 687 metadata_cache,
688 688 )
689 689 return is_affected
690 690
691 691 # The diff did not remove or add the metadata header, it's then in the same
692 692 # situation as its parent
693 693 metadata_cache[filerev] = parent_has_metadata
694 694 return parent_has_metadata
695 695
696 696
697 697 def _from_report(ui, repo, context, from_report, dry_run):
698 698 """
699 699 Fix the revisions given in the `from_report` file, but still checks if the
700 700 revisions are indeed affected to prevent an unfortunate cyclic situation
701 701 where we'd swap well-ordered parents again.
702 702
703 703 See the doc for `debug_fix_issue6528` for the format documentation.
704 704 """
705 705 ui.write(_(b"loading report file '%s'\n") % from_report)
706 706
707 707 with context(), open(from_report, mode='rb') as f:
708 708 for line in f.read().split(b'\n'):
709 709 if not line:
710 710 continue
711 711 filenodes, filename = line.split(b' ', 1)
712 712 fl = _filelog_from_filename(repo, filename)
713 713 to_fix = set(
714 714 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
715 715 )
716 716 excluded = set()
717 717
718 718 for filerev in to_fix:
719 719 if _is_revision_affected(fl, filerev):
720 720 msg = b"found affected revision %d for filelog '%s'\n"
721 721 ui.warn(msg % (filerev, filename))
722 722 else:
723 723 msg = _(b"revision %s of file '%s' is not affected\n")
724 724 msg %= (binascii.hexlify(fl.node(filerev)), filename)
725 725 ui.warn(msg)
726 726 excluded.add(filerev)
727 727
728 728 to_fix = to_fix - excluded
729 729 if not to_fix:
730 730 msg = _(b"no affected revisions were found for '%s'\n")
731 731 ui.write(msg % filename)
732 732 continue
733 733 if not dry_run:
734 734 _reorder_filelog_parents(repo, fl, sorted(to_fix))
735 735
736 736
737 737 def filter_delta_issue6528(revlog, deltas_iter):
738 738 """filter incomind deltas to repaire issue 6528 on the fly"""
739 metadata_cache = {}
739 metadata_cache = {nullrev: False}
740 740
741 741 deltacomputer = deltas.deltacomputer(revlog)
742 742
743 743 for rev, d in enumerate(deltas_iter, len(revlog)):
744 744 (
745 745 node,
746 746 p1_node,
747 747 p2_node,
748 748 linknode,
749 749 deltabase,
750 750 delta,
751 751 flags,
752 752 sidedata,
753 753 ) = d
754 754
755 755 if not revlog.index.has_node(deltabase):
756 756 raise error.LookupError(
757 757 deltabase, revlog.radix, _(b'unknown parent')
758 758 )
759 759 base_rev = revlog.rev(deltabase)
760 760 if not revlog.index.has_node(p1_node):
761 761 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
762 762 p1_rev = revlog.rev(p1_node)
763 763 if not revlog.index.has_node(p2_node):
764 764 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
765 765 p2_rev = revlog.rev(p2_node)
766 766
767 767 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
768 768 delta_base = lambda: revlog.rev(delta_base)
769 769 delta_base = lambda: base_rev
770 770 parent_revs = lambda: (p1_rev, p2_rev)
771 771
772 772 def full_text():
773 773 # note: being able to reuse the full text computation in the
774 774 # underlying addrevision would be useful however this is a bit too
775 775 # intrusive the for the "quick" issue6528 we are writing before the
776 776 # 5.8 release
777 777 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
778 778
779 779 revinfo = revlogutils.revisioninfo(
780 780 node,
781 781 p1_node,
782 782 p2_node,
783 783 [None],
784 784 textlen,
785 785 (base_rev, delta),
786 786 flags,
787 787 )
788 788 return deltacomputer.buildtext(revinfo)
789 789
790 790 is_affected = _is_revision_affected_fast_inner(
791 791 is_censored,
792 792 delta_base,
793 793 lambda: delta,
794 794 full_text,
795 795 parent_revs,
796 796 rev,
797 797 metadata_cache,
798 798 )
799 799 if is_affected:
800 800 d = (
801 801 node,
802 802 p2_node,
803 803 p1_node,
804 804 linknode,
805 805 deltabase,
806 806 delta,
807 807 flags,
808 808 sidedata,
809 809 )
810 810 yield d
811 811
812 812
813 813 def repair_issue6528(
814 814 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
815 815 ):
816 816 @contextlib.contextmanager
817 817 def context():
818 818 if dry_run or to_report: # No need for locking
819 819 yield
820 820 else:
821 821 with repo.wlock(), repo.lock():
822 822 yield
823 823
824 824 if from_report:
825 825 return _from_report(ui, repo, context, from_report, dry_run)
826 826
827 827 report_entries = []
828 828
829 829 with context():
830 830 files = list(
831 831 entry
832 832 for entry in repo.store.data_entries()
833 833 if entry.is_revlog and entry.is_filelog
834 834 )
835 835
836 836 progress = ui.makeprogress(
837 837 _(b"looking for affected revisions"),
838 838 unit=_(b"filelogs"),
839 839 total=len(files),
840 840 )
841 841 found_nothing = True
842 842
843 843 for entry in files:
844 844 progress.increment()
845 845 filename = entry.target_id
846 846 fl = _filelog_from_filename(repo, entry.target_id)
847 847
848 848 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
849 849 to_fix = set()
850 metadata_cache = {}
850 metadata_cache = {nullrev: False}
851 851 for filerev in fl.revs():
852 852 affected = _is_revision_affected_fast(
853 853 repo, fl, filerev, metadata_cache
854 854 )
855 855 if paranoid:
856 856 slow = _is_revision_affected(fl, filerev)
857 857 if slow != affected:
858 858 msg = _(b"paranoid check failed for '%s' at node %s")
859 859 node = binascii.hexlify(fl.node(filerev))
860 860 raise error.Abort(msg % (filename, node))
861 861 if affected:
862 862 msg = b"found affected revision %d for file '%s'\n"
863 863 ui.warn(msg % (filerev, filename))
864 864 found_nothing = False
865 865 if not dry_run:
866 866 if to_report:
867 867 to_fix.add(binascii.hexlify(fl.node(filerev)))
868 868 else:
869 869 to_fix.add(filerev)
870 870
871 871 if to_fix:
872 872 to_fix = sorted(to_fix)
873 873 if to_report:
874 874 report_entries.append((filename, to_fix))
875 875 else:
876 876 _reorder_filelog_parents(repo, fl, to_fix)
877 877
878 878 if found_nothing:
879 879 ui.write(_(b"no affected revisions were found\n"))
880 880
881 881 if to_report and report_entries:
882 882 with open(to_report, mode="wb") as f:
883 883 for path, to_fix in report_entries:
884 884 f.write(b"%s %s\n" % (b",".join(to_fix), path))
885 885
886 886 progress.complete()
General Comments 0
You need to be logged in to leave comments. Login now