##// END OF EJS Templates
revlogutils: fix _chunk() reference...
Joerg Sonnenberger -
r52804:767f47fc default
parent child Browse files
Show More
@@ -1,885 +1,885
1 1 # censor code related to censoring revision
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from __future__ import annotations
11 11
12 12 import binascii
13 13 import contextlib
14 14 import os
15 15 import struct
16 16
17 17 from ..node import (
18 18 nullrev,
19 19 )
20 20 from .constants import (
21 21 COMP_MODE_PLAIN,
22 22 ENTRY_DATA_COMPRESSED_LENGTH,
23 23 ENTRY_DATA_COMPRESSION_MODE,
24 24 ENTRY_DATA_OFFSET,
25 25 ENTRY_DATA_UNCOMPRESSED_LENGTH,
26 26 ENTRY_DELTA_BASE,
27 27 ENTRY_LINK_REV,
28 28 ENTRY_NODE_ID,
29 29 ENTRY_PARENT_1,
30 30 ENTRY_PARENT_2,
31 31 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
32 32 ENTRY_SIDEDATA_COMPRESSION_MODE,
33 33 ENTRY_SIDEDATA_OFFSET,
34 34 REVIDX_ISCENSORED,
35 35 REVLOGV0,
36 36 REVLOGV1,
37 37 )
38 38 from ..i18n import _
39 39
40 40 from .. import (
41 41 error,
42 42 mdiff,
43 43 pycompat,
44 44 revlogutils,
45 45 util,
46 46 )
47 47 from ..utils import (
48 48 storageutil,
49 49 )
50 50 from . import (
51 51 constants,
52 52 deltas,
53 53 )
54 54
55 55
56 56 def v1_censor(rl, tr, censor_nodes, tombstone=b''):
57 57 """censors a revision in a "version 1" revlog"""
58 58 assert rl._format_version == constants.REVLOGV1, rl._format_version
59 59
60 60 # avoid cycle
61 61 from .. import revlog
62 62
63 63 censor_revs = set(rl.rev(node) for node in censor_nodes)
64 64 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
65 65
66 66 # Rewriting the revlog in place is hard. Our strategy for censoring is
67 67 # to create a new revlog, copy all revisions to it, then replace the
68 68 # revlogs on transaction close.
69 69 #
70 70 # This is a bit dangerous. We could easily have a mismatch of state.
71 71 newrl = revlog.revlog(
72 72 rl.opener,
73 73 target=rl.target,
74 74 radix=rl.radix,
75 75 postfix=b'tmpcensored',
76 76 censorable=True,
77 77 data_config=rl.data_config,
78 78 delta_config=rl.delta_config,
79 79 feature_config=rl.feature_config,
80 80 may_inline=rl._inline,
81 81 )
82 82 # inline splitting will prepare some transaction work that will get
83 83 # confused by the final file move. So if there is a risk of not being
84 84 # inline at the end, we prevent the new revlog to be inline in the first
85 85 # place.
86 86 assert not (newrl._inline and not rl._inline)
87 87
88 88 for rev in rl.revs():
89 89 node = rl.node(rev)
90 90 p1, p2 = rl.parents(node)
91 91
92 92 if rev in censor_revs:
93 93 newrl.addrawrevision(
94 94 tombstone,
95 95 tr,
96 96 rl.linkrev(rev),
97 97 p1,
98 98 p2,
99 99 node,
100 100 constants.REVIDX_ISCENSORED,
101 101 )
102 102
103 103 if newrl.deltaparent(rev) != nullrev:
104 104 m = _(b'censored revision stored as delta; cannot censor')
105 105 h = _(
106 106 b'censoring of revlogs is not fully implemented;'
107 107 b' please report this bug'
108 108 )
109 109 raise error.Abort(m, hint=h)
110 110 continue
111 111
112 112 if rl.iscensored(rev):
113 113 if rl.deltaparent(rev) != nullrev:
114 114 m = _(
115 115 b'cannot censor due to censored '
116 116 b'revision having delta stored'
117 117 )
118 118 raise error.Abort(m)
119 119 rawtext = rl._inner._chunk(rev)
120 120 else:
121 121 rawtext = rl.rawdata(rev)
122 122
123 123 newrl.addrawrevision(
124 124 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
125 125 )
126 126
127 127 tr.addbackup(rl._indexfile, location=b'store')
128 128 if not rl._inline:
129 129 tr.addbackup(rl._datafile, location=b'store')
130 130
131 131 rl.opener.rename(newrl._indexfile, rl._indexfile)
132 132 if newrl._inline:
133 133 assert rl._inline
134 134 else:
135 135 assert not rl._inline
136 136 rl.opener.rename(newrl._datafile, rl._datafile)
137 137
138 138 rl.clearcaches()
139 139 chunk_cache = rl._loadindex()
140 140 rl._load_inner(chunk_cache)
141 141
142 142
143 143 def v2_censor(revlog, tr, censor_nodes, tombstone=b''):
144 144 """censors a revision in a "version 2" revlog"""
145 145 assert revlog._format_version != REVLOGV0, revlog._format_version
146 146 assert revlog._format_version != REVLOGV1, revlog._format_version
147 147
148 148 censor_revs = {revlog.rev(node) for node in censor_nodes}
149 149 _rewrite_v2(revlog, tr, censor_revs, tombstone)
150 150
151 151
152 152 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
153 153 """rewrite a revlog to censor some of its content
154 154
155 155 General principle
156 156
157 157 We create new revlog files (index/data/sidedata) to copy the content of
158 158 the existing data without the censored data.
159 159
160 160 We need to recompute new delta for any revision that used the censored
161 161 revision as delta base. As the cumulative size of the new delta may be
162 162 large, we store them in a temporary file until they are stored in their
163 163 final destination.
164 164
165 165 All data before the censored data can be blindly copied. The rest needs
166 166 to be copied as we go and the associated index entry needs adjustement.
167 167 """
168 168 assert revlog._format_version != REVLOGV0, revlog._format_version
169 169 assert revlog._format_version != REVLOGV1, revlog._format_version
170 170
171 171 old_index = revlog.index
172 172 docket = revlog._docket
173 173
174 174 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
175 175
176 176 first_excl_rev = min(censor_revs)
177 177
178 178 first_excl_entry = revlog.index[first_excl_rev]
179 179 index_cutoff = revlog.index.entry_size * first_excl_rev
180 180 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
181 181 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
182 182
183 183 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
184 184 # rev β†’ (new_base, data_start, data_end, compression_mode)
185 185 rewritten_entries = _precompute_rewritten_delta(
186 186 revlog,
187 187 old_index,
188 188 censor_revs,
189 189 tmp_storage,
190 190 )
191 191
192 192 all_files = _setup_new_files(
193 193 revlog,
194 194 index_cutoff,
195 195 data_cutoff,
196 196 sidedata_cutoff,
197 197 )
198 198
199 199 # we dont need to open the old index file since its content already
200 200 # exist in a usable form in `old_index`.
201 201 with all_files() as open_files:
202 202 (
203 203 old_data_file,
204 204 old_sidedata_file,
205 205 new_index_file,
206 206 new_data_file,
207 207 new_sidedata_file,
208 208 ) = open_files
209 209
210 210 # writing the censored revision
211 211
212 212 # Writing all subsequent revisions
213 213 for rev in range(first_excl_rev, len(old_index)):
214 214 if rev in censor_revs:
215 215 _rewrite_censor(
216 216 revlog,
217 217 old_index,
218 218 open_files,
219 219 rev,
220 220 tombstone,
221 221 )
222 222 else:
223 223 _rewrite_simple(
224 224 revlog,
225 225 old_index,
226 226 open_files,
227 227 rev,
228 228 rewritten_entries,
229 229 tmp_storage,
230 230 )
231 231 docket.write(transaction=None, stripping=True)
232 232
233 233
234 234 def _precompute_rewritten_delta(
235 235 revlog,
236 236 old_index,
237 237 excluded_revs,
238 238 tmp_storage,
239 239 ):
240 240 """Compute new delta for revisions whose delta is based on revision that
241 241 will not survive as is.
242 242
243 243 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
244 244 """
245 245 dc = deltas.deltacomputer(revlog)
246 246 rewritten_entries = {}
247 247 first_excl_rev = min(excluded_revs)
248 248 with revlog.reading():
249 249 for rev in range(first_excl_rev, len(old_index)):
250 250 if rev in excluded_revs:
251 251 # this revision will be preserved as is, so we don't need to
252 252 # consider recomputing a delta.
253 253 continue
254 254 entry = old_index[rev]
255 255 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
256 256 continue
257 257 # This is a revision that use the censored revision as the base
258 258 # for its delta. We need a need new deltas
259 259 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
260 260 # this revision is empty, we can delta against nullrev
261 261 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
262 262 else:
263 263 text = revlog.rawdata(rev)
264 264 info = revlogutils.revisioninfo(
265 265 node=entry[ENTRY_NODE_ID],
266 266 p1=revlog.node(entry[ENTRY_PARENT_1]),
267 267 p2=revlog.node(entry[ENTRY_PARENT_2]),
268 268 btext=[text],
269 269 textlen=len(text),
270 270 cachedelta=None,
271 271 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
272 272 )
273 273 d = dc.finddeltainfo(
274 274 info, excluded_bases=excluded_revs, target_rev=rev
275 275 )
276 276 default_comp = revlog._docket.default_compression_header
277 277 comp_mode, d = deltas.delta_compression(default_comp, d)
278 278 # using `tell` is a bit lazy, but we are not here for speed
279 279 start = tmp_storage.tell()
280 280 tmp_storage.write(d.data[1])
281 281 end = tmp_storage.tell()
282 282 rewritten_entries[rev] = (d.base, start, end, comp_mode)
283 283 return rewritten_entries
284 284
285 285
286 286 def _setup_new_files(
287 287 revlog,
288 288 index_cutoff,
289 289 data_cutoff,
290 290 sidedata_cutoff,
291 291 ):
292 292 """
293 293
294 294 return a context manager to open all the relevant files:
295 295 - old_data_file,
296 296 - old_sidedata_file,
297 297 - new_index_file,
298 298 - new_data_file,
299 299 - new_sidedata_file,
300 300
301 301 The old_index_file is not here because it is accessed through the
302 302 `old_index` object if the caller function.
303 303 """
304 304 docket = revlog._docket
305 305 old_index_filepath = revlog.opener.join(docket.index_filepath())
306 306 old_data_filepath = revlog.opener.join(docket.data_filepath())
307 307 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
308 308
309 309 new_index_filepath = revlog.opener.join(docket.new_index_file())
310 310 new_data_filepath = revlog.opener.join(docket.new_data_file())
311 311 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
312 312
313 313 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
314 314 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
315 315 util.copyfile(
316 316 old_sidedata_filepath,
317 317 new_sidedata_filepath,
318 318 nb_bytes=sidedata_cutoff,
319 319 )
320 320 revlog.opener.register_file(docket.index_filepath())
321 321 revlog.opener.register_file(docket.data_filepath())
322 322 revlog.opener.register_file(docket.sidedata_filepath())
323 323
324 324 docket.index_end = index_cutoff
325 325 docket.data_end = data_cutoff
326 326 docket.sidedata_end = sidedata_cutoff
327 327
328 328 # reload the revlog internal information
329 329 revlog.clearcaches()
330 330 revlog._loadindex(docket=docket)
331 331
332 332 @contextlib.contextmanager
333 333 def all_files_opener():
334 334 # hide opening in an helper function to please check-code, black
335 335 # and various python version at the same time
336 336 with open(old_data_filepath, 'rb') as old_data_file:
337 337 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
338 338 with open(new_index_filepath, 'r+b') as new_index_file:
339 339 with open(new_data_filepath, 'r+b') as new_data_file:
340 340 with open(
341 341 new_sidedata_filepath, 'r+b'
342 342 ) as new_sidedata_file:
343 343 new_index_file.seek(0, os.SEEK_END)
344 344 assert new_index_file.tell() == index_cutoff
345 345 new_data_file.seek(0, os.SEEK_END)
346 346 assert new_data_file.tell() == data_cutoff
347 347 new_sidedata_file.seek(0, os.SEEK_END)
348 348 assert new_sidedata_file.tell() == sidedata_cutoff
349 349 yield (
350 350 old_data_file,
351 351 old_sidedata_file,
352 352 new_index_file,
353 353 new_data_file,
354 354 new_sidedata_file,
355 355 )
356 356
357 357 return all_files_opener
358 358
359 359
360 360 def _rewrite_simple(
361 361 revlog,
362 362 old_index,
363 363 all_files,
364 364 rev,
365 365 rewritten_entries,
366 366 tmp_storage,
367 367 ):
368 368 """append a normal revision to the index after the rewritten one(s)"""
369 369 (
370 370 old_data_file,
371 371 old_sidedata_file,
372 372 new_index_file,
373 373 new_data_file,
374 374 new_sidedata_file,
375 375 ) = all_files
376 376 entry = old_index[rev]
377 377 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
378 378 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
379 379
380 380 if rev not in rewritten_entries:
381 381 old_data_file.seek(old_data_offset)
382 382 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
383 383 new_data = old_data_file.read(new_data_size)
384 384 data_delta_base = entry[ENTRY_DELTA_BASE]
385 385 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
386 386 else:
387 387 (
388 388 data_delta_base,
389 389 start,
390 390 end,
391 391 d_comp_mode,
392 392 ) = rewritten_entries[rev]
393 393 new_data_size = end - start
394 394 tmp_storage.seek(start)
395 395 new_data = tmp_storage.read(new_data_size)
396 396
397 397 # It might be faster to group continuous read/write operation,
398 398 # however, this is censor, an operation that is not focussed
399 399 # around stellar performance. So I have not written this
400 400 # optimisation yet.
401 401 new_data_offset = new_data_file.tell()
402 402 new_data_file.write(new_data)
403 403
404 404 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
405 405 new_sidedata_offset = new_sidedata_file.tell()
406 406 if 0 < sidedata_size:
407 407 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
408 408 old_sidedata_file.seek(old_sidedata_offset)
409 409 new_sidedata = old_sidedata_file.read(sidedata_size)
410 410 new_sidedata_file.write(new_sidedata)
411 411
412 412 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
413 413 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
414 414 assert data_delta_base <= rev, (data_delta_base, rev)
415 415
416 416 new_entry = revlogutils.entry(
417 417 flags=flags,
418 418 data_offset=new_data_offset,
419 419 data_compressed_length=new_data_size,
420 420 data_uncompressed_length=data_uncompressed_length,
421 421 data_delta_base=data_delta_base,
422 422 link_rev=entry[ENTRY_LINK_REV],
423 423 parent_rev_1=entry[ENTRY_PARENT_1],
424 424 parent_rev_2=entry[ENTRY_PARENT_2],
425 425 node_id=entry[ENTRY_NODE_ID],
426 426 sidedata_offset=new_sidedata_offset,
427 427 sidedata_compressed_length=sidedata_size,
428 428 data_compression_mode=d_comp_mode,
429 429 sidedata_compression_mode=sd_com_mode,
430 430 )
431 431 revlog.index.append(new_entry)
432 432 entry_bin = revlog.index.entry_binary(rev)
433 433 new_index_file.write(entry_bin)
434 434
435 435 revlog._docket.index_end = new_index_file.tell()
436 436 revlog._docket.data_end = new_data_file.tell()
437 437 revlog._docket.sidedata_end = new_sidedata_file.tell()
438 438
439 439
440 440 def _rewrite_censor(
441 441 revlog,
442 442 old_index,
443 443 all_files,
444 444 rev,
445 445 tombstone,
446 446 ):
447 447 """rewrite and append a censored revision"""
448 448 (
449 449 old_data_file,
450 450 old_sidedata_file,
451 451 new_index_file,
452 452 new_data_file,
453 453 new_sidedata_file,
454 454 ) = all_files
455 455 entry = old_index[rev]
456 456
457 457 # XXX consider trying the default compression too
458 458 new_data_size = len(tombstone)
459 459 new_data_offset = new_data_file.tell()
460 460 new_data_file.write(tombstone)
461 461
462 462 # we are not adding any sidedata as they might leak info about the censored version
463 463
464 464 link_rev = entry[ENTRY_LINK_REV]
465 465
466 466 p1 = entry[ENTRY_PARENT_1]
467 467 p2 = entry[ENTRY_PARENT_2]
468 468
469 469 new_entry = revlogutils.entry(
470 470 flags=constants.REVIDX_ISCENSORED,
471 471 data_offset=new_data_offset,
472 472 data_compressed_length=new_data_size,
473 473 data_uncompressed_length=new_data_size,
474 474 data_delta_base=rev,
475 475 link_rev=link_rev,
476 476 parent_rev_1=p1,
477 477 parent_rev_2=p2,
478 478 node_id=entry[ENTRY_NODE_ID],
479 479 sidedata_offset=0,
480 480 sidedata_compressed_length=0,
481 481 data_compression_mode=COMP_MODE_PLAIN,
482 482 sidedata_compression_mode=COMP_MODE_PLAIN,
483 483 )
484 484 revlog.index.append(new_entry)
485 485 entry_bin = revlog.index.entry_binary(rev)
486 486 new_index_file.write(entry_bin)
487 487 revlog._docket.index_end = new_index_file.tell()
488 488 revlog._docket.data_end = new_data_file.tell()
489 489
490 490
491 491 def _get_filename_from_filelog_index(path):
492 492 # Drop the extension and the `data/` prefix
493 493 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
494 494 if len(path_part) < 2:
495 495 msg = _(b"cannot recognize filelog from filename: '%s'")
496 496 msg %= path
497 497 raise error.Abort(msg)
498 498
499 499 return path_part[1]
500 500
501 501
502 502 def _filelog_from_filename(repo, path):
503 503 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
504 504
505 505 from .. import filelog # avoid cycle
506 506
507 507 fl = filelog.filelog(repo.svfs, path)
508 508 return fl
509 509
510 510
511 511 def _write_swapped_parents(repo, rl, rev, offset, fp):
512 512 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
513 513 from ..pure import parsers # avoid cycle
514 514
515 515 if repo._currentlock(repo._lockref) is None:
516 516 # Let's be paranoid about it
517 517 msg = "repo needs to be locked to rewrite parents"
518 518 raise error.ProgrammingError(msg)
519 519
520 520 index_format = parsers.IndexObject.index_format
521 521 entry = rl.index[rev]
522 522 new_entry = list(entry)
523 523 new_entry[5], new_entry[6] = entry[6], entry[5]
524 524 packed = index_format.pack(*new_entry[:8])
525 525 fp.seek(offset)
526 526 fp.write(packed)
527 527
528 528
529 529 def _reorder_filelog_parents(repo, fl, to_fix):
530 530 """
531 531 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
532 532 new version to disk, overwriting the old one with a rename.
533 533 """
534 534 from ..pure import parsers # avoid cycle
535 535
536 536 ui = repo.ui
537 537 assert len(to_fix) > 0
538 538 rl = fl._revlog
539 539 if rl._format_version != constants.REVLOGV1:
540 540 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
541 541 raise error.ProgrammingError(msg)
542 542
543 543 index_file = rl._indexfile
544 544 new_file_path = index_file + b'.tmp-parents-fix'
545 545 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
546 546
547 547 with ui.uninterruptible():
548 548 try:
549 549 util.copyfile(
550 550 rl.opener.join(index_file),
551 551 rl.opener.join(new_file_path),
552 552 checkambig=rl.data_config.check_ambig,
553 553 )
554 554
555 555 with rl.opener(new_file_path, mode=b"r+") as fp:
556 556 if rl._inline:
557 557 index = parsers.InlinedIndexObject(fp.read())
558 558 for rev in fl.revs():
559 559 if rev in to_fix:
560 560 offset = index._calculate_index(rev)
561 561 _write_swapped_parents(repo, rl, rev, offset, fp)
562 562 ui.write(repaired_msg % (rev, index_file))
563 563 else:
564 564 index_format = parsers.IndexObject.index_format
565 565 for rev in to_fix:
566 566 offset = rev * index_format.size
567 567 _write_swapped_parents(repo, rl, rev, offset, fp)
568 568 ui.write(repaired_msg % (rev, index_file))
569 569
570 570 rl.opener.rename(new_file_path, index_file)
571 571 rl.clearcaches()
572 572 rl._loadindex()
573 573 finally:
574 574 util.tryunlink(new_file_path)
575 575
576 576
577 577 def _is_revision_affected(fl, filerev, metadata_cache=None):
578 578 full_text = lambda: fl._revlog.rawdata(filerev)
579 579 parent_revs = lambda: fl._revlog.parentrevs(filerev)
580 580 return _is_revision_affected_inner(
581 581 full_text, parent_revs, filerev, metadata_cache
582 582 )
583 583
584 584
585 585 def _is_revision_affected_inner(
586 586 full_text,
587 587 parents_revs,
588 588 filerev,
589 589 metadata_cache=None,
590 590 ):
591 591 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
592 592 special meaning compared to the reverse in the context of filelog-based
593 593 copytracing. issue6528 exists because new code assumed that parent ordering
594 594 didn't matter, so this detects if the revision contains metadata (since
595 595 it's only used for filelog-based copytracing) and its parents are in the
596 596 "wrong" order."""
597 597 try:
598 598 raw_text = full_text()
599 599 except error.CensoredNodeError:
600 600 # We don't care about censored nodes as they never carry metadata
601 601 return False
602 602
603 603 # raw text can be a `memoryview`, which doesn't implement `startswith`
604 604 has_meta = bytes(raw_text[:2]) == b'\x01\n'
605 605 if metadata_cache is not None:
606 606 metadata_cache[filerev] = has_meta
607 607 if has_meta:
608 608 (p1, p2) = parents_revs()
609 609 if p1 != nullrev and p2 == nullrev:
610 610 return True
611 611 return False
612 612
613 613
614 614 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
615 615 rl = fl._revlog
616 616 is_censored = lambda: rl.iscensored(filerev)
617 617 delta_base = lambda: rl.deltaparent(filerev)
618 delta = lambda: rl._chunk(filerev)
618 delta = lambda: rl._inner._chunk(filerev)
619 619 full_text = lambda: rl.rawdata(filerev)
620 620 parent_revs = lambda: rl.parentrevs(filerev)
621 621 return _is_revision_affected_fast_inner(
622 622 is_censored,
623 623 delta_base,
624 624 delta,
625 625 full_text,
626 626 parent_revs,
627 627 filerev,
628 628 metadata_cache,
629 629 )
630 630
631 631
632 632 def _is_revision_affected_fast_inner(
633 633 is_censored,
634 634 delta_base,
635 635 delta,
636 636 full_text,
637 637 parent_revs,
638 638 filerev,
639 639 metadata_cache,
640 640 ):
641 641 """Optimization fast-path for `_is_revision_affected`.
642 642
643 643 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
644 644 revision to check if its base has metadata, saving computation of the full
645 645 text, instead looking at the current delta.
646 646
647 647 This optimization only works if the revisions are looked at in order."""
648 648
649 649 if is_censored():
650 650 # Censored revisions don't contain metadata, so they cannot be affected
651 651 metadata_cache[filerev] = False
652 652 return False
653 653
654 654 p1, p2 = parent_revs()
655 655 if p1 == nullrev or p2 != nullrev:
656 656 return False
657 657
658 658 delta_parent = delta_base()
659 659 parent_has_metadata = metadata_cache.get(delta_parent)
660 660 if parent_has_metadata is None:
661 661 return _is_revision_affected_inner(
662 662 full_text,
663 663 parent_revs,
664 664 filerev,
665 665 metadata_cache,
666 666 )
667 667
668 668 chunk = delta()
669 669 if not len(chunk):
670 670 # No diff for this revision
671 671 return parent_has_metadata
672 672
673 673 header_length = 12
674 674 if len(chunk) < header_length:
675 675 raise error.Abort(_(b"patch cannot be decoded"))
676 676
677 677 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
678 678
679 679 if start < 2: # len(b'\x01\n') == 2
680 680 # This delta does *something* to the metadata marker (if any).
681 681 # Check it the slow way
682 682 is_affected = _is_revision_affected_inner(
683 683 full_text,
684 684 parent_revs,
685 685 filerev,
686 686 metadata_cache,
687 687 )
688 688 return is_affected
689 689
690 690 # The diff did not remove or add the metadata header, it's then in the same
691 691 # situation as its parent
692 692 metadata_cache[filerev] = parent_has_metadata
693 693 return parent_has_metadata
694 694
695 695
696 696 def _from_report(ui, repo, context, from_report, dry_run):
697 697 """
698 698 Fix the revisions given in the `from_report` file, but still checks if the
699 699 revisions are indeed affected to prevent an unfortunate cyclic situation
700 700 where we'd swap well-ordered parents again.
701 701
702 702 See the doc for `debug_fix_issue6528` for the format documentation.
703 703 """
704 704 ui.write(_(b"loading report file '%s'\n") % from_report)
705 705
706 706 with context(), open(from_report, mode='rb') as f:
707 707 for line in f.read().split(b'\n'):
708 708 if not line:
709 709 continue
710 710 filenodes, filename = line.split(b' ', 1)
711 711 fl = _filelog_from_filename(repo, filename)
712 712 to_fix = set(
713 713 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
714 714 )
715 715 excluded = set()
716 716
717 717 for filerev in to_fix:
718 718 if _is_revision_affected(fl, filerev):
719 719 msg = b"found affected revision %d for filelog '%s'\n"
720 720 ui.warn(msg % (filerev, filename))
721 721 else:
722 722 msg = _(b"revision %s of file '%s' is not affected\n")
723 723 msg %= (binascii.hexlify(fl.node(filerev)), filename)
724 724 ui.warn(msg)
725 725 excluded.add(filerev)
726 726
727 727 to_fix = to_fix - excluded
728 728 if not to_fix:
729 729 msg = _(b"no affected revisions were found for '%s'\n")
730 730 ui.write(msg % filename)
731 731 continue
732 732 if not dry_run:
733 733 _reorder_filelog_parents(repo, fl, sorted(to_fix))
734 734
735 735
736 736 def filter_delta_issue6528(revlog, deltas_iter):
737 737 """filter incomind deltas to repaire issue 6528 on the fly"""
738 738 metadata_cache = {}
739 739
740 740 deltacomputer = deltas.deltacomputer(revlog)
741 741
742 742 for rev, d in enumerate(deltas_iter, len(revlog)):
743 743 (
744 744 node,
745 745 p1_node,
746 746 p2_node,
747 747 linknode,
748 748 deltabase,
749 749 delta,
750 750 flags,
751 751 sidedata,
752 752 ) = d
753 753
754 754 if not revlog.index.has_node(deltabase):
755 755 raise error.LookupError(
756 756 deltabase, revlog.radix, _(b'unknown parent')
757 757 )
758 758 base_rev = revlog.rev(deltabase)
759 759 if not revlog.index.has_node(p1_node):
760 760 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
761 761 p1_rev = revlog.rev(p1_node)
762 762 if not revlog.index.has_node(p2_node):
763 763 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
764 764 p2_rev = revlog.rev(p2_node)
765 765
766 766 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
767 767 delta_base = lambda: revlog.rev(delta_base)
768 768 delta_base = lambda: base_rev
769 769 parent_revs = lambda: (p1_rev, p2_rev)
770 770
771 771 def full_text():
772 772 # note: being able to reuse the full text computation in the
773 773 # underlying addrevision would be useful however this is a bit too
774 774 # intrusive the for the "quick" issue6528 we are writing before the
775 775 # 5.8 release
776 776 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
777 777
778 778 revinfo = revlogutils.revisioninfo(
779 779 node,
780 780 p1_node,
781 781 p2_node,
782 782 [None],
783 783 textlen,
784 784 (base_rev, delta),
785 785 flags,
786 786 )
787 787 return deltacomputer.buildtext(revinfo)
788 788
789 789 is_affected = _is_revision_affected_fast_inner(
790 790 is_censored,
791 791 delta_base,
792 792 lambda: delta,
793 793 full_text,
794 794 parent_revs,
795 795 rev,
796 796 metadata_cache,
797 797 )
798 798 if is_affected:
799 799 d = (
800 800 node,
801 801 p2_node,
802 802 p1_node,
803 803 linknode,
804 804 deltabase,
805 805 delta,
806 806 flags,
807 807 sidedata,
808 808 )
809 809 yield d
810 810
811 811
812 812 def repair_issue6528(
813 813 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
814 814 ):
815 815 @contextlib.contextmanager
816 816 def context():
817 817 if dry_run or to_report: # No need for locking
818 818 yield
819 819 else:
820 820 with repo.wlock(), repo.lock():
821 821 yield
822 822
823 823 if from_report:
824 824 return _from_report(ui, repo, context, from_report, dry_run)
825 825
826 826 report_entries = []
827 827
828 828 with context():
829 829 files = list(
830 830 entry
831 831 for entry in repo.store.data_entries()
832 832 if entry.is_revlog and entry.is_filelog
833 833 )
834 834
835 835 progress = ui.makeprogress(
836 836 _(b"looking for affected revisions"),
837 837 unit=_(b"filelogs"),
838 838 total=len(files),
839 839 )
840 840 found_nothing = True
841 841
842 842 for entry in files:
843 843 progress.increment()
844 844 filename = entry.target_id
845 845 fl = _filelog_from_filename(repo, entry.target_id)
846 846
847 847 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
848 848 to_fix = set()
849 849 metadata_cache = {}
850 850 for filerev in fl.revs():
851 851 affected = _is_revision_affected_fast(
852 852 repo, fl, filerev, metadata_cache
853 853 )
854 854 if paranoid:
855 855 slow = _is_revision_affected(fl, filerev)
856 856 if slow != affected:
857 857 msg = _(b"paranoid check failed for '%s' at node %s")
858 858 node = binascii.hexlify(fl.node(filerev))
859 859 raise error.Abort(msg % (filename, node))
860 860 if affected:
861 861 msg = b"found affected revision %d for file '%s'\n"
862 862 ui.warn(msg % (filerev, filename))
863 863 found_nothing = False
864 864 if not dry_run:
865 865 if to_report:
866 866 to_fix.add(binascii.hexlify(fl.node(filerev)))
867 867 else:
868 868 to_fix.add(filerev)
869 869
870 870 if to_fix:
871 871 to_fix = sorted(to_fix)
872 872 if to_report:
873 873 report_entries.append((filename, to_fix))
874 874 else:
875 875 _reorder_filelog_parents(repo, fl, to_fix)
876 876
877 877 if found_nothing:
878 878 ui.write(_(b"no affected revisions were found\n"))
879 879
880 880 if to_report and report_entries:
881 881 with open(to_report, mode="wb") as f:
882 882 for path, to_fix in report_entries:
883 883 f.write(b"%s %s\n" % (b",".join(to_fix), path))
884 884
885 885 progress.complete()
General Comments 0
You need to be logged in to leave comments. Login now