##// END OF EJS Templates
censor: migrate the logic to a set of `censor_revs`...
marmoute -
r48264:c81a5297 default
parent child Browse files
Show More
@@ -1,461 +1,465 b''
1 1 # censor code related to censoring revision
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 import contextlib
11 11 import os
12 12
13 13 from ..node import (
14 14 nullrev,
15 15 )
16 16 from .constants import (
17 17 COMP_MODE_PLAIN,
18 18 ENTRY_DATA_COMPRESSED_LENGTH,
19 19 ENTRY_DATA_COMPRESSION_MODE,
20 20 ENTRY_DATA_OFFSET,
21 21 ENTRY_DATA_UNCOMPRESSED_LENGTH,
22 22 ENTRY_DELTA_BASE,
23 23 ENTRY_LINK_REV,
24 24 ENTRY_NODE_ID,
25 25 ENTRY_PARENT_1,
26 26 ENTRY_PARENT_2,
27 27 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
28 28 ENTRY_SIDEDATA_COMPRESSION_MODE,
29 29 ENTRY_SIDEDATA_OFFSET,
30 30 REVLOGV0,
31 31 REVLOGV1,
32 32 )
33 33 from ..i18n import _
34 34
35 35 from .. import (
36 36 error,
37 37 pycompat,
38 38 revlogutils,
39 39 util,
40 40 )
41 41 from ..utils import (
42 42 storageutil,
43 43 )
44 44 from . import (
45 45 constants,
46 46 deltas,
47 47 )
48 48
49 49
50 50 def v1_censor(rl, tr, censornode, tombstone=b''):
51 51 """censors a revision in a "version 1" revlog"""
52 52 assert rl._format_version == constants.REVLOGV1, rl._format_version
53 53
54 54 # avoid cycle
55 55 from .. import revlog
56 56
57 57 censorrev = rl.rev(censornode)
58 58 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
59 59
60 60 # Rewriting the revlog in place is hard. Our strategy for censoring is
61 61 # to create a new revlog, copy all revisions to it, then replace the
62 62 # revlogs on transaction close.
63 63 #
64 64 # This is a bit dangerous. We could easily have a mismatch of state.
65 65 newrl = revlog.revlog(
66 66 rl.opener,
67 67 target=rl.target,
68 68 radix=rl.radix,
69 69 postfix=b'tmpcensored',
70 70 censorable=True,
71 71 )
72 72 newrl._format_version = rl._format_version
73 73 newrl._format_flags = rl._format_flags
74 74 newrl._generaldelta = rl._generaldelta
75 75 newrl._parse_index = rl._parse_index
76 76
77 77 for rev in rl.revs():
78 78 node = rl.node(rev)
79 79 p1, p2 = rl.parents(node)
80 80
81 81 if rev == censorrev:
82 82 newrl.addrawrevision(
83 83 tombstone,
84 84 tr,
85 85 rl.linkrev(censorrev),
86 86 p1,
87 87 p2,
88 88 censornode,
89 89 constants.REVIDX_ISCENSORED,
90 90 )
91 91
92 92 if newrl.deltaparent(rev) != nullrev:
93 93 m = _(b'censored revision stored as delta; cannot censor')
94 94 h = _(
95 95 b'censoring of revlogs is not fully implemented;'
96 96 b' please report this bug'
97 97 )
98 98 raise error.Abort(m, hint=h)
99 99 continue
100 100
101 101 if rl.iscensored(rev):
102 102 if rl.deltaparent(rev) != nullrev:
103 103 m = _(
104 104 b'cannot censor due to censored '
105 105 b'revision having delta stored'
106 106 )
107 107 raise error.Abort(m)
108 108 rawtext = rl._chunk(rev)
109 109 else:
110 110 rawtext = rl.rawdata(rev)
111 111
112 112 newrl.addrawrevision(
113 113 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
114 114 )
115 115
116 116 tr.addbackup(rl._indexfile, location=b'store')
117 117 if not rl._inline:
118 118 tr.addbackup(rl._datafile, location=b'store')
119 119
120 120 rl.opener.rename(newrl._indexfile, rl._indexfile)
121 121 if not rl._inline:
122 122 rl.opener.rename(newrl._datafile, rl._datafile)
123 123
124 124 rl.clearcaches()
125 125 rl._loadindex()
126 126
127 127
128 128 def v2_censor(revlog, tr, censornode, tombstone=b''):
129 129 """censors a revision in a "version 2" revlog"""
130 130 # General principle
131 131 #
132 132 # We create new revlog files (index/data/sidedata) to copy the content of
133 133 # the existing data without the censored data.
134 134 #
135 135 # We need to recompute new delta for any revision that used the censored
136 136 # revision as delta base. As the cumulative size of the new delta may be
137 137 # large, we store them in a temporary file until they are stored in their
138 138 # final destination.
139 139 #
140 140 # All data before the censored data can be blindly copied. The rest needs
141 141 # to be copied as we go and the associated index entry needs adjustement.
142 142
143 143 assert revlog._format_version != REVLOGV0, revlog._format_version
144 144 assert revlog._format_version != REVLOGV1, revlog._format_version
145 145
146 146 old_index = revlog.index
147 147 docket = revlog._docket
148 148
149 censor_rev = revlog.rev(censornode)
149 censor_revs = {revlog.rev(censornode)}
150 150 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
151 151
152 censored_entry = revlog.index[censor_rev]
153 index_cutoff = revlog.index.entry_size * censor_rev
154 data_cutoff = censored_entry[ENTRY_DATA_OFFSET] >> 16
155 sidedata_cutoff = revlog.sidedata_cut_off(censor_rev)
152 first_excl_rev = min(censor_revs)
153
154 first_excl_entry = revlog.index[first_excl_rev]
155 index_cutoff = revlog.index.entry_size * first_excl_rev
156 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
157 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
156 158
157 159 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
158 160 # rev β†’ (new_base, data_start, data_end, compression_mode)
159 161 rewritten_entries = _precompute_rewritten_delta(
160 162 revlog,
161 163 old_index,
162 {censor_rev},
164 censor_revs,
163 165 tmp_storage,
164 166 )
165 167
166 168 all_files = _setup_new_files(
167 169 revlog,
168 170 index_cutoff,
169 171 data_cutoff,
170 172 sidedata_cutoff,
171 173 )
172 174
173 175 # we dont need to open the old index file since its content already
174 176 # exist in a usable form in `old_index`.
175 177 with all_files() as open_files:
176 178 (
177 179 old_data_file,
178 180 old_sidedata_file,
179 181 new_index_file,
180 182 new_data_file,
181 183 new_sidedata_file,
182 184 ) = open_files
183 185
184 186 # writing the censored revision
187
188 # Writing all subsequent revisions
189 for rev in range(first_excl_rev, len(old_index)):
190 if rev in censor_revs:
185 191 _rewrite_censor(
186 192 revlog,
187 193 old_index,
188 194 open_files,
189 censor_rev,
195 rev,
190 196 tombstone,
191 197 )
192
193 # Writing all subsequent revisions
194 for rev in range(censor_rev + 1, len(old_index)):
198 else:
195 199 _rewrite_simple(
196 200 revlog,
197 201 old_index,
198 202 open_files,
199 203 rev,
200 204 rewritten_entries,
201 205 tmp_storage,
202 206 )
203 207 docket.write(transaction=None, stripping=True)
204 208
205 209
206 210 def _precompute_rewritten_delta(
207 211 revlog,
208 212 old_index,
209 213 excluded_revs,
210 214 tmp_storage,
211 215 ):
212 216 """Compute new delta for revisions whose delta is based on revision that
213 217 will not survive as is.
214 218
215 219 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
216 220 """
217 221 dc = deltas.deltacomputer(revlog)
218 222 rewritten_entries = {}
219 223 first_excl_rev = min(excluded_revs)
220 224 with revlog._segmentfile._open_read() as dfh:
221 225 for rev in range(first_excl_rev, len(old_index)):
222 226 if rev in excluded_revs:
223 227 # this revision will be preserved as is, so we don't need to
224 228 # consider recomputing a delta.
225 229 continue
226 230 entry = old_index[rev]
227 231 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
228 232 continue
229 233 # This is a revision that use the censored revision as the base
230 234 # for its delta. We need a need new deltas
231 235 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
232 236 # this revision is empty, we can delta against nullrev
233 237 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
234 238 else:
235 239
236 240 text = revlog.rawdata(rev, _df=dfh)
237 241 info = revlogutils.revisioninfo(
238 242 node=entry[ENTRY_NODE_ID],
239 243 p1=revlog.node(entry[ENTRY_PARENT_1]),
240 244 p2=revlog.node(entry[ENTRY_PARENT_2]),
241 245 btext=[text],
242 246 textlen=len(text),
243 247 cachedelta=None,
244 248 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
245 249 )
246 250 d = dc.finddeltainfo(
247 251 info, dfh, excluded_bases=excluded_revs, target_rev=rev
248 252 )
249 253 default_comp = revlog._docket.default_compression_header
250 254 comp_mode, d = deltas.delta_compression(default_comp, d)
251 255 # using `tell` is a bit lazy, but we are not here for speed
252 256 start = tmp_storage.tell()
253 257 tmp_storage.write(d.data[1])
254 258 end = tmp_storage.tell()
255 259 rewritten_entries[rev] = (d.base, start, end, comp_mode)
256 260 return rewritten_entries
257 261
258 262
259 263 def _setup_new_files(
260 264 revlog,
261 265 index_cutoff,
262 266 data_cutoff,
263 267 sidedata_cutoff,
264 268 ):
265 269 """
266 270
267 271 return a context manager to open all the relevant files:
268 272 - old_data_file,
269 273 - old_sidedata_file,
270 274 - new_index_file,
271 275 - new_data_file,
272 276 - new_sidedata_file,
273 277
274 278 The old_index_file is not here because it is accessed through the
275 279 `old_index` object if the caller function.
276 280 """
277 281 docket = revlog._docket
278 282 old_index_filepath = revlog.opener.join(docket.index_filepath())
279 283 old_data_filepath = revlog.opener.join(docket.data_filepath())
280 284 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
281 285
282 286 new_index_filepath = revlog.opener.join(docket.new_index_file())
283 287 new_data_filepath = revlog.opener.join(docket.new_data_file())
284 288 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
285 289
286 290 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
287 291 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
288 292 util.copyfile(
289 293 old_sidedata_filepath,
290 294 new_sidedata_filepath,
291 295 nb_bytes=sidedata_cutoff,
292 296 )
293 297 revlog.opener.register_file(docket.index_filepath())
294 298 revlog.opener.register_file(docket.data_filepath())
295 299 revlog.opener.register_file(docket.sidedata_filepath())
296 300
297 301 docket.index_end = index_cutoff
298 302 docket.data_end = data_cutoff
299 303 docket.sidedata_end = sidedata_cutoff
300 304
301 305 # reload the revlog internal information
302 306 revlog.clearcaches()
303 307 revlog._loadindex(docket=docket)
304 308
305 309 @contextlib.contextmanager
306 310 def all_files_opener():
307 311 # hide opening in an helper function to please check-code, black
308 312 # and various python version at the same time
309 313 with open(old_data_filepath, 'rb') as old_data_file:
310 314 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
311 315 with open(new_index_filepath, 'r+b') as new_index_file:
312 316 with open(new_data_filepath, 'r+b') as new_data_file:
313 317 with open(
314 318 new_sidedata_filepath, 'r+b'
315 319 ) as new_sidedata_file:
316 320 new_index_file.seek(0, os.SEEK_END)
317 321 assert new_index_file.tell() == index_cutoff
318 322 new_data_file.seek(0, os.SEEK_END)
319 323 assert new_data_file.tell() == data_cutoff
320 324 new_sidedata_file.seek(0, os.SEEK_END)
321 325 assert new_sidedata_file.tell() == sidedata_cutoff
322 326 yield (
323 327 old_data_file,
324 328 old_sidedata_file,
325 329 new_index_file,
326 330 new_data_file,
327 331 new_sidedata_file,
328 332 )
329 333
330 334 return all_files_opener
331 335
332 336
333 337 def _rewrite_simple(
334 338 revlog,
335 339 old_index,
336 340 all_files,
337 341 rev,
338 342 rewritten_entries,
339 343 tmp_storage,
340 344 ):
341 345 """append a normal revision to the index after the rewritten one(s)"""
342 346 (
343 347 old_data_file,
344 348 old_sidedata_file,
345 349 new_index_file,
346 350 new_data_file,
347 351 new_sidedata_file,
348 352 ) = all_files
349 353 entry = old_index[rev]
350 354 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
351 355 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
352 356
353 357 if rev not in rewritten_entries:
354 358 old_data_file.seek(old_data_offset)
355 359 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
356 360 new_data = old_data_file.read(new_data_size)
357 361 data_delta_base = entry[ENTRY_DELTA_BASE]
358 362 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
359 363 else:
360 364 (
361 365 data_delta_base,
362 366 start,
363 367 end,
364 368 d_comp_mode,
365 369 ) = rewritten_entries[rev]
366 370 new_data_size = end - start
367 371 tmp_storage.seek(start)
368 372 new_data = tmp_storage.read(new_data_size)
369 373
370 374 # It might be faster to group continuous read/write operation,
371 375 # however, this is censor, an operation that is not focussed
372 376 # around stellar performance. So I have not written this
373 377 # optimisation yet.
374 378 new_data_offset = new_data_file.tell()
375 379 new_data_file.write(new_data)
376 380
377 381 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
378 382 new_sidedata_offset = new_sidedata_file.tell()
379 383 if 0 < sidedata_size:
380 384 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
381 385 old_sidedata_file.seek(old_sidedata_offset)
382 386 new_sidedata = old_sidedata_file.read(sidedata_size)
383 387 new_sidedata_file.write(new_sidedata)
384 388
385 389 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
386 390 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
387 391 assert data_delta_base <= rev, (data_delta_base, rev)
388 392
389 393 new_entry = revlogutils.entry(
390 394 flags=flags,
391 395 data_offset=new_data_offset,
392 396 data_compressed_length=new_data_size,
393 397 data_uncompressed_length=data_uncompressed_length,
394 398 data_delta_base=data_delta_base,
395 399 link_rev=entry[ENTRY_LINK_REV],
396 400 parent_rev_1=entry[ENTRY_PARENT_1],
397 401 parent_rev_2=entry[ENTRY_PARENT_2],
398 402 node_id=entry[ENTRY_NODE_ID],
399 403 sidedata_offset=new_sidedata_offset,
400 404 sidedata_compressed_length=sidedata_size,
401 405 data_compression_mode=d_comp_mode,
402 406 sidedata_compression_mode=sd_com_mode,
403 407 )
404 408 revlog.index.append(new_entry)
405 409 entry_bin = revlog.index.entry_binary(rev)
406 410 new_index_file.write(entry_bin)
407 411
408 412 revlog._docket.index_end = new_index_file.tell()
409 413 revlog._docket.data_end = new_data_file.tell()
410 414 revlog._docket.sidedata_end = new_sidedata_file.tell()
411 415
412 416
413 417 def _rewrite_censor(
414 418 revlog,
415 419 old_index,
416 420 all_files,
417 421 rev,
418 422 tombstone,
419 423 ):
420 424 """rewrite and append a censored revision"""
421 425 (
422 426 old_data_file,
423 427 old_sidedata_file,
424 428 new_index_file,
425 429 new_data_file,
426 430 new_sidedata_file,
427 431 ) = all_files
428 432 entry = old_index[rev]
429 433
430 434 # XXX consider trying the default compression too
431 435 new_data_size = len(tombstone)
432 436 new_data_offset = new_data_file.tell()
433 437 new_data_file.write(tombstone)
434 438
435 439 # we are not adding any sidedata as they might leak info about the censored version
436 440
437 441 link_rev = entry[ENTRY_LINK_REV]
438 442
439 443 p1 = entry[ENTRY_PARENT_1]
440 444 p2 = entry[ENTRY_PARENT_2]
441 445
442 446 new_entry = revlogutils.entry(
443 447 flags=constants.REVIDX_ISCENSORED,
444 448 data_offset=new_data_offset,
445 449 data_compressed_length=new_data_size,
446 450 data_uncompressed_length=new_data_size,
447 451 data_delta_base=rev,
448 452 link_rev=link_rev,
449 453 parent_rev_1=p1,
450 454 parent_rev_2=p2,
451 455 node_id=entry[ENTRY_NODE_ID],
452 456 sidedata_offset=0,
453 457 sidedata_compressed_length=0,
454 458 data_compression_mode=COMP_MODE_PLAIN,
455 459 sidedata_compression_mode=COMP_MODE_PLAIN,
456 460 )
457 461 revlog.index.append(new_entry)
458 462 entry_bin = revlog.index.entry_binary(rev)
459 463 new_index_file.write(entry_bin)
460 464 revlog._docket.index_end = new_index_file.tell()
461 465 revlog._docket.data_end = new_data_file.tell()
General Comments 0
You need to be logged in to leave comments. Login now