Show More
@@ -3070,11 +3070,7 b' class revlog(object):' | |||
|
3070 | 3070 | elif self._format_version == REVLOGV1: |
|
3071 | 3071 | censor.v1_censor(self, tr, censornode, tombstone) |
|
3072 | 3072 | else: |
|
3073 | # revlog v2 | |
|
3074 | raise error.RevlogError( | |
|
3075 | _(b'cannot censor with version %d revlogs') | |
|
3076 | % self._format_version | |
|
3077 | ) | |
|
3073 | censor.v2_censor(self, tr, censornode, tombstone) | |
|
3078 | 3074 | |
|
3079 | 3075 | def verifyintegrity(self, state): |
|
3080 | 3076 | """Verifies the integrity of the revlog. |
@@ -1,4 +1,5 b'' | |||
|
1 | 1 | # censor code related to censoring revision |
|
2 | # coding: utf8 | |
|
2 | 3 | # |
|
3 | 4 | # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net> |
|
4 | 5 | # Copyright 2015 Google, Inc <martinvonz@google.com> |
@@ -6,17 +7,44 b'' | |||
|
6 | 7 | # This software may be used and distributed according to the terms of the |
|
7 | 8 | # GNU General Public License version 2 or any later version. |
|
8 | 9 | |
|
10 | import contextlib | |
|
11 | import os | |
|
12 | ||
|
9 | 13 | from ..node import ( |
|
10 | 14 | nullrev, |
|
11 | 15 | ) |
|
16 | from .constants import ( | |
|
17 | COMP_MODE_PLAIN, | |
|
18 | ENTRY_DATA_COMPRESSED_LENGTH, | |
|
19 | ENTRY_DATA_COMPRESSION_MODE, | |
|
20 | ENTRY_DATA_OFFSET, | |
|
21 | ENTRY_DATA_UNCOMPRESSED_LENGTH, | |
|
22 | ENTRY_DELTA_BASE, | |
|
23 | ENTRY_LINK_REV, | |
|
24 | ENTRY_NODE_ID, | |
|
25 | ENTRY_PARENT_1, | |
|
26 | ENTRY_PARENT_2, | |
|
27 | ENTRY_SIDEDATA_COMPRESSED_LENGTH, | |
|
28 | ENTRY_SIDEDATA_COMPRESSION_MODE, | |
|
29 | ENTRY_SIDEDATA_OFFSET, | |
|
30 | REVLOGV0, | |
|
31 | REVLOGV1, | |
|
32 | ) | |
|
12 | 33 | from ..i18n import _ |
|
34 | ||
|
13 | 35 | from .. import ( |
|
14 | 36 | error, |
|
37 | pycompat, | |
|
38 | revlogutils, | |
|
39 | util, | |
|
15 | 40 | ) |
|
16 | 41 | from ..utils import ( |
|
17 | 42 | storageutil, |
|
18 | 43 | ) |
|
19 |
from . import |
|
|
44 | from . import ( | |
|
45 | constants, | |
|
46 | deltas, | |
|
47 | ) | |
|
20 | 48 | |
|
21 | 49 | |
|
22 | 50 | def v1_censor(rl, tr, censornode, tombstone=b''): |
@@ -95,3 +123,237 b' def v1_censor(rl, tr, censornode, tombst' | |||
|
95 | 123 | |
|
96 | 124 | rl.clearcaches() |
|
97 | 125 | rl._loadindex() |
|
126 | ||
|
127 | ||
|
128 | def v2_censor(rl, tr, censornode, tombstone=b''): | |
|
129 | """censors a revision in a "version 2" revlog""" | |
|
130 | # General principle | |
|
131 | # | |
|
132 | # We create new revlog files (index/data/sidedata) to copy the content of | |
|
133 | # the existing data without the censored data. | |
|
134 | # | |
|
135 | # We need to recompute new delta for any revision that used the censored | |
|
136 | # revision as delta base. As the cumulative size of the new delta may be | |
|
137 | # large, we store them in a temporary file until they are stored in their | |
|
138 | # final destination. | |
|
139 | # | |
|
140 | # All data before the censored data can be blindly copied. The rest needs | |
|
141 | # to be copied as we go and the associated index entry needs adjustement. | |
|
142 | ||
|
143 | assert rl._format_version != REVLOGV0, rl._format_version | |
|
144 | assert rl._format_version != REVLOGV1, rl._format_version | |
|
145 | ||
|
146 | old_index = rl.index | |
|
147 | docket = rl._docket | |
|
148 | ||
|
149 | censor_rev = rl.rev(censornode) | |
|
150 | tombstone = storageutil.packmeta({b'censored': tombstone}, b'') | |
|
151 | ||
|
152 | censored_entry = rl.index[censor_rev] | |
|
153 | index_cutoff = rl.index.entry_size * censor_rev | |
|
154 | data_cutoff = censored_entry[ENTRY_DATA_OFFSET] >> 16 | |
|
155 | sidedata_cutoff = rl.sidedata_cut_off(censor_rev) | |
|
156 | ||
|
157 | # rev β (new_base, data_start, data_end) | |
|
158 | rewritten_entries = {} | |
|
159 | ||
|
160 | dc = deltas.deltacomputer(rl) | |
|
161 | excl = [censor_rev] | |
|
162 | ||
|
163 | with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage: | |
|
164 | with rl._segmentfile._open_read() as dfh: | |
|
165 | for rev in range(censor_rev + 1, len(old_index)): | |
|
166 | entry = old_index[rev] | |
|
167 | if censor_rev != entry[ENTRY_DELTA_BASE]: | |
|
168 | continue | |
|
169 | # This is a revision that use the censored revision as the base | |
|
170 | # for its delta. We need a need new deltas | |
|
171 | if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0: | |
|
172 | # this revision is empty, we can delta against nullrev | |
|
173 | rewritten_entries[rev] = (nullrev, 0, 0) | |
|
174 | else: | |
|
175 | ||
|
176 | text = rl.rawdata(rev, _df=dfh) | |
|
177 | info = revlogutils.revisioninfo( | |
|
178 | node=entry[ENTRY_NODE_ID], | |
|
179 | p1=rl.node(entry[ENTRY_PARENT_1]), | |
|
180 | p2=rl.node(entry[ENTRY_PARENT_2]), | |
|
181 | btext=[text], | |
|
182 | textlen=len(text), | |
|
183 | cachedelta=None, | |
|
184 | flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF, | |
|
185 | ) | |
|
186 | d = dc.finddeltainfo( | |
|
187 | info, dfh, excluded_bases=excl, target_rev=rev | |
|
188 | ) | |
|
189 | default_comp = rl._docket.default_compression_header | |
|
190 | comp_mode, d = deltas.delta_compression(default_comp, d) | |
|
191 | # using `tell` is a bit lazy, but we are not here for speed | |
|
192 | start = tmp_storage.tell() | |
|
193 | tmp_storage.write(d.data[1]) | |
|
194 | end = tmp_storage.tell() | |
|
195 | rewritten_entries[rev] = (d.base, start, end, comp_mode) | |
|
196 | ||
|
197 | old_index_filepath = rl.opener.join(docket.index_filepath()) | |
|
198 | old_data_filepath = rl.opener.join(docket.data_filepath()) | |
|
199 | old_sidedata_filepath = rl.opener.join(docket.sidedata_filepath()) | |
|
200 | ||
|
201 | new_index_filepath = rl.opener.join(docket.new_index_file()) | |
|
202 | new_data_filepath = rl.opener.join(docket.new_data_file()) | |
|
203 | new_sidedata_filepath = rl.opener.join(docket.new_sidedata_file()) | |
|
204 | ||
|
205 | util.copyfile( | |
|
206 | old_index_filepath, new_index_filepath, nb_bytes=index_cutoff | |
|
207 | ) | |
|
208 | util.copyfile( | |
|
209 | old_data_filepath, new_data_filepath, nb_bytes=data_cutoff | |
|
210 | ) | |
|
211 | util.copyfile( | |
|
212 | old_sidedata_filepath, | |
|
213 | new_sidedata_filepath, | |
|
214 | nb_bytes=sidedata_cutoff, | |
|
215 | ) | |
|
216 | rl.opener.register_file(docket.index_filepath()) | |
|
217 | rl.opener.register_file(docket.data_filepath()) | |
|
218 | rl.opener.register_file(docket.sidedata_filepath()) | |
|
219 | ||
|
220 | docket.index_end = index_cutoff | |
|
221 | docket.data_end = data_cutoff | |
|
222 | docket.sidedata_end = sidedata_cutoff | |
|
223 | ||
|
224 | # reload the revlog internal information | |
|
225 | rl.clearcaches() | |
|
226 | rl._loadindex(docket=docket) | |
|
227 | ||
|
228 | @contextlib.contextmanager | |
|
229 | def all_files(): | |
|
230 | # hide opening in an helper function to please check-code, black | |
|
231 | # and various python ersion at the same time | |
|
232 | with open(old_data_filepath, 'rb') as old_data_file: | |
|
233 | with open(old_sidedata_filepath, 'rb') as old_sidedata_file: | |
|
234 | with open(new_index_filepath, 'r+b') as new_index_file: | |
|
235 | with open(new_data_filepath, 'r+b') as new_data_file: | |
|
236 | with open( | |
|
237 | new_sidedata_filepath, 'r+b' | |
|
238 | ) as new_sidedata_file: | |
|
239 | yield ( | |
|
240 | old_data_file, | |
|
241 | old_sidedata_file, | |
|
242 | new_index_file, | |
|
243 | new_data_file, | |
|
244 | new_sidedata_file, | |
|
245 | ) | |
|
246 | ||
|
247 | # we dont need to open the old index file since its content already | |
|
248 | # exist in a usable form in `old_index`. | |
|
249 | with all_files() as ( | |
|
250 | old_data_file, | |
|
251 | old_sidedata_file, | |
|
252 | new_index_file, | |
|
253 | new_data_file, | |
|
254 | new_sidedata_file, | |
|
255 | ): | |
|
256 | new_index_file.seek(0, os.SEEK_END) | |
|
257 | assert new_index_file.tell() == index_cutoff | |
|
258 | new_data_file.seek(0, os.SEEK_END) | |
|
259 | assert new_data_file.tell() == data_cutoff | |
|
260 | new_sidedata_file.seek(0, os.SEEK_END) | |
|
261 | assert new_sidedata_file.tell() == sidedata_cutoff | |
|
262 | ||
|
263 | ### writing the censored revision | |
|
264 | entry = old_index[censor_rev] | |
|
265 | ||
|
266 | # XXX consider trying the default compression too | |
|
267 | new_data_size = len(tombstone) | |
|
268 | new_data_offset = new_data_file.tell() | |
|
269 | new_data_file.write(tombstone) | |
|
270 | ||
|
271 | # we are not adding any sidedata as they might leak info about the censored version | |
|
272 | ||
|
273 | new_entry = revlogutils.entry( | |
|
274 | flags=constants.REVIDX_ISCENSORED, | |
|
275 | data_offset=new_data_offset, | |
|
276 | data_compressed_length=new_data_size, | |
|
277 | data_uncompressed_length=new_data_size, | |
|
278 | data_delta_base=censor_rev, | |
|
279 | link_rev=entry[ENTRY_LINK_REV], | |
|
280 | parent_rev_1=entry[ENTRY_PARENT_1], | |
|
281 | parent_rev_2=entry[ENTRY_PARENT_2], | |
|
282 | node_id=entry[ENTRY_NODE_ID], | |
|
283 | sidedata_offset=0, | |
|
284 | sidedata_compressed_length=0, | |
|
285 | data_compression_mode=COMP_MODE_PLAIN, | |
|
286 | sidedata_compression_mode=COMP_MODE_PLAIN, | |
|
287 | ) | |
|
288 | rl.index.append(new_entry) | |
|
289 | entry_bin = rl.index.entry_binary(censor_rev) | |
|
290 | new_index_file.write(entry_bin) | |
|
291 | docket.index_end = new_index_file.tell() | |
|
292 | docket.data_end = new_data_file.tell() | |
|
293 | ||
|
294 | #### Writing all subsequent revisions | |
|
295 | for rev in range(censor_rev + 1, len(old_index)): | |
|
296 | entry = old_index[rev] | |
|
297 | flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF | |
|
298 | old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16 | |
|
299 | ||
|
300 | if rev not in rewritten_entries: | |
|
301 | old_data_file.seek(old_data_offset) | |
|
302 | new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH] | |
|
303 | new_data = old_data_file.read(new_data_size) | |
|
304 | data_delta_base = entry[ENTRY_DELTA_BASE] | |
|
305 | d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE] | |
|
306 | else: | |
|
307 | ( | |
|
308 | data_delta_base, | |
|
309 | start, | |
|
310 | end, | |
|
311 | d_comp_mode, | |
|
312 | ) = rewritten_entries[rev] | |
|
313 | new_data_size = end - start | |
|
314 | tmp_storage.seek(start) | |
|
315 | new_data = tmp_storage.read(new_data_size) | |
|
316 | ||
|
317 | # It might be faster to group continuous read/write operation, | |
|
318 | # however, this is censor, an operation that is not focussed | |
|
319 | # around stellar performance. So I have not written this | |
|
320 | # optimisation yet. | |
|
321 | new_data_offset = new_data_file.tell() | |
|
322 | new_data_file.write(new_data) | |
|
323 | ||
|
324 | sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH] | |
|
325 | new_sidedata_offset = new_sidedata_file.tell() | |
|
326 | if 0 < sidedata_size: | |
|
327 | old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET] | |
|
328 | old_sidedata_file.seek(old_sidedata_offset) | |
|
329 | new_sidedata = old_sidedata_file.read(sidedata_size) | |
|
330 | new_sidedata_file.write(new_sidedata) | |
|
331 | ||
|
332 | data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] | |
|
333 | sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE] | |
|
334 | assert data_delta_base <= rev, (data_delta_base, rev) | |
|
335 | ||
|
336 | new_entry = revlogutils.entry( | |
|
337 | flags=flags, | |
|
338 | data_offset=new_data_offset, | |
|
339 | data_compressed_length=new_data_size, | |
|
340 | data_uncompressed_length=data_uncompressed_length, | |
|
341 | data_delta_base=data_delta_base, | |
|
342 | link_rev=entry[ENTRY_LINK_REV], | |
|
343 | parent_rev_1=entry[ENTRY_PARENT_1], | |
|
344 | parent_rev_2=entry[ENTRY_PARENT_2], | |
|
345 | node_id=entry[ENTRY_NODE_ID], | |
|
346 | sidedata_offset=new_sidedata_offset, | |
|
347 | sidedata_compressed_length=sidedata_size, | |
|
348 | data_compression_mode=d_comp_mode, | |
|
349 | sidedata_compression_mode=sd_com_mode, | |
|
350 | ) | |
|
351 | rl.index.append(new_entry) | |
|
352 | entry_bin = rl.index.entry_binary(rev) | |
|
353 | new_index_file.write(entry_bin) | |
|
354 | ||
|
355 | docket.index_end = new_index_file.tell() | |
|
356 | docket.data_end = new_data_file.tell() | |
|
357 | docket.sidedata_end = new_sidedata_file.tell() | |
|
358 | ||
|
359 | docket.write(transaction=None, stripping=True) |
@@ -1070,7 +1070,7 b' class deltacomputer(object):' | |||
|
1070 | 1070 | context. |
|
1071 | 1071 | """ |
|
1072 | 1072 | if target_rev is None: |
|
1073 |
|
|
|
1073 | target_rev = len(self.revlog) | |
|
1074 | 1074 | |
|
1075 | 1075 | if not revinfo.textlen: |
|
1076 | 1076 | return self._fullsnapshotinfo(fh, revinfo, target_rev) |
@@ -1,4 +1,14 b'' | |||
|
1 | 1 | #require no-reposimplestore |
|
2 | #testcases revlogv1 revlogv2 | |
|
3 | ||
|
4 | #if revlogv2 | |
|
5 | ||
|
6 | $ cat >> $HGRCPATH <<EOF | |
|
7 | > [experimental] | |
|
8 | > revlogv2=enable-unstable-format-and-corrupt-my-data | |
|
9 | > EOF | |
|
10 | ||
|
11 | #endif | |
|
2 | 12 | |
|
3 | 13 | $ cat >> $HGRCPATH <<EOF |
|
4 | 14 | > [extensions] |
@@ -505,3 +515,51 b' Can import bundle where first revision o' | |||
|
505 | 515 | new changesets e97f55b2665a (1 drafts) |
|
506 | 516 | (run 'hg update' to get a working copy) |
|
507 | 517 | $ hg cat -r 0 target | head -n 10 |
|
518 | ||
|
519 | #if revlogv2 | |
|
520 | ||
|
521 | Testing feature that does not work in revlog v1 | |
|
522 | =============================================== | |
|
523 | ||
|
524 | Censoring a revision that is used as delta base | |
|
525 | ----------------------------------------------- | |
|
526 | ||
|
527 | $ cd .. | |
|
528 | $ hg init censor-with-delta | |
|
529 | $ cd censor-with-delta | |
|
530 | $ echo root > target | |
|
531 | $ hg add target | |
|
532 | $ hg commit -m root | |
|
533 | $ B0=`hg id --debug -i` | |
|
534 | $ for x in `"$PYTHON" $TESTDIR/seq.py 0 50000` | |
|
535 | > do | |
|
536 | > echo "Password: hunter$x" >> target | |
|
537 | > done | |
|
538 | $ hg ci -m 'write a long file' | |
|
539 | $ B1=`hg id --debug -i` | |
|
540 | $ echo 'small change (should create a delta)' >> target | |
|
541 | $ hg ci -m 'create a delta over the password' | |
|
542 | (should show that the last revision is a delta, not a snapshot) | |
|
543 | $ B2=`hg id --debug -i` | |
|
544 | ||
|
545 | Make sure the last revision is a delta against the revision we will censor | |
|
546 | ||
|
547 | $ hg debugdeltachain target -T '{rev} {chainid} {chainlen} {prevrev}\n' | |
|
548 | 0 1 1 -1 | |
|
549 | 1 2 1 -1 | |
|
550 | 2 2 2 1 | |
|
551 | ||
|
552 | Censor the file | |
|
553 | ||
|
554 | $ hg cat -r $B1 target | wc -l | |
|
555 | 50002 (re) | |
|
556 | $ hg censor -r $B1 target | |
|
557 | $ hg cat -r $B1 target | wc -l | |
|
558 | 0 (re) | |
|
559 | ||
|
560 | Check the children is fine | |
|
561 | ||
|
562 | $ hg cat -r $B2 target | wc -l | |
|
563 | 50003 (re) | |
|
564 | ||
|
565 | #endif |
General Comments 0
You need to be logged in to leave comments.
Login now