Show More
@@ -3070,11 +3070,7 b' class revlog(object):' | |||||
3070 | elif self._format_version == REVLOGV1: |
|
3070 | elif self._format_version == REVLOGV1: | |
3071 | censor.v1_censor(self, tr, censornode, tombstone) |
|
3071 | censor.v1_censor(self, tr, censornode, tombstone) | |
3072 | else: |
|
3072 | else: | |
3073 | # revlog v2 |
|
3073 | censor.v2_censor(self, tr, censornode, tombstone) | |
3074 | raise error.RevlogError( |
|
|||
3075 | _(b'cannot censor with version %d revlogs') |
|
|||
3076 | % self._format_version |
|
|||
3077 | ) |
|
|||
3078 |
|
3074 | |||
3079 | def verifyintegrity(self, state): |
|
3075 | def verifyintegrity(self, state): | |
3080 | """Verifies the integrity of the revlog. |
|
3076 | """Verifies the integrity of the revlog. |
@@ -1,4 +1,5 b'' | |||||
1 | # censor code related to censoring revision |
|
1 | # censor code related to censoring revision | |
|
2 | # coding: utf8 | |||
2 | # |
|
3 | # | |
3 | # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net> |
|
4 | # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net> | |
4 | # Copyright 2015 Google, Inc <martinvonz@google.com> |
|
5 | # Copyright 2015 Google, Inc <martinvonz@google.com> | |
@@ -6,17 +7,44 b'' | |||||
6 | # This software may be used and distributed according to the terms of the |
|
7 | # This software may be used and distributed according to the terms of the | |
7 | # GNU General Public License version 2 or any later version. |
|
8 | # GNU General Public License version 2 or any later version. | |
8 |
|
9 | |||
|
10 | import contextlib | |||
|
11 | import os | |||
|
12 | ||||
9 | from ..node import ( |
|
13 | from ..node import ( | |
10 | nullrev, |
|
14 | nullrev, | |
11 | ) |
|
15 | ) | |
|
16 | from .constants import ( | |||
|
17 | COMP_MODE_PLAIN, | |||
|
18 | ENTRY_DATA_COMPRESSED_LENGTH, | |||
|
19 | ENTRY_DATA_COMPRESSION_MODE, | |||
|
20 | ENTRY_DATA_OFFSET, | |||
|
21 | ENTRY_DATA_UNCOMPRESSED_LENGTH, | |||
|
22 | ENTRY_DELTA_BASE, | |||
|
23 | ENTRY_LINK_REV, | |||
|
24 | ENTRY_NODE_ID, | |||
|
25 | ENTRY_PARENT_1, | |||
|
26 | ENTRY_PARENT_2, | |||
|
27 | ENTRY_SIDEDATA_COMPRESSED_LENGTH, | |||
|
28 | ENTRY_SIDEDATA_COMPRESSION_MODE, | |||
|
29 | ENTRY_SIDEDATA_OFFSET, | |||
|
30 | REVLOGV0, | |||
|
31 | REVLOGV1, | |||
|
32 | ) | |||
12 | from ..i18n import _ |
|
33 | from ..i18n import _ | |
|
34 | ||||
13 | from .. import ( |
|
35 | from .. import ( | |
14 | error, |
|
36 | error, | |
|
37 | pycompat, | |||
|
38 | revlogutils, | |||
|
39 | util, | |||
15 | ) |
|
40 | ) | |
16 | from ..utils import ( |
|
41 | from ..utils import ( | |
17 | storageutil, |
|
42 | storageutil, | |
18 | ) |
|
43 | ) | |
19 |
from . import |
|
44 | from . import ( | |
|
45 | constants, | |||
|
46 | deltas, | |||
|
47 | ) | |||
20 |
|
48 | |||
21 |
|
49 | |||
22 | def v1_censor(rl, tr, censornode, tombstone=b''): |
|
50 | def v1_censor(rl, tr, censornode, tombstone=b''): | |
@@ -95,3 +123,237 b' def v1_censor(rl, tr, censornode, tombst' | |||||
95 |
|
123 | |||
96 | rl.clearcaches() |
|
124 | rl.clearcaches() | |
97 | rl._loadindex() |
|
125 | rl._loadindex() | |
|
126 | ||||
|
127 | ||||
|
128 | def v2_censor(rl, tr, censornode, tombstone=b''): | |||
|
129 | """censors a revision in a "version 2" revlog""" | |||
|
130 | # General principle | |||
|
131 | # | |||
|
132 | # We create new revlog files (index/data/sidedata) to copy the content of | |||
|
133 | # the existing data without the censored data. | |||
|
134 | # | |||
|
135 | # We need to recompute new delta for any revision that used the censored | |||
|
136 | # revision as delta base. As the cumulative size of the new delta may be | |||
|
137 | # large, we store them in a temporary file until they are stored in their | |||
|
138 | # final destination. | |||
|
139 | # | |||
|
140 | # All data before the censored data can be blindly copied. The rest needs | |||
|
141 | # to be copied as we go and the associated index entry needs adjustement. | |||
|
142 | ||||
|
143 | assert rl._format_version != REVLOGV0, rl._format_version | |||
|
144 | assert rl._format_version != REVLOGV1, rl._format_version | |||
|
145 | ||||
|
146 | old_index = rl.index | |||
|
147 | docket = rl._docket | |||
|
148 | ||||
|
149 | censor_rev = rl.rev(censornode) | |||
|
150 | tombstone = storageutil.packmeta({b'censored': tombstone}, b'') | |||
|
151 | ||||
|
152 | censored_entry = rl.index[censor_rev] | |||
|
153 | index_cutoff = rl.index.entry_size * censor_rev | |||
|
154 | data_cutoff = censored_entry[ENTRY_DATA_OFFSET] >> 16 | |||
|
155 | sidedata_cutoff = rl.sidedata_cut_off(censor_rev) | |||
|
156 | ||||
|
157 | # rev β (new_base, data_start, data_end) | |||
|
158 | rewritten_entries = {} | |||
|
159 | ||||
|
160 | dc = deltas.deltacomputer(rl) | |||
|
161 | excl = [censor_rev] | |||
|
162 | ||||
|
163 | with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage: | |||
|
164 | with rl._segmentfile._open_read() as dfh: | |||
|
165 | for rev in range(censor_rev + 1, len(old_index)): | |||
|
166 | entry = old_index[rev] | |||
|
167 | if censor_rev != entry[ENTRY_DELTA_BASE]: | |||
|
168 | continue | |||
|
169 | # This is a revision that use the censored revision as the base | |||
|
170 | # for its delta. We need a need new deltas | |||
|
171 | if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0: | |||
|
172 | # this revision is empty, we can delta against nullrev | |||
|
173 | rewritten_entries[rev] = (nullrev, 0, 0) | |||
|
174 | else: | |||
|
175 | ||||
|
176 | text = rl.rawdata(rev, _df=dfh) | |||
|
177 | info = revlogutils.revisioninfo( | |||
|
178 | node=entry[ENTRY_NODE_ID], | |||
|
179 | p1=rl.node(entry[ENTRY_PARENT_1]), | |||
|
180 | p2=rl.node(entry[ENTRY_PARENT_2]), | |||
|
181 | btext=[text], | |||
|
182 | textlen=len(text), | |||
|
183 | cachedelta=None, | |||
|
184 | flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF, | |||
|
185 | ) | |||
|
186 | d = dc.finddeltainfo( | |||
|
187 | info, dfh, excluded_bases=excl, target_rev=rev | |||
|
188 | ) | |||
|
189 | default_comp = rl._docket.default_compression_header | |||
|
190 | comp_mode, d = deltas.delta_compression(default_comp, d) | |||
|
191 | # using `tell` is a bit lazy, but we are not here for speed | |||
|
192 | start = tmp_storage.tell() | |||
|
193 | tmp_storage.write(d.data[1]) | |||
|
194 | end = tmp_storage.tell() | |||
|
195 | rewritten_entries[rev] = (d.base, start, end, comp_mode) | |||
|
196 | ||||
|
197 | old_index_filepath = rl.opener.join(docket.index_filepath()) | |||
|
198 | old_data_filepath = rl.opener.join(docket.data_filepath()) | |||
|
199 | old_sidedata_filepath = rl.opener.join(docket.sidedata_filepath()) | |||
|
200 | ||||
|
201 | new_index_filepath = rl.opener.join(docket.new_index_file()) | |||
|
202 | new_data_filepath = rl.opener.join(docket.new_data_file()) | |||
|
203 | new_sidedata_filepath = rl.opener.join(docket.new_sidedata_file()) | |||
|
204 | ||||
|
205 | util.copyfile( | |||
|
206 | old_index_filepath, new_index_filepath, nb_bytes=index_cutoff | |||
|
207 | ) | |||
|
208 | util.copyfile( | |||
|
209 | old_data_filepath, new_data_filepath, nb_bytes=data_cutoff | |||
|
210 | ) | |||
|
211 | util.copyfile( | |||
|
212 | old_sidedata_filepath, | |||
|
213 | new_sidedata_filepath, | |||
|
214 | nb_bytes=sidedata_cutoff, | |||
|
215 | ) | |||
|
216 | rl.opener.register_file(docket.index_filepath()) | |||
|
217 | rl.opener.register_file(docket.data_filepath()) | |||
|
218 | rl.opener.register_file(docket.sidedata_filepath()) | |||
|
219 | ||||
|
220 | docket.index_end = index_cutoff | |||
|
221 | docket.data_end = data_cutoff | |||
|
222 | docket.sidedata_end = sidedata_cutoff | |||
|
223 | ||||
|
224 | # reload the revlog internal information | |||
|
225 | rl.clearcaches() | |||
|
226 | rl._loadindex(docket=docket) | |||
|
227 | ||||
|
228 | @contextlib.contextmanager | |||
|
229 | def all_files(): | |||
|
230 | # hide opening in an helper function to please check-code, black | |||
|
231 | # and various python ersion at the same time | |||
|
232 | with open(old_data_filepath, 'rb') as old_data_file: | |||
|
233 | with open(old_sidedata_filepath, 'rb') as old_sidedata_file: | |||
|
234 | with open(new_index_filepath, 'r+b') as new_index_file: | |||
|
235 | with open(new_data_filepath, 'r+b') as new_data_file: | |||
|
236 | with open( | |||
|
237 | new_sidedata_filepath, 'r+b' | |||
|
238 | ) as new_sidedata_file: | |||
|
239 | yield ( | |||
|
240 | old_data_file, | |||
|
241 | old_sidedata_file, | |||
|
242 | new_index_file, | |||
|
243 | new_data_file, | |||
|
244 | new_sidedata_file, | |||
|
245 | ) | |||
|
246 | ||||
|
247 | # we dont need to open the old index file since its content already | |||
|
248 | # exist in a usable form in `old_index`. | |||
|
249 | with all_files() as ( | |||
|
250 | old_data_file, | |||
|
251 | old_sidedata_file, | |||
|
252 | new_index_file, | |||
|
253 | new_data_file, | |||
|
254 | new_sidedata_file, | |||
|
255 | ): | |||
|
256 | new_index_file.seek(0, os.SEEK_END) | |||
|
257 | assert new_index_file.tell() == index_cutoff | |||
|
258 | new_data_file.seek(0, os.SEEK_END) | |||
|
259 | assert new_data_file.tell() == data_cutoff | |||
|
260 | new_sidedata_file.seek(0, os.SEEK_END) | |||
|
261 | assert new_sidedata_file.tell() == sidedata_cutoff | |||
|
262 | ||||
|
263 | ### writing the censored revision | |||
|
264 | entry = old_index[censor_rev] | |||
|
265 | ||||
|
266 | # XXX consider trying the default compression too | |||
|
267 | new_data_size = len(tombstone) | |||
|
268 | new_data_offset = new_data_file.tell() | |||
|
269 | new_data_file.write(tombstone) | |||
|
270 | ||||
|
271 | # we are not adding any sidedata as they might leak info about the censored version | |||
|
272 | ||||
|
273 | new_entry = revlogutils.entry( | |||
|
274 | flags=constants.REVIDX_ISCENSORED, | |||
|
275 | data_offset=new_data_offset, | |||
|
276 | data_compressed_length=new_data_size, | |||
|
277 | data_uncompressed_length=new_data_size, | |||
|
278 | data_delta_base=censor_rev, | |||
|
279 | link_rev=entry[ENTRY_LINK_REV], | |||
|
280 | parent_rev_1=entry[ENTRY_PARENT_1], | |||
|
281 | parent_rev_2=entry[ENTRY_PARENT_2], | |||
|
282 | node_id=entry[ENTRY_NODE_ID], | |||
|
283 | sidedata_offset=0, | |||
|
284 | sidedata_compressed_length=0, | |||
|
285 | data_compression_mode=COMP_MODE_PLAIN, | |||
|
286 | sidedata_compression_mode=COMP_MODE_PLAIN, | |||
|
287 | ) | |||
|
288 | rl.index.append(new_entry) | |||
|
289 | entry_bin = rl.index.entry_binary(censor_rev) | |||
|
290 | new_index_file.write(entry_bin) | |||
|
291 | docket.index_end = new_index_file.tell() | |||
|
292 | docket.data_end = new_data_file.tell() | |||
|
293 | ||||
|
294 | #### Writing all subsequent revisions | |||
|
295 | for rev in range(censor_rev + 1, len(old_index)): | |||
|
296 | entry = old_index[rev] | |||
|
297 | flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF | |||
|
298 | old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16 | |||
|
299 | ||||
|
300 | if rev not in rewritten_entries: | |||
|
301 | old_data_file.seek(old_data_offset) | |||
|
302 | new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH] | |||
|
303 | new_data = old_data_file.read(new_data_size) | |||
|
304 | data_delta_base = entry[ENTRY_DELTA_BASE] | |||
|
305 | d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE] | |||
|
306 | else: | |||
|
307 | ( | |||
|
308 | data_delta_base, | |||
|
309 | start, | |||
|
310 | end, | |||
|
311 | d_comp_mode, | |||
|
312 | ) = rewritten_entries[rev] | |||
|
313 | new_data_size = end - start | |||
|
314 | tmp_storage.seek(start) | |||
|
315 | new_data = tmp_storage.read(new_data_size) | |||
|
316 | ||||
|
317 | # It might be faster to group continuous read/write operation, | |||
|
318 | # however, this is censor, an operation that is not focussed | |||
|
319 | # around stellar performance. So I have not written this | |||
|
320 | # optimisation yet. | |||
|
321 | new_data_offset = new_data_file.tell() | |||
|
322 | new_data_file.write(new_data) | |||
|
323 | ||||
|
324 | sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH] | |||
|
325 | new_sidedata_offset = new_sidedata_file.tell() | |||
|
326 | if 0 < sidedata_size: | |||
|
327 | old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET] | |||
|
328 | old_sidedata_file.seek(old_sidedata_offset) | |||
|
329 | new_sidedata = old_sidedata_file.read(sidedata_size) | |||
|
330 | new_sidedata_file.write(new_sidedata) | |||
|
331 | ||||
|
332 | data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] | |||
|
333 | sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE] | |||
|
334 | assert data_delta_base <= rev, (data_delta_base, rev) | |||
|
335 | ||||
|
336 | new_entry = revlogutils.entry( | |||
|
337 | flags=flags, | |||
|
338 | data_offset=new_data_offset, | |||
|
339 | data_compressed_length=new_data_size, | |||
|
340 | data_uncompressed_length=data_uncompressed_length, | |||
|
341 | data_delta_base=data_delta_base, | |||
|
342 | link_rev=entry[ENTRY_LINK_REV], | |||
|
343 | parent_rev_1=entry[ENTRY_PARENT_1], | |||
|
344 | parent_rev_2=entry[ENTRY_PARENT_2], | |||
|
345 | node_id=entry[ENTRY_NODE_ID], | |||
|
346 | sidedata_offset=new_sidedata_offset, | |||
|
347 | sidedata_compressed_length=sidedata_size, | |||
|
348 | data_compression_mode=d_comp_mode, | |||
|
349 | sidedata_compression_mode=sd_com_mode, | |||
|
350 | ) | |||
|
351 | rl.index.append(new_entry) | |||
|
352 | entry_bin = rl.index.entry_binary(rev) | |||
|
353 | new_index_file.write(entry_bin) | |||
|
354 | ||||
|
355 | docket.index_end = new_index_file.tell() | |||
|
356 | docket.data_end = new_data_file.tell() | |||
|
357 | docket.sidedata_end = new_sidedata_file.tell() | |||
|
358 | ||||
|
359 | docket.write(transaction=None, stripping=True) |
@@ -1070,7 +1070,7 b' class deltacomputer(object):' | |||||
1070 | context. |
|
1070 | context. | |
1071 | """ |
|
1071 | """ | |
1072 | if target_rev is None: |
|
1072 | if target_rev is None: | |
1073 |
|
|
1073 | target_rev = len(self.revlog) | |
1074 |
|
1074 | |||
1075 | if not revinfo.textlen: |
|
1075 | if not revinfo.textlen: | |
1076 | return self._fullsnapshotinfo(fh, revinfo, target_rev) |
|
1076 | return self._fullsnapshotinfo(fh, revinfo, target_rev) |
@@ -1,4 +1,14 b'' | |||||
1 | #require no-reposimplestore |
|
1 | #require no-reposimplestore | |
|
2 | #testcases revlogv1 revlogv2 | |||
|
3 | ||||
|
4 | #if revlogv2 | |||
|
5 | ||||
|
6 | $ cat >> $HGRCPATH <<EOF | |||
|
7 | > [experimental] | |||
|
8 | > revlogv2=enable-unstable-format-and-corrupt-my-data | |||
|
9 | > EOF | |||
|
10 | ||||
|
11 | #endif | |||
2 |
|
12 | |||
3 | $ cat >> $HGRCPATH <<EOF |
|
13 | $ cat >> $HGRCPATH <<EOF | |
4 | > [extensions] |
|
14 | > [extensions] | |
@@ -505,3 +515,51 b' Can import bundle where first revision o' | |||||
505 | new changesets e97f55b2665a (1 drafts) |
|
515 | new changesets e97f55b2665a (1 drafts) | |
506 | (run 'hg update' to get a working copy) |
|
516 | (run 'hg update' to get a working copy) | |
507 | $ hg cat -r 0 target | head -n 10 |
|
517 | $ hg cat -r 0 target | head -n 10 | |
|
518 | ||||
|
519 | #if revlogv2 | |||
|
520 | ||||
|
521 | Testing feature that does not work in revlog v1 | |||
|
522 | =============================================== | |||
|
523 | ||||
|
524 | Censoring a revision that is used as delta base | |||
|
525 | ----------------------------------------------- | |||
|
526 | ||||
|
527 | $ cd .. | |||
|
528 | $ hg init censor-with-delta | |||
|
529 | $ cd censor-with-delta | |||
|
530 | $ echo root > target | |||
|
531 | $ hg add target | |||
|
532 | $ hg commit -m root | |||
|
533 | $ B0=`hg id --debug -i` | |||
|
534 | $ for x in `"$PYTHON" $TESTDIR/seq.py 0 50000` | |||
|
535 | > do | |||
|
536 | > echo "Password: hunter$x" >> target | |||
|
537 | > done | |||
|
538 | $ hg ci -m 'write a long file' | |||
|
539 | $ B1=`hg id --debug -i` | |||
|
540 | $ echo 'small change (should create a delta)' >> target | |||
|
541 | $ hg ci -m 'create a delta over the password' | |||
|
542 | (should show that the last revision is a delta, not a snapshot) | |||
|
543 | $ B2=`hg id --debug -i` | |||
|
544 | ||||
|
545 | Make sure the last revision is a delta against the revision we will censor | |||
|
546 | ||||
|
547 | $ hg debugdeltachain target -T '{rev} {chainid} {chainlen} {prevrev}\n' | |||
|
548 | 0 1 1 -1 | |||
|
549 | 1 2 1 -1 | |||
|
550 | 2 2 2 1 | |||
|
551 | ||||
|
552 | Censor the file | |||
|
553 | ||||
|
554 | $ hg cat -r $B1 target | wc -l | |||
|
555 | 50002 (re) | |||
|
556 | $ hg censor -r $B1 target | |||
|
557 | $ hg cat -r $B1 target | wc -l | |||
|
558 | 0 (re) | |||
|
559 | ||||
|
560 | Check the children is fine | |||
|
561 | ||||
|
562 | $ hg cat -r $B2 target | wc -l | |||
|
563 | 50003 (re) | |||
|
564 | ||||
|
565 | #endif |
General Comments 0
You need to be logged in to leave comments.
Login now