##// END OF EJS Templates
censor: put the tuple of open files in an explicit variable...
marmoute -
r48258:f7a94e2d default
parent child Browse files
Show More
@@ -1,359 +1,360 b''
1 1 # censor code related to censoring revision
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 import contextlib
11 11 import os
12 12
13 13 from ..node import (
14 14 nullrev,
15 15 )
16 16 from .constants import (
17 17 COMP_MODE_PLAIN,
18 18 ENTRY_DATA_COMPRESSED_LENGTH,
19 19 ENTRY_DATA_COMPRESSION_MODE,
20 20 ENTRY_DATA_OFFSET,
21 21 ENTRY_DATA_UNCOMPRESSED_LENGTH,
22 22 ENTRY_DELTA_BASE,
23 23 ENTRY_LINK_REV,
24 24 ENTRY_NODE_ID,
25 25 ENTRY_PARENT_1,
26 26 ENTRY_PARENT_2,
27 27 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
28 28 ENTRY_SIDEDATA_COMPRESSION_MODE,
29 29 ENTRY_SIDEDATA_OFFSET,
30 30 REVLOGV0,
31 31 REVLOGV1,
32 32 )
33 33 from ..i18n import _
34 34
35 35 from .. import (
36 36 error,
37 37 pycompat,
38 38 revlogutils,
39 39 util,
40 40 )
41 41 from ..utils import (
42 42 storageutil,
43 43 )
44 44 from . import (
45 45 constants,
46 46 deltas,
47 47 )
48 48
49 49
50 50 def v1_censor(rl, tr, censornode, tombstone=b''):
51 51 """censors a revision in a "version 1" revlog"""
52 52 assert rl._format_version == constants.REVLOGV1, rl._format_version
53 53
54 54 # avoid cycle
55 55 from .. import revlog
56 56
57 57 censorrev = rl.rev(censornode)
58 58 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
59 59
60 60 # Rewriting the revlog in place is hard. Our strategy for censoring is
61 61 # to create a new revlog, copy all revisions to it, then replace the
62 62 # revlogs on transaction close.
63 63 #
64 64 # This is a bit dangerous. We could easily have a mismatch of state.
65 65 newrl = revlog.revlog(
66 66 rl.opener,
67 67 target=rl.target,
68 68 radix=rl.radix,
69 69 postfix=b'tmpcensored',
70 70 censorable=True,
71 71 )
72 72 newrl._format_version = rl._format_version
73 73 newrl._format_flags = rl._format_flags
74 74 newrl._generaldelta = rl._generaldelta
75 75 newrl._parse_index = rl._parse_index
76 76
77 77 for rev in rl.revs():
78 78 node = rl.node(rev)
79 79 p1, p2 = rl.parents(node)
80 80
81 81 if rev == censorrev:
82 82 newrl.addrawrevision(
83 83 tombstone,
84 84 tr,
85 85 rl.linkrev(censorrev),
86 86 p1,
87 87 p2,
88 88 censornode,
89 89 constants.REVIDX_ISCENSORED,
90 90 )
91 91
92 92 if newrl.deltaparent(rev) != nullrev:
93 93 m = _(b'censored revision stored as delta; cannot censor')
94 94 h = _(
95 95 b'censoring of revlogs is not fully implemented;'
96 96 b' please report this bug'
97 97 )
98 98 raise error.Abort(m, hint=h)
99 99 continue
100 100
101 101 if rl.iscensored(rev):
102 102 if rl.deltaparent(rev) != nullrev:
103 103 m = _(
104 104 b'cannot censor due to censored '
105 105 b'revision having delta stored'
106 106 )
107 107 raise error.Abort(m)
108 108 rawtext = rl._chunk(rev)
109 109 else:
110 110 rawtext = rl.rawdata(rev)
111 111
112 112 newrl.addrawrevision(
113 113 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
114 114 )
115 115
116 116 tr.addbackup(rl._indexfile, location=b'store')
117 117 if not rl._inline:
118 118 tr.addbackup(rl._datafile, location=b'store')
119 119
120 120 rl.opener.rename(newrl._indexfile, rl._indexfile)
121 121 if not rl._inline:
122 122 rl.opener.rename(newrl._datafile, rl._datafile)
123 123
124 124 rl.clearcaches()
125 125 rl._loadindex()
126 126
127 127
128 128 def v2_censor(rl, tr, censornode, tombstone=b''):
129 129 """censors a revision in a "version 2" revlog"""
130 130 # General principle
131 131 #
132 132 # We create new revlog files (index/data/sidedata) to copy the content of
133 133 # the existing data without the censored data.
134 134 #
135 135 # We need to recompute new delta for any revision that used the censored
136 136 # revision as delta base. As the cumulative size of the new delta may be
137 137 # large, we store them in a temporary file until they are stored in their
138 138 # final destination.
139 139 #
140 140 # All data before the censored data can be blindly copied. The rest needs
141 141 # to be copied as we go and the associated index entry needs adjustement.
142 142
143 143 assert rl._format_version != REVLOGV0, rl._format_version
144 144 assert rl._format_version != REVLOGV1, rl._format_version
145 145
146 146 old_index = rl.index
147 147 docket = rl._docket
148 148
149 149 censor_rev = rl.rev(censornode)
150 150 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
151 151
152 152 censored_entry = rl.index[censor_rev]
153 153 index_cutoff = rl.index.entry_size * censor_rev
154 154 data_cutoff = censored_entry[ENTRY_DATA_OFFSET] >> 16
155 155 sidedata_cutoff = rl.sidedata_cut_off(censor_rev)
156 156
157 157 # rev β†’ (new_base, data_start, data_end)
158 158 rewritten_entries = {}
159 159
160 160 dc = deltas.deltacomputer(rl)
161 161 excl = [censor_rev]
162 162
163 163 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
164 164 with rl._segmentfile._open_read() as dfh:
165 165 for rev in range(censor_rev + 1, len(old_index)):
166 166 entry = old_index[rev]
167 167 if censor_rev != entry[ENTRY_DELTA_BASE]:
168 168 continue
169 169 # This is a revision that use the censored revision as the base
170 170 # for its delta. We need a need new deltas
171 171 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
172 172 # this revision is empty, we can delta against nullrev
173 173 rewritten_entries[rev] = (nullrev, 0, 0)
174 174 else:
175 175
176 176 text = rl.rawdata(rev, _df=dfh)
177 177 info = revlogutils.revisioninfo(
178 178 node=entry[ENTRY_NODE_ID],
179 179 p1=rl.node(entry[ENTRY_PARENT_1]),
180 180 p2=rl.node(entry[ENTRY_PARENT_2]),
181 181 btext=[text],
182 182 textlen=len(text),
183 183 cachedelta=None,
184 184 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
185 185 )
186 186 d = dc.finddeltainfo(
187 187 info, dfh, excluded_bases=excl, target_rev=rev
188 188 )
189 189 default_comp = rl._docket.default_compression_header
190 190 comp_mode, d = deltas.delta_compression(default_comp, d)
191 191 # using `tell` is a bit lazy, but we are not here for speed
192 192 start = tmp_storage.tell()
193 193 tmp_storage.write(d.data[1])
194 194 end = tmp_storage.tell()
195 195 rewritten_entries[rev] = (d.base, start, end, comp_mode)
196 196
197 197 old_index_filepath = rl.opener.join(docket.index_filepath())
198 198 old_data_filepath = rl.opener.join(docket.data_filepath())
199 199 old_sidedata_filepath = rl.opener.join(docket.sidedata_filepath())
200 200
201 201 new_index_filepath = rl.opener.join(docket.new_index_file())
202 202 new_data_filepath = rl.opener.join(docket.new_data_file())
203 203 new_sidedata_filepath = rl.opener.join(docket.new_sidedata_file())
204 204
205 205 util.copyfile(
206 206 old_index_filepath, new_index_filepath, nb_bytes=index_cutoff
207 207 )
208 208 util.copyfile(
209 209 old_data_filepath, new_data_filepath, nb_bytes=data_cutoff
210 210 )
211 211 util.copyfile(
212 212 old_sidedata_filepath,
213 213 new_sidedata_filepath,
214 214 nb_bytes=sidedata_cutoff,
215 215 )
216 216 rl.opener.register_file(docket.index_filepath())
217 217 rl.opener.register_file(docket.data_filepath())
218 218 rl.opener.register_file(docket.sidedata_filepath())
219 219
220 220 docket.index_end = index_cutoff
221 221 docket.data_end = data_cutoff
222 222 docket.sidedata_end = sidedata_cutoff
223 223
224 224 # reload the revlog internal information
225 225 rl.clearcaches()
226 226 rl._loadindex(docket=docket)
227 227
228 228 @contextlib.contextmanager
229 229 def all_files():
230 230 # hide opening in an helper function to please check-code, black
231 231 # and various python ersion at the same time
232 232 with open(old_data_filepath, 'rb') as old_data_file:
233 233 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
234 234 with open(new_index_filepath, 'r+b') as new_index_file:
235 235 with open(new_data_filepath, 'r+b') as new_data_file:
236 236 with open(
237 237 new_sidedata_filepath, 'r+b'
238 238 ) as new_sidedata_file:
239 239 yield (
240 240 old_data_file,
241 241 old_sidedata_file,
242 242 new_index_file,
243 243 new_data_file,
244 244 new_sidedata_file,
245 245 )
246 246
247 247 # we dont need to open the old index file since its content already
248 248 # exist in a usable form in `old_index`.
249 with all_files() as (
250 old_data_file,
251 old_sidedata_file,
252 new_index_file,
253 new_data_file,
254 new_sidedata_file,
255 ):
249 with all_files() as open_files:
250 (
251 old_data_file,
252 old_sidedata_file,
253 new_index_file,
254 new_data_file,
255 new_sidedata_file,
256 ) = open_files
256 257 new_index_file.seek(0, os.SEEK_END)
257 258 assert new_index_file.tell() == index_cutoff
258 259 new_data_file.seek(0, os.SEEK_END)
259 260 assert new_data_file.tell() == data_cutoff
260 261 new_sidedata_file.seek(0, os.SEEK_END)
261 262 assert new_sidedata_file.tell() == sidedata_cutoff
262 263
263 264 ### writing the censored revision
264 265 entry = old_index[censor_rev]
265 266
266 267 # XXX consider trying the default compression too
267 268 new_data_size = len(tombstone)
268 269 new_data_offset = new_data_file.tell()
269 270 new_data_file.write(tombstone)
270 271
271 272 # we are not adding any sidedata as they might leak info about the censored version
272 273
273 274 new_entry = revlogutils.entry(
274 275 flags=constants.REVIDX_ISCENSORED,
275 276 data_offset=new_data_offset,
276 277 data_compressed_length=new_data_size,
277 278 data_uncompressed_length=new_data_size,
278 279 data_delta_base=censor_rev,
279 280 link_rev=entry[ENTRY_LINK_REV],
280 281 parent_rev_1=entry[ENTRY_PARENT_1],
281 282 parent_rev_2=entry[ENTRY_PARENT_2],
282 283 node_id=entry[ENTRY_NODE_ID],
283 284 sidedata_offset=0,
284 285 sidedata_compressed_length=0,
285 286 data_compression_mode=COMP_MODE_PLAIN,
286 287 sidedata_compression_mode=COMP_MODE_PLAIN,
287 288 )
288 289 rl.index.append(new_entry)
289 290 entry_bin = rl.index.entry_binary(censor_rev)
290 291 new_index_file.write(entry_bin)
291 292 docket.index_end = new_index_file.tell()
292 293 docket.data_end = new_data_file.tell()
293 294
294 295 #### Writing all subsequent revisions
295 296 for rev in range(censor_rev + 1, len(old_index)):
296 297 entry = old_index[rev]
297 298 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
298 299 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
299 300
300 301 if rev not in rewritten_entries:
301 302 old_data_file.seek(old_data_offset)
302 303 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
303 304 new_data = old_data_file.read(new_data_size)
304 305 data_delta_base = entry[ENTRY_DELTA_BASE]
305 306 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
306 307 else:
307 308 (
308 309 data_delta_base,
309 310 start,
310 311 end,
311 312 d_comp_mode,
312 313 ) = rewritten_entries[rev]
313 314 new_data_size = end - start
314 315 tmp_storage.seek(start)
315 316 new_data = tmp_storage.read(new_data_size)
316 317
317 318 # It might be faster to group continuous read/write operation,
318 319 # however, this is censor, an operation that is not focussed
319 320 # around stellar performance. So I have not written this
320 321 # optimisation yet.
321 322 new_data_offset = new_data_file.tell()
322 323 new_data_file.write(new_data)
323 324
324 325 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
325 326 new_sidedata_offset = new_sidedata_file.tell()
326 327 if 0 < sidedata_size:
327 328 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
328 329 old_sidedata_file.seek(old_sidedata_offset)
329 330 new_sidedata = old_sidedata_file.read(sidedata_size)
330 331 new_sidedata_file.write(new_sidedata)
331 332
332 333 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
333 334 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
334 335 assert data_delta_base <= rev, (data_delta_base, rev)
335 336
336 337 new_entry = revlogutils.entry(
337 338 flags=flags,
338 339 data_offset=new_data_offset,
339 340 data_compressed_length=new_data_size,
340 341 data_uncompressed_length=data_uncompressed_length,
341 342 data_delta_base=data_delta_base,
342 343 link_rev=entry[ENTRY_LINK_REV],
343 344 parent_rev_1=entry[ENTRY_PARENT_1],
344 345 parent_rev_2=entry[ENTRY_PARENT_2],
345 346 node_id=entry[ENTRY_NODE_ID],
346 347 sidedata_offset=new_sidedata_offset,
347 348 sidedata_compressed_length=sidedata_size,
348 349 data_compression_mode=d_comp_mode,
349 350 sidedata_compression_mode=sd_com_mode,
350 351 )
351 352 rl.index.append(new_entry)
352 353 entry_bin = rl.index.entry_binary(rev)
353 354 new_index_file.write(entry_bin)
354 355
355 356 docket.index_end = new_index_file.tell()
356 357 docket.data_end = new_data_file.tell()
357 358 docket.sidedata_end = new_sidedata_file.tell()
358 359
359 360 docket.write(transaction=None, stripping=True)
General Comments 0
You need to be logged in to leave comments. Login now