##// END OF EJS Templates
censor: drop size limitation on the tombstone...
marmoute -
r48184:8089d0fa default
parent child Browse files
Show More
@@ -1,102 +1,97 b''
1 1 # censor code related to censoring revision
2 2 #
3 3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 # Copyright 2015 Google, Inc <martinvonz@google.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from ..node import (
10 10 nullrev,
11 11 )
12 12 from ..i18n import _
13 13 from .. import (
14 14 error,
15 15 )
16 16 from ..utils import (
17 17 storageutil,
18 18 )
19 19 from . import constants
20 20
21 21
22 22 def v1_censor(rl, tr, censornode, tombstone=b''):
23 23 """censors a revision in a "version 1" revlog"""
24 24 assert rl._format_version == constants.REVLOGV1, rl._format_version
25 25
26 26 # avoid cycle
27 27 from .. import revlog
28 28
29 29 censorrev = rl.rev(censornode)
30 30 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
31 31
32 if len(tombstone) > rl.rawsize(censorrev):
33 raise error.Abort(
34 _(b'censor tombstone must be no longer than censored data')
35 )
36
37 32 # Rewriting the revlog in place is hard. Our strategy for censoring is
38 33 # to create a new revlog, copy all revisions to it, then replace the
39 34 # revlogs on transaction close.
40 35 #
41 36 # This is a bit dangerous. We could easily have a mismatch of state.
42 37 newrl = revlog.revlog(
43 38 rl.opener,
44 39 target=rl.target,
45 40 radix=rl.radix,
46 41 postfix=b'tmpcensored',
47 42 censorable=True,
48 43 )
49 44 newrl._format_version = rl._format_version
50 45 newrl._format_flags = rl._format_flags
51 46 newrl._generaldelta = rl._generaldelta
52 47 newrl._parse_index = rl._parse_index
53 48
54 49 for rev in rl.revs():
55 50 node = rl.node(rev)
56 51 p1, p2 = rl.parents(node)
57 52
58 53 if rev == censorrev:
59 54 newrl.addrawrevision(
60 55 tombstone,
61 56 tr,
62 57 rl.linkrev(censorrev),
63 58 p1,
64 59 p2,
65 60 censornode,
66 61 constants.REVIDX_ISCENSORED,
67 62 )
68 63
69 64 if newrl.deltaparent(rev) != nullrev:
70 65 m = _(b'censored revision stored as delta; cannot censor')
71 66 h = _(
72 67 b'censoring of revlogs is not fully implemented;'
73 68 b' please report this bug'
74 69 )
75 70 raise error.Abort(m, hint=h)
76 71 continue
77 72
78 73 if rl.iscensored(rev):
79 74 if rl.deltaparent(rev) != nullrev:
80 75 m = _(
81 76 b'cannot censor due to censored '
82 77 b'revision having delta stored'
83 78 )
84 79 raise error.Abort(m)
85 80 rawtext = rl._chunk(rev)
86 81 else:
87 82 rawtext = rl.rawdata(rev)
88 83
89 84 newrl.addrawrevision(
90 85 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
91 86 )
92 87
93 88 tr.addbackup(rl._indexfile, location=b'store')
94 89 if not rl._inline:
95 90 tr.addbackup(rl._datafile, location=b'store')
96 91
97 92 rl.opener.rename(newrl._indexfile, rl._indexfile)
98 93 if not rl._inline:
99 94 rl.opener.rename(newrl._datafile, rl._datafile)
100 95
101 96 rl.clearcaches()
102 97 rl._loadindex()
@@ -1,513 +1,507 b''
1 1 #require no-reposimplestore
2 2
3 3 $ cat >> $HGRCPATH <<EOF
4 4 > [extensions]
5 5 > censor=
6 6 > EOF
7 7 $ cp $HGRCPATH $HGRCPATH.orig
8 8
9 9 Create repo with unimpeachable content
10 10
11 11 $ hg init r
12 12 $ cd r
13 13 $ echo 'Initially untainted file' > target
14 14 $ echo 'Normal file here' > bystander
15 15 $ hg add target bystander
16 16 $ hg ci -m init
17 17
18 18 Clone repo so we can test pull later
19 19
20 20 $ cd ..
21 21 $ hg clone r rpull
22 22 updating to branch default
23 23 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
24 24 $ cd r
25 25
26 26 Introduce content which will ultimately require censorship. Name the first
27 27 censored node C1, second C2, and so on
28 28
29 29 $ echo 'Tainted file' > target
30 30 $ echo 'Passwords: hunter2' >> target
31 31 $ hg ci -m taint target
32 32 $ C1=`hg id --debug -i`
33 33
34 34 $ echo 'hunter3' >> target
35 35 $ echo 'Normal file v2' > bystander
36 36 $ hg ci -m moretaint target bystander
37 37 $ C2=`hg id --debug -i`
38 38
39 39 Add a new sanitized versions to correct our mistake. Name the first head H1,
40 40 the second head H2, and so on
41 41
42 42 $ echo 'Tainted file is now sanitized' > target
43 43 $ hg ci -m sanitized target
44 44 $ H1=`hg id --debug -i`
45 45
46 46 $ hg update -r $C2
47 47 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
48 48 $ echo 'Tainted file now super sanitized' > target
49 49 $ hg ci -m 'super sanitized' target
50 50 created new head
51 51 $ H2=`hg id --debug -i`
52 52
53 53 Verify target contents before censorship at each revision
54 54
55 55 $ hg cat -r $H1 target | head -n 10
56 56 Tainted file is now sanitized
57 57 $ hg cat -r $H2 target | head -n 10
58 58 Tainted file now super sanitized
59 59 $ hg cat -r $C2 target | head -n 10
60 60 Tainted file
61 61 Passwords: hunter2
62 62 hunter3
63 63 $ hg cat -r $C1 target | head -n 10
64 64 Tainted file
65 65 Passwords: hunter2
66 66 $ hg cat -r 0 target | head -n 10
67 67 Initially untainted file
68 68
69 Try to censor revision with too large of a tombstone message
70
71 $ hg censor -r $C1 -t 'blah blah blah blah blah blah blah blah bla' target
72 abort: censor tombstone must be no longer than censored data
73 [255]
74
75 69 Censor revision with 2 offenses
76 70
77 71 (this also tests file pattern matching: path relative to cwd case)
78 72
79 73 $ mkdir -p foo/bar/baz
80 74 $ hg --cwd foo/bar/baz censor -r $C2 -t "remove password" ../../../target
81 75 $ hg cat -r $H1 target | head -n 10
82 76 Tainted file is now sanitized
83 77 $ hg cat -r $H2 target | head -n 10
84 78 Tainted file now super sanitized
85 79 $ hg cat -r $C2 target | head -n 10
86 80 abort: censored node: 1e0247a9a4b7
87 81 (set censor.policy to ignore errors)
88 82 $ hg cat -r $C1 target | head -n 10
89 83 Tainted file
90 84 Passwords: hunter2
91 85 $ hg cat -r 0 target | head -n 10
92 86 Initially untainted file
93 87
94 88 Censor revision with 1 offense
95 89
96 90 (this also tests file pattern matching: with 'path:' scheme)
97 91
98 92 $ hg --cwd foo/bar/baz censor -r $C1 path:target
99 93 $ hg cat -r $H1 target | head -n 10
100 94 Tainted file is now sanitized
101 95 $ hg cat -r $H2 target | head -n 10
102 96 Tainted file now super sanitized
103 97 $ hg cat -r $C2 target | head -n 10
104 98 abort: censored node: 1e0247a9a4b7
105 99 (set censor.policy to ignore errors)
106 100 $ hg cat -r $C1 target | head -n 10
107 101 abort: censored node: 613bc869fceb
108 102 (set censor.policy to ignore errors)
109 103 $ hg cat -r 0 target | head -n 10
110 104 Initially untainted file
111 105
112 106 Can only checkout target at uncensored revisions, -X is workaround for --all
113 107
114 108 $ hg revert -r $C2 target | head -n 10
115 109 abort: censored node: 1e0247a9a4b7
116 110 (set censor.policy to ignore errors)
117 111 $ hg revert -r $C1 target | head -n 10
118 112 abort: censored node: 613bc869fceb
119 113 (set censor.policy to ignore errors)
120 114 $ hg revert -r $C1 --all
121 115 reverting bystander
122 116 reverting target
123 117 abort: censored node: 613bc869fceb
124 118 (set censor.policy to ignore errors)
125 119 [255]
126 120 $ hg revert -r $C1 --all -X target
127 121 $ cat target | head -n 10
128 122 Tainted file now super sanitized
129 123 $ hg revert -r 0 --all
130 124 reverting target
131 125 $ cat target | head -n 10
132 126 Initially untainted file
133 127 $ hg revert -r $H2 --all
134 128 reverting bystander
135 129 reverting target
136 130 $ cat target | head -n 10
137 131 Tainted file now super sanitized
138 132
139 133 Uncensored file can be viewed at any revision
140 134
141 135 $ hg cat -r $H1 bystander | head -n 10
142 136 Normal file v2
143 137 $ hg cat -r $C2 bystander | head -n 10
144 138 Normal file v2
145 139 $ hg cat -r $C1 bystander | head -n 10
146 140 Normal file here
147 141 $ hg cat -r 0 bystander | head -n 10
148 142 Normal file here
149 143
150 144 Can update to children of censored revision
151 145
152 146 $ hg update -r $H1
153 147 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
154 148 $ cat target | head -n 10
155 149 Tainted file is now sanitized
156 150 $ hg update -r $H2
157 151 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
158 152 $ cat target | head -n 10
159 153 Tainted file now super sanitized
160 154
161 155 Set censor policy to abort in trusted $HGRC so hg verify fails
162 156
163 157 $ cp $HGRCPATH.orig $HGRCPATH
164 158 $ cat >> $HGRCPATH <<EOF
165 159 > [censor]
166 160 > policy = abort
167 161 > EOF
168 162
169 163 Repo fails verification due to censorship
170 164
171 165 $ hg verify
172 166 checking changesets
173 167 checking manifests
174 168 crosschecking files in changesets and manifests
175 169 checking files
176 170 target@1: censored file data
177 171 target@2: censored file data
178 172 checked 5 changesets with 7 changes to 2 files
179 173 2 integrity errors encountered!
180 174 (first damaged changeset appears to be 1)
181 175 [1]
182 176
183 177 Cannot update to revision with censored data
184 178
185 179 $ hg update -r $C2
186 180 abort: censored node: 1e0247a9a4b7
187 181 (set censor.policy to ignore errors)
188 182 [255]
189 183 $ hg update -r $C1
190 184 abort: censored node: 613bc869fceb
191 185 (set censor.policy to ignore errors)
192 186 [255]
193 187 $ hg update -r 0
194 188 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
195 189 $ hg update -r $H2
196 190 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
197 191
198 192 Set censor policy to ignore in trusted $HGRC so hg verify passes
199 193
200 194 $ cp $HGRCPATH.orig $HGRCPATH
201 195 $ cat >> $HGRCPATH <<EOF
202 196 > [censor]
203 197 > policy = ignore
204 198 > EOF
205 199
206 200 Repo passes verification with warnings with explicit config
207 201
208 202 $ hg verify
209 203 checking changesets
210 204 checking manifests
211 205 crosschecking files in changesets and manifests
212 206 checking files
213 207 checked 5 changesets with 7 changes to 2 files
214 208
215 209 May update to revision with censored data with explicit config
216 210
217 211 $ hg update -r $C2
218 212 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
219 213 $ cat target | head -n 10
220 214 $ hg update -r $C1
221 215 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
222 216 $ cat target | head -n 10
223 217 $ hg update -r 0
224 218 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
225 219 $ cat target | head -n 10
226 220 Initially untainted file
227 221 $ hg update -r $H2
228 222 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
229 223 $ cat target | head -n 10
230 224 Tainted file now super sanitized
231 225
232 226 Can merge in revision with censored data. Test requires one branch of history
233 227 with the file censored, but we can't censor at a head, so advance H1.
234 228
235 229 $ hg update -r $H1
236 230 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
237 231 $ C3=$H1
238 232 $ echo 'advanced head H1' > target
239 233 $ hg ci -m 'advance head H1' target
240 234 $ H1=`hg id --debug -i`
241 235 $ hg censor -r $C3 target
242 236 $ hg update -r $H2
243 237 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
244 238 $ hg merge -r $C3
245 239 merging target
246 240 0 files updated, 1 files merged, 0 files removed, 0 files unresolved
247 241 (branch merge, don't forget to commit)
248 242
249 243 Revisions present in repository heads may not be censored
250 244
251 245 $ hg update -C -r $H2
252 246 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
253 247 $ hg censor -r $H2 target
254 248 abort: cannot censor file in heads (78a8fc215e79)
255 249 (clean/delete and commit first)
256 250 [255]
257 251 $ echo 'twiddling thumbs' > bystander
258 252 $ hg ci -m 'bystander commit'
259 253 $ H2=`hg id --debug -i`
260 254 $ hg censor -r "$H2^" target
261 255 abort: cannot censor file in heads (efbe78065929)
262 256 (clean/delete and commit first)
263 257 [255]
264 258
265 259 Cannot censor working directory
266 260
267 261 $ echo 'seriously no passwords' > target
268 262 $ hg ci -m 'extend second head arbitrarily' target
269 263 $ H2=`hg id --debug -i`
270 264 $ hg update -r "$H2^"
271 265 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
272 266 $ hg censor -r . target
273 267 abort: cannot censor working directory
274 268 (clean/delete/update first)
275 269 [255]
276 270 $ hg update -r $H2
277 271 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
278 272
279 273 Can re-add file after being deleted + censored
280 274
281 275 $ C4=$H2
282 276 $ hg rm target
283 277 $ hg ci -m 'delete target so it may be censored'
284 278 $ H2=`hg id --debug -i`
285 279 $ hg censor -r $C4 target
286 280 $ hg cat -r $C4 target | head -n 10
287 281 $ hg cat -r "$H2^^" target | head -n 10
288 282 Tainted file now super sanitized
289 283 $ echo 'fresh start' > target
290 284 $ hg add target
291 285 $ hg ci -m reincarnated target
292 286 $ H2=`hg id --debug -i`
293 287 $ hg cat -r $H2 target | head -n 10
294 288 fresh start
295 289 $ hg cat -r "$H2^" target | head -n 10
296 290 target: no such file in rev 452ec1762369
297 291 $ hg cat -r $C4 target | head -n 10
298 292 $ hg cat -r "$H2^^^" target | head -n 10
299 293 Tainted file now super sanitized
300 294
301 295 Can censor after revlog has expanded to no longer permit inline storage
302 296
303 297 $ for x in `"$PYTHON" $TESTDIR/seq.py 0 50000`
304 298 > do
305 299 > echo "Password: hunter$x" >> target
306 300 > done
307 301 $ hg ci -m 'add 100k passwords'
308 302 $ H2=`hg id --debug -i`
309 303 $ C5=$H2
310 304 $ hg revert -r "$H2^" target
311 305 $ hg ci -m 'cleaned 100k passwords'
312 306 $ H2=`hg id --debug -i`
313 307 $ hg censor -r $C5 target
314 308 $ hg cat -r $C5 target | head -n 10
315 309 $ hg cat -r $H2 target | head -n 10
316 310 fresh start
317 311
318 312 Repo with censored nodes can be cloned and cloned nodes are censored
319 313
320 314 $ cd ..
321 315 $ hg clone r rclone
322 316 updating to branch default
323 317 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
324 318 $ cd rclone
325 319 $ hg cat -r $H1 target | head -n 10
326 320 advanced head H1
327 321 $ hg cat -r $H2~5 target | head -n 10
328 322 Tainted file now super sanitized
329 323 $ hg cat -r $C2 target | head -n 10
330 324 $ hg cat -r $C1 target | head -n 10
331 325 $ hg cat -r 0 target | head -n 10
332 326 Initially untainted file
333 327 $ hg verify
334 328 checking changesets
335 329 checking manifests
336 330 crosschecking files in changesets and manifests
337 331 checking files
338 332 checked 12 changesets with 13 changes to 2 files
339 333
340 334 Repo cloned before tainted content introduced can pull censored nodes
341 335
342 336 $ cd ../rpull
343 337 $ hg cat -r tip target | head -n 10
344 338 Initially untainted file
345 339 $ hg verify
346 340 checking changesets
347 341 checking manifests
348 342 crosschecking files in changesets and manifests
349 343 checking files
350 344 checked 1 changesets with 2 changes to 2 files
351 345 $ hg pull -r $H1 -r $H2
352 346 pulling from $TESTTMP/r
353 347 searching for changes
354 348 adding changesets
355 349 adding manifests
356 350 adding file changes
357 351 added 11 changesets with 11 changes to 2 files (+1 heads)
358 352 new changesets 186fb27560c3:683e4645fded
359 353 (run 'hg heads' to see heads, 'hg merge' to merge)
360 354 $ hg update 4
361 355 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
362 356 $ cat target | head -n 10
363 357 Tainted file now super sanitized
364 358 $ hg cat -r $H1 target | head -n 10
365 359 advanced head H1
366 360 $ hg cat -r $H2~5 target | head -n 10
367 361 Tainted file now super sanitized
368 362 $ hg cat -r $C2 target | head -n 10
369 363 $ hg cat -r $C1 target | head -n 10
370 364 $ hg cat -r 0 target | head -n 10
371 365 Initially untainted file
372 366 $ hg verify
373 367 checking changesets
374 368 checking manifests
375 369 crosschecking files in changesets and manifests
376 370 checking files
377 371 checked 12 changesets with 13 changes to 2 files
378 372
379 373 Censored nodes can be pushed if they censor previously unexchanged nodes
380 374
381 375 $ echo 'Passwords: hunter2hunter2' > target
382 376 $ hg ci -m 're-add password from clone' target
383 377 created new head
384 378 $ H3=`hg id --debug -i`
385 379 $ REV=$H3
386 380 $ echo 'Re-sanitized; nothing to see here' > target
387 381 $ hg ci -m 're-sanitized' target
388 382 $ H2=`hg id --debug -i`
389 383 $ CLEANREV=$H2
390 384 $ hg cat -r $REV target | head -n 10
391 385 Passwords: hunter2hunter2
392 386 $ hg censor -r $REV target
393 387 $ hg cat -r $REV target | head -n 10
394 388 $ hg cat -r $CLEANREV target | head -n 10
395 389 Re-sanitized; nothing to see here
396 390 $ hg push -f -r $H2
397 391 pushing to $TESTTMP/r
398 392 searching for changes
399 393 adding changesets
400 394 adding manifests
401 395 adding file changes
402 396 added 2 changesets with 2 changes to 1 files (+1 heads)
403 397
404 398 $ cd ../r
405 399 $ hg cat -r $REV target | head -n 10
406 400 $ hg cat -r $CLEANREV target | head -n 10
407 401 Re-sanitized; nothing to see here
408 402 $ hg update $CLEANREV
409 403 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
410 404 $ cat target | head -n 10
411 405 Re-sanitized; nothing to see here
412 406
413 407 Censored nodes can be bundled up and unbundled in another repo
414 408
415 409 $ hg bundle --base 0 ../pwbundle
416 410 13 changesets found
417 411 $ cd ../rclone
418 412 $ hg unbundle ../pwbundle
419 413 adding changesets
420 414 adding manifests
421 415 adding file changes
422 416 added 2 changesets with 2 changes to 2 files (+1 heads)
423 417 new changesets 075be80ac777:dcbaf17bf3a1 (2 drafts)
424 418 (run 'hg heads .' to see heads, 'hg merge' to merge)
425 419 $ hg cat -r $REV target | head -n 10
426 420 $ hg cat -r $CLEANREV target | head -n 10
427 421 Re-sanitized; nothing to see here
428 422 $ hg update $CLEANREV
429 423 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
430 424 $ cat target | head -n 10
431 425 Re-sanitized; nothing to see here
432 426 $ hg verify
433 427 checking changesets
434 428 checking manifests
435 429 crosschecking files in changesets and manifests
436 430 checking files
437 431 checked 14 changesets with 15 changes to 2 files
438 432
439 433 Grepping only warns, doesn't error out
440 434
441 435 $ cd ../rpull
442 436 $ hg grep 'Normal file'
443 437 bystander:Normal file v2
444 438 $ hg grep nothing
445 439 target:Re-sanitized; nothing to see here
446 440 $ hg grep --diff 'Normal file'
447 441 cannot search in censored file: target:7
448 442 cannot search in censored file: target:10
449 443 cannot search in censored file: target:12
450 444 bystander:6:-:Normal file v2
451 445 cannot search in censored file: target:1
452 446 cannot search in censored file: target:2
453 447 cannot search in censored file: target:3
454 448 bystander:2:-:Normal file here
455 449 bystander:2:+:Normal file v2
456 450 bystander:0:+:Normal file here
457 451 $ hg grep --diff nothing
458 452 cannot search in censored file: target:7
459 453 cannot search in censored file: target:10
460 454 cannot search in censored file: target:12
461 455 target:13:+:Re-sanitized; nothing to see here
462 456 cannot search in censored file: target:1
463 457 cannot search in censored file: target:2
464 458 cannot search in censored file: target:3
465 459
466 460 Censored nodes can be imported on top of censored nodes, consecutively
467 461
468 462 $ hg init ../rimport
469 463 $ hg bundle --base 1 ../rimport/splitbundle
470 464 12 changesets found
471 465 $ cd ../rimport
472 466 $ hg pull -r $H1 -r $H2 ../r
473 467 pulling from ../r
474 468 adding changesets
475 469 adding manifests
476 470 adding file changes
477 471 added 8 changesets with 10 changes to 2 files (+1 heads)
478 472 new changesets e97f55b2665a:dcbaf17bf3a1
479 473 (run 'hg heads' to see heads, 'hg merge' to merge)
480 474 $ hg unbundle splitbundle
481 475 adding changesets
482 476 adding manifests
483 477 adding file changes
484 478 added 6 changesets with 5 changes to 2 files (+1 heads)
485 479 new changesets efbe78065929:683e4645fded (6 drafts)
486 480 (run 'hg heads .' to see heads, 'hg merge' to merge)
487 481 $ hg update $H2
488 482 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
489 483 $ cat target | head -n 10
490 484 Re-sanitized; nothing to see here
491 485 $ hg verify
492 486 checking changesets
493 487 checking manifests
494 488 crosschecking files in changesets and manifests
495 489 checking files
496 490 checked 14 changesets with 15 changes to 2 files
497 491 $ cd ../r
498 492
499 493 Can import bundle where first revision of a file is censored
500 494
501 495 $ hg init ../rinit
502 496 $ hg censor -r 0 target
503 497 $ hg bundle -r 0 --base null ../rinit/initbundle
504 498 1 changesets found
505 499 $ cd ../rinit
506 500 $ hg unbundle initbundle
507 501 adding changesets
508 502 adding manifests
509 503 adding file changes
510 504 added 1 changesets with 2 changes to 2 files
511 505 new changesets e97f55b2665a (1 drafts)
512 506 (run 'hg update' to get a working copy)
513 507 $ hg cat -r 0 target | head -n 10
General Comments 0
You need to be logged in to leave comments. Login now