##// END OF EJS Templates
Backport PR #8237: Don't write notebooks to disk when signing them...
Min RK -
Show More
@@ -1,427 +1,424 b''
1 1 """Utilities for signing notebooks"""
2 2
3 3 # Copyright (c) IPython Development Team.
4 4 # Distributed under the terms of the Modified BSD License.
5 5
6 6 import base64
7 7 from contextlib import contextmanager
8 8 from datetime import datetime
9 9 import hashlib
10 10 from hmac import HMAC
11 11 import io
12 12 import os
13 13
14 14 try:
15 15 import sqlite3
16 16 except ImportError:
17 17 try:
18 18 from pysqlite2 import dbapi2 as sqlite3
19 19 except ImportError:
20 20 sqlite3 = None
21 21
22 from IPython.utils.io import atomic_writing
23 22 from IPython.utils.py3compat import unicode_type, cast_bytes
24 23 from IPython.utils.traitlets import Instance, Bytes, Enum, Any, Unicode, Bool, Integer
25 24 from IPython.config import LoggingConfigurable, MultipleInstanceError
26 25 from IPython.core.application import BaseIPythonApplication, base_flags
27 26
28 27 from . import read, write, NO_CONVERT
29 28
30 29 try:
31 30 # Python 3
32 31 algorithms = hashlib.algorithms_guaranteed
33 32 except AttributeError:
34 33 algorithms = hashlib.algorithms
35 34
36 35
37 36 def yield_everything(obj):
38 37 """Yield every item in a container as bytes
39 38
40 39 Allows any JSONable object to be passed to an HMAC digester
41 40 without having to serialize the whole thing.
42 41 """
43 42 if isinstance(obj, dict):
44 43 for key in sorted(obj):
45 44 value = obj[key]
46 45 yield cast_bytes(key)
47 46 for b in yield_everything(value):
48 47 yield b
49 48 elif isinstance(obj, (list, tuple)):
50 49 for element in obj:
51 50 for b in yield_everything(element):
52 51 yield b
53 52 elif isinstance(obj, unicode_type):
54 53 yield obj.encode('utf8')
55 54 else:
56 55 yield unicode_type(obj).encode('utf8')
57 56
58 57 def yield_code_cells(nb):
59 58 """Iterator that yields all cells in a notebook
60 59
61 60 nbformat version independent
62 61 """
63 62 if nb.nbformat >= 4:
64 63 for cell in nb['cells']:
65 64 if cell['cell_type'] == 'code':
66 65 yield cell
67 66 elif nb.nbformat == 3:
68 67 for ws in nb['worksheets']:
69 68 for cell in ws['cells']:
70 69 if cell['cell_type'] == 'code':
71 70 yield cell
72 71
73 72 @contextmanager
74 73 def signature_removed(nb):
75 74 """Context manager for operating on a notebook with its signature removed
76 75
77 76 Used for excluding the previous signature when computing a notebook's signature.
78 77 """
79 78 save_signature = nb['metadata'].pop('signature', None)
80 79 try:
81 80 yield
82 81 finally:
83 82 if save_signature is not None:
84 83 nb['metadata']['signature'] = save_signature
85 84
86 85
87 86 class NotebookNotary(LoggingConfigurable):
88 87 """A class for computing and verifying notebook signatures."""
89 88
90 89 profile_dir = Instance("IPython.core.profiledir.ProfileDir")
91 90 def _profile_dir_default(self):
92 91 from IPython.core.application import BaseIPythonApplication
93 92 app = None
94 93 try:
95 94 if BaseIPythonApplication.initialized():
96 95 app = BaseIPythonApplication.instance()
97 96 except MultipleInstanceError:
98 97 pass
99 98 if app is None:
100 99 # create an app, without the global instance
101 100 app = BaseIPythonApplication()
102 101 app.initialize(argv=[])
103 102 return app.profile_dir
104 103
105 104 db_file = Unicode(config=True,
106 105 help="""The sqlite file in which to store notebook signatures.
107 106 By default, this will be in your IPython profile.
108 107 You can set it to ':memory:' to disable sqlite writing to the filesystem.
109 108 """)
110 109 def _db_file_default(self):
111 110 if self.profile_dir is None:
112 111 return ':memory:'
113 112 return os.path.join(self.profile_dir.security_dir, u'nbsignatures.db')
114 113
115 114 # 64k entries ~ 12MB
116 115 cache_size = Integer(65535, config=True,
117 116 help="""The number of notebook signatures to cache.
118 117 When the number of signatures exceeds this value,
119 118 the oldest 25% of signatures will be culled.
120 119 """
121 120 )
122 121 db = Any()
123 122 def _db_default(self):
124 123 if sqlite3 is None:
125 124 self.log.warn("Missing SQLite3, all notebooks will be untrusted!")
126 125 return
127 126 kwargs = dict(detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
128 127 db = sqlite3.connect(self.db_file, **kwargs)
129 128 self.init_db(db)
130 129 return db
131 130
132 131 def init_db(self, db):
133 132 db.execute("""
134 133 CREATE TABLE IF NOT EXISTS nbsignatures
135 134 (
136 135 id integer PRIMARY KEY AUTOINCREMENT,
137 136 algorithm text,
138 137 signature text,
139 138 path text,
140 139 last_seen timestamp
141 140 )""")
142 141 db.execute("""
143 142 CREATE INDEX IF NOT EXISTS algosig ON nbsignatures(algorithm, signature)
144 143 """)
145 144 db.commit()
146 145
147 146 algorithm = Enum(algorithms, default_value='sha256', config=True,
148 147 help="""The hashing algorithm used to sign notebooks."""
149 148 )
150 149 def _algorithm_changed(self, name, old, new):
151 150 self.digestmod = getattr(hashlib, self.algorithm)
152 151
153 152 digestmod = Any()
154 153 def _digestmod_default(self):
155 154 return getattr(hashlib, self.algorithm)
156 155
157 156 secret_file = Unicode(config=True,
158 157 help="""The file where the secret key is stored."""
159 158 )
160 159 def _secret_file_default(self):
161 160 if self.profile_dir is None:
162 161 return ''
163 162 return os.path.join(self.profile_dir.security_dir, 'notebook_secret')
164 163
165 164 secret = Bytes(config=True,
166 165 help="""The secret key with which notebooks are signed."""
167 166 )
168 167 def _secret_default(self):
169 168 # note : this assumes an Application is running
170 169 if os.path.exists(self.secret_file):
171 170 with io.open(self.secret_file, 'rb') as f:
172 171 return f.read()
173 172 else:
174 173 secret = base64.encodestring(os.urandom(1024))
175 174 self._write_secret_file(secret)
176 175 return secret
177 176
178 177 def _write_secret_file(self, secret):
179 178 """write my secret to my secret_file"""
180 179 self.log.info("Writing notebook-signing key to %s", self.secret_file)
181 180 with io.open(self.secret_file, 'wb') as f:
182 181 f.write(secret)
183 182 try:
184 183 os.chmod(self.secret_file, 0o600)
185 184 except OSError:
186 185 self.log.warn(
187 186 "Could not set permissions on %s",
188 187 self.secret_file
189 188 )
190 189 return secret
191 190
192 191 def compute_signature(self, nb):
193 192 """Compute a notebook's signature
194 193
195 194 by hashing the entire contents of the notebook via HMAC digest.
196 195 """
197 196 hmac = HMAC(self.secret, digestmod=self.digestmod)
198 197 # don't include the previous hash in the content to hash
199 198 with signature_removed(nb):
200 199 # sign the whole thing
201 200 for b in yield_everything(nb):
202 201 hmac.update(b)
203 202
204 203 return hmac.hexdigest()
205 204
206 205 def check_signature(self, nb):
207 206 """Check a notebook's stored signature
208 207
209 208 If a signature is stored in the notebook's metadata,
210 209 a new signature is computed and compared with the stored value.
211 210
212 211 Returns True if the signature is found and matches, False otherwise.
213 212
214 213 The following conditions must all be met for a notebook to be trusted:
215 214 - a signature is stored in the form 'scheme:hexdigest'
216 215 - the stored scheme matches the requested scheme
217 216 - the requested scheme is available from hashlib
218 217 - the computed hash from notebook_signature matches the stored hash
219 218 """
220 219 if nb.nbformat < 3:
221 220 return False
222 221 if self.db is None:
223 222 return False
224 223 signature = self.compute_signature(nb)
225 224 r = self.db.execute("""SELECT id FROM nbsignatures WHERE
226 225 algorithm = ? AND
227 226 signature = ?;
228 227 """, (self.algorithm, signature)).fetchone()
229 228 if r is None:
230 229 return False
231 230 self.db.execute("""UPDATE nbsignatures SET last_seen = ? WHERE
232 231 algorithm = ? AND
233 232 signature = ?;
234 233 """,
235 234 (datetime.utcnow(), self.algorithm, signature),
236 235 )
237 236 self.db.commit()
238 237 return True
239 238
240 239 def sign(self, nb):
241 240 """Sign a notebook, indicating that its output is trusted on this machine
242 241
243 242 Stores hash algorithm and hmac digest in a local database of trusted notebooks.
244 243 """
245 244 if nb.nbformat < 3:
246 245 return
247 246 signature = self.compute_signature(nb)
248 247 self.store_signature(signature, nb)
249 248
250 249 def store_signature(self, signature, nb):
251 250 if self.db is None:
252 251 return
253 252 self.db.execute("""INSERT OR IGNORE INTO nbsignatures
254 253 (algorithm, signature, last_seen) VALUES (?, ?, ?)""",
255 254 (self.algorithm, signature, datetime.utcnow())
256 255 )
257 256 self.db.execute("""UPDATE nbsignatures SET last_seen = ? WHERE
258 257 algorithm = ? AND
259 258 signature = ?;
260 259 """,
261 260 (datetime.utcnow(), self.algorithm, signature),
262 261 )
263 262 self.db.commit()
264 263 n, = self.db.execute("SELECT Count(*) FROM nbsignatures").fetchone()
265 264 if n > self.cache_size:
266 265 self.cull_db()
267 266
268 267 def unsign(self, nb):
269 268 """Ensure that a notebook is untrusted
270 269
271 270 by removing its signature from the trusted database, if present.
272 271 """
273 272 signature = self.compute_signature(nb)
274 273 self.db.execute("""DELETE FROM nbsignatures WHERE
275 274 algorithm = ? AND
276 275 signature = ?;
277 276 """,
278 277 (self.algorithm, signature)
279 278 )
280 279 self.db.commit()
281 280
282 281 def cull_db(self):
283 282 """Cull oldest 25% of the trusted signatures when the size limit is reached"""
284 283 self.db.execute("""DELETE FROM nbsignatures WHERE id IN (
285 284 SELECT id FROM nbsignatures ORDER BY last_seen DESC LIMIT -1 OFFSET ?
286 285 );
287 286 """, (max(int(0.75 * self.cache_size), 1),))
288 287
289 288 def mark_cells(self, nb, trusted):
290 289 """Mark cells as trusted if the notebook's signature can be verified
291 290
292 291 Sets ``cell.metadata.trusted = True | False`` on all code cells,
293 292 depending on whether the stored signature can be verified.
294 293
295 294 This function is the inverse of check_cells
296 295 """
297 296 if nb.nbformat < 3:
298 297 return
299 298
300 299 for cell in yield_code_cells(nb):
301 300 cell['metadata']['trusted'] = trusted
302 301
303 302 def _check_cell(self, cell, nbformat_version):
304 303 """Do we trust an individual cell?
305 304
306 305 Return True if:
307 306
308 307 - cell is explicitly trusted
309 308 - cell has no potentially unsafe rich output
310 309
311 310 If a cell has no output, or only simple print statements,
312 311 it will always be trusted.
313 312 """
314 313 # explicitly trusted
315 314 if cell['metadata'].pop("trusted", False):
316 315 return True
317 316
318 317 # explicitly safe output
319 318 if nbformat_version >= 4:
320 319 unsafe_output_types = ['execute_result', 'display_data']
321 320 safe_keys = {"output_type", "execution_count", "metadata"}
322 321 else: # v3
323 322 unsafe_output_types = ['pyout', 'display_data']
324 323 safe_keys = {"output_type", "prompt_number", "metadata"}
325 324
326 325 for output in cell['outputs']:
327 326 output_type = output['output_type']
328 327 if output_type in unsafe_output_types:
329 328 # if there are any data keys not in the safe whitelist
330 329 output_keys = set(output)
331 330 if output_keys.difference(safe_keys):
332 331 return False
333 332
334 333 return True
335 334
336 335 def check_cells(self, nb):
337 336 """Return whether all code cells are trusted
338 337
339 338 If there are no code cells, return True.
340 339
341 340 This function is the inverse of mark_cells.
342 341 """
343 342 if nb.nbformat < 3:
344 343 return False
345 344 trusted = True
346 345 for cell in yield_code_cells(nb):
347 346 # only distrust a cell if it actually has some output to distrust
348 347 if not self._check_cell(cell, nb.nbformat):
349 348 trusted = False
350 349
351 350 return trusted
352 351
353 352
354 353 trust_flags = {
355 354 'reset' : (
356 355 {'TrustNotebookApp' : { 'reset' : True}},
357 356 """Delete the trusted notebook cache.
358 357 All previously signed notebooks will become untrusted.
359 358 """
360 359 ),
361 360 }
362 361 trust_flags.update(base_flags)
363 362 trust_flags.pop('init')
364 363
365 364
366 365 class TrustNotebookApp(BaseIPythonApplication):
367 366
368 367 description="""Sign one or more IPython notebooks with your key,
369 368 to trust their dynamic (HTML, Javascript) output.
370 369
371 370 Trusting a notebook only applies to the current IPython profile.
372 371 To trust a notebook for use with a profile other than default,
373 372 add `--profile [profile name]`.
374 373
375 374 Otherwise, you will have to re-execute the notebook to see output.
376 375 """
377 376
378 377 examples = """
379 378 ipython trust mynotebook.ipynb and_this_one.ipynb
380 379 ipython trust --profile myprofile mynotebook.ipynb
381 380 """
382 381
383 382 flags = trust_flags
384 383
385 384 reset = Bool(False, config=True,
386 385 help="""If True, delete the trusted signature cache.
387 386 After reset, all previously signed notebooks will become untrusted.
388 387 """
389 388 )
390 389
391 390 notary = Instance(NotebookNotary)
392 391 def _notary_default(self):
393 392 return NotebookNotary(parent=self, profile_dir=self.profile_dir)
394 393
395 394 def sign_notebook(self, notebook_path):
396 395 if not os.path.exists(notebook_path):
397 396 self.log.error("Notebook missing: %s" % notebook_path)
398 397 self.exit(1)
399 398 with io.open(notebook_path, encoding='utf8') as f:
400 399 nb = read(f, NO_CONVERT)
401 400 if self.notary.check_signature(nb):
402 401 print("Notebook already signed: %s" % notebook_path)
403 402 else:
404 403 print("Signing notebook: %s" % notebook_path)
405 404 self.notary.sign(nb)
406 with atomic_writing(notebook_path) as f:
407 write(nb, f, NO_CONVERT)
408 405
409 406 def generate_new_key(self):
410 407 """Generate a new notebook signature key"""
411 408 print("Generating new notebook key: %s" % self.notary.secret_file)
412 409 self.notary._write_secret_file(os.urandom(1024))
413 410
414 411 def start(self):
415 412 if self.reset:
416 413 if os.path.exists(self.notary.db_file):
417 414 print("Removing trusted signature cache: %s" % self.notary.db_file)
418 415 os.remove(self.notary.db_file)
419 416 self.generate_new_key()
420 417 return
421 418 if not self.extra_args:
422 419 self.log.critical("Specify at least one notebook to sign.")
423 420 self.exit(1)
424 421
425 422 for notebook_path in self.extra_args:
426 423 self.sign_notebook(notebook_path)
427 424
General Comments 0
You need to be logged in to leave comments. Login now