upstream/ipython Commit - r27906:4b0aed94

Implement guarded evaluation, replace greedy, implement:...

krassowski -

r27906:4b0aed94

parent child

IPython/core/guarded_eval.py

0 created 644 +541 0

This diff has been collapsed as it changes many lines, (541 lines changed) Show them Hide them
	@@ -0,0 +1,541 b''
		1	from typing import Callable, Protocol, Set, Tuple, NamedTuple, Literal, Union
		2	import collections
		3	import sys
		4	import ast
		5	import types
		6	from functools import cached_property
		7	from dataclasses import dataclass, field
		8
		9
		10	class HasGetItem(Protocol):
		11	def __getitem__(self, key) -> None: ...
		12
		13
		14	class InstancesHaveGetItem(Protocol):
		15	def __call__(self) -> HasGetItem: ...
		16
		17
		18	class HasGetAttr(Protocol):
		19	def __getattr__(self, key) -> None: ...
		20
		21
		22	class DoesNotHaveGetAttr(Protocol):
		23	pass
		24
		25	# By default `__getattr__` is not explicitly implemented on most objects
		26	MayHaveGetattr = Union[HasGetAttr, DoesNotHaveGetAttr]
		27
		28
		29	def unbind_method(func: Callable) -> Union[Callable, None]:
		30	"""Get unbound method for given bound method.
		31
		32	Returns None if cannot get unbound method."""
		33	owner = getattr(func, '__self__', None)
		34	owner_class = type(owner)
		35	name = getattr(func, '__name__', None)
		36	instance_dict_overrides = getattr(owner, '__dict__', None)
		37	if (
		38	owner is not None
		39	and
		40	name
		41	and
		42	(
		43	not instance_dict_overrides
		44	or
		45	(
		46	instance_dict_overrides
		47	and name not in instance_dict_overrides
		48	)
		49	)
		50	):
		51	return getattr(owner_class, name)
		52
		53
		54	@dataclass
		55	class EvaluationPolicy:
		56	allow_locals_access: bool = False
		57	allow_globals_access: bool = False
		58	allow_item_access: bool = False
		59	allow_attr_access: bool = False
		60	allow_builtins_access: bool = False
		61	allow_any_calls: bool = False
		62	allowed_calls: Set[Callable] = field(default_factory=set)
		63
		64	def can_get_item(self, value, item):
		65	return self.allow_item_access
		66
		67	def can_get_attr(self, value, attr):
		68	return self.allow_attr_access
		69
		70	def can_call(self, func):
		71	if self.allow_any_calls:
		72	return True
		73
		74	if func in self.allowed_calls:
		75	return True
		76
		77	owner_method = unbind_method(func)
		78	if owner_method and owner_method in self.allowed_calls:
		79	return True
		80
		81	def has_original_dunder_external(value, module_name, access_path, method_name,):
		82	try:
		83	if module_name not in sys.modules:
		84	return False
		85	member_type = sys.modules[module_name]
		86	for attr in access_path:
		87	member_type = getattr(member_type, attr)
		88	value_type = type(value)
		89	if type(value) == member_type:
		90	return True
		91	if isinstance(value, member_type):
		92	method = getattr(value_type, method_name, None)
		93	member_method = getattr(member_type, method_name, None)
		94	if member_method == method:
		95	return True
		96	except (AttributeError, KeyError):
		97	return False
		98
		99
		100	def has_original_dunder(
		101	value,
		102	allowed_types,
		103	allowed_methods,
		104	allowed_external,
		105	method_name
		106	):
		107	# note: Python ignores `__getattr__`/`__getitem__` on instances,
		108	# we only need to check at class level
		109	value_type = type(value)
		110
		111	# strict type check passes → no need to check method
		112	if value_type in allowed_types:
		113	return True
		114
		115	method = getattr(value_type, method_name, None)
		116
		117	if not method:
		118	return None
		119
		120	if method in allowed_methods:
		121	return True
		122
		123	for module_name, *access_path in allowed_external:
		124	if has_original_dunder_external(value, module_name, access_path, method_name):
		125	return True
		126
		127	return False
		128
		129
		130	@dataclass
		131	class SelectivePolicy(EvaluationPolicy):
		132	allowed_getitem: Set[HasGetItem] = field(default_factory=set)
		133	allowed_getitem_external: Set[Tuple[str, ...]] = field(default_factory=set)
		134	allowed_getattr: Set[MayHaveGetattr] = field(default_factory=set)
		135	allowed_getattr_external: Set[Tuple[str, ...]] = field(default_factory=set)
		136
		137	def can_get_attr(self, value, attr):
		138	has_original_attribute = has_original_dunder(
		139	value,
		140	allowed_types=self.allowed_getattr,
		141	allowed_methods=self._getattribute_methods,
		142	allowed_external=self.allowed_getattr_external,
		143	method_name='__getattribute__'
		144	)
		145	has_original_attr = has_original_dunder(
		146	value,
		147	allowed_types=self.allowed_getattr,
		148	allowed_methods=self._getattr_methods,
		149	allowed_external=self.allowed_getattr_external,
		150	method_name='__getattr__'
		151	)
		152	# Many objects do not have `__getattr__`, this is fine
		153	if has_original_attr is None and has_original_attribute:
		154	return True
		155
		156	# Accept objects without modifications to `__getattr__` and `__getattribute__`
		157	return has_original_attr and has_original_attribute
		158
		159	def get_attr(self, value, attr):
		160	if self.can_get_attr(value, attr):
		161	return getattr(value, attr)
		162
		163
		164	def can_get_item(self, value, item):
		165	"""Allow accessing `__getiitem__` of allow-listed instances unless it was not modified."""
		166	return has_original_dunder(
		167	value,
		168	allowed_types=self.allowed_getitem,
		169	allowed_methods=self._getitem_methods,
		170	allowed_external=self.allowed_getitem_external,
		171	method_name='__getitem__'
		172	)
		173
		174	@cached_property
		175	def _getitem_methods(self) -> Set[Callable]:
		176	return self._safe_get_methods(
		177	self.allowed_getitem,
		178	'__getitem__'
		179	)
		180
		181	@cached_property
		182	def _getattr_methods(self) -> Set[Callable]:
		183	return self._safe_get_methods(
		184	self.allowed_getattr,
		185	'__getattr__'
		186	)
		187
		188	@cached_property
		189	def _getattribute_methods(self) -> Set[Callable]:
		190	return self._safe_get_methods(
		191	self.allowed_getattr,
		192	'__getattribute__'
		193	)
		194
		195	def _safe_get_methods(self, classes, name) -> Set[Callable]:
		196	return {
		197	method
		198	for class_ in classes
		199	for method in [getattr(class_, name, None)]
		200	if method
		201	}
		202
		203
		204	class DummyNamedTuple(NamedTuple):
		205	pass
		206
		207
		208	class EvaluationContext(NamedTuple):
		209	locals_: dict
		210	globals_: dict
		211	evaluation: Literal['forbidden', 'minimal', 'limitted', 'unsafe', 'dangerous'] = 'forbidden'
		212	in_subscript: bool = False
		213
		214
		215	class IdentitySubscript:
		216	def __getitem__(self, key):
		217	return key
		218
		219	IDENTITY_SUBSCRIPT = IdentitySubscript()
		220	SUBSCRIPT_MARKER = '__SUBSCRIPT_SENTINEL__'
		221
		222	class GuardRejection(ValueError):
		223	pass
		224
		225
		226	def guarded_eval(
		227	code: str,
		228	context: EvaluationContext
		229	):
		230	locals_ = context.locals_
		231
		232	if context.evaluation == 'forbidden':
		233	raise GuardRejection('Forbidden mode')
		234
		235	# note: not using `ast.literal_eval` as it does not implement
		236	# getitem at all, for example it fails on simple `[0][1]`
		237
		238	if context.in_subscript:
		239	# syntatic sugar for ellipsis (:) is only available in susbcripts
		240	# so we need to trick the ast parser into thinking that we have
		241	# a subscript, but we need to be able to later recognise that we did
		242	# it so we can ignore the actual __getitem__ operation
		243	if not code:
		244	return tuple()
		245	locals_ = locals_.copy()
		246	locals_[SUBSCRIPT_MARKER] = IDENTITY_SUBSCRIPT
		247	code = SUBSCRIPT_MARKER + '[' + code + ']'
		248	context = EvaluationContext(**{
		249	**context._asdict(),
		250	**{'locals_': locals_}
		251	})
		252
		253	if context.evaluation == 'dangerous':
		254	return eval(code, context.globals_, context.locals_)
		255
		256	expression = ast.parse(code, mode='eval')
		257
		258	return eval_node(expression, context)
		259
		260	def eval_node(node: Union[ast.AST, None], context: EvaluationContext):
		261	"""
		262	Evaluate AST node in provided context.
		263
		264	Applies evaluation restrictions defined in the context.
		265
		266	Currently does not support evaluation of functions with arguments.
		267
		268	Does not evaluate actions which always have side effects:
		269	- class definitions (`class sth: ...`)
		270	- function definitions (`def sth: ...`)
		271	- variable assignments (`x = 1`)
		272	- augumented assignments (`x += 1`)
		273	- deletions (`del x`)
		274
		275	Does not evaluate operations which do not return values:
		276	- assertions (`assert x`)
		277	- pass (`pass`)
		278	- imports (`import x`)
		279	- control flow
		280	- conditionals (`if x:`) except for terenary IfExp (`a if x else b`)
		281	- loops (`for` and `while`)
		282	- exception handling
		283	"""
		284	policy = EVALUATION_POLICIES[context.evaluation]
		285	if node is None:
		286	return None
		287	if isinstance(node, ast.Expression):
		288	return eval_node(node.body, context)
		289	if isinstance(node, ast.BinOp):
		290	# TODO: add guards
		291	left = eval_node(node.left, context)
		292	right = eval_node(node.right, context)
		293	if isinstance(node.op, ast.Add):
		294	return left + right
		295	if isinstance(node.op, ast.Sub):
		296	return left - right
		297	if isinstance(node.op, ast.Mult):
		298	return left * right
		299	if isinstance(node.op, ast.Div):
		300	return left / right
		301	if isinstance(node.op, ast.FloorDiv):
		302	return left // right
		303	if isinstance(node.op, ast.Mod):
		304	return left % right
		305	if isinstance(node.op, ast.Pow):
		306	return left ** right
		307	if isinstance(node.op, ast.LShift):
		308	return left << right
		309	if isinstance(node.op, ast.RShift):
		310	return left >> right
		311	if isinstance(node.op, ast.BitOr):
		312	return left \| right
		313	if isinstance(node.op, ast.BitXor):
		314	return left ^ right
		315	if isinstance(node.op, ast.BitAnd):
		316	return left & right
		317	if isinstance(node.op, ast.MatMult):
		318	return left @ right
		319	if isinstance(node, ast.Constant):
		320	return node.value
		321	if isinstance(node, ast.Index):
		322	return eval_node(node.value, context)
		323	if isinstance(node, ast.Tuple):
		324	return tuple(
		325	eval_node(e, context)
		326	for e in node.elts
		327	)
		328	if isinstance(node, ast.List):
		329	return [
		330	eval_node(e, context)
		331	for e in node.elts
		332	]
		333	if isinstance(node, ast.Set):
		334	return {
		335	eval_node(e, context)
		336	for e in node.elts
		337	}
		338	if isinstance(node, ast.Dict):
		339	return dict(zip(
		340	[eval_node(k, context) for k in node.keys],
		341	[eval_node(v, context) for v in node.values]
		342	))
		343	if isinstance(node, ast.Slice):
		344	return slice(
		345	eval_node(node.lower, context),
		346	eval_node(node.upper, context),
		347	eval_node(node.step, context)
		348	)
		349	if isinstance(node, ast.ExtSlice):
		350	return tuple([
		351	eval_node(dim, context)
		352	for dim in node.dims
		353	])
		354	if isinstance(node, ast.UnaryOp):
		355	# TODO: add guards
		356	value = eval_node(node.operand, context)
		357	if isinstance(node.op, ast.USub):
		358	return -value
		359	if isinstance(node.op, ast.UAdd):
		360	return +value
		361	if isinstance(node.op, ast.Invert):
		362	return ~value
		363	if isinstance(node.op, ast.Not):
		364	return not value
		365	raise ValueError('Unhandled unary operation:', node.op)
		366	if isinstance(node, ast.Subscript):
		367	value = eval_node(node.value, context)
		368	slice_ = eval_node(node.slice, context)
		369	if policy.can_get_item(value, slice_):
		370	return value[slice_]
		371	raise GuardRejection(
		372	'Subscript access (`__getitem__`) for',
		373	type(value), # not joined to avoid calling `repr`
		374	f' not allowed in {context.evaluation} mode'
		375	)
		376	if isinstance(node, ast.Name):
		377	if policy.allow_locals_access and node.id in context.locals_:
		378	return context.locals_[node.id]
		379	if policy.allow_globals_access and node.id in context.globals_:
		380	return context.globals_[node.id]
		381	if policy.allow_builtins_access and node.id in __builtins__:
		382	return __builtins__[node.id]
		383	if not policy.allow_globals_access and not policy.allow_locals_access:
		384	raise GuardRejection(
		385	f'Namespace access not allowed in {context.evaluation} mode'
		386	)
		387	else:
		388	raise NameError(f'{node.id} not found in locals nor globals')
		389	if isinstance(node, ast.Attribute):
		390	value = eval_node(node.value, context)
		391	if policy.can_get_attr(value, node.attr):
		392	return getattr(value, node.attr)
		393	raise GuardRejection(
		394	'Attribute access (`__getattr__`) for',
		395	type(value), # not joined to avoid calling `repr`
		396	f'not allowed in {context.evaluation} mode'
		397	)
		398	if isinstance(node, ast.IfExp):
		399	test = eval_node(node.test, context)
		400	if test:
		401	return eval_node(node.body, context)
		402	else:
		403	return eval_node(node.orelse, context)
		404	if isinstance(node, ast.Call):
		405	func = eval_node(node.func, context)
		406	print(node.keywords)
		407	if policy.can_call(func) and not node.keywords:
		408	args = [
		409	eval_node(arg, context)
		410	for arg in node.args
		411	]
		412	return func(*args)
		413	raise GuardRejection(
		414	'Call for',
		415	func, # not joined to avoid calling `repr`
		416	f'not allowed in {context.evaluation} mode'
		417	)
		418	raise ValueError('Unhandled node', node)
		419
		420
		421	SUPPORTED_EXTERNAL_GETITEM = {
		422	('pandas', 'core', 'indexing', '_iLocIndexer'),
		423	('pandas', 'core', 'indexing', '_LocIndexer'),
		424	('pandas', 'DataFrame'),
		425	('pandas', 'Series'),
		426	('numpy', 'ndarray'),
		427	('numpy', 'void')
		428	}
		429
		430	BUILTIN_GETITEM = {
		431	dict,
		432	str,
		433	bytes,
		434	list,
		435	tuple,
		436	collections.defaultdict,
		437	collections.deque,
		438	collections.OrderedDict,
		439	collections.ChainMap,
		440	collections.UserDict,
		441	collections.UserList,
		442	collections.UserString,
		443	DummyNamedTuple,
		444	IdentitySubscript
		445	}
		446
		447
		448	def _list_methods(cls, source=None):
		449	"""For use on immutable objects or with methods returning a copy"""
		450	return [
		451	getattr(cls, k)
		452	for k in (source if source else dir(cls))
		453	]
		454
		455
		456	dict_non_mutating_methods = ('copy', 'keys', 'values', 'items')
		457	list_non_mutating_methods = ('copy', 'index', 'count')
		458	set_non_mutating_methods = set(dir(set)) & set(dir(frozenset))
		459
		460
		461	dict_keys = type({}.keys())
		462	method_descriptor = type(list.copy)
		463
		464	ALLOWED_CALLS = {
		465	bytes,
		466	*_list_methods(bytes),
		467	dict,
		468	*_list_methods(dict, dict_non_mutating_methods),
		469	dict_keys.isdisjoint,
		470	list,
		471	*_list_methods(list, list_non_mutating_methods),
		472	set,
		473	*_list_methods(set, set_non_mutating_methods),
		474	frozenset,
		475	*_list_methods(frozenset),
		476	range,
		477	str,
		478	*_list_methods(str),
		479	tuple,
		480	*_list_methods(tuple),
		481	collections.deque,
		482	*_list_methods(collections.deque, list_non_mutating_methods),
		483	collections.defaultdict,
		484	*_list_methods(collections.defaultdict, dict_non_mutating_methods),
		485	collections.OrderedDict,
		486	*_list_methods(collections.OrderedDict, dict_non_mutating_methods),
		487	collections.UserDict,
		488	*_list_methods(collections.UserDict, dict_non_mutating_methods),
		489	collections.UserList,
		490	*_list_methods(collections.UserList, list_non_mutating_methods),
		491	collections.UserString,
		492	*_list_methods(collections.UserString, dir(str)),
		493	collections.Counter,
		494	*_list_methods(collections.Counter, dict_non_mutating_methods),
		495	collections.Counter.elements,
		496	collections.Counter.most_common
		497	}
		498
		499	EVALUATION_POLICIES = {
		500	'minimal': EvaluationPolicy(
		501	allow_builtins_access=True,
		502	allow_locals_access=False,
		503	allow_globals_access=False,
		504	allow_item_access=False,
		505	allow_attr_access=False,
		506	allowed_calls=set(),
		507	allow_any_calls=False
		508	),
		509	'limitted': SelectivePolicy(
		510	# TODO:
		511	# - should reject binary and unary operations if custom methods would be dispatched
		512	allowed_getitem=BUILTIN_GETITEM,
		513	allowed_getitem_external=SUPPORTED_EXTERNAL_GETITEM,
		514	allowed_getattr={
		515	*BUILTIN_GETITEM,
		516	set,
		517	frozenset,
		518	object,
		519	type, # `type` handles a lot of generic cases, e.g. numbers as in `int.real`.
		520	dict_keys,
		521	method_descriptor
		522	},
		523	allowed_getattr_external={
		524	# pandas Series/Frame implements custom `__getattr__`
		525	('pandas', 'DataFrame'),
		526	('pandas', 'Series')
		527	},
		528	allow_builtins_access=True,
		529	allow_locals_access=True,
		530	allow_globals_access=True,
		531	allowed_calls=ALLOWED_CALLS
		532	),
		533	'unsafe': EvaluationPolicy(
		534	allow_builtins_access=True,
		535	allow_locals_access=True,
		536	allow_globals_access=True,
		537	allow_attr_access=True,
		538	allow_item_access=True,
		539	allow_any_calls=True
		540	)
		541	} No newline at end of file

IPython/core/tests/test_guarded_eval.py

0 created 644 +286 0

@@ -0,0 +1,286 b''
	1	from typing import NamedTuple
	2	from IPython.core.guarded_eval import EvaluationContext, GuardRejection, guarded_eval, unbind_method
	3	from IPython.testing import decorators as dec
	4	import pytest
	5
	6
	7	def limitted(**kwargs):
	8	return EvaluationContext(
	9	locals_=kwargs,
	10	globals_={},
	11	evaluation='limitted'
	12	)
	13
	14
	15	def unsafe(**kwargs):
	16	return EvaluationContext(
	17	locals_=kwargs,
	18	globals_={},
	19	evaluation='unsafe'
	20	)
	21
	22	@dec.skip_without('pandas')
	23	def test_pandas_series_iloc():
	24	import pandas as pd
	25	series = pd.Series([1], index=['a'])
	26	context = limitted(data=series)
	27	assert guarded_eval('data.iloc[0]', context) == 1
	28
	29
	30	@dec.skip_without('pandas')
	31	def test_pandas_series():
	32	import pandas as pd
	33	context = limitted(data=pd.Series([1], index=['a']))
	34	assert guarded_eval('data["a"]', context) == 1
	35	with pytest.raises(KeyError):
	36	guarded_eval('data["c"]', context)
	37
	38
	39	@dec.skip_without('pandas')
	40	def test_pandas_bad_series():
	41	import pandas as pd
	42	class BadItemSeries(pd.Series):
	43	def __getitem__(self, key):
	44	return 'CUSTOM_ITEM'
	45
	46	class BadAttrSeries(pd.Series):
	47	def __getattr__(self, key):
	48	return 'CUSTOM_ATTR'
	49
	50	bad_series = BadItemSeries([1], index=['a'])
	51	context = limitted(data=bad_series)
	52
	53	with pytest.raises(GuardRejection):
	54	guarded_eval('data["a"]', context)
	55	with pytest.raises(GuardRejection):
	56	guarded_eval('data["c"]', context)
	57
	58	# note: here result is a bit unexpected because
	59	# pandas `__getattr__` calls `__getitem__`;
	60	# FIXME - special case to handle it?
	61	assert guarded_eval('data.a', context) == 'CUSTOM_ITEM'
	62
	63	context = unsafe(data=bad_series)
	64	assert guarded_eval('data["a"]', context) == 'CUSTOM_ITEM'
	65
	66	bad_attr_series = BadAttrSeries([1], index=['a'])
	67	context = limitted(data=bad_attr_series)
	68	assert guarded_eval('data["a"]', context) == 1
	69	with pytest.raises(GuardRejection):
	70	guarded_eval('data.a', context)
	71
	72
	73	@dec.skip_without('pandas')
	74	def test_pandas_dataframe_loc():
	75	import pandas as pd
	76	from pandas.testing import assert_series_equal
	77	data = pd.DataFrame([{'a': 1}])
	78	context = limitted(data=data)
	79	assert_series_equal(
	80	guarded_eval('data.loc[:, "a"]', context),
	81	data['a']
	82	)
	83
	84
	85	def test_named_tuple():
	86
	87	class GoodNamedTuple(NamedTuple):
	88	a: str
	89	pass
	90
	91	class BadNamedTuple(NamedTuple):
	92	a: str
	93	def __getitem__(self, key):
	94	return None
	95
	96	good = GoodNamedTuple(a='x')
	97	bad = BadNamedTuple(a='x')
	98
	99	context = limitted(data=good)
	100	assert guarded_eval('data[0]', context) == 'x'
	101
	102	context = limitted(data=bad)
	103	with pytest.raises(GuardRejection):
	104	guarded_eval('data[0]', context)
	105
	106
	107	def test_dict():
	108	context = limitted(
	109	data={'a': 1, 'b': {'x': 2}, ('x', 'y'): 3}
	110	)
	111	assert guarded_eval('data["a"]', context) == 1
	112	assert guarded_eval('data["b"]', context) == {'x': 2}
	113	assert guarded_eval('data["b"]["x"]', context) == 2
	114	assert guarded_eval('data["x", "y"]', context) == 3
	115
	116	assert guarded_eval('data.keys', context)
	117
	118
	119	def test_set():
	120	context = limitted(data={'a', 'b'})
	121	assert guarded_eval('data.difference', context)
	122
	123
	124	def test_list():
	125	context = limitted(data=[1, 2, 3])
	126	assert guarded_eval('data[1]', context) == 2
	127	assert guarded_eval('data.copy', context)
	128
	129
	130	def test_dict_literal():
	131	context = limitted()
	132	assert guarded_eval('{}', context) == {}
	133	assert guarded_eval('{"a": 1}', context) == {"a": 1}
	134
	135
	136	def test_list_literal():
	137	context = limitted()
	138	assert guarded_eval('[]', context) == []
	139	assert guarded_eval('[1, "a"]', context) == [1, "a"]
	140
	141
	142	def test_set_literal():
	143	context = limitted()
	144	assert guarded_eval('set()', context) == set()
	145	assert guarded_eval('{"a"}', context) == {"a"}
	146
	147
	148	def test_if_expression():
	149	context = limitted()
	150	assert guarded_eval('2 if True else 3', context) == 2
	151	assert guarded_eval('4 if False else 5', context) == 5
	152
	153
	154	def test_object():
	155	obj = object()
	156	context = limitted(obj=obj)
	157	assert guarded_eval('obj.__dir__', context) == obj.__dir__
	158
	159
	160	@pytest.mark.parametrize(
	161	"code,expected",
	162	[
	163	[
	164	'int.numerator',
	165	int.numerator
	166	],
	167	[
	168	'float.is_integer',
	169	float.is_integer
	170	],
	171	[
	172	'complex.real',
	173	complex.real
	174	]
	175	]
	176	)
	177	def test_number_attributes(code, expected):
	178	assert guarded_eval(code, limitted()) == expected
	179
	180
	181	def test_method_descriptor():
	182	context = limitted()
	183	assert guarded_eval('list.copy.__name__', context) == 'copy'
	184
	185
	186	@pytest.mark.parametrize(
	187	"data,good,bad,expected",
	188	[
	189	[
	190	[1, 2, 3],
	191	'data.index(2)',
	192	'data.append(4)',
	193	1
	194	],
	195	[
	196	{'a': 1},
	197	'data.keys().isdisjoint({})',
	198	'data.update()',
	199	True
	200	]
	201	]
	202	)
	203	def test_calls(data, good, bad, expected):
	204	context = limitted(data=data)
	205	assert guarded_eval(good, context) == expected
	206
	207	with pytest.raises(GuardRejection):
	208	guarded_eval(bad, context)
	209
	210
	211	@pytest.mark.parametrize(
	212	"code,expected",
	213	[
	214	[
	215	'(1\n+\n1)',
	216	2
	217	],
	218	[
	219	'list(range(10))[-1:]',
	220	[9]
	221	],
	222	[
	223	'list(range(20))[3:-2:3]',
	224	[3, 6, 9, 12, 15]
	225	]
	226	]
	227	)
	228	def test_literals(code, expected):
	229	context = limitted()
	230	assert guarded_eval(code, context) == expected
	231
	232
	233	def test_subscript():
	234	context = EvaluationContext(
	235	locals_={},
	236	globals_={},
	237	evaluation='limitted',
	238	in_subscript=True
	239	)
	240	empty_slice = slice(None, None, None)
	241	assert guarded_eval('', context) == tuple()
	242	assert guarded_eval(':', context) == empty_slice
	243	assert guarded_eval('1:2:3', context) == slice(1, 2, 3)
	244	assert guarded_eval(':, "a"', context) == (empty_slice, "a")
	245
	246
	247	def test_unbind_method():
	248	class X(list):
	249	def index(self, k):
	250	return 'CUSTOM'
	251	x = X()
	252	assert unbind_method(x.index) is X.index
	253	assert unbind_method([].index) is list.index
	254
	255
	256	def test_assumption_instance_attr_do_not_matter():
	257	"""This is semi-specified in Python documentation.
	258
	259	However, since the specification says 'not guaranted
	260	to work' rather than 'is forbidden to work', future
	261	versions could invalidate this assumptions. This test
	262	is meant to catch such a change if it ever comes true.
	263	"""
	264	class T:
	265	def __getitem__(self, k):
	266	return 'a'
	267	def __getattr__(self, k):
	268	return 'a'
	269	t = T()
	270	t.__getitem__ = lambda f: 'b'
	271	t.__getattr__ = lambda f: 'b'
	272	assert t[1] == 'a'
	273	assert t[1] == 'a'
	274
	275
	276	def test_assumption_named_tuples_share_getitem():
	277	"""Check assumption on named tuples sharing __getitem__"""
	278	from typing import NamedTuple
	279
	280	class A(NamedTuple):
	281	pass
	282
	283	class B(NamedTuple):
	284	pass
	285
	286	assert A.__getitem__ == B.__getitem__

IPython/core/completer.py

0 +145 -86

             import unicodedata
             import uuid
             import warnings
+            from ast import literal_eval
             from contextlib import contextmanager
             from dataclasses import dataclass
             from functools import cached_property, partial
                 Literal,
             )
+            from IPython.core.guarded_eval import guarded_eval, EvaluationContext
             from IPython.core.error import TryNext
             from IPython.core.inputtransformer2 import ESC_MAGIC
             from IPython.core.latex_symbols import latex_symbols, reverse_latex_symbol
             # Completion type reported when no type can be inferred.
             _UNKNOWN_TYPE = "<unknown>"
+            # sentinel value to signal lack of a match
+            not_found = object()
             class ProvisionalCompleterWarning(FutureWarning):
                 """
                 Exception raise by an experimental feature in this module.
             class Completer(Configurable):
-                greedy = Bool(False,
+                greedy = Bool(
-                    help="""Activate greedy completion
+                    False,
-                    PENDING DEPRECATION. this is now mostly taken care of with Jedi.
+                    help="""Activate greedy completion.
+                    .. deprecated:: 8.8
+                        Use :any:`evaluation` instead.
+                    As of IPython 8.8 proxy for ``evaluation = 'unsafe'`` when set to ``True``,
+                    and for ``'forbidden'`` when set to ``False``.
+                    """,
+                ).tag(config=True)
-                    This will enable completion on elements of lists, results of function calls, etc.,
+                evaluation = Enum(
-                    but can be unsafe because the code is actually evaluated on TAB.
+                    ('forbidden', 'minimal', 'limitted', 'unsafe', 'dangerous'),
+                    default_value='limitted',
+                    help="""Code evaluation under completion.
+                    Successive options allow to enable more eager evaluation for more accurate completion suggestions,
+                    including for nested dictionaries, nested lists, or even results of function calls. Setting `unsafe`
+                    or higher can lead to evaluation of arbitrary user code on TAB with potentially dangerous side effects.
+                    Allowed values are:
+                      - `forbidden`: no evaluation at all
+                      - `minimal`:  evaluation of literals and access to built-in namespaces; no item/attribute evaluation nor access to locals/globals
+                      - `limitted` (default): access to all namespaces, evaluation of hard-coded methods (``keys()``, ``__getattr__``, ``__getitems__``, etc) on allow-listed objects (e.g. ``dict``, ``list``, ``tuple``, ``pandas.Series``)
+                      - `unsafe`: evaluation of all methods and function calls but not of syntax with side-effects like `del x`,
+                      - `dangerous`: completely arbitrary evaluation
                     """,
                 ).tag(config=True)
                     with a __getattr__ hook is evaluated.
                     """
-                    # Another option, seems to work great. Catches things like ''.<tab>
-                    m = re.match(r"(\S+(\.\w+)*)\.(\w*)$", text)
-                    if m:
-                        expr, attr = m.group(1, 3)
-                    elif self.greedy:
                     m2 = re.match(r"(.+)\.(\w*)$", self.line_buffer)
                     if not m2:
                         return []
                     expr, attr = m2.group(1,2)
-                    else:
-                        return []
-                    try:
+                    obj = self._evaluate_expr(expr)
-                        obj = eval(expr, self.namespace)
-                    except:
+                    if obj is not_found:
-                        try:
-                            obj = eval(expr, self.global_namespace)
-                        except:
                         return []
                     if self.limit_to__all__ and hasattr(obj, '__all__'):
                         pass
                     # Build match list to return
                     n = len(attr)
-                    return [u"%s.%s" % (expr, w) for w in words if w[:n] == attr ]
+                    return ["%s.%s" % (expr, w) for w in words if w[:n] == attr ]
+                def _evaluate_expr(self, expr):
+                    obj = not_found
+                    done = False
+                    while not done and expr:
+                        try:
+                            obj = guarded_eval(
+                                expr,
+                                EvaluationContext(
+                                    globals_=self.global_namespace,
+                                    locals_=self.namespace,
+                                    evaluation=self.evaluation
+                                )
+                            )
+                            done = True
+                        except Exception as e:
+                            if self.debug:
+                                print('Evaluation exception', e)
+                            # trim the expression to remove any invalid prefix
+                            # e.g. user starts `(d[`, so we get `expr = '(d'`,
+                            # where parenthesis is not closed.
+                            # TODO: make this faster by reusing parts of the computation?
+                            expr = expr[1:]
+                    return obj
             def get__all__entries(obj):
                 """returns the strings in the __all__ attribute"""
                 return [w for w in words if isinstance(w, str)]
-            def match_dict_keys(keys: List[Union[str, bytes, Tuple[Union[str, bytes]]]], prefix: str, delims: str,
+            def match_dict_keys(keys: List[Union[str, bytes, Tuple[Union[str, bytes], ...]]], prefix: str, delims: str,
-                                extra_prefix: Optional[Tuple[str, bytes]]=None) -> Tuple[str, int, List[str]]:
+                                extra_prefix: Optional[Tuple[Union[str, bytes], ...]]=None) -> Tuple[str, int, List[str]]:
                 """Used by dict_key_matches, matching the prefix to a list of keys
                 Parameters
                 """
                 prefix_tuple = extra_prefix if extra_prefix else ()
                 Nprefix = len(prefix_tuple)
+                text_serializable_types = (str, bytes, int, float, slice)
                 def filter_prefix_tuple(key):
                     # Reject too short keys
                     if len(key) <= Nprefix:
                         return False
-                    # Reject keys with non str/bytes in it
+                    # Reject keys which cannot be serialised to text
                     for k in key:
-                        if not isinstance(k, (str, bytes)):
+                        if not isinstance(k, text_serializable_types):
                             return False
                     # Reject keys that do not match the prefix
                     for k, pt in zip(key, prefix_tuple):
-                        if k != pt:
+                        if k != pt and not isinstance(pt, slice):
                             return False
                     # All checks passed!
                     return True
-                filtered_keys:List[Union[str,bytes]] = []
+                filtered_keys: List[Union[str, bytes, int, float, slice]] = []
                 def _add_to_filtered_keys(key):
-                    if isinstance(key, (str, bytes)):
+                    if isinstance(key, text_serializable_types):
                         filtered_keys.append(key)
                 for k in keys:
                 assert quote_match is not None # silence mypy
                 quote = quote_match.group()
                 try:
-                    prefix_str = eval(prefix + quote, {})
+                    prefix_str = literal_eval(prefix + quote)
                 except Exception:
                     return '', 0, []
                 matched: List[str] = []
                 for key in filtered_keys:
+                    str_key = key if isinstance(key, (str, bytes)) else str(key)
                     try:
-                        if not key.startswith(prefix_str):
+                        if not str_key.startswith(prefix_str):
                             continue
                     except (AttributeError, TypeError, UnicodeError):
                         # Python 3+ TypeError on b'a'.startswith('a') or vice-versa
                         continue
                     # reformat remainder of key to begin with prefix
-                    rem = key[len(prefix_str):]
+                    rem = str_key[len(prefix_str):]
                     # force repr wrapped in '
                     rem_repr = repr(rem + '"') if isinstance(rem, str) else repr(rem + b'"')
                     rem_repr = rem_repr[1 + rem_repr.index("'"):-2]
                 return line, col
-            def _safe_isinstance(obj, module, class_name):
+            def _safe_isinstance(obj, module, class_name, *attrs):
                 """Checks if obj is an instance of module.class_name if loaded
                 """
-                return (module in sys.modules and
+                if module in sys.modules:
-                        isinstance(obj, getattr(import_module(module), class_name)))
+                    m = sys.modules[module]
+                    for attr in [class_name, *attrs]:
+                        m = getattr(m, attr)
+                    return isinstance(obj, m)
             @context_matcher()
             _CompleteResult = Dict[str, MatcherResult]
+            DICT_MATCHER_REGEX = re.compile(r"""(?x)
+            (  # match dict-referring - or any get item object - expression
+                .+
+            )
+            \[   # open bracket
+            \s*  # and optional whitespace
+            # Capture any number of serializable objects (e.g. "a", "b", 'c')
+            # and slices
+            ((?:[uUbB]?  # string prefix (r not handled)
+                (?:
+                    '(?:[^']|(?<!\\)\\')*'
+                |
+                    "(?:[^"]|(?<!\\)\\")*"
+                |
+                    # capture integers and slices
+                    (?:[-+]?\d+)?(?::(?:[-+]?\d+)?){0,2}
+                )
+                \s*,\s*
+            )*)
+            ([uUbB]?  # string prefix (r not handled)
+                (?:   # unclosed string
+                    '(?:[^']|(?<!\\)\\')*
+                |
+                    "(?:[^"]|(?<!\\)\\")*
+                |
+                    (?:[-+]?\d+)
+                )
+            )?
+            $
+            """)
             def _convert_matcher_v1_result_to_v2(
                 matches: Sequence[str],
                 type: str,
             class IPCompleter(Completer):
                 """Extension of the completer class with IPython-specific features"""
-                __dict_key_regexps: Optional[Dict[bool,Pattern]] = None
                 @observe('greedy')
                 def _greedy_changed(self, change):
                     """update the splitter and readline delims when greedy is changed"""
                     if change['new']:
+                        self.evaluation = 'unsafe'
                         self.splitter.delims = GREEDY_DELIMS
                     else:
+                        self.evaluation = 'limitted'
                         self.splitter.delims = DELIMS
                 dict_keys_only = Bool(
                         return method()
                     # Special case some common in-memory dict-like types
-                    if isinstance(obj, dict) or\
+                    if (isinstance(obj, dict) or
-                       _safe_isinstance(obj, 'pandas', 'DataFrame'):
+                       _safe_isinstance(obj, 'pandas', 'DataFrame')):
                         try:
                             return list(obj.keys())
                         except Exception:
                             return []
+                    elif _safe_isinstance(obj, 'pandas', 'core', 'indexing', '_LocIndexer'):
+                        try:
+                            return list(obj.obj.keys())
+                        except Exception:
+                            return []
                     elif _safe_isinstance(obj, 'numpy', 'ndarray') or\
                          _safe_isinstance(obj, 'numpy', 'void'):
                         return obj.dtype.names or []
                         You can use :meth:`dict_key_matcher` instead.
                     """
-                    if self.__dict_key_regexps is not None:
+                    # Short-circuit on closed dictionary (regular expression would
-                        regexps = self.__dict_key_regexps
+                    # not match anyway, but would take quite a while).
-                    else:
+                    if self.text_until_cursor.strip().endswith(']'):
-                        dict_key_re_fmt = r'''(?x)
+                        return []
-                        (  # match dict-referring expression wrt greedy setting
-                            %s
-                        \[   # open bracket
-                        \s*  # and optional whitespace
-                        # Capture any number of str-like objects (e.g. "a", "b", 'c')
-                        ((?:[uUbB]?  # string prefix (r not handled)
-                            (?:
-                                '(?:[^']|(?<!\\)\\')*'
-                                "(?:[^"]|(?<!\\)\\")*"
-                            \s*,\s*
-                        )*)
-                        ([uUbB]?  # string prefix (r not handled)
-                            (?:   # unclosed string
-                                '(?:[^']|(?<!\\)\\')*
-                                "(?:[^"]|(?<!\\)\\")*
-                        )?
-                        '''
-                        regexps = self.__dict_key_regexps = {
-                            False: re.compile(dict_key_re_fmt % r'''
-                                              # identifiers separated by .
-                                              (?!\d)\w+
-                                              (?:\.(?!\d)\w+)*
-                                              '''),
-                            True: re.compile(dict_key_re_fmt % '''
-                                             .+
-                                             ''')
-                    match = regexps[self.greedy].search(self.text_until_cursor)
+                    match = DICT_MATCHER_REGEX.search(self.text_until_cursor)
                     if match is None:
                         return []
-                    expr, prefix0, prefix = match.groups()
+                    expr, prior_tuple_keys, key_prefix = match.groups()
-                    try:
-                        obj = eval(expr, self.namespace)
+                    obj = self._evaluate_expr(expr)
-                    except Exception:
-                        try:
+                    if obj is not_found:
-                            obj = eval(expr, self.global_namespace)
-                        except Exception:
                         return []
                     keys = self._get_keys(obj)
                     if not keys:
                         return keys
-                    extra_prefix = eval(prefix0) if prefix0 != '' else None
+                    tuple_prefix = guarded_eval(
+                        prior_tuple_keys,
+                        EvaluationContext(
+                            globals_=self.global_namespace,
+                            locals_=self.namespace,
+                            evaluation=self.evaluation,
+                            in_subscript=True
+                        )
+                    )
-                    closing_quote, token_offset, matches = match_dict_keys(keys, prefix, self.splitter.delims, extra_prefix=extra_prefix)
+                    closing_quote, token_offset, matches = match_dict_keys(
+                        keys,
+                        key_prefix,
+                        self.splitter.delims,
+                        extra_prefix=tuple_prefix
+                    )
                     if not matches:
                         return matches
                     # - the start of the key text
                     # - the start of the completion
                     text_start = len(self.text_until_cursor) - len(text)
-                    if prefix:
+                    if key_prefix:
                         key_start = match.start(3)
                         completion_start = key_start + token_offset
                     else:

IPython/core/tests/test_completer.py

0 +46 -4

             @contextmanager
+            def evaluation_level(evaluation: str):
+                ip = get_ipython()
+                evaluation_original = ip.Completer.evaluation
+                try:
+                    ip.Completer.evaluation = evaluation
+                    yield
+                finally:
+                    ip.Completer.evaluation = evaluation_original
+            @contextmanager
             def custom_matchers(matchers):
                 ip = get_ipython()
                 try:
                     assert match_dict_keys(keys, '"', delims=delims) == ('"', 1, ["foo"])
                     assert match_dict_keys(keys, '"f', delims=delims) == ('"', 1, ["foo"])
-                    match_dict_keys
                 def test_match_dict_keys_tuple(self):
                     """
                     Test that match_dict_keys called with extra prefix works on a couple of use case,
                     assert match_dict_keys(keys, "'foo", delims=delims, extra_prefix=('foo1', 'foo2', 'foo3')) == ("'", 1, ["foo4"])
                     assert match_dict_keys(keys, "'foo", delims=delims, extra_prefix=('foo1', 'foo2', 'foo3', 'foo4')) == ("'", 1, [])
+                    keys = [("foo", 1111), ("foo", 2222), (3333, "bar"), (3333, 'test')]
+                    assert match_dict_keys(keys, "'", delims=delims, extra_prefix=("foo",)) == ("'", 1, ["1111", "2222"])
+                    assert match_dict_keys(keys, "'", delims=delims, extra_prefix=(3333,)) == ("'", 1, ["bar", "test"])
+                    assert match_dict_keys(keys, "'", delims=delims, extra_prefix=("3333",)) == ("'", 1, [])
                 def test_dict_key_completion_string(self):
                     """Test dictionary key completion for string keys"""
                     ip = get_ipython()
                     ip.user_ns["C"] = C
                     ip.user_ns["get"] = lambda: d
+                    ip.user_ns["nested"] = {'x': d}
                     def assert_no_completion(**kwargs):
                         _, matches = complete(**kwargs)
                     assert_completion(line_buffer="(d[")
                     assert_completion(line_buffer="C.data[")
+                    # nested dict completion
+                    assert_completion(line_buffer="nested['x'][")
+                    with evaluation_level('minimal'):
+                        with pytest.raises(AssertionError):
+                            assert_completion(line_buffer="nested['x'][")
                     # greedy flag
                     def assert_completion(**kwargs):
                         _, matches = complete(**kwargs)
                     _, matches = complete(line_buffer="d['")
                     self.assertIn("my_head", matches)
                     self.assertIn("my_data", matches)
-                    # complete on a nested level
+                    def completes_on_nested():
-                    with greedy_completion():
                         ip.user_ns["d"] = numpy.zeros(2, dtype=dt)
                         _, matches = complete(line_buffer="d[1]['my_head']['")
                         self.assertTrue(any(["my_dt" in m for m in matches]))
                         self.assertTrue(any(["my_df" in m for m in matches]))
+                    # complete on a nested level
+                    with greedy_completion():
+                        completes_on_nested()
+                    with evaluation_level('limitted'):
+                        completes_on_nested()
+                    with evaluation_level('minimal'):
+                        with pytest.raises(AssertionError):
+                            completes_on_nested()
                 @dec.skip_without("pandas")
                 def test_dataframe_key_completion(self):
                     _, matches = complete(line_buffer="d['")
                     self.assertIn("hello", matches)
                     self.assertIn("world", matches)
+                    _, matches = complete(line_buffer="d.loc[:, '")
+                    self.assertIn("hello", matches)
+                    self.assertIn("world", matches)
+                    _, matches = complete(line_buffer="d.loc[1:, '")
+                    self.assertIn("hello", matches)
+                    _, matches = complete(line_buffer="d.loc[1:1, '")
+                    self.assertIn("hello", matches)
+                    _, matches = complete(line_buffer="d.loc[1:1:-1, '")
+                    self.assertIn("hello", matches)
+                    _, matches = complete(line_buffer="d.loc[::, '")
+                    self.assertIn("hello", matches)
                 def test_dict_key_completion_invalids(self):
                     """Smoke test cases dict key completion can't handle"""

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages