Show More
@@ -0,0 +1,21 b'' | |||||
|
1 | MIT License | |||
|
2 | ||||
|
3 | Copyright (c) 2021 Taneli Hukkinen | |||
|
4 | ||||
|
5 | Permission is hereby granted, free of charge, to any person obtaining a copy | |||
|
6 | of this software and associated documentation files (the "Software"), to deal | |||
|
7 | in the Software without restriction, including without limitation the rights | |||
|
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
|
9 | copies of the Software, and to permit persons to whom the Software is | |||
|
10 | furnished to do so, subject to the following conditions: | |||
|
11 | ||||
|
12 | The above copyright notice and this permission notice shall be included in all | |||
|
13 | copies or substantial portions of the Software. | |||
|
14 | ||||
|
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
|
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
|
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
|
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
|
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
|
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
|
21 | SOFTWARE. |
@@ -0,0 +1,182 b'' | |||||
|
1 | [![Build Status](https://github.com/hukkin/tomli/workflows/Tests/badge.svg?branch=master)](https://github.com/hukkin/tomli/actions?query=workflow%3ATests+branch%3Amaster+event%3Apush) | |||
|
2 | [![codecov.io](https://codecov.io/gh/hukkin/tomli/branch/master/graph/badge.svg)](https://codecov.io/gh/hukkin/tomli) | |||
|
3 | [![PyPI version](https://img.shields.io/pypi/v/tomli)](https://pypi.org/project/tomli) | |||
|
4 | ||||
|
5 | # Tomli | |||
|
6 | ||||
|
7 | > A lil' TOML parser | |||
|
8 | ||||
|
9 | **Table of Contents** *generated with [mdformat-toc](https://github.com/hukkin/mdformat-toc)* | |||
|
10 | ||||
|
11 | <!-- mdformat-toc start --slug=github --maxlevel=6 --minlevel=2 --> | |||
|
12 | ||||
|
13 | - [Intro](#intro) | |||
|
14 | - [Installation](#installation) | |||
|
15 | - [Usage](#usage) | |||
|
16 | - [Parse a TOML string](#parse-a-toml-string) | |||
|
17 | - [Parse a TOML file](#parse-a-toml-file) | |||
|
18 | - [Handle invalid TOML](#handle-invalid-toml) | |||
|
19 | - [Construct `decimal.Decimal`s from TOML floats](#construct-decimaldecimals-from-toml-floats) | |||
|
20 | - [FAQ](#faq) | |||
|
21 | - [Why this parser?](#why-this-parser) | |||
|
22 | - [Is comment preserving round-trip parsing supported?](#is-comment-preserving-round-trip-parsing-supported) | |||
|
23 | - [Is there a `dumps`, `write` or `encode` function?](#is-there-a-dumps-write-or-encode-function) | |||
|
24 | - [How do TOML types map into Python types?](#how-do-toml-types-map-into-python-types) | |||
|
25 | - [Performance](#performance) | |||
|
26 | ||||
|
27 | <!-- mdformat-toc end --> | |||
|
28 | ||||
|
29 | ## Intro<a name="intro"></a> | |||
|
30 | ||||
|
31 | Tomli is a Python library for parsing [TOML](https://toml.io). | |||
|
32 | Tomli is fully compatible with [TOML v1.0.0](https://toml.io/en/v1.0.0). | |||
|
33 | ||||
|
34 | ## Installation<a name="installation"></a> | |||
|
35 | ||||
|
36 | ```bash | |||
|
37 | pip install tomli | |||
|
38 | ``` | |||
|
39 | ||||
|
40 | ## Usage<a name="usage"></a> | |||
|
41 | ||||
|
42 | ### Parse a TOML string<a name="parse-a-toml-string"></a> | |||
|
43 | ||||
|
44 | ```python | |||
|
45 | import tomli | |||
|
46 | ||||
|
47 | toml_str = """ | |||
|
48 | gretzky = 99 | |||
|
49 | ||||
|
50 | [kurri] | |||
|
51 | jari = 17 | |||
|
52 | """ | |||
|
53 | ||||
|
54 | toml_dict = tomli.loads(toml_str) | |||
|
55 | assert toml_dict == {"gretzky": 99, "kurri": {"jari": 17}} | |||
|
56 | ``` | |||
|
57 | ||||
|
58 | ### Parse a TOML file<a name="parse-a-toml-file"></a> | |||
|
59 | ||||
|
60 | ```python | |||
|
61 | import tomli | |||
|
62 | ||||
|
63 | with open("path_to_file/conf.toml", "rb") as f: | |||
|
64 | toml_dict = tomli.load(f) | |||
|
65 | ``` | |||
|
66 | ||||
|
67 | The file must be opened in binary mode (with the `"rb"` flag). | |||
|
68 | Binary mode will enforce decoding the file as UTF-8 with universal newlines disabled, | |||
|
69 | both of which are required to correctly parse TOML. | |||
|
70 | Support for text file objects is deprecated for removal in the next major release. | |||
|
71 | ||||
|
72 | ### Handle invalid TOML<a name="handle-invalid-toml"></a> | |||
|
73 | ||||
|
74 | ```python | |||
|
75 | import tomli | |||
|
76 | ||||
|
77 | try: | |||
|
78 | toml_dict = tomli.loads("]] this is invalid TOML [[") | |||
|
79 | except tomli.TOMLDecodeError: | |||
|
80 | print("Yep, definitely not valid.") | |||
|
81 | ``` | |||
|
82 | ||||
|
83 | Note that while the `TOMLDecodeError` type is public API, error messages of raised instances of it are not. | |||
|
84 | Error messages should not be assumed to stay constant across Tomli versions. | |||
|
85 | ||||
|
86 | ### Construct `decimal.Decimal`s from TOML floats<a name="construct-decimaldecimals-from-toml-floats"></a> | |||
|
87 | ||||
|
88 | ```python | |||
|
89 | from decimal import Decimal | |||
|
90 | import tomli | |||
|
91 | ||||
|
92 | toml_dict = tomli.loads("precision-matters = 0.982492", parse_float=Decimal) | |||
|
93 | assert toml_dict["precision-matters"] == Decimal("0.982492") | |||
|
94 | ``` | |||
|
95 | ||||
|
96 | Note that `decimal.Decimal` can be replaced with another callable that converts a TOML float from string to a Python type. | |||
|
97 | The `decimal.Decimal` is, however, a practical choice for use cases where float inaccuracies can not be tolerated. | |||
|
98 | ||||
|
99 | Illegal types include `dict`, `list`, and anything that has the `append` attribute. | |||
|
100 | Parsing floats into an illegal type results in undefined behavior. | |||
|
101 | ||||
|
102 | ## FAQ<a name="faq"></a> | |||
|
103 | ||||
|
104 | ### Why this parser?<a name="why-this-parser"></a> | |||
|
105 | ||||
|
106 | - it's lil' | |||
|
107 | - pure Python with zero dependencies | |||
|
108 | - the fastest pure Python parser [\*](#performance): | |||
|
109 | 15x as fast as [tomlkit](https://pypi.org/project/tomlkit/), | |||
|
110 | 2.4x as fast as [toml](https://pypi.org/project/toml/) | |||
|
111 | - outputs [basic data types](#how-do-toml-types-map-into-python-types) only | |||
|
112 | - 100% spec compliant: passes all tests in | |||
|
113 | [a test set](https://github.com/toml-lang/compliance/pull/8) | |||
|
114 | soon to be merged to the official | |||
|
115 | [compliance tests for TOML](https://github.com/toml-lang/compliance) | |||
|
116 | repository | |||
|
117 | - thoroughly tested: 100% branch coverage | |||
|
118 | ||||
|
119 | ### Is comment preserving round-trip parsing supported?<a name="is-comment-preserving-round-trip-parsing-supported"></a> | |||
|
120 | ||||
|
121 | No. | |||
|
122 | ||||
|
123 | The `tomli.loads` function returns a plain `dict` that is populated with builtin types and types from the standard library only. | |||
|
124 | Preserving comments requires a custom type to be returned so will not be supported, | |||
|
125 | at least not by the `tomli.loads` and `tomli.load` functions. | |||
|
126 | ||||
|
127 | Look into [TOML Kit](https://github.com/sdispater/tomlkit) if preservation of style is what you need. | |||
|
128 | ||||
|
129 | ### Is there a `dumps`, `write` or `encode` function?<a name="is-there-a-dumps-write-or-encode-function"></a> | |||
|
130 | ||||
|
131 | [Tomli-W](https://github.com/hukkin/tomli-w) is the write-only counterpart of Tomli, providing `dump` and `dumps` functions. | |||
|
132 | ||||
|
133 | The core library does not include write capability, as most TOML use cases are read-only, and Tomli intends to be minimal. | |||
|
134 | ||||
|
135 | ### How do TOML types map into Python types?<a name="how-do-toml-types-map-into-python-types"></a> | |||
|
136 | ||||
|
137 | | TOML type | Python type | Details | | |||
|
138 | | ---------------- | ------------------- | ------------------------------------------------------------ | | |||
|
139 | | Document Root | `dict` | | | |||
|
140 | | Key | `str` | | | |||
|
141 | | String | `str` | | | |||
|
142 | | Integer | `int` | | | |||
|
143 | | Float | `float` | | | |||
|
144 | | Boolean | `bool` | | | |||
|
145 | | Offset Date-Time | `datetime.datetime` | `tzinfo` attribute set to an instance of `datetime.timezone` | | |||
|
146 | | Local Date-Time | `datetime.datetime` | `tzinfo` attribute set to `None` | | |||
|
147 | | Local Date | `datetime.date` | | | |||
|
148 | | Local Time | `datetime.time` | | | |||
|
149 | | Array | `list` | | | |||
|
150 | | Table | `dict` | | | |||
|
151 | | Inline Table | `dict` | | | |||
|
152 | ||||
|
153 | ## Performance<a name="performance"></a> | |||
|
154 | ||||
|
155 | The `benchmark/` folder in this repository contains a performance benchmark for comparing the various Python TOML parsers. | |||
|
156 | The benchmark can be run with `tox -e benchmark-pypi`. | |||
|
157 | Running the benchmark on my personal computer output the following: | |||
|
158 | ||||
|
159 | ```console | |||
|
160 | foo@bar:~/dev/tomli$ tox -e benchmark-pypi | |||
|
161 | benchmark-pypi installed: attrs==19.3.0,click==7.1.2,pytomlpp==1.0.2,qtoml==0.3.0,rtoml==0.7.0,toml==0.10.2,tomli==1.1.0,tomlkit==0.7.2 | |||
|
162 | benchmark-pypi run-test-pre: PYTHONHASHSEED='2658546909' | |||
|
163 | benchmark-pypi run-test: commands[0] | python -c 'import datetime; print(datetime.date.today())' | |||
|
164 | 2021-07-23 | |||
|
165 | benchmark-pypi run-test: commands[1] | python --version | |||
|
166 | Python 3.8.10 | |||
|
167 | benchmark-pypi run-test: commands[2] | python benchmark/run.py | |||
|
168 | Parsing data.toml 5000 times: | |||
|
169 | ------------------------------------------------------ | |||
|
170 | parser | exec time | performance (more is better) | |||
|
171 | -----------+------------+----------------------------- | |||
|
172 | rtoml | 0.901 s | baseline (100%) | |||
|
173 | pytomlpp | 1.08 s | 83.15% | |||
|
174 | tomli | 3.89 s | 23.15% | |||
|
175 | toml | 9.36 s | 9.63% | |||
|
176 | qtoml | 11.5 s | 7.82% | |||
|
177 | tomlkit | 56.8 s | 1.59% | |||
|
178 | ``` | |||
|
179 | ||||
|
180 | The parsers are ordered from fastest to slowest, using the fastest parser as baseline. | |||
|
181 | Tomli performed the best out of all pure Python TOML parsers, | |||
|
182 | losing only to pytomlpp (wraps C++) and rtoml (wraps Rust). |
@@ -0,0 +1,9 b'' | |||||
|
1 | """A lil' TOML parser.""" | |||
|
2 | ||||
|
3 | __all__ = ("loads", "load", "TOMLDecodeError") | |||
|
4 | __version__ = "1.2.3" # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT | |||
|
5 | ||||
|
6 | from ._parser import TOMLDecodeError, load, loads | |||
|
7 | ||||
|
8 | # Pretend this exception was created here. | |||
|
9 | TOMLDecodeError.__module__ = "tomli" |
This diff has been collapsed as it changes many lines, (663 lines changed) Show them Hide them | |||||
@@ -0,0 +1,663 b'' | |||||
|
1 | import string | |||
|
2 | from types import MappingProxyType | |||
|
3 | from typing import Any, BinaryIO, Dict, FrozenSet, Iterable, NamedTuple, Optional, Tuple | |||
|
4 | import warnings | |||
|
5 | ||||
|
6 | from ._re import ( | |||
|
7 | RE_DATETIME, | |||
|
8 | RE_LOCALTIME, | |||
|
9 | RE_NUMBER, | |||
|
10 | match_to_datetime, | |||
|
11 | match_to_localtime, | |||
|
12 | match_to_number, | |||
|
13 | ) | |||
|
14 | from ._types import Key, ParseFloat, Pos | |||
|
15 | ||||
|
16 | ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127)) | |||
|
17 | ||||
|
18 | # Neither of these sets include quotation mark or backslash. They are | |||
|
19 | # currently handled as separate cases in the parser functions. | |||
|
20 | ILLEGAL_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t") | |||
|
21 | ILLEGAL_MULTILINE_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t\n") | |||
|
22 | ||||
|
23 | ILLEGAL_LITERAL_STR_CHARS = ILLEGAL_BASIC_STR_CHARS | |||
|
24 | ILLEGAL_MULTILINE_LITERAL_STR_CHARS = ILLEGAL_MULTILINE_BASIC_STR_CHARS | |||
|
25 | ||||
|
26 | ILLEGAL_COMMENT_CHARS = ILLEGAL_BASIC_STR_CHARS | |||
|
27 | ||||
|
28 | TOML_WS = frozenset(" \t") | |||
|
29 | TOML_WS_AND_NEWLINE = TOML_WS | frozenset("\n") | |||
|
30 | BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + "-_") | |||
|
31 | KEY_INITIAL_CHARS = BARE_KEY_CHARS | frozenset("\"'") | |||
|
32 | HEXDIGIT_CHARS = frozenset(string.hexdigits) | |||
|
33 | ||||
|
34 | BASIC_STR_ESCAPE_REPLACEMENTS = MappingProxyType( | |||
|
35 | { | |||
|
36 | "\\b": "\u0008", # backspace | |||
|
37 | "\\t": "\u0009", # tab | |||
|
38 | "\\n": "\u000A", # linefeed | |||
|
39 | "\\f": "\u000C", # form feed | |||
|
40 | "\\r": "\u000D", # carriage return | |||
|
41 | '\\"': "\u0022", # quote | |||
|
42 | "\\\\": "\u005C", # backslash | |||
|
43 | } | |||
|
44 | ) | |||
|
45 | ||||
|
46 | ||||
|
47 | class TOMLDecodeError(ValueError): | |||
|
48 | """An error raised if a document is not valid TOML.""" | |||
|
49 | ||||
|
50 | ||||
|
51 | def load(fp: BinaryIO, *, parse_float: ParseFloat = float) -> Dict[str, Any]: | |||
|
52 | """Parse TOML from a binary file object.""" | |||
|
53 | s_bytes = fp.read() | |||
|
54 | try: | |||
|
55 | s = s_bytes.decode() | |||
|
56 | except AttributeError: | |||
|
57 | warnings.warn( | |||
|
58 | "Text file object support is deprecated in favor of binary file objects." | |||
|
59 | ' Use `open("foo.toml", "rb")` to open the file in binary mode.', | |||
|
60 | DeprecationWarning, | |||
|
61 | stacklevel=2, | |||
|
62 | ) | |||
|
63 | s = s_bytes # type: ignore[assignment] | |||
|
64 | return loads(s, parse_float=parse_float) | |||
|
65 | ||||
|
66 | ||||
|
67 | def loads(s: str, *, parse_float: ParseFloat = float) -> Dict[str, Any]: # noqa: C901 | |||
|
68 | """Parse TOML from a string.""" | |||
|
69 | ||||
|
70 | # The spec allows converting "\r\n" to "\n", even in string | |||
|
71 | # literals. Let's do so to simplify parsing. | |||
|
72 | src = s.replace("\r\n", "\n") | |||
|
73 | pos = 0 | |||
|
74 | out = Output(NestedDict(), Flags()) | |||
|
75 | header: Key = () | |||
|
76 | ||||
|
77 | # Parse one statement at a time | |||
|
78 | # (typically means one line in TOML source) | |||
|
79 | while True: | |||
|
80 | # 1. Skip line leading whitespace | |||
|
81 | pos = skip_chars(src, pos, TOML_WS) | |||
|
82 | ||||
|
83 | # 2. Parse rules. Expect one of the following: | |||
|
84 | # - end of file | |||
|
85 | # - end of line | |||
|
86 | # - comment | |||
|
87 | # - key/value pair | |||
|
88 | # - append dict to list (and move to its namespace) | |||
|
89 | # - create dict (and move to its namespace) | |||
|
90 | # Skip trailing whitespace when applicable. | |||
|
91 | try: | |||
|
92 | char = src[pos] | |||
|
93 | except IndexError: | |||
|
94 | break | |||
|
95 | if char == "\n": | |||
|
96 | pos += 1 | |||
|
97 | continue | |||
|
98 | if char in KEY_INITIAL_CHARS: | |||
|
99 | pos = key_value_rule(src, pos, out, header, parse_float) | |||
|
100 | pos = skip_chars(src, pos, TOML_WS) | |||
|
101 | elif char == "[": | |||
|
102 | try: | |||
|
103 | second_char: Optional[str] = src[pos + 1] | |||
|
104 | except IndexError: | |||
|
105 | second_char = None | |||
|
106 | if second_char == "[": | |||
|
107 | pos, header = create_list_rule(src, pos, out) | |||
|
108 | else: | |||
|
109 | pos, header = create_dict_rule(src, pos, out) | |||
|
110 | pos = skip_chars(src, pos, TOML_WS) | |||
|
111 | elif char != "#": | |||
|
112 | raise suffixed_err(src, pos, "Invalid statement") | |||
|
113 | ||||
|
114 | # 3. Skip comment | |||
|
115 | pos = skip_comment(src, pos) | |||
|
116 | ||||
|
117 | # 4. Expect end of line or end of file | |||
|
118 | try: | |||
|
119 | char = src[pos] | |||
|
120 | except IndexError: | |||
|
121 | break | |||
|
122 | if char != "\n": | |||
|
123 | raise suffixed_err( | |||
|
124 | src, pos, "Expected newline or end of document after a statement" | |||
|
125 | ) | |||
|
126 | pos += 1 | |||
|
127 | ||||
|
128 | return out.data.dict | |||
|
129 | ||||
|
130 | ||||
|
131 | class Flags: | |||
|
132 | """Flags that map to parsed keys/namespaces.""" | |||
|
133 | ||||
|
134 | # Marks an immutable namespace (inline array or inline table). | |||
|
135 | FROZEN = 0 | |||
|
136 | # Marks a nest that has been explicitly created and can no longer | |||
|
137 | # be opened using the "[table]" syntax. | |||
|
138 | EXPLICIT_NEST = 1 | |||
|
139 | ||||
|
140 | def __init__(self) -> None: | |||
|
141 | self._flags: Dict[str, dict] = {} | |||
|
142 | ||||
|
143 | def unset_all(self, key: Key) -> None: | |||
|
144 | cont = self._flags | |||
|
145 | for k in key[:-1]: | |||
|
146 | if k not in cont: | |||
|
147 | return | |||
|
148 | cont = cont[k]["nested"] | |||
|
149 | cont.pop(key[-1], None) | |||
|
150 | ||||
|
151 | def set_for_relative_key(self, head_key: Key, rel_key: Key, flag: int) -> None: | |||
|
152 | cont = self._flags | |||
|
153 | for k in head_key: | |||
|
154 | if k not in cont: | |||
|
155 | cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}} | |||
|
156 | cont = cont[k]["nested"] | |||
|
157 | for k in rel_key: | |||
|
158 | if k in cont: | |||
|
159 | cont[k]["flags"].add(flag) | |||
|
160 | else: | |||
|
161 | cont[k] = {"flags": {flag}, "recursive_flags": set(), "nested": {}} | |||
|
162 | cont = cont[k]["nested"] | |||
|
163 | ||||
|
164 | def set(self, key: Key, flag: int, *, recursive: bool) -> None: # noqa: A003 | |||
|
165 | cont = self._flags | |||
|
166 | key_parent, key_stem = key[:-1], key[-1] | |||
|
167 | for k in key_parent: | |||
|
168 | if k not in cont: | |||
|
169 | cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}} | |||
|
170 | cont = cont[k]["nested"] | |||
|
171 | if key_stem not in cont: | |||
|
172 | cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}} | |||
|
173 | cont[key_stem]["recursive_flags" if recursive else "flags"].add(flag) | |||
|
174 | ||||
|
175 | def is_(self, key: Key, flag: int) -> bool: | |||
|
176 | if not key: | |||
|
177 | return False # document root has no flags | |||
|
178 | cont = self._flags | |||
|
179 | for k in key[:-1]: | |||
|
180 | if k not in cont: | |||
|
181 | return False | |||
|
182 | inner_cont = cont[k] | |||
|
183 | if flag in inner_cont["recursive_flags"]: | |||
|
184 | return True | |||
|
185 | cont = inner_cont["nested"] | |||
|
186 | key_stem = key[-1] | |||
|
187 | if key_stem in cont: | |||
|
188 | cont = cont[key_stem] | |||
|
189 | return flag in cont["flags"] or flag in cont["recursive_flags"] | |||
|
190 | return False | |||
|
191 | ||||
|
192 | ||||
|
193 | class NestedDict: | |||
|
194 | def __init__(self) -> None: | |||
|
195 | # The parsed content of the TOML document | |||
|
196 | self.dict: Dict[str, Any] = {} | |||
|
197 | ||||
|
198 | def get_or_create_nest( | |||
|
199 | self, | |||
|
200 | key: Key, | |||
|
201 | *, | |||
|
202 | access_lists: bool = True, | |||
|
203 | ) -> dict: | |||
|
204 | cont: Any = self.dict | |||
|
205 | for k in key: | |||
|
206 | if k not in cont: | |||
|
207 | cont[k] = {} | |||
|
208 | cont = cont[k] | |||
|
209 | if access_lists and isinstance(cont, list): | |||
|
210 | cont = cont[-1] | |||
|
211 | if not isinstance(cont, dict): | |||
|
212 | raise KeyError("There is no nest behind this key") | |||
|
213 | return cont | |||
|
214 | ||||
|
215 | def append_nest_to_list(self, key: Key) -> None: | |||
|
216 | cont = self.get_or_create_nest(key[:-1]) | |||
|
217 | last_key = key[-1] | |||
|
218 | if last_key in cont: | |||
|
219 | list_ = cont[last_key] | |||
|
220 | try: | |||
|
221 | list_.append({}) | |||
|
222 | except AttributeError: | |||
|
223 | raise KeyError("An object other than list found behind this key") | |||
|
224 | else: | |||
|
225 | cont[last_key] = [{}] | |||
|
226 | ||||
|
227 | ||||
|
228 | class Output(NamedTuple): | |||
|
229 | data: NestedDict | |||
|
230 | flags: Flags | |||
|
231 | ||||
|
232 | ||||
|
233 | def skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos: | |||
|
234 | try: | |||
|
235 | while src[pos] in chars: | |||
|
236 | pos += 1 | |||
|
237 | except IndexError: | |||
|
238 | pass | |||
|
239 | return pos | |||
|
240 | ||||
|
241 | ||||
|
242 | def skip_until( | |||
|
243 | src: str, | |||
|
244 | pos: Pos, | |||
|
245 | expect: str, | |||
|
246 | *, | |||
|
247 | error_on: FrozenSet[str], | |||
|
248 | error_on_eof: bool, | |||
|
249 | ) -> Pos: | |||
|
250 | try: | |||
|
251 | new_pos = src.index(expect, pos) | |||
|
252 | except ValueError: | |||
|
253 | new_pos = len(src) | |||
|
254 | if error_on_eof: | |||
|
255 | raise suffixed_err(src, new_pos, f"Expected {expect!r}") from None | |||
|
256 | ||||
|
257 | if not error_on.isdisjoint(src[pos:new_pos]): | |||
|
258 | while src[pos] not in error_on: | |||
|
259 | pos += 1 | |||
|
260 | raise suffixed_err(src, pos, f"Found invalid character {src[pos]!r}") | |||
|
261 | return new_pos | |||
|
262 | ||||
|
263 | ||||
|
264 | def skip_comment(src: str, pos: Pos) -> Pos: | |||
|
265 | try: | |||
|
266 | char: Optional[str] = src[pos] | |||
|
267 | except IndexError: | |||
|
268 | char = None | |||
|
269 | if char == "#": | |||
|
270 | return skip_until( | |||
|
271 | src, pos + 1, "\n", error_on=ILLEGAL_COMMENT_CHARS, error_on_eof=False | |||
|
272 | ) | |||
|
273 | return pos | |||
|
274 | ||||
|
275 | ||||
|
276 | def skip_comments_and_array_ws(src: str, pos: Pos) -> Pos: | |||
|
277 | while True: | |||
|
278 | pos_before_skip = pos | |||
|
279 | pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE) | |||
|
280 | pos = skip_comment(src, pos) | |||
|
281 | if pos == pos_before_skip: | |||
|
282 | return pos | |||
|
283 | ||||
|
284 | ||||
|
285 | def create_dict_rule(src: str, pos: Pos, out: Output) -> Tuple[Pos, Key]: | |||
|
286 | pos += 1 # Skip "[" | |||
|
287 | pos = skip_chars(src, pos, TOML_WS) | |||
|
288 | pos, key = parse_key(src, pos) | |||
|
289 | ||||
|
290 | if out.flags.is_(key, Flags.EXPLICIT_NEST) or out.flags.is_(key, Flags.FROZEN): | |||
|
291 | raise suffixed_err(src, pos, f"Can not declare {key} twice") | |||
|
292 | out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False) | |||
|
293 | try: | |||
|
294 | out.data.get_or_create_nest(key) | |||
|
295 | except KeyError: | |||
|
296 | raise suffixed_err(src, pos, "Can not overwrite a value") from None | |||
|
297 | ||||
|
298 | if not src.startswith("]", pos): | |||
|
299 | raise suffixed_err(src, pos, 'Expected "]" at the end of a table declaration') | |||
|
300 | return pos + 1, key | |||
|
301 | ||||
|
302 | ||||
|
303 | def create_list_rule(src: str, pos: Pos, out: Output) -> Tuple[Pos, Key]: | |||
|
304 | pos += 2 # Skip "[[" | |||
|
305 | pos = skip_chars(src, pos, TOML_WS) | |||
|
306 | pos, key = parse_key(src, pos) | |||
|
307 | ||||
|
308 | if out.flags.is_(key, Flags.FROZEN): | |||
|
309 | raise suffixed_err(src, pos, f"Can not mutate immutable namespace {key}") | |||
|
310 | # Free the namespace now that it points to another empty list item... | |||
|
311 | out.flags.unset_all(key) | |||
|
312 | # ...but this key precisely is still prohibited from table declaration | |||
|
313 | out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False) | |||
|
314 | try: | |||
|
315 | out.data.append_nest_to_list(key) | |||
|
316 | except KeyError: | |||
|
317 | raise suffixed_err(src, pos, "Can not overwrite a value") from None | |||
|
318 | ||||
|
319 | if not src.startswith("]]", pos): | |||
|
320 | raise suffixed_err(src, pos, 'Expected "]]" at the end of an array declaration') | |||
|
321 | return pos + 2, key | |||
|
322 | ||||
|
323 | ||||
|
324 | def key_value_rule( | |||
|
325 | src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat | |||
|
326 | ) -> Pos: | |||
|
327 | pos, key, value = parse_key_value_pair(src, pos, parse_float) | |||
|
328 | key_parent, key_stem = key[:-1], key[-1] | |||
|
329 | abs_key_parent = header + key_parent | |||
|
330 | ||||
|
331 | if out.flags.is_(abs_key_parent, Flags.FROZEN): | |||
|
332 | raise suffixed_err( | |||
|
333 | src, pos, f"Can not mutate immutable namespace {abs_key_parent}" | |||
|
334 | ) | |||
|
335 | # Containers in the relative path can't be opened with the table syntax after this | |||
|
336 | out.flags.set_for_relative_key(header, key, Flags.EXPLICIT_NEST) | |||
|
337 | try: | |||
|
338 | nest = out.data.get_or_create_nest(abs_key_parent) | |||
|
339 | except KeyError: | |||
|
340 | raise suffixed_err(src, pos, "Can not overwrite a value") from None | |||
|
341 | if key_stem in nest: | |||
|
342 | raise suffixed_err(src, pos, "Can not overwrite a value") | |||
|
343 | # Mark inline table and array namespaces recursively immutable | |||
|
344 | if isinstance(value, (dict, list)): | |||
|
345 | out.flags.set(header + key, Flags.FROZEN, recursive=True) | |||
|
346 | nest[key_stem] = value | |||
|
347 | return pos | |||
|
348 | ||||
|
349 | ||||
|
350 | def parse_key_value_pair( | |||
|
351 | src: str, pos: Pos, parse_float: ParseFloat | |||
|
352 | ) -> Tuple[Pos, Key, Any]: | |||
|
353 | pos, key = parse_key(src, pos) | |||
|
354 | try: | |||
|
355 | char: Optional[str] = src[pos] | |||
|
356 | except IndexError: | |||
|
357 | char = None | |||
|
358 | if char != "=": | |||
|
359 | raise suffixed_err(src, pos, 'Expected "=" after a key in a key/value pair') | |||
|
360 | pos += 1 | |||
|
361 | pos = skip_chars(src, pos, TOML_WS) | |||
|
362 | pos, value = parse_value(src, pos, parse_float) | |||
|
363 | return pos, key, value | |||
|
364 | ||||
|
365 | ||||
|
366 | def parse_key(src: str, pos: Pos) -> Tuple[Pos, Key]: | |||
|
367 | pos, key_part = parse_key_part(src, pos) | |||
|
368 | key: Key = (key_part,) | |||
|
369 | pos = skip_chars(src, pos, TOML_WS) | |||
|
370 | while True: | |||
|
371 | try: | |||
|
372 | char: Optional[str] = src[pos] | |||
|
373 | except IndexError: | |||
|
374 | char = None | |||
|
375 | if char != ".": | |||
|
376 | return pos, key | |||
|
377 | pos += 1 | |||
|
378 | pos = skip_chars(src, pos, TOML_WS) | |||
|
379 | pos, key_part = parse_key_part(src, pos) | |||
|
380 | key += (key_part,) | |||
|
381 | pos = skip_chars(src, pos, TOML_WS) | |||
|
382 | ||||
|
383 | ||||
|
384 | def parse_key_part(src: str, pos: Pos) -> Tuple[Pos, str]: | |||
|
385 | try: | |||
|
386 | char: Optional[str] = src[pos] | |||
|
387 | except IndexError: | |||
|
388 | char = None | |||
|
389 | if char in BARE_KEY_CHARS: | |||
|
390 | start_pos = pos | |||
|
391 | pos = skip_chars(src, pos, BARE_KEY_CHARS) | |||
|
392 | return pos, src[start_pos:pos] | |||
|
393 | if char == "'": | |||
|
394 | return parse_literal_str(src, pos) | |||
|
395 | if char == '"': | |||
|
396 | return parse_one_line_basic_str(src, pos) | |||
|
397 | raise suffixed_err(src, pos, "Invalid initial character for a key part") | |||
|
398 | ||||
|
399 | ||||
|
400 | def parse_one_line_basic_str(src: str, pos: Pos) -> Tuple[Pos, str]: | |||
|
401 | pos += 1 | |||
|
402 | return parse_basic_str(src, pos, multiline=False) | |||
|
403 | ||||
|
404 | ||||
|
405 | def parse_array(src: str, pos: Pos, parse_float: ParseFloat) -> Tuple[Pos, list]: | |||
|
406 | pos += 1 | |||
|
407 | array: list = [] | |||
|
408 | ||||
|
409 | pos = skip_comments_and_array_ws(src, pos) | |||
|
410 | if src.startswith("]", pos): | |||
|
411 | return pos + 1, array | |||
|
412 | while True: | |||
|
413 | pos, val = parse_value(src, pos, parse_float) | |||
|
414 | array.append(val) | |||
|
415 | pos = skip_comments_and_array_ws(src, pos) | |||
|
416 | ||||
|
417 | c = src[pos : pos + 1] | |||
|
418 | if c == "]": | |||
|
419 | return pos + 1, array | |||
|
420 | if c != ",": | |||
|
421 | raise suffixed_err(src, pos, "Unclosed array") | |||
|
422 | pos += 1 | |||
|
423 | ||||
|
424 | pos = skip_comments_and_array_ws(src, pos) | |||
|
425 | if src.startswith("]", pos): | |||
|
426 | return pos + 1, array | |||
|
427 | ||||
|
428 | ||||
|
429 | def parse_inline_table(src: str, pos: Pos, parse_float: ParseFloat) -> Tuple[Pos, dict]: | |||
|
430 | pos += 1 | |||
|
431 | nested_dict = NestedDict() | |||
|
432 | flags = Flags() | |||
|
433 | ||||
|
434 | pos = skip_chars(src, pos, TOML_WS) | |||
|
435 | if src.startswith("}", pos): | |||
|
436 | return pos + 1, nested_dict.dict | |||
|
437 | while True: | |||
|
438 | pos, key, value = parse_key_value_pair(src, pos, parse_float) | |||
|
439 | key_parent, key_stem = key[:-1], key[-1] | |||
|
440 | if flags.is_(key, Flags.FROZEN): | |||
|
441 | raise suffixed_err(src, pos, f"Can not mutate immutable namespace {key}") | |||
|
442 | try: | |||
|
443 | nest = nested_dict.get_or_create_nest(key_parent, access_lists=False) | |||
|
444 | except KeyError: | |||
|
445 | raise suffixed_err(src, pos, "Can not overwrite a value") from None | |||
|
446 | if key_stem in nest: | |||
|
447 | raise suffixed_err(src, pos, f"Duplicate inline table key {key_stem!r}") | |||
|
448 | nest[key_stem] = value | |||
|
449 | pos = skip_chars(src, pos, TOML_WS) | |||
|
450 | c = src[pos : pos + 1] | |||
|
451 | if c == "}": | |||
|
452 | return pos + 1, nested_dict.dict | |||
|
453 | if c != ",": | |||
|
454 | raise suffixed_err(src, pos, "Unclosed inline table") | |||
|
455 | if isinstance(value, (dict, list)): | |||
|
456 | flags.set(key, Flags.FROZEN, recursive=True) | |||
|
457 | pos += 1 | |||
|
458 | pos = skip_chars(src, pos, TOML_WS) | |||
|
459 | ||||
|
460 | ||||
|
461 | def parse_basic_str_escape( # noqa: C901 | |||
|
462 | src: str, pos: Pos, *, multiline: bool = False | |||
|
463 | ) -> Tuple[Pos, str]: | |||
|
464 | escape_id = src[pos : pos + 2] | |||
|
465 | pos += 2 | |||
|
466 | if multiline and escape_id in {"\\ ", "\\\t", "\\\n"}: | |||
|
467 | # Skip whitespace until next non-whitespace character or end of | |||
|
468 | # the doc. Error if non-whitespace is found before newline. | |||
|
469 | if escape_id != "\\\n": | |||
|
470 | pos = skip_chars(src, pos, TOML_WS) | |||
|
471 | try: | |||
|
472 | char = src[pos] | |||
|
473 | except IndexError: | |||
|
474 | return pos, "" | |||
|
475 | if char != "\n": | |||
|
476 | raise suffixed_err(src, pos, 'Unescaped "\\" in a string') | |||
|
477 | pos += 1 | |||
|
478 | pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE) | |||
|
479 | return pos, "" | |||
|
480 | if escape_id == "\\u": | |||
|
481 | return parse_hex_char(src, pos, 4) | |||
|
482 | if escape_id == "\\U": | |||
|
483 | return parse_hex_char(src, pos, 8) | |||
|
484 | try: | |||
|
485 | return pos, BASIC_STR_ESCAPE_REPLACEMENTS[escape_id] | |||
|
486 | except KeyError: | |||
|
487 | if len(escape_id) != 2: | |||
|
488 | raise suffixed_err(src, pos, "Unterminated string") from None | |||
|
489 | raise suffixed_err(src, pos, 'Unescaped "\\" in a string') from None | |||
|
490 | ||||
|
491 | ||||
|
492 | def parse_basic_str_escape_multiline(src: str, pos: Pos) -> Tuple[Pos, str]: | |||
|
493 | return parse_basic_str_escape(src, pos, multiline=True) | |||
|
494 | ||||
|
495 | ||||
|
496 | def parse_hex_char(src: str, pos: Pos, hex_len: int) -> Tuple[Pos, str]: | |||
|
497 | hex_str = src[pos : pos + hex_len] | |||
|
498 | if len(hex_str) != hex_len or not HEXDIGIT_CHARS.issuperset(hex_str): | |||
|
499 | raise suffixed_err(src, pos, "Invalid hex value") | |||
|
500 | pos += hex_len | |||
|
501 | hex_int = int(hex_str, 16) | |||
|
502 | if not is_unicode_scalar_value(hex_int): | |||
|
503 | raise suffixed_err(src, pos, "Escaped character is not a Unicode scalar value") | |||
|
504 | return pos, chr(hex_int) | |||
|
505 | ||||
|
506 | ||||
|
507 | def parse_literal_str(src: str, pos: Pos) -> Tuple[Pos, str]: | |||
|
508 | pos += 1 # Skip starting apostrophe | |||
|
509 | start_pos = pos | |||
|
510 | pos = skip_until( | |||
|
511 | src, pos, "'", error_on=ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True | |||
|
512 | ) | |||
|
513 | return pos + 1, src[start_pos:pos] # Skip ending apostrophe | |||
|
514 | ||||
|
515 | ||||
|
516 | def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> Tuple[Pos, str]: | |||
|
517 | pos += 3 | |||
|
518 | if src.startswith("\n", pos): | |||
|
519 | pos += 1 | |||
|
520 | ||||
|
521 | if literal: | |||
|
522 | delim = "'" | |||
|
523 | end_pos = skip_until( | |||
|
524 | src, | |||
|
525 | pos, | |||
|
526 | "'''", | |||
|
527 | error_on=ILLEGAL_MULTILINE_LITERAL_STR_CHARS, | |||
|
528 | error_on_eof=True, | |||
|
529 | ) | |||
|
530 | result = src[pos:end_pos] | |||
|
531 | pos = end_pos + 3 | |||
|
532 | else: | |||
|
533 | delim = '"' | |||
|
534 | pos, result = parse_basic_str(src, pos, multiline=True) | |||
|
535 | ||||
|
536 | # Add at maximum two extra apostrophes/quotes if the end sequence | |||
|
537 | # is 4 or 5 chars long instead of just 3. | |||
|
538 | if not src.startswith(delim, pos): | |||
|
539 | return pos, result | |||
|
540 | pos += 1 | |||
|
541 | if not src.startswith(delim, pos): | |||
|
542 | return pos, result + delim | |||
|
543 | pos += 1 | |||
|
544 | return pos, result + (delim * 2) | |||
|
545 | ||||
|
546 | ||||
|
547 | def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> Tuple[Pos, str]: | |||
|
548 | if multiline: | |||
|
549 | error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS | |||
|
550 | parse_escapes = parse_basic_str_escape_multiline | |||
|
551 | else: | |||
|
552 | error_on = ILLEGAL_BASIC_STR_CHARS | |||
|
553 | parse_escapes = parse_basic_str_escape | |||
|
554 | result = "" | |||
|
555 | start_pos = pos | |||
|
556 | while True: | |||
|
557 | try: | |||
|
558 | char = src[pos] | |||
|
559 | except IndexError: | |||
|
560 | raise suffixed_err(src, pos, "Unterminated string") from None | |||
|
561 | if char == '"': | |||
|
562 | if not multiline: | |||
|
563 | return pos + 1, result + src[start_pos:pos] | |||
|
564 | if src.startswith('"""', pos): | |||
|
565 | return pos + 3, result + src[start_pos:pos] | |||
|
566 | pos += 1 | |||
|
567 | continue | |||
|
568 | if char == "\\": | |||
|
569 | result += src[start_pos:pos] | |||
|
570 | pos, parsed_escape = parse_escapes(src, pos) | |||
|
571 | result += parsed_escape | |||
|
572 | start_pos = pos | |||
|
573 | continue | |||
|
574 | if char in error_on: | |||
|
575 | raise suffixed_err(src, pos, f"Illegal character {char!r}") | |||
|
576 | pos += 1 | |||
|
577 | ||||
|
578 | ||||
|
579 | def parse_value( # noqa: C901 | |||
|
580 | src: str, pos: Pos, parse_float: ParseFloat | |||
|
581 | ) -> Tuple[Pos, Any]: | |||
|
582 | try: | |||
|
583 | char: Optional[str] = src[pos] | |||
|
584 | except IndexError: | |||
|
585 | char = None | |||
|
586 | ||||
|
587 | # Basic strings | |||
|
588 | if char == '"': | |||
|
589 | if src.startswith('"""', pos): | |||
|
590 | return parse_multiline_str(src, pos, literal=False) | |||
|
591 | return parse_one_line_basic_str(src, pos) | |||
|
592 | ||||
|
593 | # Literal strings | |||
|
594 | if char == "'": | |||
|
595 | if src.startswith("'''", pos): | |||
|
596 | return parse_multiline_str(src, pos, literal=True) | |||
|
597 | return parse_literal_str(src, pos) | |||
|
598 | ||||
|
599 | # Booleans | |||
|
600 | if char == "t": | |||
|
601 | if src.startswith("true", pos): | |||
|
602 | return pos + 4, True | |||
|
603 | if char == "f": | |||
|
604 | if src.startswith("false", pos): | |||
|
605 | return pos + 5, False | |||
|
606 | ||||
|
607 | # Dates and times | |||
|
608 | datetime_match = RE_DATETIME.match(src, pos) | |||
|
609 | if datetime_match: | |||
|
610 | try: | |||
|
611 | datetime_obj = match_to_datetime(datetime_match) | |||
|
612 | except ValueError as e: | |||
|
613 | raise suffixed_err(src, pos, "Invalid date or datetime") from e | |||
|
614 | return datetime_match.end(), datetime_obj | |||
|
615 | localtime_match = RE_LOCALTIME.match(src, pos) | |||
|
616 | if localtime_match: | |||
|
617 | return localtime_match.end(), match_to_localtime(localtime_match) | |||
|
618 | ||||
|
619 | # Integers and "normal" floats. | |||
|
620 | # The regex will greedily match any type starting with a decimal | |||
|
621 | # char, so needs to be located after handling of dates and times. | |||
|
622 | number_match = RE_NUMBER.match(src, pos) | |||
|
623 | if number_match: | |||
|
624 | return number_match.end(), match_to_number(number_match, parse_float) | |||
|
625 | ||||
|
626 | # Arrays | |||
|
627 | if char == "[": | |||
|
628 | return parse_array(src, pos, parse_float) | |||
|
629 | ||||
|
630 | # Inline tables | |||
|
631 | if char == "{": | |||
|
632 | return parse_inline_table(src, pos, parse_float) | |||
|
633 | ||||
|
634 | # Special floats | |||
|
635 | first_three = src[pos : pos + 3] | |||
|
636 | if first_three in {"inf", "nan"}: | |||
|
637 | return pos + 3, parse_float(first_three) | |||
|
638 | first_four = src[pos : pos + 4] | |||
|
639 | if first_four in {"-inf", "+inf", "-nan", "+nan"}: | |||
|
640 | return pos + 4, parse_float(first_four) | |||
|
641 | ||||
|
642 | raise suffixed_err(src, pos, "Invalid value") | |||
|
643 | ||||
|
644 | ||||
|
645 | def suffixed_err(src: str, pos: Pos, msg: str) -> TOMLDecodeError: | |||
|
646 | """Return a `TOMLDecodeError` where error message is suffixed with | |||
|
647 | coordinates in source.""" | |||
|
648 | ||||
|
649 | def coord_repr(src: str, pos: Pos) -> str: | |||
|
650 | if pos >= len(src): | |||
|
651 | return "end of document" | |||
|
652 | line = src.count("\n", 0, pos) + 1 | |||
|
653 | if line == 1: | |||
|
654 | column = pos + 1 | |||
|
655 | else: | |||
|
656 | column = pos - src.rindex("\n", 0, pos) | |||
|
657 | return f"line {line}, column {column}" | |||
|
658 | ||||
|
659 | return TOMLDecodeError(f"{msg} (at {coord_repr(src, pos)})") | |||
|
660 | ||||
|
661 | ||||
|
662 | def is_unicode_scalar_value(codepoint: int) -> bool: | |||
|
663 | return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111) |
@@ -0,0 +1,101 b'' | |||||
|
1 | from datetime import date, datetime, time, timedelta, timezone, tzinfo | |||
|
2 | from functools import lru_cache | |||
|
3 | import re | |||
|
4 | from typing import Any, Optional, Union | |||
|
5 | ||||
|
6 | from ._types import ParseFloat | |||
|
7 | ||||
|
8 | # E.g. | |||
|
9 | # - 00:32:00.999999 | |||
|
10 | # - 00:32:00 | |||
|
11 | _TIME_RE_STR = r"([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?" | |||
|
12 | ||||
|
13 | RE_NUMBER = re.compile( | |||
|
14 | r""" | |||
|
15 | 0 | |||
|
16 | (?: | |||
|
17 | x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex | |||
|
18 | | | |||
|
19 | b[01](?:_?[01])* # bin | |||
|
20 | | | |||
|
21 | o[0-7](?:_?[0-7])* # oct | |||
|
22 | ) | |||
|
23 | | | |||
|
24 | [+-]?(?:0|[1-9](?:_?[0-9])*) # dec, integer part | |||
|
25 | (?P<floatpart> | |||
|
26 | (?:\.[0-9](?:_?[0-9])*)? # optional fractional part | |||
|
27 | (?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part | |||
|
28 | ) | |||
|
29 | """, | |||
|
30 | flags=re.VERBOSE, | |||
|
31 | ) | |||
|
32 | RE_LOCALTIME = re.compile(_TIME_RE_STR) | |||
|
33 | RE_DATETIME = re.compile( | |||
|
34 | fr""" | |||
|
35 | ([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27 | |||
|
36 | (?: | |||
|
37 | [Tt ] | |||
|
38 | {_TIME_RE_STR} | |||
|
39 | (?:([Zz])|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset | |||
|
40 | )? | |||
|
41 | """, | |||
|
42 | flags=re.VERBOSE, | |||
|
43 | ) | |||
|
44 | ||||
|
45 | ||||
|
46 | def match_to_datetime(match: "re.Match") -> Union[datetime, date]: | |||
|
47 | """Convert a `RE_DATETIME` match to `datetime.datetime` or `datetime.date`. | |||
|
48 | ||||
|
49 | Raises ValueError if the match does not correspond to a valid date | |||
|
50 | or datetime. | |||
|
51 | """ | |||
|
52 | ( | |||
|
53 | year_str, | |||
|
54 | month_str, | |||
|
55 | day_str, | |||
|
56 | hour_str, | |||
|
57 | minute_str, | |||
|
58 | sec_str, | |||
|
59 | micros_str, | |||
|
60 | zulu_time, | |||
|
61 | offset_sign_str, | |||
|
62 | offset_hour_str, | |||
|
63 | offset_minute_str, | |||
|
64 | ) = match.groups() | |||
|
65 | year, month, day = int(year_str), int(month_str), int(day_str) | |||
|
66 | if hour_str is None: | |||
|
67 | return date(year, month, day) | |||
|
68 | hour, minute, sec = int(hour_str), int(minute_str), int(sec_str) | |||
|
69 | micros = int(micros_str.ljust(6, "0")) if micros_str else 0 | |||
|
70 | if offset_sign_str: | |||
|
71 | tz: Optional[tzinfo] = cached_tz( | |||
|
72 | offset_hour_str, offset_minute_str, offset_sign_str | |||
|
73 | ) | |||
|
74 | elif zulu_time: | |||
|
75 | tz = timezone.utc | |||
|
76 | else: # local date-time | |||
|
77 | tz = None | |||
|
78 | return datetime(year, month, day, hour, minute, sec, micros, tzinfo=tz) | |||
|
79 | ||||
|
80 | ||||
|
81 | @lru_cache(maxsize=None) | |||
|
82 | def cached_tz(hour_str: str, minute_str: str, sign_str: str) -> timezone: | |||
|
83 | sign = 1 if sign_str == "+" else -1 | |||
|
84 | return timezone( | |||
|
85 | timedelta( | |||
|
86 | hours=sign * int(hour_str), | |||
|
87 | minutes=sign * int(minute_str), | |||
|
88 | ) | |||
|
89 | ) | |||
|
90 | ||||
|
91 | ||||
|
92 | def match_to_localtime(match: "re.Match") -> time: | |||
|
93 | hour_str, minute_str, sec_str, micros_str = match.groups() | |||
|
94 | micros = int(micros_str.ljust(6, "0")) if micros_str else 0 | |||
|
95 | return time(int(hour_str), int(minute_str), int(sec_str), micros) | |||
|
96 | ||||
|
97 | ||||
|
98 | def match_to_number(match: "re.Match", parse_float: "ParseFloat") -> Any: | |||
|
99 | if match.group("floatpart"): | |||
|
100 | return parse_float(match.group()) | |||
|
101 | return int(match.group(), 0) |
@@ -0,0 +1,6 b'' | |||||
|
1 | from typing import Any, Callable, Tuple | |||
|
2 | ||||
|
3 | # Type annotations | |||
|
4 | ParseFloat = Callable[[str], Any] | |||
|
5 | Key = Tuple[str, ...] | |||
|
6 | Pos = int |
@@ -0,0 +1,1 b'' | |||||
|
1 | # Marker file for PEP 561 |
@@ -45,6 +45,7 b' allowsymbolimports = (' | |||||
45 | 'mercurial.thirdparty', |
|
45 | 'mercurial.thirdparty', | |
46 | 'mercurial.thirdparty.attr', |
|
46 | 'mercurial.thirdparty.attr', | |
47 | 'mercurial.thirdparty.jaraco.collections', |
|
47 | 'mercurial.thirdparty.jaraco.collections', | |
|
48 | 'mercurial.thirdparty.tomli', | |||
48 | 'mercurial.thirdparty.zope', |
|
49 | 'mercurial.thirdparty.zope', | |
49 | 'mercurial.thirdparty.zope.interface', |
|
50 | 'mercurial.thirdparty.zope.interface', | |
50 | 'typing', |
|
51 | 'typing', |
@@ -1306,6 +1306,7 b' packages = [' | |||||
1306 | 'mercurial.templates', |
|
1306 | 'mercurial.templates', | |
1307 | 'mercurial.thirdparty', |
|
1307 | 'mercurial.thirdparty', | |
1308 | 'mercurial.thirdparty.attr', |
|
1308 | 'mercurial.thirdparty.attr', | |
|
1309 | 'mercurial.thirdparty.tomli', | |||
1309 | 'mercurial.thirdparty.zope', |
|
1310 | 'mercurial.thirdparty.zope', | |
1310 | 'mercurial.thirdparty.zope.interface', |
|
1311 | 'mercurial.thirdparty.zope.interface', | |
1311 | 'mercurial.upgrade_utils', |
|
1312 | 'mercurial.upgrade_utils', |
General Comments 0
You need to be logged in to leave comments.
Login now