##// END OF EJS Templates
rust-nodemap: use proper Index API instead of using the C API...
Raphaël Gomès -
r44994:857cc792 default
parent child Browse files
Show More
@@ -1,278 +1,376 b''
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 use crate::cindex;
8 use crate::{
9 cindex,
10 utils::{node_from_py_bytes, node_from_py_object},
11 };
9 12 use cpython::{
10 exc::ValueError, ObjectProtocol, PyClone, PyDict, PyErr, PyModule,
11 PyObject, PyResult, PyTuple, Python, PythonObject, ToPyObject,
13 exc::{IndexError, ValueError},
14 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyModule, PyObject,
15 PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
12 16 };
13 use hg::{nodemap::NodeMapError, NodeError, Revision};
17 use hg::{
18 nodemap::{NodeMapError, NodeTree},
19 revlog::{nodemap::NodeMap, RevlogIndex},
20 NodeError, Revision,
21 };
14 22 use std::cell::RefCell;
15 23
16 24 /// Return a Struct implementing the Graph trait
17 25 pub(crate) fn pyindex_to_graph(
18 26 py: Python,
19 27 index: PyObject,
20 28 ) -> PyResult<cindex::Index> {
21 29 match index.extract::<MixedIndex>(py) {
22 30 Ok(midx) => Ok(midx.clone_cindex(py)),
23 31 Err(_) => cindex::Index::new(py, index),
24 32 }
25 33 }
26 34
27 35 py_class!(pub class MixedIndex |py| {
28 36 data cindex: RefCell<cindex::Index>;
37 data nt: RefCell<Option<NodeTree>>;
29 38
30 39 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
31 40 Self::new(py, cindex)
32 41 }
33 42
34 43 /// Compatibility layer used for Python consumers needing access to the C index
35 44 ///
36 45 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
37 46 /// that may need to build a custom `nodetree`, based on a specified revset.
38 47 /// With a Rust implementation of the nodemap, we will be able to get rid of
39 48 /// this, by exposing our own standalone nodemap class,
40 49 /// ready to accept `MixedIndex`.
41 50 def get_cindex(&self) -> PyResult<PyObject> {
42 51 Ok(self.cindex(py).borrow().inner().clone_ref(py))
43 52 }
44 53
54 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
45 55
56 /// Return Revision if found, raises a bare `error.RevlogError`
57 /// in case of ambiguity, same as C version does
58 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
59 let opt = self.get_nodetree(py)?.borrow();
60 let nt = opt.as_ref().unwrap();
61 let idx = &*self.cindex(py).borrow();
62 let node = node_from_py_bytes(py, &node)?;
63 nt.find_bin(idx, (&node).into()).map_err(|e| nodemap_error(py, e))
64 }
65
66 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
67 /// is not found.
68 ///
69 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
70 /// will catch and rewrap with it
71 def rev(&self, node: PyBytes) -> PyResult<Revision> {
72 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
73 }
74
75 /// return True if the node exist in the index
76 def has_node(&self, node: PyBytes) -> PyResult<bool> {
77 self.get_rev(py, node).map(|opt| opt.is_some())
78 }
79
80 /// find length of shortest hex nodeid of a binary ID
81 def shortest(&self, node: PyBytes) -> PyResult<usize> {
82 let opt = self.get_nodetree(py)?.borrow();
83 let nt = opt.as_ref().unwrap();
84 let idx = &*self.cindex(py).borrow();
85 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
86 {
87 Ok(Some(l)) => Ok(l),
88 Ok(None) => Err(revlog_error(py)),
89 Err(e) => Err(nodemap_error(py, e)),
90 }
91 }
92
93 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
94 let opt = self.get_nodetree(py)?.borrow();
95 let nt = opt.as_ref().unwrap();
96 let idx = &*self.cindex(py).borrow();
97
98 let node_as_string = if cfg!(feature = "python3-sys") {
99 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
100 }
101 else {
102 let node = node.extract::<PyBytes>(py)?;
103 String::from_utf8_lossy(node.data(py)).to_string()
104 };
105
106 nt.find_hex(idx, &node_as_string)
107 // TODO make an inner API returning the node directly
108 .map(|opt| opt.map(
109 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
110 .map_err(|e| nodemap_error(py, e))
111
112 }
113
114 /// append an index entry
115 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
116 if tup.len(py) < 8 {
117 // this is better than the panic promised by tup.get_item()
118 return Err(
119 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
120 }
121 let node_bytes = tup.get_item(py, 7).extract(py)?;
122 let node = node_from_py_object(py, &node_bytes)?;
123
124 let mut idx = self.cindex(py).borrow_mut();
125 let rev = idx.len() as Revision;
126
127 idx.append(py, tup)?;
128 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
129 .insert(&*idx, &node, rev)
130 .map_err(|e| nodemap_error(py, e))?;
131 Ok(py.None())
132 }
133
134 def __delitem__(&self, key: PyObject) -> PyResult<()> {
135 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
136 self.cindex(py).borrow().inner().del_item(py, key)?;
137 let mut opt = self.get_nodetree(py)?.borrow_mut();
138 let mut nt = opt.as_mut().unwrap();
139 nt.invalidate_all();
140 self.fill_nodemap(py, &mut nt)?;
141 Ok(())
142 }
143
144 //
46 145 // Reforwarded C index API
146 //
47 147
48 148 // index_methods (tp_methods). Same ordering as in revlog.c
49 149
50 150 /// return the gca set of the given revs
51 151 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
52 152 self.call_cindex(py, "ancestors", args, kw)
53 153 }
54 154
55 155 /// return the heads of the common ancestors of the given revs
56 156 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
57 157 self.call_cindex(py, "commonancestorsheads", args, kw)
58 158 }
59 159
60 160 /// clear the index caches
61 161 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
62 162 self.call_cindex(py, "clearcaches", args, kw)
63 163 }
64 164
65 165 /// get an index entry
66 166 def get(&self, *args, **kw) -> PyResult<PyObject> {
67 167 self.call_cindex(py, "get", args, kw)
68 168 }
69 169
70 /// return `rev` associated with a node or None
71 def get_rev(&self, *args, **kw) -> PyResult<PyObject> {
72 self.call_cindex(py, "get_rev", args, kw)
73 }
74
75 /// return True if the node exist in the index
76 def has_node(&self, *args, **kw) -> PyResult<PyObject> {
77 self.call_cindex(py, "has_node", args, kw)
78 }
79
80 /// return `rev` associated with a node or raise RevlogError
81 def rev(&self, *args, **kw) -> PyResult<PyObject> {
82 self.call_cindex(py, "rev", args, kw)
83 }
84
85 170 /// compute phases
86 171 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
87 172 self.call_cindex(py, "computephasesmapsets", args, kw)
88 173 }
89 174
90 175 /// reachableroots
91 176 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
92 177 self.call_cindex(py, "reachableroots2", args, kw)
93 178 }
94 179
95 180 /// get head revisions
96 181 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
97 182 self.call_cindex(py, "headrevs", args, kw)
98 183 }
99 184
100 185 /// get filtered head revisions
101 186 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
102 187 self.call_cindex(py, "headrevsfiltered", args, kw)
103 188 }
104 189
105 190 /// True if the object is a snapshot
106 191 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
107 192 self.call_cindex(py, "issnapshot", args, kw)
108 193 }
109 194
110 195 /// Gather snapshot data in a cache dict
111 196 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
112 197 self.call_cindex(py, "findsnapshots", args, kw)
113 198 }
114 199
115 200 /// determine revisions with deltas to reconstruct fulltext
116 201 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
117 202 self.call_cindex(py, "deltachain", args, kw)
118 203 }
119 204
120 205 /// slice planned chunk read to reach a density threshold
121 206 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
122 207 self.call_cindex(py, "slicechunktodensity", args, kw)
123 208 }
124 209
125 /// append an index entry
126 def append(&self, *args, **kw) -> PyResult<PyObject> {
127 self.call_cindex(py, "append", args, kw)
128 }
129
130 /// match a potentially ambiguous node ID
131 def partialmatch(&self, *args, **kw) -> PyResult<PyObject> {
132 self.call_cindex(py, "partialmatch", args, kw)
133 }
134
135 /// find length of shortest hex nodeid of a binary ID
136 def shortest(&self, *args, **kw) -> PyResult<PyObject> {
137 self.call_cindex(py, "shortest", args, kw)
138 }
139
140 210 /// stats for the index
141 211 def stats(&self, *args, **kw) -> PyResult<PyObject> {
142 212 self.call_cindex(py, "stats", args, kw)
143 213 }
144 214
145 215 // index_sequence_methods and index_mapping_methods.
146 216 //
147 217 // Since we call back through the high level Python API,
148 218 // there's no point making a distinction between index_get
149 219 // and index_getitem.
150 220
151 221 def __len__(&self) -> PyResult<usize> {
152 222 self.cindex(py).borrow().inner().len(py)
153 223 }
154 224
155 225 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
156 226 // this conversion seems needless, but that's actually because
157 227 // `index_getitem` does not handle conversion from PyLong,
158 228 // which expressions such as [e for e in index] internally use.
159 229 // Note that we don't seem to have a direct way to call
160 230 // PySequence_GetItem (does the job), which would be better for
161 231 // for performance
162 232 let key = match key.extract::<Revision>(py) {
163 233 Ok(rev) => rev.to_py_object(py).into_object(),
164 234 Err(_) => key,
165 235 };
166 236 self.cindex(py).borrow().inner().get_item(py, key)
167 237 }
168 238
169 239 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
170 240 self.cindex(py).borrow().inner().set_item(py, key, value)
171 241 }
172 242
173 def __delitem__(&self, key: PyObject) -> PyResult<()> {
174 self.cindex(py).borrow().inner().del_item(py, key)
175 }
176
177 243 def __contains__(&self, item: PyObject) -> PyResult<bool> {
178 244 // ObjectProtocol does not seem to provide contains(), so
179 245 // this is an equivalent implementation of the index_contains()
180 246 // defined in revlog.c
181 247 let cindex = self.cindex(py).borrow();
182 248 match item.extract::<Revision>(py) {
183 249 Ok(rev) => {
184 250 Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
185 251 }
186 252 Err(_) => {
187 253 cindex.inner().call_method(
188 254 py,
189 255 "has_node",
190 256 PyTuple::new(py, &[item]),
191 257 None)?
192 258 .extract(py)
193 259 }
194 260 }
195 261 }
196 262
197 263
198 264 });
199 265
200 266 impl MixedIndex {
201 267 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
202 268 Self::create_instance(
203 269 py,
204 270 RefCell::new(cindex::Index::new(py, cindex)?),
271 RefCell::new(None),
205 272 )
206 273 }
207 274
275 /// This is scaffolding at this point, but it could also become
276 /// a way to start a persistent nodemap or perform a
277 /// vacuum / repack operation
278 fn fill_nodemap(
279 &self,
280 py: Python,
281 nt: &mut NodeTree,
282 ) -> PyResult<PyObject> {
283 let index = self.cindex(py).borrow();
284 for r in 0..index.len() {
285 let rev = r as Revision;
286 // in this case node() won't ever return None
287 nt.insert(&*index, index.node(rev).unwrap(), rev)
288 .map_err(|e| nodemap_error(py, e))?
289 }
290 Ok(py.None())
291 }
292
293 fn get_nodetree<'a>(
294 &'a self,
295 py: Python<'a>,
296 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
297 if self.nt(py).borrow().is_none() {
298 let readonly = Box::new(Vec::new());
299 let mut nt = NodeTree::load_bytes(readonly, 0);
300 self.fill_nodemap(py, &mut nt)?;
301 self.nt(py).borrow_mut().replace(nt);
302 }
303 Ok(self.nt(py))
304 }
305
208 306 /// forward a method call to the underlying C index
209 307 fn call_cindex(
210 308 &self,
211 309 py: Python,
212 310 name: &str,
213 311 args: &PyTuple,
214 312 kwargs: Option<&PyDict>,
215 313 ) -> PyResult<PyObject> {
216 314 self.cindex(py)
217 315 .borrow()
218 316 .inner()
219 317 .call_method(py, name, args, kwargs)
220 318 }
221 319
222 320 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
223 321 self.cindex(py).borrow().clone_ref(py)
224 322 }
225 323 }
226 324
227 325 fn revlog_error(py: Python) -> PyErr {
228 326 match py
229 327 .import("mercurial.error")
230 328 .and_then(|m| m.get(py, "RevlogError"))
231 329 {
232 330 Err(e) => e,
233 331 Ok(cls) => PyErr::from_instance(py, cls),
234 332 }
235 333 }
236 334
237 335 fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
238 336 PyErr::new::<ValueError, _>(
239 337 py,
240 338 format!(
241 339 "Inconsistency: Revision {} found in nodemap \
242 340 is not in revlog index",
243 341 rev
244 342 ),
245 343 )
246 344 }
247 345
248 346 /// Standard treatment of NodeMapError
249 347 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
250 348 match err {
251 349 NodeMapError::MultipleResults => revlog_error(py),
252 350 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
253 351 NodeMapError::InvalidNodePrefix(s) => invalid_node_prefix(py, &s),
254 352 }
255 353 }
256 354
257 355 fn invalid_node_prefix(py: Python, ne: &NodeError) -> PyErr {
258 356 PyErr::new::<ValueError, _>(
259 357 py,
260 358 format!("Invalid node or prefix: {:?}", ne),
261 359 )
262 360 }
263 361
264 362 /// Create the module, with __package__ given from parent
265 363 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
266 364 let dotted_name = &format!("{}.revlog", package);
267 365 let m = PyModule::new(py, dotted_name)?;
268 366 m.add(py, "__package__", package)?;
269 367 m.add(py, "__doc__", "RevLog - Rust implementations")?;
270 368
271 369 m.add_class::<MixedIndex>(py)?;
272 370
273 371 let sys = PyModule::import(py, "sys")?;
274 372 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
275 373 sys_modules.set_item(py, dotted_name, &m)?;
276 374
277 375 Ok(m)
278 376 }
General Comments 0
You need to be logged in to leave comments. Login now