##// END OF EJS Templates
rust-nodemap: add binding for `nodemap_data_all`...
Georges Racinet -
r44995:b581231a default
parent child Browse files
Show More
@@ -1,376 +1,403 b''
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 cindex,
10 10 utils::{node_from_py_bytes, node_from_py_object},
11 11 };
12 12 use cpython::{
13 13 exc::{IndexError, ValueError},
14 14 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyModule, PyObject,
15 15 PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
16 16 };
17 17 use hg::{
18 18 nodemap::{NodeMapError, NodeTree},
19 19 revlog::{nodemap::NodeMap, RevlogIndex},
20 20 NodeError, Revision,
21 21 };
22 22 use std::cell::RefCell;
23 23
24 24 /// Return a Struct implementing the Graph trait
25 25 pub(crate) fn pyindex_to_graph(
26 26 py: Python,
27 27 index: PyObject,
28 28 ) -> PyResult<cindex::Index> {
29 29 match index.extract::<MixedIndex>(py) {
30 30 Ok(midx) => Ok(midx.clone_cindex(py)),
31 31 Err(_) => cindex::Index::new(py, index),
32 32 }
33 33 }
34 34
35 35 py_class!(pub class MixedIndex |py| {
36 36 data cindex: RefCell<cindex::Index>;
37 37 data nt: RefCell<Option<NodeTree>>;
38 38
39 39 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
40 40 Self::new(py, cindex)
41 41 }
42 42
43 43 /// Compatibility layer used for Python consumers needing access to the C index
44 44 ///
45 45 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
46 46 /// that may need to build a custom `nodetree`, based on a specified revset.
47 47 /// With a Rust implementation of the nodemap, we will be able to get rid of
48 48 /// this, by exposing our own standalone nodemap class,
49 49 /// ready to accept `MixedIndex`.
50 50 def get_cindex(&self) -> PyResult<PyObject> {
51 51 Ok(self.cindex(py).borrow().inner().clone_ref(py))
52 52 }
53 53
54 54 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
55 55
56 56 /// Return Revision if found, raises a bare `error.RevlogError`
57 57 /// in case of ambiguity, same as C version does
58 58 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
59 59 let opt = self.get_nodetree(py)?.borrow();
60 60 let nt = opt.as_ref().unwrap();
61 61 let idx = &*self.cindex(py).borrow();
62 62 let node = node_from_py_bytes(py, &node)?;
63 63 nt.find_bin(idx, (&node).into()).map_err(|e| nodemap_error(py, e))
64 64 }
65 65
66 66 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
67 67 /// is not found.
68 68 ///
69 69 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
70 70 /// will catch and rewrap with it
71 71 def rev(&self, node: PyBytes) -> PyResult<Revision> {
72 72 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
73 73 }
74 74
75 75 /// return True if the node exist in the index
76 76 def has_node(&self, node: PyBytes) -> PyResult<bool> {
77 77 self.get_rev(py, node).map(|opt| opt.is_some())
78 78 }
79 79
80 80 /// find length of shortest hex nodeid of a binary ID
81 81 def shortest(&self, node: PyBytes) -> PyResult<usize> {
82 82 let opt = self.get_nodetree(py)?.borrow();
83 83 let nt = opt.as_ref().unwrap();
84 84 let idx = &*self.cindex(py).borrow();
85 85 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
86 86 {
87 87 Ok(Some(l)) => Ok(l),
88 88 Ok(None) => Err(revlog_error(py)),
89 89 Err(e) => Err(nodemap_error(py, e)),
90 90 }
91 91 }
92 92
93 93 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
94 94 let opt = self.get_nodetree(py)?.borrow();
95 95 let nt = opt.as_ref().unwrap();
96 96 let idx = &*self.cindex(py).borrow();
97 97
98 98 let node_as_string = if cfg!(feature = "python3-sys") {
99 99 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
100 100 }
101 101 else {
102 102 let node = node.extract::<PyBytes>(py)?;
103 103 String::from_utf8_lossy(node.data(py)).to_string()
104 104 };
105 105
106 106 nt.find_hex(idx, &node_as_string)
107 107 // TODO make an inner API returning the node directly
108 108 .map(|opt| opt.map(
109 109 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
110 110 .map_err(|e| nodemap_error(py, e))
111 111
112 112 }
113 113
114 114 /// append an index entry
115 115 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
116 116 if tup.len(py) < 8 {
117 117 // this is better than the panic promised by tup.get_item()
118 118 return Err(
119 119 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
120 120 }
121 121 let node_bytes = tup.get_item(py, 7).extract(py)?;
122 122 let node = node_from_py_object(py, &node_bytes)?;
123 123
124 124 let mut idx = self.cindex(py).borrow_mut();
125 125 let rev = idx.len() as Revision;
126 126
127 127 idx.append(py, tup)?;
128 128 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
129 129 .insert(&*idx, &node, rev)
130 130 .map_err(|e| nodemap_error(py, e))?;
131 131 Ok(py.None())
132 132 }
133 133
134 134 def __delitem__(&self, key: PyObject) -> PyResult<()> {
135 135 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
136 136 self.cindex(py).borrow().inner().del_item(py, key)?;
137 137 let mut opt = self.get_nodetree(py)?.borrow_mut();
138 138 let mut nt = opt.as_mut().unwrap();
139 139 nt.invalidate_all();
140 140 self.fill_nodemap(py, &mut nt)?;
141 141 Ok(())
142 142 }
143 143
144 144 //
145 145 // Reforwarded C index API
146 146 //
147 147
148 148 // index_methods (tp_methods). Same ordering as in revlog.c
149 149
150 150 /// return the gca set of the given revs
151 151 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
152 152 self.call_cindex(py, "ancestors", args, kw)
153 153 }
154 154
155 155 /// return the heads of the common ancestors of the given revs
156 156 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
157 157 self.call_cindex(py, "commonancestorsheads", args, kw)
158 158 }
159 159
160 160 /// clear the index caches
161 161 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
162 162 self.call_cindex(py, "clearcaches", args, kw)
163 163 }
164 164
165 165 /// get an index entry
166 166 def get(&self, *args, **kw) -> PyResult<PyObject> {
167 167 self.call_cindex(py, "get", args, kw)
168 168 }
169 169
170 170 /// compute phases
171 171 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
172 172 self.call_cindex(py, "computephasesmapsets", args, kw)
173 173 }
174 174
175 175 /// reachableroots
176 176 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
177 177 self.call_cindex(py, "reachableroots2", args, kw)
178 178 }
179 179
180 180 /// get head revisions
181 181 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
182 182 self.call_cindex(py, "headrevs", args, kw)
183 183 }
184 184
185 185 /// get filtered head revisions
186 186 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
187 187 self.call_cindex(py, "headrevsfiltered", args, kw)
188 188 }
189 189
190 190 /// True if the object is a snapshot
191 191 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
192 192 self.call_cindex(py, "issnapshot", args, kw)
193 193 }
194 194
195 195 /// Gather snapshot data in a cache dict
196 196 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
197 197 self.call_cindex(py, "findsnapshots", args, kw)
198 198 }
199 199
200 200 /// determine revisions with deltas to reconstruct fulltext
201 201 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
202 202 self.call_cindex(py, "deltachain", args, kw)
203 203 }
204 204
205 205 /// slice planned chunk read to reach a density threshold
206 206 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
207 207 self.call_cindex(py, "slicechunktodensity", args, kw)
208 208 }
209 209
210 210 /// stats for the index
211 211 def stats(&self, *args, **kw) -> PyResult<PyObject> {
212 212 self.call_cindex(py, "stats", args, kw)
213 213 }
214 214
215 215 // index_sequence_methods and index_mapping_methods.
216 216 //
217 217 // Since we call back through the high level Python API,
218 218 // there's no point making a distinction between index_get
219 219 // and index_getitem.
220 220
221 221 def __len__(&self) -> PyResult<usize> {
222 222 self.cindex(py).borrow().inner().len(py)
223 223 }
224 224
225 225 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
226 226 // this conversion seems needless, but that's actually because
227 227 // `index_getitem` does not handle conversion from PyLong,
228 228 // which expressions such as [e for e in index] internally use.
229 229 // Note that we don't seem to have a direct way to call
230 230 // PySequence_GetItem (does the job), which would be better for
231 231 // for performance
232 232 let key = match key.extract::<Revision>(py) {
233 233 Ok(rev) => rev.to_py_object(py).into_object(),
234 234 Err(_) => key,
235 235 };
236 236 self.cindex(py).borrow().inner().get_item(py, key)
237 237 }
238 238
239 239 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
240 240 self.cindex(py).borrow().inner().set_item(py, key, value)
241 241 }
242 242
243 243 def __contains__(&self, item: PyObject) -> PyResult<bool> {
244 244 // ObjectProtocol does not seem to provide contains(), so
245 245 // this is an equivalent implementation of the index_contains()
246 246 // defined in revlog.c
247 247 let cindex = self.cindex(py).borrow();
248 248 match item.extract::<Revision>(py) {
249 249 Ok(rev) => {
250 250 Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
251 251 }
252 252 Err(_) => {
253 253 cindex.inner().call_method(
254 254 py,
255 255 "has_node",
256 256 PyTuple::new(py, &[item]),
257 257 None)?
258 258 .extract(py)
259 259 }
260 260 }
261 261 }
262 262
263 def nodemap_data_all(&self) -> PyResult<PyBytes> {
264 self.inner_nodemap_data_all(py)
265 }
266
263 267
264 268 });
265 269
266 270 impl MixedIndex {
267 271 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
268 272 Self::create_instance(
269 273 py,
270 274 RefCell::new(cindex::Index::new(py, cindex)?),
271 275 RefCell::new(None),
272 276 )
273 277 }
274 278
275 279 /// This is scaffolding at this point, but it could also become
276 280 /// a way to start a persistent nodemap or perform a
277 281 /// vacuum / repack operation
278 282 fn fill_nodemap(
279 283 &self,
280 284 py: Python,
281 285 nt: &mut NodeTree,
282 286 ) -> PyResult<PyObject> {
283 287 let index = self.cindex(py).borrow();
284 288 for r in 0..index.len() {
285 289 let rev = r as Revision;
286 290 // in this case node() won't ever return None
287 291 nt.insert(&*index, index.node(rev).unwrap(), rev)
288 292 .map_err(|e| nodemap_error(py, e))?
289 293 }
290 294 Ok(py.None())
291 295 }
292 296
293 297 fn get_nodetree<'a>(
294 298 &'a self,
295 299 py: Python<'a>,
296 300 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
297 301 if self.nt(py).borrow().is_none() {
298 302 let readonly = Box::new(Vec::new());
299 303 let mut nt = NodeTree::load_bytes(readonly, 0);
300 304 self.fill_nodemap(py, &mut nt)?;
301 305 self.nt(py).borrow_mut().replace(nt);
302 306 }
303 307 Ok(self.nt(py))
304 308 }
305 309
306 310 /// forward a method call to the underlying C index
307 311 fn call_cindex(
308 312 &self,
309 313 py: Python,
310 314 name: &str,
311 315 args: &PyTuple,
312 316 kwargs: Option<&PyDict>,
313 317 ) -> PyResult<PyObject> {
314 318 self.cindex(py)
315 319 .borrow()
316 320 .inner()
317 321 .call_method(py, name, args, kwargs)
318 322 }
319 323
320 324 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
321 325 self.cindex(py).borrow().clone_ref(py)
322 326 }
327
328 /// Returns the full nodemap bytes to be written as-is to disk
329 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
330 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
331 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
332
333 // If there's anything readonly, we need to build the data again from
334 // scratch
335 let bytes = if readonly.len() > 0 {
336 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
337 self.fill_nodemap(py, &mut nt)?;
338
339 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
340 assert_eq!(readonly.len(), 0);
341
342 bytes
343 } else {
344 bytes
345 };
346
347 let bytes = PyBytes::new(py, &bytes);
348 Ok(bytes)
349 }
323 350 }
324 351
325 352 fn revlog_error(py: Python) -> PyErr {
326 353 match py
327 354 .import("mercurial.error")
328 355 .and_then(|m| m.get(py, "RevlogError"))
329 356 {
330 357 Err(e) => e,
331 358 Ok(cls) => PyErr::from_instance(py, cls),
332 359 }
333 360 }
334 361
335 362 fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
336 363 PyErr::new::<ValueError, _>(
337 364 py,
338 365 format!(
339 366 "Inconsistency: Revision {} found in nodemap \
340 367 is not in revlog index",
341 368 rev
342 369 ),
343 370 )
344 371 }
345 372
346 373 /// Standard treatment of NodeMapError
347 374 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
348 375 match err {
349 376 NodeMapError::MultipleResults => revlog_error(py),
350 377 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
351 378 NodeMapError::InvalidNodePrefix(s) => invalid_node_prefix(py, &s),
352 379 }
353 380 }
354 381
355 382 fn invalid_node_prefix(py: Python, ne: &NodeError) -> PyErr {
356 383 PyErr::new::<ValueError, _>(
357 384 py,
358 385 format!("Invalid node or prefix: {:?}", ne),
359 386 )
360 387 }
361 388
362 389 /// Create the module, with __package__ given from parent
363 390 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
364 391 let dotted_name = &format!("{}.revlog", package);
365 392 let m = PyModule::new(py, dotted_name)?;
366 393 m.add(py, "__package__", package)?;
367 394 m.add(py, "__doc__", "RevLog - Rust implementations")?;
368 395
369 396 m.add_class::<MixedIndex>(py)?;
370 397
371 398 let sys = PyModule::import(py, "sys")?;
372 399 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
373 400 sys_modules.set_item(py, dotted_name, &m)?;
374 401
375 402 Ok(m)
376 403 }
General Comments 0
You need to be logged in to leave comments. Login now