##// END OF EJS Templates
rust-nodemap: add binding for `nodemap_data_incremental`...
Georges Racinet -
r44996:5bbf8872 default
parent child Browse files
Show More
@@ -1,403 +1,430 b''
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 cindex,
10 10 utils::{node_from_py_bytes, node_from_py_object},
11 11 };
12 12 use cpython::{
13 13 exc::{IndexError, ValueError},
14 14 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyModule, PyObject,
15 15 PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
16 16 };
17 17 use hg::{
18 nodemap::{NodeMapError, NodeTree},
18 nodemap::{Block, NodeMapError, NodeTree},
19 19 revlog::{nodemap::NodeMap, RevlogIndex},
20 20 NodeError, Revision,
21 21 };
22 22 use std::cell::RefCell;
23 23
24 24 /// Return a Struct implementing the Graph trait
25 25 pub(crate) fn pyindex_to_graph(
26 26 py: Python,
27 27 index: PyObject,
28 28 ) -> PyResult<cindex::Index> {
29 29 match index.extract::<MixedIndex>(py) {
30 30 Ok(midx) => Ok(midx.clone_cindex(py)),
31 31 Err(_) => cindex::Index::new(py, index),
32 32 }
33 33 }
34 34
35 35 py_class!(pub class MixedIndex |py| {
36 36 data cindex: RefCell<cindex::Index>;
37 37 data nt: RefCell<Option<NodeTree>>;
38 data docket: RefCell<Option<PyObject>>;
38 39
39 40 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
40 41 Self::new(py, cindex)
41 42 }
42 43
43 44 /// Compatibility layer used for Python consumers needing access to the C index
44 45 ///
45 46 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
46 47 /// that may need to build a custom `nodetree`, based on a specified revset.
47 48 /// With a Rust implementation of the nodemap, we will be able to get rid of
48 49 /// this, by exposing our own standalone nodemap class,
49 50 /// ready to accept `MixedIndex`.
50 51 def get_cindex(&self) -> PyResult<PyObject> {
51 52 Ok(self.cindex(py).borrow().inner().clone_ref(py))
52 53 }
53 54
54 55 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
55 56
56 57 /// Return Revision if found, raises a bare `error.RevlogError`
57 58 /// in case of ambiguity, same as C version does
58 59 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
59 60 let opt = self.get_nodetree(py)?.borrow();
60 61 let nt = opt.as_ref().unwrap();
61 62 let idx = &*self.cindex(py).borrow();
62 63 let node = node_from_py_bytes(py, &node)?;
63 64 nt.find_bin(idx, (&node).into()).map_err(|e| nodemap_error(py, e))
64 65 }
65 66
66 67 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
67 68 /// is not found.
68 69 ///
69 70 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
70 71 /// will catch and rewrap with it
71 72 def rev(&self, node: PyBytes) -> PyResult<Revision> {
72 73 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
73 74 }
74 75
75 76 /// return True if the node exist in the index
76 77 def has_node(&self, node: PyBytes) -> PyResult<bool> {
77 78 self.get_rev(py, node).map(|opt| opt.is_some())
78 79 }
79 80
80 81 /// find length of shortest hex nodeid of a binary ID
81 82 def shortest(&self, node: PyBytes) -> PyResult<usize> {
82 83 let opt = self.get_nodetree(py)?.borrow();
83 84 let nt = opt.as_ref().unwrap();
84 85 let idx = &*self.cindex(py).borrow();
85 86 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
86 87 {
87 88 Ok(Some(l)) => Ok(l),
88 89 Ok(None) => Err(revlog_error(py)),
89 90 Err(e) => Err(nodemap_error(py, e)),
90 91 }
91 92 }
92 93
93 94 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
94 95 let opt = self.get_nodetree(py)?.borrow();
95 96 let nt = opt.as_ref().unwrap();
96 97 let idx = &*self.cindex(py).borrow();
97 98
98 99 let node_as_string = if cfg!(feature = "python3-sys") {
99 100 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
100 101 }
101 102 else {
102 103 let node = node.extract::<PyBytes>(py)?;
103 104 String::from_utf8_lossy(node.data(py)).to_string()
104 105 };
105 106
106 107 nt.find_hex(idx, &node_as_string)
107 108 // TODO make an inner API returning the node directly
108 109 .map(|opt| opt.map(
109 110 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
110 111 .map_err(|e| nodemap_error(py, e))
111 112
112 113 }
113 114
114 115 /// append an index entry
115 116 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
116 117 if tup.len(py) < 8 {
117 118 // this is better than the panic promised by tup.get_item()
118 119 return Err(
119 120 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
120 121 }
121 122 let node_bytes = tup.get_item(py, 7).extract(py)?;
122 123 let node = node_from_py_object(py, &node_bytes)?;
123 124
124 125 let mut idx = self.cindex(py).borrow_mut();
125 126 let rev = idx.len() as Revision;
126 127
127 128 idx.append(py, tup)?;
128 129 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
129 130 .insert(&*idx, &node, rev)
130 131 .map_err(|e| nodemap_error(py, e))?;
131 132 Ok(py.None())
132 133 }
133 134
134 135 def __delitem__(&self, key: PyObject) -> PyResult<()> {
135 136 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
136 137 self.cindex(py).borrow().inner().del_item(py, key)?;
137 138 let mut opt = self.get_nodetree(py)?.borrow_mut();
138 139 let mut nt = opt.as_mut().unwrap();
139 140 nt.invalidate_all();
140 141 self.fill_nodemap(py, &mut nt)?;
141 142 Ok(())
142 143 }
143 144
144 145 //
145 146 // Reforwarded C index API
146 147 //
147 148
148 149 // index_methods (tp_methods). Same ordering as in revlog.c
149 150
150 151 /// return the gca set of the given revs
151 152 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
152 153 self.call_cindex(py, "ancestors", args, kw)
153 154 }
154 155
155 156 /// return the heads of the common ancestors of the given revs
156 157 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
157 158 self.call_cindex(py, "commonancestorsheads", args, kw)
158 159 }
159 160
160 161 /// clear the index caches
161 162 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
162 163 self.call_cindex(py, "clearcaches", args, kw)
163 164 }
164 165
165 166 /// get an index entry
166 167 def get(&self, *args, **kw) -> PyResult<PyObject> {
167 168 self.call_cindex(py, "get", args, kw)
168 169 }
169 170
170 171 /// compute phases
171 172 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
172 173 self.call_cindex(py, "computephasesmapsets", args, kw)
173 174 }
174 175
175 176 /// reachableroots
176 177 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
177 178 self.call_cindex(py, "reachableroots2", args, kw)
178 179 }
179 180
180 181 /// get head revisions
181 182 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
182 183 self.call_cindex(py, "headrevs", args, kw)
183 184 }
184 185
185 186 /// get filtered head revisions
186 187 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
187 188 self.call_cindex(py, "headrevsfiltered", args, kw)
188 189 }
189 190
190 191 /// True if the object is a snapshot
191 192 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
192 193 self.call_cindex(py, "issnapshot", args, kw)
193 194 }
194 195
195 196 /// Gather snapshot data in a cache dict
196 197 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
197 198 self.call_cindex(py, "findsnapshots", args, kw)
198 199 }
199 200
200 201 /// determine revisions with deltas to reconstruct fulltext
201 202 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
202 203 self.call_cindex(py, "deltachain", args, kw)
203 204 }
204 205
205 206 /// slice planned chunk read to reach a density threshold
206 207 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
207 208 self.call_cindex(py, "slicechunktodensity", args, kw)
208 209 }
209 210
210 211 /// stats for the index
211 212 def stats(&self, *args, **kw) -> PyResult<PyObject> {
212 213 self.call_cindex(py, "stats", args, kw)
213 214 }
214 215
215 216 // index_sequence_methods and index_mapping_methods.
216 217 //
217 218 // Since we call back through the high level Python API,
218 219 // there's no point making a distinction between index_get
219 220 // and index_getitem.
220 221
221 222 def __len__(&self) -> PyResult<usize> {
222 223 self.cindex(py).borrow().inner().len(py)
223 224 }
224 225
225 226 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
226 227 // this conversion seems needless, but that's actually because
227 228 // `index_getitem` does not handle conversion from PyLong,
228 229 // which expressions such as [e for e in index] internally use.
229 230 // Note that we don't seem to have a direct way to call
230 231 // PySequence_GetItem (does the job), which would be better for
231 232 // for performance
232 233 let key = match key.extract::<Revision>(py) {
233 234 Ok(rev) => rev.to_py_object(py).into_object(),
234 235 Err(_) => key,
235 236 };
236 237 self.cindex(py).borrow().inner().get_item(py, key)
237 238 }
238 239
239 240 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
240 241 self.cindex(py).borrow().inner().set_item(py, key, value)
241 242 }
242 243
243 244 def __contains__(&self, item: PyObject) -> PyResult<bool> {
244 245 // ObjectProtocol does not seem to provide contains(), so
245 246 // this is an equivalent implementation of the index_contains()
246 247 // defined in revlog.c
247 248 let cindex = self.cindex(py).borrow();
248 249 match item.extract::<Revision>(py) {
249 250 Ok(rev) => {
250 251 Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
251 252 }
252 253 Err(_) => {
253 254 cindex.inner().call_method(
254 255 py,
255 256 "has_node",
256 257 PyTuple::new(py, &[item]),
257 258 None)?
258 259 .extract(py)
259 260 }
260 261 }
261 262 }
262 263
263 264 def nodemap_data_all(&self) -> PyResult<PyBytes> {
264 265 self.inner_nodemap_data_all(py)
265 266 }
266 267
268 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
269 self.inner_nodemap_data_incremental(py)
270 }
267 271
268 272 });
269 273
270 274 impl MixedIndex {
271 275 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
272 276 Self::create_instance(
273 277 py,
274 278 RefCell::new(cindex::Index::new(py, cindex)?),
275 279 RefCell::new(None),
280 RefCell::new(None),
276 281 )
277 282 }
278 283
279 284 /// This is scaffolding at this point, but it could also become
280 285 /// a way to start a persistent nodemap or perform a
281 286 /// vacuum / repack operation
282 287 fn fill_nodemap(
283 288 &self,
284 289 py: Python,
285 290 nt: &mut NodeTree,
286 291 ) -> PyResult<PyObject> {
287 292 let index = self.cindex(py).borrow();
288 293 for r in 0..index.len() {
289 294 let rev = r as Revision;
290 295 // in this case node() won't ever return None
291 296 nt.insert(&*index, index.node(rev).unwrap(), rev)
292 297 .map_err(|e| nodemap_error(py, e))?
293 298 }
294 299 Ok(py.None())
295 300 }
296 301
297 302 fn get_nodetree<'a>(
298 303 &'a self,
299 304 py: Python<'a>,
300 305 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
301 306 if self.nt(py).borrow().is_none() {
302 307 let readonly = Box::new(Vec::new());
303 308 let mut nt = NodeTree::load_bytes(readonly, 0);
304 309 self.fill_nodemap(py, &mut nt)?;
305 310 self.nt(py).borrow_mut().replace(nt);
306 311 }
307 312 Ok(self.nt(py))
308 313 }
309 314
310 315 /// forward a method call to the underlying C index
311 316 fn call_cindex(
312 317 &self,
313 318 py: Python,
314 319 name: &str,
315 320 args: &PyTuple,
316 321 kwargs: Option<&PyDict>,
317 322 ) -> PyResult<PyObject> {
318 323 self.cindex(py)
319 324 .borrow()
320 325 .inner()
321 326 .call_method(py, name, args, kwargs)
322 327 }
323 328
324 329 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
325 330 self.cindex(py).borrow().clone_ref(py)
326 331 }
327 332
328 333 /// Returns the full nodemap bytes to be written as-is to disk
329 334 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
330 335 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
331 336 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
332 337
333 338 // If there's anything readonly, we need to build the data again from
334 339 // scratch
335 340 let bytes = if readonly.len() > 0 {
336 341 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
337 342 self.fill_nodemap(py, &mut nt)?;
338 343
339 344 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
340 345 assert_eq!(readonly.len(), 0);
341 346
342 347 bytes
343 348 } else {
344 349 bytes
345 350 };
346 351
347 352 let bytes = PyBytes::new(py, &bytes);
348 353 Ok(bytes)
349 354 }
355
356 /// Returns the last saved docket along with the size of any changed data
357 /// (in number of blocks), and said data as bytes.
358 fn inner_nodemap_data_incremental(
359 &self,
360 py: Python,
361 ) -> PyResult<PyObject> {
362 let docket = self.docket(py).borrow();
363 let docket = match docket.as_ref() {
364 Some(d) => d,
365 None => return Ok(py.None()),
366 };
367
368 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
369 let masked_blocks = node_tree.masked_readonly_blocks();
370 let (_, data) = node_tree.into_readonly_and_added_bytes();
371 let changed = masked_blocks * std::mem::size_of::<Block>();
372
373 Ok((docket, changed, PyBytes::new(py, &data))
374 .to_py_object(py)
375 .into_object())
376 }
350 377 }
351 378
352 379 fn revlog_error(py: Python) -> PyErr {
353 380 match py
354 381 .import("mercurial.error")
355 382 .and_then(|m| m.get(py, "RevlogError"))
356 383 {
357 384 Err(e) => e,
358 385 Ok(cls) => PyErr::from_instance(py, cls),
359 386 }
360 387 }
361 388
362 389 fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
363 390 PyErr::new::<ValueError, _>(
364 391 py,
365 392 format!(
366 393 "Inconsistency: Revision {} found in nodemap \
367 394 is not in revlog index",
368 395 rev
369 396 ),
370 397 )
371 398 }
372 399
373 400 /// Standard treatment of NodeMapError
374 401 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
375 402 match err {
376 403 NodeMapError::MultipleResults => revlog_error(py),
377 404 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
378 405 NodeMapError::InvalidNodePrefix(s) => invalid_node_prefix(py, &s),
379 406 }
380 407 }
381 408
382 409 fn invalid_node_prefix(py: Python, ne: &NodeError) -> PyErr {
383 410 PyErr::new::<ValueError, _>(
384 411 py,
385 412 format!("Invalid node or prefix: {:?}", ne),
386 413 )
387 414 }
388 415
389 416 /// Create the module, with __package__ given from parent
390 417 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
391 418 let dotted_name = &format!("{}.revlog", package);
392 419 let m = PyModule::new(py, dotted_name)?;
393 420 m.add(py, "__package__", package)?;
394 421 m.add(py, "__doc__", "RevLog - Rust implementations")?;
395 422
396 423 m.add_class::<MixedIndex>(py)?;
397 424
398 425 let sys = PyModule::import(py, "sys")?;
399 426 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
400 427 sys_modules.set_item(py, dotted_name, &m)?;
401 428
402 429 Ok(m)
403 430 }
General Comments 0
You need to be logged in to leave comments. Login now