##// END OF EJS Templates
rust-nodemap: add binding to `nodemap_update_data`...
Georges Racinet -
r44997:15febf99 default
parent child Browse files
Show More
@@ -1,430 +1,492
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 cindex,
10 10 utils::{node_from_py_bytes, node_from_py_object},
11 11 };
12 12 use cpython::{
13 buffer::{Element, PyBuffer},
13 14 exc::{IndexError, ValueError},
14 15 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyModule, PyObject,
15 16 PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
16 17 };
17 18 use hg::{
18 19 nodemap::{Block, NodeMapError, NodeTree},
19 20 revlog::{nodemap::NodeMap, RevlogIndex},
20 21 NodeError, Revision,
21 22 };
22 23 use std::cell::RefCell;
23 24
24 25 /// Return a Struct implementing the Graph trait
25 26 pub(crate) fn pyindex_to_graph(
26 27 py: Python,
27 28 index: PyObject,
28 29 ) -> PyResult<cindex::Index> {
29 30 match index.extract::<MixedIndex>(py) {
30 31 Ok(midx) => Ok(midx.clone_cindex(py)),
31 32 Err(_) => cindex::Index::new(py, index),
32 33 }
33 34 }
34 35
35 36 py_class!(pub class MixedIndex |py| {
36 37 data cindex: RefCell<cindex::Index>;
37 38 data nt: RefCell<Option<NodeTree>>;
38 39 data docket: RefCell<Option<PyObject>>;
40 // Holds a reference to the mmap'ed persistent nodemap data
41 data mmap: RefCell<Option<PyBuffer>>;
39 42
40 43 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
41 44 Self::new(py, cindex)
42 45 }
43 46
44 47 /// Compatibility layer used for Python consumers needing access to the C index
45 48 ///
46 49 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
47 50 /// that may need to build a custom `nodetree`, based on a specified revset.
48 51 /// With a Rust implementation of the nodemap, we will be able to get rid of
49 52 /// this, by exposing our own standalone nodemap class,
50 53 /// ready to accept `MixedIndex`.
51 54 def get_cindex(&self) -> PyResult<PyObject> {
52 55 Ok(self.cindex(py).borrow().inner().clone_ref(py))
53 56 }
54 57
55 58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
56 59
57 60 /// Return Revision if found, raises a bare `error.RevlogError`
58 61 /// in case of ambiguity, same as C version does
59 62 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
60 63 let opt = self.get_nodetree(py)?.borrow();
61 64 let nt = opt.as_ref().unwrap();
62 65 let idx = &*self.cindex(py).borrow();
63 66 let node = node_from_py_bytes(py, &node)?;
64 67 nt.find_bin(idx, (&node).into()).map_err(|e| nodemap_error(py, e))
65 68 }
66 69
67 70 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
68 71 /// is not found.
69 72 ///
70 73 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
71 74 /// will catch and rewrap with it
72 75 def rev(&self, node: PyBytes) -> PyResult<Revision> {
73 76 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
74 77 }
75 78
76 79 /// return True if the node exist in the index
77 80 def has_node(&self, node: PyBytes) -> PyResult<bool> {
78 81 self.get_rev(py, node).map(|opt| opt.is_some())
79 82 }
80 83
81 84 /// find length of shortest hex nodeid of a binary ID
82 85 def shortest(&self, node: PyBytes) -> PyResult<usize> {
83 86 let opt = self.get_nodetree(py)?.borrow();
84 87 let nt = opt.as_ref().unwrap();
85 88 let idx = &*self.cindex(py).borrow();
86 89 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
87 90 {
88 91 Ok(Some(l)) => Ok(l),
89 92 Ok(None) => Err(revlog_error(py)),
90 93 Err(e) => Err(nodemap_error(py, e)),
91 94 }
92 95 }
93 96
94 97 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
95 98 let opt = self.get_nodetree(py)?.borrow();
96 99 let nt = opt.as_ref().unwrap();
97 100 let idx = &*self.cindex(py).borrow();
98 101
99 102 let node_as_string = if cfg!(feature = "python3-sys") {
100 103 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
101 104 }
102 105 else {
103 106 let node = node.extract::<PyBytes>(py)?;
104 107 String::from_utf8_lossy(node.data(py)).to_string()
105 108 };
106 109
107 110 nt.find_hex(idx, &node_as_string)
108 111 // TODO make an inner API returning the node directly
109 112 .map(|opt| opt.map(
110 113 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
111 114 .map_err(|e| nodemap_error(py, e))
112 115
113 116 }
114 117
115 118 /// append an index entry
116 119 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
117 120 if tup.len(py) < 8 {
118 121 // this is better than the panic promised by tup.get_item()
119 122 return Err(
120 123 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
121 124 }
122 125 let node_bytes = tup.get_item(py, 7).extract(py)?;
123 126 let node = node_from_py_object(py, &node_bytes)?;
124 127
125 128 let mut idx = self.cindex(py).borrow_mut();
126 129 let rev = idx.len() as Revision;
127 130
128 131 idx.append(py, tup)?;
129 132 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
130 133 .insert(&*idx, &node, rev)
131 134 .map_err(|e| nodemap_error(py, e))?;
132 135 Ok(py.None())
133 136 }
134 137
135 138 def __delitem__(&self, key: PyObject) -> PyResult<()> {
136 139 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
137 140 self.cindex(py).borrow().inner().del_item(py, key)?;
138 141 let mut opt = self.get_nodetree(py)?.borrow_mut();
139 142 let mut nt = opt.as_mut().unwrap();
140 143 nt.invalidate_all();
141 144 self.fill_nodemap(py, &mut nt)?;
142 145 Ok(())
143 146 }
144 147
145 148 //
146 149 // Reforwarded C index API
147 150 //
148 151
149 152 // index_methods (tp_methods). Same ordering as in revlog.c
150 153
151 154 /// return the gca set of the given revs
152 155 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
153 156 self.call_cindex(py, "ancestors", args, kw)
154 157 }
155 158
156 159 /// return the heads of the common ancestors of the given revs
157 160 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
158 161 self.call_cindex(py, "commonancestorsheads", args, kw)
159 162 }
160 163
161 164 /// clear the index caches
162 165 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
163 166 self.call_cindex(py, "clearcaches", args, kw)
164 167 }
165 168
166 169 /// get an index entry
167 170 def get(&self, *args, **kw) -> PyResult<PyObject> {
168 171 self.call_cindex(py, "get", args, kw)
169 172 }
170 173
171 174 /// compute phases
172 175 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
173 176 self.call_cindex(py, "computephasesmapsets", args, kw)
174 177 }
175 178
176 179 /// reachableroots
177 180 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
178 181 self.call_cindex(py, "reachableroots2", args, kw)
179 182 }
180 183
181 184 /// get head revisions
182 185 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
183 186 self.call_cindex(py, "headrevs", args, kw)
184 187 }
185 188
186 189 /// get filtered head revisions
187 190 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
188 191 self.call_cindex(py, "headrevsfiltered", args, kw)
189 192 }
190 193
191 194 /// True if the object is a snapshot
192 195 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
193 196 self.call_cindex(py, "issnapshot", args, kw)
194 197 }
195 198
196 199 /// Gather snapshot data in a cache dict
197 200 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
198 201 self.call_cindex(py, "findsnapshots", args, kw)
199 202 }
200 203
201 204 /// determine revisions with deltas to reconstruct fulltext
202 205 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
203 206 self.call_cindex(py, "deltachain", args, kw)
204 207 }
205 208
206 209 /// slice planned chunk read to reach a density threshold
207 210 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
208 211 self.call_cindex(py, "slicechunktodensity", args, kw)
209 212 }
210 213
211 214 /// stats for the index
212 215 def stats(&self, *args, **kw) -> PyResult<PyObject> {
213 216 self.call_cindex(py, "stats", args, kw)
214 217 }
215 218
216 219 // index_sequence_methods and index_mapping_methods.
217 220 //
218 221 // Since we call back through the high level Python API,
219 222 // there's no point making a distinction between index_get
220 223 // and index_getitem.
221 224
222 225 def __len__(&self) -> PyResult<usize> {
223 226 self.cindex(py).borrow().inner().len(py)
224 227 }
225 228
226 229 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
227 230 // this conversion seems needless, but that's actually because
228 231 // `index_getitem` does not handle conversion from PyLong,
229 232 // which expressions such as [e for e in index] internally use.
230 233 // Note that we don't seem to have a direct way to call
231 234 // PySequence_GetItem (does the job), which would be better for
232 235 // for performance
233 236 let key = match key.extract::<Revision>(py) {
234 237 Ok(rev) => rev.to_py_object(py).into_object(),
235 238 Err(_) => key,
236 239 };
237 240 self.cindex(py).borrow().inner().get_item(py, key)
238 241 }
239 242
240 243 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
241 244 self.cindex(py).borrow().inner().set_item(py, key, value)
242 245 }
243 246
244 247 def __contains__(&self, item: PyObject) -> PyResult<bool> {
245 248 // ObjectProtocol does not seem to provide contains(), so
246 249 // this is an equivalent implementation of the index_contains()
247 250 // defined in revlog.c
248 251 let cindex = self.cindex(py).borrow();
249 252 match item.extract::<Revision>(py) {
250 253 Ok(rev) => {
251 254 Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
252 255 }
253 256 Err(_) => {
254 257 cindex.inner().call_method(
255 258 py,
256 259 "has_node",
257 260 PyTuple::new(py, &[item]),
258 261 None)?
259 262 .extract(py)
260 263 }
261 264 }
262 265 }
263 266
264 267 def nodemap_data_all(&self) -> PyResult<PyBytes> {
265 268 self.inner_nodemap_data_all(py)
266 269 }
267 270
268 271 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
269 272 self.inner_nodemap_data_incremental(py)
270 273 }
274 def update_nodemap_data(
275 &self,
276 docket: PyObject,
277 nm_data: PyObject
278 ) -> PyResult<PyObject> {
279 self.inner_update_nodemap_data(py, docket, nm_data)
280 }
281
271 282
272 283 });
273 284
274 285 impl MixedIndex {
275 286 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
276 287 Self::create_instance(
277 288 py,
278 289 RefCell::new(cindex::Index::new(py, cindex)?),
279 290 RefCell::new(None),
280 291 RefCell::new(None),
292 RefCell::new(None),
281 293 )
282 294 }
283 295
284 296 /// This is scaffolding at this point, but it could also become
285 297 /// a way to start a persistent nodemap or perform a
286 298 /// vacuum / repack operation
287 299 fn fill_nodemap(
288 300 &self,
289 301 py: Python,
290 302 nt: &mut NodeTree,
291 303 ) -> PyResult<PyObject> {
292 304 let index = self.cindex(py).borrow();
293 305 for r in 0..index.len() {
294 306 let rev = r as Revision;
295 307 // in this case node() won't ever return None
296 308 nt.insert(&*index, index.node(rev).unwrap(), rev)
297 309 .map_err(|e| nodemap_error(py, e))?
298 310 }
299 311 Ok(py.None())
300 312 }
301 313
302 314 fn get_nodetree<'a>(
303 315 &'a self,
304 316 py: Python<'a>,
305 317 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
306 318 if self.nt(py).borrow().is_none() {
307 319 let readonly = Box::new(Vec::new());
308 320 let mut nt = NodeTree::load_bytes(readonly, 0);
309 321 self.fill_nodemap(py, &mut nt)?;
310 322 self.nt(py).borrow_mut().replace(nt);
311 323 }
312 324 Ok(self.nt(py))
313 325 }
314 326
315 327 /// forward a method call to the underlying C index
316 328 fn call_cindex(
317 329 &self,
318 330 py: Python,
319 331 name: &str,
320 332 args: &PyTuple,
321 333 kwargs: Option<&PyDict>,
322 334 ) -> PyResult<PyObject> {
323 335 self.cindex(py)
324 336 .borrow()
325 337 .inner()
326 338 .call_method(py, name, args, kwargs)
327 339 }
328 340
329 341 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
330 342 self.cindex(py).borrow().clone_ref(py)
331 343 }
332 344
333 345 /// Returns the full nodemap bytes to be written as-is to disk
334 346 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
335 347 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
336 348 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
337 349
338 350 // If there's anything readonly, we need to build the data again from
339 351 // scratch
340 352 let bytes = if readonly.len() > 0 {
341 353 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
342 354 self.fill_nodemap(py, &mut nt)?;
343 355
344 356 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
345 357 assert_eq!(readonly.len(), 0);
346 358
347 359 bytes
348 360 } else {
349 361 bytes
350 362 };
351 363
352 364 let bytes = PyBytes::new(py, &bytes);
353 365 Ok(bytes)
354 366 }
355 367
356 368 /// Returns the last saved docket along with the size of any changed data
357 369 /// (in number of blocks), and said data as bytes.
358 370 fn inner_nodemap_data_incremental(
359 371 &self,
360 372 py: Python,
361 373 ) -> PyResult<PyObject> {
362 374 let docket = self.docket(py).borrow();
363 375 let docket = match docket.as_ref() {
364 376 Some(d) => d,
365 377 None => return Ok(py.None()),
366 378 };
367 379
368 380 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
369 381 let masked_blocks = node_tree.masked_readonly_blocks();
370 382 let (_, data) = node_tree.into_readonly_and_added_bytes();
371 383 let changed = masked_blocks * std::mem::size_of::<Block>();
372 384
373 385 Ok((docket, changed, PyBytes::new(py, &data))
374 386 .to_py_object(py)
375 387 .into_object())
376 388 }
389
390 /// Update the nodemap from the new (mmaped) data.
391 /// The docket is kept as a reference for later incremental calls.
392 fn inner_update_nodemap_data(
393 &self,
394 py: Python,
395 docket: PyObject,
396 nm_data: PyObject,
397 ) -> PyResult<PyObject> {
398 let buf = PyBuffer::get(py, &nm_data)?;
399 let len = buf.item_count();
400
401 // Build a slice from the mmap'ed buffer data
402 let cbuf = buf.buf_ptr();
403 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
404 && buf.is_c_contiguous()
405 && u8::is_compatible_format(buf.format())
406 {
407 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
408 } else {
409 return Err(PyErr::new::<ValueError, _>(
410 py,
411 "Nodemap data buffer has an invalid memory representation"
412 .to_string(),
413 ));
414 };
415
416 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
417 // pointer.
418 self.mmap(py).borrow_mut().replace(buf);
419
420 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
421
422 let data_tip =
423 docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
424 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
425 let idx = self.cindex(py).borrow();
426 let current_tip = idx.len();
427
428 for r in (data_tip + 1)..current_tip as Revision {
429 let rev = r as Revision;
430 // in this case node() won't ever return None
431 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
432 .map_err(|e| nodemap_error(py, e))?
433 }
434
435 *self.nt(py).borrow_mut() = Some(nt);
436
437 Ok(py.None())
438 }
377 439 }
378 440
379 441 fn revlog_error(py: Python) -> PyErr {
380 442 match py
381 443 .import("mercurial.error")
382 444 .and_then(|m| m.get(py, "RevlogError"))
383 445 {
384 446 Err(e) => e,
385 447 Ok(cls) => PyErr::from_instance(py, cls),
386 448 }
387 449 }
388 450
389 451 fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
390 452 PyErr::new::<ValueError, _>(
391 453 py,
392 454 format!(
393 455 "Inconsistency: Revision {} found in nodemap \
394 456 is not in revlog index",
395 457 rev
396 458 ),
397 459 )
398 460 }
399 461
400 462 /// Standard treatment of NodeMapError
401 463 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
402 464 match err {
403 465 NodeMapError::MultipleResults => revlog_error(py),
404 466 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
405 467 NodeMapError::InvalidNodePrefix(s) => invalid_node_prefix(py, &s),
406 468 }
407 469 }
408 470
409 471 fn invalid_node_prefix(py: Python, ne: &NodeError) -> PyErr {
410 472 PyErr::new::<ValueError, _>(
411 473 py,
412 474 format!("Invalid node or prefix: {:?}", ne),
413 475 )
414 476 }
415 477
416 478 /// Create the module, with __package__ given from parent
417 479 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
418 480 let dotted_name = &format!("{}.revlog", package);
419 481 let m = PyModule::new(py, dotted_name)?;
420 482 m.add(py, "__package__", package)?;
421 483 m.add(py, "__doc__", "RevLog - Rust implementations")?;
422 484
423 485 m.add_class::<MixedIndex>(py)?;
424 486
425 487 let sys = PyModule::import(py, "sys")?;
426 488 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
427 489 sys_modules.set_item(py, dotted_name, &m)?;
428 490
429 491 Ok(m)
430 492 }
General Comments 0
You need to be logged in to leave comments. Login now