##// END OF EJS Templates
rust-nodemap: also clear Rust data in `clearcaches`...
Georges Racinet -
r44998:cadcc8c2 default
parent child Browse files
Show More
@@ -1,492 +1,496 b''
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 cindex,
10 10 utils::{node_from_py_bytes, node_from_py_object},
11 11 };
12 12 use cpython::{
13 13 buffer::{Element, PyBuffer},
14 14 exc::{IndexError, ValueError},
15 15 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyModule, PyObject,
16 16 PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
17 17 };
18 18 use hg::{
19 19 nodemap::{Block, NodeMapError, NodeTree},
20 20 revlog::{nodemap::NodeMap, RevlogIndex},
21 21 NodeError, Revision,
22 22 };
23 23 use std::cell::RefCell;
24 24
25 25 /// Return a Struct implementing the Graph trait
26 26 pub(crate) fn pyindex_to_graph(
27 27 py: Python,
28 28 index: PyObject,
29 29 ) -> PyResult<cindex::Index> {
30 30 match index.extract::<MixedIndex>(py) {
31 31 Ok(midx) => Ok(midx.clone_cindex(py)),
32 32 Err(_) => cindex::Index::new(py, index),
33 33 }
34 34 }
35 35
36 36 py_class!(pub class MixedIndex |py| {
37 37 data cindex: RefCell<cindex::Index>;
38 38 data nt: RefCell<Option<NodeTree>>;
39 39 data docket: RefCell<Option<PyObject>>;
40 40 // Holds a reference to the mmap'ed persistent nodemap data
41 41 data mmap: RefCell<Option<PyBuffer>>;
42 42
43 43 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
44 44 Self::new(py, cindex)
45 45 }
46 46
47 47 /// Compatibility layer used for Python consumers needing access to the C index
48 48 ///
49 49 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
50 50 /// that may need to build a custom `nodetree`, based on a specified revset.
51 51 /// With a Rust implementation of the nodemap, we will be able to get rid of
52 52 /// this, by exposing our own standalone nodemap class,
53 53 /// ready to accept `MixedIndex`.
54 54 def get_cindex(&self) -> PyResult<PyObject> {
55 55 Ok(self.cindex(py).borrow().inner().clone_ref(py))
56 56 }
57 57
58 58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
59 59
60 60 /// Return Revision if found, raises a bare `error.RevlogError`
61 61 /// in case of ambiguity, same as C version does
62 62 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
63 63 let opt = self.get_nodetree(py)?.borrow();
64 64 let nt = opt.as_ref().unwrap();
65 65 let idx = &*self.cindex(py).borrow();
66 66 let node = node_from_py_bytes(py, &node)?;
67 67 nt.find_bin(idx, (&node).into()).map_err(|e| nodemap_error(py, e))
68 68 }
69 69
70 70 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
71 71 /// is not found.
72 72 ///
73 73 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
74 74 /// will catch and rewrap with it
75 75 def rev(&self, node: PyBytes) -> PyResult<Revision> {
76 76 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
77 77 }
78 78
79 79 /// return True if the node exist in the index
80 80 def has_node(&self, node: PyBytes) -> PyResult<bool> {
81 81 self.get_rev(py, node).map(|opt| opt.is_some())
82 82 }
83 83
84 84 /// find length of shortest hex nodeid of a binary ID
85 85 def shortest(&self, node: PyBytes) -> PyResult<usize> {
86 86 let opt = self.get_nodetree(py)?.borrow();
87 87 let nt = opt.as_ref().unwrap();
88 88 let idx = &*self.cindex(py).borrow();
89 89 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
90 90 {
91 91 Ok(Some(l)) => Ok(l),
92 92 Ok(None) => Err(revlog_error(py)),
93 93 Err(e) => Err(nodemap_error(py, e)),
94 94 }
95 95 }
96 96
97 97 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
98 98 let opt = self.get_nodetree(py)?.borrow();
99 99 let nt = opt.as_ref().unwrap();
100 100 let idx = &*self.cindex(py).borrow();
101 101
102 102 let node_as_string = if cfg!(feature = "python3-sys") {
103 103 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
104 104 }
105 105 else {
106 106 let node = node.extract::<PyBytes>(py)?;
107 107 String::from_utf8_lossy(node.data(py)).to_string()
108 108 };
109 109
110 110 nt.find_hex(idx, &node_as_string)
111 111 // TODO make an inner API returning the node directly
112 112 .map(|opt| opt.map(
113 113 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
114 114 .map_err(|e| nodemap_error(py, e))
115 115
116 116 }
117 117
118 118 /// append an index entry
119 119 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
120 120 if tup.len(py) < 8 {
121 121 // this is better than the panic promised by tup.get_item()
122 122 return Err(
123 123 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
124 124 }
125 125 let node_bytes = tup.get_item(py, 7).extract(py)?;
126 126 let node = node_from_py_object(py, &node_bytes)?;
127 127
128 128 let mut idx = self.cindex(py).borrow_mut();
129 129 let rev = idx.len() as Revision;
130 130
131 131 idx.append(py, tup)?;
132 132 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
133 133 .insert(&*idx, &node, rev)
134 134 .map_err(|e| nodemap_error(py, e))?;
135 135 Ok(py.None())
136 136 }
137 137
138 138 def __delitem__(&self, key: PyObject) -> PyResult<()> {
139 139 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
140 140 self.cindex(py).borrow().inner().del_item(py, key)?;
141 141 let mut opt = self.get_nodetree(py)?.borrow_mut();
142 142 let mut nt = opt.as_mut().unwrap();
143 143 nt.invalidate_all();
144 144 self.fill_nodemap(py, &mut nt)?;
145 145 Ok(())
146 146 }
147 147
148 148 //
149 149 // Reforwarded C index API
150 150 //
151 151
152 152 // index_methods (tp_methods). Same ordering as in revlog.c
153 153
154 154 /// return the gca set of the given revs
155 155 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
156 156 self.call_cindex(py, "ancestors", args, kw)
157 157 }
158 158
159 159 /// return the heads of the common ancestors of the given revs
160 160 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
161 161 self.call_cindex(py, "commonancestorsheads", args, kw)
162 162 }
163 163
164 /// clear the index caches
164 /// Clear the index caches and inner py_class data.
165 /// It is Python's responsibility to call `update_nodemap_data` again.
165 166 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
167 self.nt(py).borrow_mut().take();
168 self.docket(py).borrow_mut().take();
169 self.mmap(py).borrow_mut().take();
166 170 self.call_cindex(py, "clearcaches", args, kw)
167 171 }
168 172
169 173 /// get an index entry
170 174 def get(&self, *args, **kw) -> PyResult<PyObject> {
171 175 self.call_cindex(py, "get", args, kw)
172 176 }
173 177
174 178 /// compute phases
175 179 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
176 180 self.call_cindex(py, "computephasesmapsets", args, kw)
177 181 }
178 182
179 183 /// reachableroots
180 184 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
181 185 self.call_cindex(py, "reachableroots2", args, kw)
182 186 }
183 187
184 188 /// get head revisions
185 189 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
186 190 self.call_cindex(py, "headrevs", args, kw)
187 191 }
188 192
189 193 /// get filtered head revisions
190 194 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
191 195 self.call_cindex(py, "headrevsfiltered", args, kw)
192 196 }
193 197
194 198 /// True if the object is a snapshot
195 199 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
196 200 self.call_cindex(py, "issnapshot", args, kw)
197 201 }
198 202
199 203 /// Gather snapshot data in a cache dict
200 204 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
201 205 self.call_cindex(py, "findsnapshots", args, kw)
202 206 }
203 207
204 208 /// determine revisions with deltas to reconstruct fulltext
205 209 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
206 210 self.call_cindex(py, "deltachain", args, kw)
207 211 }
208 212
209 213 /// slice planned chunk read to reach a density threshold
210 214 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
211 215 self.call_cindex(py, "slicechunktodensity", args, kw)
212 216 }
213 217
214 218 /// stats for the index
215 219 def stats(&self, *args, **kw) -> PyResult<PyObject> {
216 220 self.call_cindex(py, "stats", args, kw)
217 221 }
218 222
219 223 // index_sequence_methods and index_mapping_methods.
220 224 //
221 225 // Since we call back through the high level Python API,
222 226 // there's no point making a distinction between index_get
223 227 // and index_getitem.
224 228
225 229 def __len__(&self) -> PyResult<usize> {
226 230 self.cindex(py).borrow().inner().len(py)
227 231 }
228 232
229 233 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
230 234 // this conversion seems needless, but that's actually because
231 235 // `index_getitem` does not handle conversion from PyLong,
232 236 // which expressions such as [e for e in index] internally use.
233 237 // Note that we don't seem to have a direct way to call
234 // PySequence_GetItem (does the job), which would be better for
238 // PySequence_GetItem (does the job), which would possibly be better
235 239 // for performance
236 240 let key = match key.extract::<Revision>(py) {
237 241 Ok(rev) => rev.to_py_object(py).into_object(),
238 242 Err(_) => key,
239 243 };
240 244 self.cindex(py).borrow().inner().get_item(py, key)
241 245 }
242 246
243 247 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
244 248 self.cindex(py).borrow().inner().set_item(py, key, value)
245 249 }
246 250
247 251 def __contains__(&self, item: PyObject) -> PyResult<bool> {
248 252 // ObjectProtocol does not seem to provide contains(), so
249 253 // this is an equivalent implementation of the index_contains()
250 254 // defined in revlog.c
251 255 let cindex = self.cindex(py).borrow();
252 256 match item.extract::<Revision>(py) {
253 257 Ok(rev) => {
254 258 Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
255 259 }
256 260 Err(_) => {
257 261 cindex.inner().call_method(
258 262 py,
259 263 "has_node",
260 264 PyTuple::new(py, &[item]),
261 265 None)?
262 266 .extract(py)
263 267 }
264 268 }
265 269 }
266 270
267 271 def nodemap_data_all(&self) -> PyResult<PyBytes> {
268 272 self.inner_nodemap_data_all(py)
269 273 }
270 274
271 275 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
272 276 self.inner_nodemap_data_incremental(py)
273 277 }
274 278 def update_nodemap_data(
275 279 &self,
276 280 docket: PyObject,
277 281 nm_data: PyObject
278 282 ) -> PyResult<PyObject> {
279 283 self.inner_update_nodemap_data(py, docket, nm_data)
280 284 }
281 285
282 286
283 287 });
284 288
285 289 impl MixedIndex {
286 290 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
287 291 Self::create_instance(
288 292 py,
289 293 RefCell::new(cindex::Index::new(py, cindex)?),
290 294 RefCell::new(None),
291 295 RefCell::new(None),
292 296 RefCell::new(None),
293 297 )
294 298 }
295 299
296 300 /// This is scaffolding at this point, but it could also become
297 301 /// a way to start a persistent nodemap or perform a
298 302 /// vacuum / repack operation
299 303 fn fill_nodemap(
300 304 &self,
301 305 py: Python,
302 306 nt: &mut NodeTree,
303 307 ) -> PyResult<PyObject> {
304 308 let index = self.cindex(py).borrow();
305 309 for r in 0..index.len() {
306 310 let rev = r as Revision;
307 311 // in this case node() won't ever return None
308 312 nt.insert(&*index, index.node(rev).unwrap(), rev)
309 313 .map_err(|e| nodemap_error(py, e))?
310 314 }
311 315 Ok(py.None())
312 316 }
313 317
314 318 fn get_nodetree<'a>(
315 319 &'a self,
316 320 py: Python<'a>,
317 321 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
318 322 if self.nt(py).borrow().is_none() {
319 323 let readonly = Box::new(Vec::new());
320 324 let mut nt = NodeTree::load_bytes(readonly, 0);
321 325 self.fill_nodemap(py, &mut nt)?;
322 326 self.nt(py).borrow_mut().replace(nt);
323 327 }
324 328 Ok(self.nt(py))
325 329 }
326 330
327 331 /// forward a method call to the underlying C index
328 332 fn call_cindex(
329 333 &self,
330 334 py: Python,
331 335 name: &str,
332 336 args: &PyTuple,
333 337 kwargs: Option<&PyDict>,
334 338 ) -> PyResult<PyObject> {
335 339 self.cindex(py)
336 340 .borrow()
337 341 .inner()
338 342 .call_method(py, name, args, kwargs)
339 343 }
340 344
341 345 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
342 346 self.cindex(py).borrow().clone_ref(py)
343 347 }
344 348
345 349 /// Returns the full nodemap bytes to be written as-is to disk
346 350 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
347 351 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
348 352 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
349 353
350 354 // If there's anything readonly, we need to build the data again from
351 355 // scratch
352 356 let bytes = if readonly.len() > 0 {
353 357 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
354 358 self.fill_nodemap(py, &mut nt)?;
355 359
356 360 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
357 361 assert_eq!(readonly.len(), 0);
358 362
359 363 bytes
360 364 } else {
361 365 bytes
362 366 };
363 367
364 368 let bytes = PyBytes::new(py, &bytes);
365 369 Ok(bytes)
366 370 }
367 371
368 372 /// Returns the last saved docket along with the size of any changed data
369 373 /// (in number of blocks), and said data as bytes.
370 374 fn inner_nodemap_data_incremental(
371 375 &self,
372 376 py: Python,
373 377 ) -> PyResult<PyObject> {
374 378 let docket = self.docket(py).borrow();
375 379 let docket = match docket.as_ref() {
376 380 Some(d) => d,
377 381 None => return Ok(py.None()),
378 382 };
379 383
380 384 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
381 385 let masked_blocks = node_tree.masked_readonly_blocks();
382 386 let (_, data) = node_tree.into_readonly_and_added_bytes();
383 387 let changed = masked_blocks * std::mem::size_of::<Block>();
384 388
385 389 Ok((docket, changed, PyBytes::new(py, &data))
386 390 .to_py_object(py)
387 391 .into_object())
388 392 }
389 393
390 394 /// Update the nodemap from the new (mmaped) data.
391 395 /// The docket is kept as a reference for later incremental calls.
392 396 fn inner_update_nodemap_data(
393 397 &self,
394 398 py: Python,
395 399 docket: PyObject,
396 400 nm_data: PyObject,
397 401 ) -> PyResult<PyObject> {
398 402 let buf = PyBuffer::get(py, &nm_data)?;
399 403 let len = buf.item_count();
400 404
401 405 // Build a slice from the mmap'ed buffer data
402 406 let cbuf = buf.buf_ptr();
403 407 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
404 408 && buf.is_c_contiguous()
405 409 && u8::is_compatible_format(buf.format())
406 410 {
407 411 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
408 412 } else {
409 413 return Err(PyErr::new::<ValueError, _>(
410 414 py,
411 415 "Nodemap data buffer has an invalid memory representation"
412 416 .to_string(),
413 417 ));
414 418 };
415 419
416 420 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
417 421 // pointer.
418 422 self.mmap(py).borrow_mut().replace(buf);
419 423
420 424 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
421 425
422 426 let data_tip =
423 427 docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
424 428 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
425 429 let idx = self.cindex(py).borrow();
426 430 let current_tip = idx.len();
427 431
428 432 for r in (data_tip + 1)..current_tip as Revision {
429 433 let rev = r as Revision;
430 434 // in this case node() won't ever return None
431 435 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
432 436 .map_err(|e| nodemap_error(py, e))?
433 437 }
434 438
435 439 *self.nt(py).borrow_mut() = Some(nt);
436 440
437 441 Ok(py.None())
438 442 }
439 443 }
440 444
441 445 fn revlog_error(py: Python) -> PyErr {
442 446 match py
443 447 .import("mercurial.error")
444 448 .and_then(|m| m.get(py, "RevlogError"))
445 449 {
446 450 Err(e) => e,
447 451 Ok(cls) => PyErr::from_instance(py, cls),
448 452 }
449 453 }
450 454
451 455 fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
452 456 PyErr::new::<ValueError, _>(
453 457 py,
454 458 format!(
455 459 "Inconsistency: Revision {} found in nodemap \
456 460 is not in revlog index",
457 461 rev
458 462 ),
459 463 )
460 464 }
461 465
462 466 /// Standard treatment of NodeMapError
463 467 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
464 468 match err {
465 469 NodeMapError::MultipleResults => revlog_error(py),
466 470 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
467 471 NodeMapError::InvalidNodePrefix(s) => invalid_node_prefix(py, &s),
468 472 }
469 473 }
470 474
471 475 fn invalid_node_prefix(py: Python, ne: &NodeError) -> PyErr {
472 476 PyErr::new::<ValueError, _>(
473 477 py,
474 478 format!("Invalid node or prefix: {:?}", ne),
475 479 )
476 480 }
477 481
478 482 /// Create the module, with __package__ given from parent
479 483 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
480 484 let dotted_name = &format!("{}.revlog", package);
481 485 let m = PyModule::new(py, dotted_name)?;
482 486 m.add(py, "__package__", package)?;
483 487 m.add(py, "__doc__", "RevLog - Rust implementations")?;
484 488
485 489 m.add_class::<MixedIndex>(py)?;
486 490
487 491 let sys = PyModule::import(py, "sys")?;
488 492 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
489 493 sys_modules.set_item(py, dotted_name, &m)?;
490 494
491 495 Ok(m)
492 496 }
General Comments 0
You need to be logged in to leave comments. Login now