##// END OF EJS Templates
rust-mixed-index: rename variable to make the next change clearer...
Raphaël Gomès -
r52079:8ade5e6c default
parent child Browse files
Show More
@@ -1,524 +1,524 b''
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 cindex,
10 10 utils::{node_from_py_bytes, node_from_py_object},
11 11 PyRevision,
12 12 };
13 13 use cpython::{
14 14 buffer::{Element, PyBuffer},
15 15 exc::{IndexError, ValueError},
16 16 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
17 17 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
18 18 };
19 19 use hg::{
20 20 nodemap::{Block, NodeMapError, NodeTree},
21 21 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
22 22 BaseRevision, Revision, UncheckedRevision,
23 23 };
24 24 use std::cell::RefCell;
25 25
26 26 /// Return a Struct implementing the Graph trait
27 27 pub(crate) fn pyindex_to_graph(
28 28 py: Python,
29 29 index: PyObject,
30 30 ) -> PyResult<cindex::Index> {
31 31 match index.extract::<MixedIndex>(py) {
32 32 Ok(midx) => Ok(midx.clone_cindex(py)),
33 33 Err(_) => cindex::Index::new(py, index),
34 34 }
35 35 }
36 36
37 37 py_class!(pub class MixedIndex |py| {
38 38 data cindex: RefCell<cindex::Index>;
39 39 data nt: RefCell<Option<NodeTree>>;
40 40 data docket: RefCell<Option<PyObject>>;
41 41 // Holds a reference to the mmap'ed persistent nodemap data
42 data mmap: RefCell<Option<PyBuffer>>;
42 data nodemap_mmap: RefCell<Option<PyBuffer>>;
43 43
44 44 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
45 45 Self::new(py, cindex)
46 46 }
47 47
48 48 /// Compatibility layer used for Python consumers needing access to the C index
49 49 ///
50 50 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
51 51 /// that may need to build a custom `nodetree`, based on a specified revset.
52 52 /// With a Rust implementation of the nodemap, we will be able to get rid of
53 53 /// this, by exposing our own standalone nodemap class,
54 54 /// ready to accept `MixedIndex`.
55 55 def get_cindex(&self) -> PyResult<PyObject> {
56 56 Ok(self.cindex(py).borrow().inner().clone_ref(py))
57 57 }
58 58
59 59 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
60 60
61 61 /// Return Revision if found, raises a bare `error.RevlogError`
62 62 /// in case of ambiguity, same as C version does
63 63 def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
64 64 let opt = self.get_nodetree(py)?.borrow();
65 65 let nt = opt.as_ref().unwrap();
66 66 let idx = &*self.cindex(py).borrow();
67 67 let node = node_from_py_bytes(py, &node)?;
68 68 let res = nt.find_bin(idx, node.into());
69 69 Ok(res.map_err(|e| nodemap_error(py, e))?.map(Into::into))
70 70 }
71 71
72 72 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
73 73 /// is not found.
74 74 ///
75 75 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
76 76 /// will catch and rewrap with it
77 77 def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
78 78 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
79 79 }
80 80
81 81 /// return True if the node exist in the index
82 82 def has_node(&self, node: PyBytes) -> PyResult<bool> {
83 83 self.get_rev(py, node).map(|opt| opt.is_some())
84 84 }
85 85
86 86 /// find length of shortest hex nodeid of a binary ID
87 87 def shortest(&self, node: PyBytes) -> PyResult<usize> {
88 88 let opt = self.get_nodetree(py)?.borrow();
89 89 let nt = opt.as_ref().unwrap();
90 90 let idx = &*self.cindex(py).borrow();
91 91 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
92 92 {
93 93 Ok(Some(l)) => Ok(l),
94 94 Ok(None) => Err(revlog_error(py)),
95 95 Err(e) => Err(nodemap_error(py, e)),
96 96 }
97 97 }
98 98
99 99 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
100 100 let opt = self.get_nodetree(py)?.borrow();
101 101 let nt = opt.as_ref().unwrap();
102 102 let idx = &*self.cindex(py).borrow();
103 103
104 104 let node_as_string = if cfg!(feature = "python3-sys") {
105 105 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
106 106 }
107 107 else {
108 108 let node = node.extract::<PyBytes>(py)?;
109 109 String::from_utf8_lossy(node.data(py)).to_string()
110 110 };
111 111
112 112 let prefix = NodePrefix::from_hex(&node_as_string)
113 113 .map_err(|_| PyErr::new::<ValueError, _>(
114 114 py, format!("Invalid node or prefix '{}'", node_as_string))
115 115 )?;
116 116
117 117 nt.find_bin(idx, prefix)
118 118 // TODO make an inner API returning the node directly
119 119 .map(|opt| opt.map(
120 120 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
121 121 .map_err(|e| nodemap_error(py, e))
122 122
123 123 }
124 124
125 125 /// append an index entry
126 126 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
127 127 if tup.len(py) < 8 {
128 128 // this is better than the panic promised by tup.get_item()
129 129 return Err(
130 130 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
131 131 }
132 132 let node_bytes = tup.get_item(py, 7).extract(py)?;
133 133 let node = node_from_py_object(py, &node_bytes)?;
134 134
135 135 let mut idx = self.cindex(py).borrow_mut();
136 136
137 137 // This is ok since we will just add the revision to the index
138 138 let rev = Revision(idx.len() as BaseRevision);
139 139 idx.append(py, tup)?;
140 140
141 141 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
142 142 .insert(&*idx, &node, rev)
143 143 .map_err(|e| nodemap_error(py, e))?;
144 144 Ok(py.None())
145 145 }
146 146
147 147 def __delitem__(&self, key: PyObject) -> PyResult<()> {
148 148 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
149 149 self.cindex(py).borrow().inner().del_item(py, key)?;
150 150 let mut opt = self.get_nodetree(py)?.borrow_mut();
151 151 let nt = opt.as_mut().unwrap();
152 152 nt.invalidate_all();
153 153 self.fill_nodemap(py, nt)?;
154 154 Ok(())
155 155 }
156 156
157 157 //
158 158 // Reforwarded C index API
159 159 //
160 160
161 161 // index_methods (tp_methods). Same ordering as in revlog.c
162 162
163 163 /// return the gca set of the given revs
164 164 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
165 165 self.call_cindex(py, "ancestors", args, kw)
166 166 }
167 167
168 168 /// return the heads of the common ancestors of the given revs
169 169 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
170 170 self.call_cindex(py, "commonancestorsheads", args, kw)
171 171 }
172 172
173 173 /// Clear the index caches and inner py_class data.
174 174 /// It is Python's responsibility to call `update_nodemap_data` again.
175 175 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
176 176 self.nt(py).borrow_mut().take();
177 177 self.docket(py).borrow_mut().take();
178 self.mmap(py).borrow_mut().take();
178 self.nodemap_mmap(py).borrow_mut().take();
179 179 self.call_cindex(py, "clearcaches", args, kw)
180 180 }
181 181
182 182 /// return the raw binary string representing a revision
183 183 def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
184 184 self.call_cindex(py, "entry_binary", args, kw)
185 185 }
186 186
187 187 /// return a binary packed version of the header
188 188 def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
189 189 self.call_cindex(py, "pack_header", args, kw)
190 190 }
191 191
192 192 /// get an index entry
193 193 def get(&self, *args, **kw) -> PyResult<PyObject> {
194 194 self.call_cindex(py, "get", args, kw)
195 195 }
196 196
197 197 /// compute phases
198 198 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
199 199 self.call_cindex(py, "computephasesmapsets", args, kw)
200 200 }
201 201
202 202 /// reachableroots
203 203 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
204 204 self.call_cindex(py, "reachableroots2", args, kw)
205 205 }
206 206
207 207 /// get head revisions
208 208 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
209 209 self.call_cindex(py, "headrevs", args, kw)
210 210 }
211 211
212 212 /// get filtered head revisions
213 213 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
214 214 self.call_cindex(py, "headrevsfiltered", args, kw)
215 215 }
216 216
217 217 /// True if the object is a snapshot
218 218 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
219 219 self.call_cindex(py, "issnapshot", args, kw)
220 220 }
221 221
222 222 /// Gather snapshot data in a cache dict
223 223 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
224 224 self.call_cindex(py, "findsnapshots", args, kw)
225 225 }
226 226
227 227 /// determine revisions with deltas to reconstruct fulltext
228 228 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
229 229 self.call_cindex(py, "deltachain", args, kw)
230 230 }
231 231
232 232 /// slice planned chunk read to reach a density threshold
233 233 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
234 234 self.call_cindex(py, "slicechunktodensity", args, kw)
235 235 }
236 236
237 237 /// stats for the index
238 238 def stats(&self, *args, **kw) -> PyResult<PyObject> {
239 239 self.call_cindex(py, "stats", args, kw)
240 240 }
241 241
242 242 // index_sequence_methods and index_mapping_methods.
243 243 //
244 244 // Since we call back through the high level Python API,
245 245 // there's no point making a distinction between index_get
246 246 // and index_getitem.
247 247
248 248 def __len__(&self) -> PyResult<usize> {
249 249 self.cindex(py).borrow().inner().len(py)
250 250 }
251 251
252 252 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
253 253 // this conversion seems needless, but that's actually because
254 254 // `index_getitem` does not handle conversion from PyLong,
255 255 // which expressions such as [e for e in index] internally use.
256 256 // Note that we don't seem to have a direct way to call
257 257 // PySequence_GetItem (does the job), which would possibly be better
258 258 // for performance
259 259 let key = match key.extract::<i32>(py) {
260 260 Ok(rev) => rev.to_py_object(py).into_object(),
261 261 Err(_) => key,
262 262 };
263 263 self.cindex(py).borrow().inner().get_item(py, key)
264 264 }
265 265
266 266 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
267 267 self.cindex(py).borrow().inner().set_item(py, key, value)
268 268 }
269 269
270 270 def __contains__(&self, item: PyObject) -> PyResult<bool> {
271 271 // ObjectProtocol does not seem to provide contains(), so
272 272 // this is an equivalent implementation of the index_contains()
273 273 // defined in revlog.c
274 274 let cindex = self.cindex(py).borrow();
275 275 match item.extract::<i32>(py) {
276 276 Ok(rev) => {
277 277 Ok(rev >= -1 && rev < cindex.inner().len(py)? as BaseRevision)
278 278 }
279 279 Err(_) => {
280 280 cindex.inner().call_method(
281 281 py,
282 282 "has_node",
283 283 PyTuple::new(py, &[item]),
284 284 None)?
285 285 .extract(py)
286 286 }
287 287 }
288 288 }
289 289
290 290 def nodemap_data_all(&self) -> PyResult<PyBytes> {
291 291 self.inner_nodemap_data_all(py)
292 292 }
293 293
294 294 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
295 295 self.inner_nodemap_data_incremental(py)
296 296 }
297 297 def update_nodemap_data(
298 298 &self,
299 299 docket: PyObject,
300 300 nm_data: PyObject
301 301 ) -> PyResult<PyObject> {
302 302 self.inner_update_nodemap_data(py, docket, nm_data)
303 303 }
304 304
305 305 @property
306 306 def entry_size(&self) -> PyResult<PyInt> {
307 307 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
308 308 }
309 309
310 310 @property
311 311 def rust_ext_compat(&self) -> PyResult<PyInt> {
312 312 self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py)
313 313 }
314 314
315 315 });
316 316
317 317 impl MixedIndex {
318 318 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
319 319 Self::create_instance(
320 320 py,
321 321 RefCell::new(cindex::Index::new(py, cindex)?),
322 322 RefCell::new(None),
323 323 RefCell::new(None),
324 324 RefCell::new(None),
325 325 )
326 326 }
327 327
328 328 /// This is scaffolding at this point, but it could also become
329 329 /// a way to start a persistent nodemap or perform a
330 330 /// vacuum / repack operation
331 331 fn fill_nodemap(
332 332 &self,
333 333 py: Python,
334 334 nt: &mut NodeTree,
335 335 ) -> PyResult<PyObject> {
336 336 let index = self.cindex(py).borrow();
337 337 for r in 0..index.len() {
338 338 let rev = Revision(r as BaseRevision);
339 339 // in this case node() won't ever return None
340 340 nt.insert(&*index, index.node(rev).unwrap(), rev)
341 341 .map_err(|e| nodemap_error(py, e))?
342 342 }
343 343 Ok(py.None())
344 344 }
345 345
346 346 fn get_nodetree<'a>(
347 347 &'a self,
348 348 py: Python<'a>,
349 349 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
350 350 if self.nt(py).borrow().is_none() {
351 351 let readonly = Box::<Vec<_>>::default();
352 352 let mut nt = NodeTree::load_bytes(readonly, 0);
353 353 self.fill_nodemap(py, &mut nt)?;
354 354 self.nt(py).borrow_mut().replace(nt);
355 355 }
356 356 Ok(self.nt(py))
357 357 }
358 358
359 359 /// forward a method call to the underlying C index
360 360 fn call_cindex(
361 361 &self,
362 362 py: Python,
363 363 name: &str,
364 364 args: &PyTuple,
365 365 kwargs: Option<&PyDict>,
366 366 ) -> PyResult<PyObject> {
367 367 self.cindex(py)
368 368 .borrow()
369 369 .inner()
370 370 .call_method(py, name, args, kwargs)
371 371 }
372 372
373 373 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
374 374 self.cindex(py).borrow().clone_ref(py)
375 375 }
376 376
377 377 /// Returns the full nodemap bytes to be written as-is to disk
378 378 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
379 379 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
380 380 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
381 381
382 382 // If there's anything readonly, we need to build the data again from
383 383 // scratch
384 384 let bytes = if readonly.len() > 0 {
385 385 let mut nt = NodeTree::load_bytes(Box::<Vec<_>>::default(), 0);
386 386 self.fill_nodemap(py, &mut nt)?;
387 387
388 388 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
389 389 assert_eq!(readonly.len(), 0);
390 390
391 391 bytes
392 392 } else {
393 393 bytes
394 394 };
395 395
396 396 let bytes = PyBytes::new(py, &bytes);
397 397 Ok(bytes)
398 398 }
399 399
400 400 /// Returns the last saved docket along with the size of any changed data
401 401 /// (in number of blocks), and said data as bytes.
402 402 fn inner_nodemap_data_incremental(
403 403 &self,
404 404 py: Python,
405 405 ) -> PyResult<PyObject> {
406 406 let docket = self.docket(py).borrow();
407 407 let docket = match docket.as_ref() {
408 408 Some(d) => d,
409 409 None => return Ok(py.None()),
410 410 };
411 411
412 412 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
413 413 let masked_blocks = node_tree.masked_readonly_blocks();
414 414 let (_, data) = node_tree.into_readonly_and_added_bytes();
415 415 let changed = masked_blocks * std::mem::size_of::<Block>();
416 416
417 417 Ok((docket, changed, PyBytes::new(py, &data))
418 418 .to_py_object(py)
419 419 .into_object())
420 420 }
421 421
422 422 /// Update the nodemap from the new (mmaped) data.
423 423 /// The docket is kept as a reference for later incremental calls.
424 424 fn inner_update_nodemap_data(
425 425 &self,
426 426 py: Python,
427 427 docket: PyObject,
428 428 nm_data: PyObject,
429 429 ) -> PyResult<PyObject> {
430 430 let buf = PyBuffer::get(py, &nm_data)?;
431 431 let len = buf.item_count();
432 432
433 433 // Build a slice from the mmap'ed buffer data
434 434 let cbuf = buf.buf_ptr();
435 435 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
436 436 && buf.is_c_contiguous()
437 437 && u8::is_compatible_format(buf.format())
438 438 {
439 439 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
440 440 } else {
441 441 return Err(PyErr::new::<ValueError, _>(
442 442 py,
443 443 "Nodemap data buffer has an invalid memory representation"
444 444 .to_string(),
445 445 ));
446 446 };
447 447
448 448 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
449 449 // pointer.
450 self.mmap(py).borrow_mut().replace(buf);
450 self.nodemap_mmap(py).borrow_mut().replace(buf);
451 451
452 452 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
453 453
454 454 let data_tip = docket
455 455 .getattr(py, "tip_rev")?
456 456 .extract::<BaseRevision>(py)?
457 457 .into();
458 458 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
459 459 let idx = self.cindex(py).borrow();
460 460 let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
461 461 nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
462 462 })?;
463 463 let current_tip = idx.len();
464 464
465 465 for r in (data_tip.0 + 1)..current_tip as BaseRevision {
466 466 let rev = Revision(r);
467 467 // in this case node() won't ever return None
468 468 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
469 469 .map_err(|e| nodemap_error(py, e))?
470 470 }
471 471
472 472 *self.nt(py).borrow_mut() = Some(nt);
473 473
474 474 Ok(py.None())
475 475 }
476 476 }
477 477
478 478 fn revlog_error(py: Python) -> PyErr {
479 479 match py
480 480 .import("mercurial.error")
481 481 .and_then(|m| m.get(py, "RevlogError"))
482 482 {
483 483 Err(e) => e,
484 484 Ok(cls) => PyErr::from_instance(
485 485 py,
486 486 cls.call(py, (py.None(),), None).ok().into_py_object(py),
487 487 ),
488 488 }
489 489 }
490 490
491 491 fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
492 492 PyErr::new::<ValueError, _>(
493 493 py,
494 494 format!(
495 495 "Inconsistency: Revision {} found in nodemap \
496 496 is not in revlog index",
497 497 rev
498 498 ),
499 499 )
500 500 }
501 501
502 502 /// Standard treatment of NodeMapError
503 503 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
504 504 match err {
505 505 NodeMapError::MultipleResults => revlog_error(py),
506 506 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
507 507 }
508 508 }
509 509
510 510 /// Create the module, with __package__ given from parent
511 511 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
512 512 let dotted_name = &format!("{}.revlog", package);
513 513 let m = PyModule::new(py, dotted_name)?;
514 514 m.add(py, "__package__", package)?;
515 515 m.add(py, "__doc__", "RevLog - Rust implementations")?;
516 516
517 517 m.add_class::<MixedIndex>(py)?;
518 518
519 519 let sys = PyModule::import(py, "sys")?;
520 520 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
521 521 sys_modules.set_item(py, dotted_name, &m)?;
522 522
523 523 Ok(m)
524 524 }
General Comments 0
You need to be logged in to leave comments. Login now