##// END OF EJS Templates
hg-cpython: fix new occuring TypeError...
Raphaël Gomès -
r48086:33e7508b default
parent child Browse files
Show More
@@ -1,509 +1,512 b''
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 cindex,
10 10 utils::{node_from_py_bytes, node_from_py_object},
11 11 };
12 12 use cpython::{
13 13 buffer::{Element, PyBuffer},
14 14 exc::{IndexError, ValueError},
15 15 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
16 16 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
17 17 };
18 18 use hg::{
19 19 nodemap::{Block, NodeMapError, NodeTree},
20 20 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
21 21 Revision,
22 22 };
23 23 use std::cell::RefCell;
24 24
25 25 /// Return a Struct implementing the Graph trait
26 26 pub(crate) fn pyindex_to_graph(
27 27 py: Python,
28 28 index: PyObject,
29 29 ) -> PyResult<cindex::Index> {
30 30 match index.extract::<MixedIndex>(py) {
31 31 Ok(midx) => Ok(midx.clone_cindex(py)),
32 32 Err(_) => cindex::Index::new(py, index),
33 33 }
34 34 }
35 35
36 36 py_class!(pub class MixedIndex |py| {
37 37 data cindex: RefCell<cindex::Index>;
38 38 data nt: RefCell<Option<NodeTree>>;
39 39 data docket: RefCell<Option<PyObject>>;
40 40 // Holds a reference to the mmap'ed persistent nodemap data
41 41 data mmap: RefCell<Option<PyBuffer>>;
42 42
43 43 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
44 44 Self::new(py, cindex)
45 45 }
46 46
47 47 /// Compatibility layer used for Python consumers needing access to the C index
48 48 ///
49 49 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
50 50 /// that may need to build a custom `nodetree`, based on a specified revset.
51 51 /// With a Rust implementation of the nodemap, we will be able to get rid of
52 52 /// this, by exposing our own standalone nodemap class,
53 53 /// ready to accept `MixedIndex`.
54 54 def get_cindex(&self) -> PyResult<PyObject> {
55 55 Ok(self.cindex(py).borrow().inner().clone_ref(py))
56 56 }
57 57
58 58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
59 59
60 60 /// Return Revision if found, raises a bare `error.RevlogError`
61 61 /// in case of ambiguity, same as C version does
62 62 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
63 63 let opt = self.get_nodetree(py)?.borrow();
64 64 let nt = opt.as_ref().unwrap();
65 65 let idx = &*self.cindex(py).borrow();
66 66 let node = node_from_py_bytes(py, &node)?;
67 67 nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))
68 68 }
69 69
70 70 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
71 71 /// is not found.
72 72 ///
73 73 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
74 74 /// will catch and rewrap with it
75 75 def rev(&self, node: PyBytes) -> PyResult<Revision> {
76 76 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
77 77 }
78 78
79 79 /// return True if the node exist in the index
80 80 def has_node(&self, node: PyBytes) -> PyResult<bool> {
81 81 self.get_rev(py, node).map(|opt| opt.is_some())
82 82 }
83 83
84 84 /// find length of shortest hex nodeid of a binary ID
85 85 def shortest(&self, node: PyBytes) -> PyResult<usize> {
86 86 let opt = self.get_nodetree(py)?.borrow();
87 87 let nt = opt.as_ref().unwrap();
88 88 let idx = &*self.cindex(py).borrow();
89 89 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
90 90 {
91 91 Ok(Some(l)) => Ok(l),
92 92 Ok(None) => Err(revlog_error(py)),
93 93 Err(e) => Err(nodemap_error(py, e)),
94 94 }
95 95 }
96 96
97 97 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
98 98 let opt = self.get_nodetree(py)?.borrow();
99 99 let nt = opt.as_ref().unwrap();
100 100 let idx = &*self.cindex(py).borrow();
101 101
102 102 let node_as_string = if cfg!(feature = "python3-sys") {
103 103 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
104 104 }
105 105 else {
106 106 let node = node.extract::<PyBytes>(py)?;
107 107 String::from_utf8_lossy(node.data(py)).to_string()
108 108 };
109 109
110 110 let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?;
111 111
112 112 nt.find_bin(idx, prefix)
113 113 // TODO make an inner API returning the node directly
114 114 .map(|opt| opt.map(
115 115 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
116 116 .map_err(|e| nodemap_error(py, e))
117 117
118 118 }
119 119
120 120 /// append an index entry
121 121 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
122 122 if tup.len(py) < 8 {
123 123 // this is better than the panic promised by tup.get_item()
124 124 return Err(
125 125 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
126 126 }
127 127 let node_bytes = tup.get_item(py, 7).extract(py)?;
128 128 let node = node_from_py_object(py, &node_bytes)?;
129 129
130 130 let mut idx = self.cindex(py).borrow_mut();
131 131 let rev = idx.len() as Revision;
132 132
133 133 idx.append(py, tup)?;
134 134 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
135 135 .insert(&*idx, &node, rev)
136 136 .map_err(|e| nodemap_error(py, e))?;
137 137 Ok(py.None())
138 138 }
139 139
140 140 def __delitem__(&self, key: PyObject) -> PyResult<()> {
141 141 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
142 142 self.cindex(py).borrow().inner().del_item(py, key)?;
143 143 let mut opt = self.get_nodetree(py)?.borrow_mut();
144 144 let mut nt = opt.as_mut().unwrap();
145 145 nt.invalidate_all();
146 146 self.fill_nodemap(py, &mut nt)?;
147 147 Ok(())
148 148 }
149 149
150 150 //
151 151 // Reforwarded C index API
152 152 //
153 153
154 154 // index_methods (tp_methods). Same ordering as in revlog.c
155 155
156 156 /// return the gca set of the given revs
157 157 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
158 158 self.call_cindex(py, "ancestors", args, kw)
159 159 }
160 160
161 161 /// return the heads of the common ancestors of the given revs
162 162 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
163 163 self.call_cindex(py, "commonancestorsheads", args, kw)
164 164 }
165 165
166 166 /// Clear the index caches and inner py_class data.
167 167 /// It is Python's responsibility to call `update_nodemap_data` again.
168 168 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
169 169 self.nt(py).borrow_mut().take();
170 170 self.docket(py).borrow_mut().take();
171 171 self.mmap(py).borrow_mut().take();
172 172 self.call_cindex(py, "clearcaches", args, kw)
173 173 }
174 174
175 175 /// return the raw binary string representing a revision
176 176 def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
177 177 self.call_cindex(py, "entry_binary", args, kw)
178 178 }
179 179
180 180 /// return a binary packed version of the header
181 181 def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
182 182 self.call_cindex(py, "pack_header", args, kw)
183 183 }
184 184
185 185 /// get an index entry
186 186 def get(&self, *args, **kw) -> PyResult<PyObject> {
187 187 self.call_cindex(py, "get", args, kw)
188 188 }
189 189
190 190 /// compute phases
191 191 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
192 192 self.call_cindex(py, "computephasesmapsets", args, kw)
193 193 }
194 194
195 195 /// reachableroots
196 196 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
197 197 self.call_cindex(py, "reachableroots2", args, kw)
198 198 }
199 199
200 200 /// get head revisions
201 201 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
202 202 self.call_cindex(py, "headrevs", args, kw)
203 203 }
204 204
205 205 /// get filtered head revisions
206 206 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
207 207 self.call_cindex(py, "headrevsfiltered", args, kw)
208 208 }
209 209
210 210 /// True if the object is a snapshot
211 211 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
212 212 self.call_cindex(py, "issnapshot", args, kw)
213 213 }
214 214
215 215 /// Gather snapshot data in a cache dict
216 216 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
217 217 self.call_cindex(py, "findsnapshots", args, kw)
218 218 }
219 219
220 220 /// determine revisions with deltas to reconstruct fulltext
221 221 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
222 222 self.call_cindex(py, "deltachain", args, kw)
223 223 }
224 224
225 225 /// slice planned chunk read to reach a density threshold
226 226 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
227 227 self.call_cindex(py, "slicechunktodensity", args, kw)
228 228 }
229 229
230 230 /// stats for the index
231 231 def stats(&self, *args, **kw) -> PyResult<PyObject> {
232 232 self.call_cindex(py, "stats", args, kw)
233 233 }
234 234
235 235 // index_sequence_methods and index_mapping_methods.
236 236 //
237 237 // Since we call back through the high level Python API,
238 238 // there's no point making a distinction between index_get
239 239 // and index_getitem.
240 240
241 241 def __len__(&self) -> PyResult<usize> {
242 242 self.cindex(py).borrow().inner().len(py)
243 243 }
244 244
245 245 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
246 246 // this conversion seems needless, but that's actually because
247 247 // `index_getitem` does not handle conversion from PyLong,
248 248 // which expressions such as [e for e in index] internally use.
249 249 // Note that we don't seem to have a direct way to call
250 250 // PySequence_GetItem (does the job), which would possibly be better
251 251 // for performance
252 252 let key = match key.extract::<Revision>(py) {
253 253 Ok(rev) => rev.to_py_object(py).into_object(),
254 254 Err(_) => key,
255 255 };
256 256 self.cindex(py).borrow().inner().get_item(py, key)
257 257 }
258 258
259 259 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
260 260 self.cindex(py).borrow().inner().set_item(py, key, value)
261 261 }
262 262
263 263 def __contains__(&self, item: PyObject) -> PyResult<bool> {
264 264 // ObjectProtocol does not seem to provide contains(), so
265 265 // this is an equivalent implementation of the index_contains()
266 266 // defined in revlog.c
267 267 let cindex = self.cindex(py).borrow();
268 268 match item.extract::<Revision>(py) {
269 269 Ok(rev) => {
270 270 Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
271 271 }
272 272 Err(_) => {
273 273 cindex.inner().call_method(
274 274 py,
275 275 "has_node",
276 276 PyTuple::new(py, &[item]),
277 277 None)?
278 278 .extract(py)
279 279 }
280 280 }
281 281 }
282 282
283 283 def nodemap_data_all(&self) -> PyResult<PyBytes> {
284 284 self.inner_nodemap_data_all(py)
285 285 }
286 286
287 287 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
288 288 self.inner_nodemap_data_incremental(py)
289 289 }
290 290 def update_nodemap_data(
291 291 &self,
292 292 docket: PyObject,
293 293 nm_data: PyObject
294 294 ) -> PyResult<PyObject> {
295 295 self.inner_update_nodemap_data(py, docket, nm_data)
296 296 }
297 297
298 298 @property
299 299 def entry_size(&self) -> PyResult<PyInt> {
300 300 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
301 301 }
302 302
303 303 @property
304 304 def rust_ext_compat(&self) -> PyResult<PyInt> {
305 305 self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py)
306 306 }
307 307
308 308 });
309 309
310 310 impl MixedIndex {
311 311 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
312 312 Self::create_instance(
313 313 py,
314 314 RefCell::new(cindex::Index::new(py, cindex)?),
315 315 RefCell::new(None),
316 316 RefCell::new(None),
317 317 RefCell::new(None),
318 318 )
319 319 }
320 320
321 321 /// This is scaffolding at this point, but it could also become
322 322 /// a way to start a persistent nodemap or perform a
323 323 /// vacuum / repack operation
324 324 fn fill_nodemap(
325 325 &self,
326 326 py: Python,
327 327 nt: &mut NodeTree,
328 328 ) -> PyResult<PyObject> {
329 329 let index = self.cindex(py).borrow();
330 330 for r in 0..index.len() {
331 331 let rev = r as Revision;
332 332 // in this case node() won't ever return None
333 333 nt.insert(&*index, index.node(rev).unwrap(), rev)
334 334 .map_err(|e| nodemap_error(py, e))?
335 335 }
336 336 Ok(py.None())
337 337 }
338 338
339 339 fn get_nodetree<'a>(
340 340 &'a self,
341 341 py: Python<'a>,
342 342 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
343 343 if self.nt(py).borrow().is_none() {
344 344 let readonly = Box::new(Vec::new());
345 345 let mut nt = NodeTree::load_bytes(readonly, 0);
346 346 self.fill_nodemap(py, &mut nt)?;
347 347 self.nt(py).borrow_mut().replace(nt);
348 348 }
349 349 Ok(self.nt(py))
350 350 }
351 351
352 352 /// forward a method call to the underlying C index
353 353 fn call_cindex(
354 354 &self,
355 355 py: Python,
356 356 name: &str,
357 357 args: &PyTuple,
358 358 kwargs: Option<&PyDict>,
359 359 ) -> PyResult<PyObject> {
360 360 self.cindex(py)
361 361 .borrow()
362 362 .inner()
363 363 .call_method(py, name, args, kwargs)
364 364 }
365 365
366 366 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
367 367 self.cindex(py).borrow().clone_ref(py)
368 368 }
369 369
370 370 /// Returns the full nodemap bytes to be written as-is to disk
371 371 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
372 372 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
373 373 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
374 374
375 375 // If there's anything readonly, we need to build the data again from
376 376 // scratch
377 377 let bytes = if readonly.len() > 0 {
378 378 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
379 379 self.fill_nodemap(py, &mut nt)?;
380 380
381 381 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
382 382 assert_eq!(readonly.len(), 0);
383 383
384 384 bytes
385 385 } else {
386 386 bytes
387 387 };
388 388
389 389 let bytes = PyBytes::new(py, &bytes);
390 390 Ok(bytes)
391 391 }
392 392
393 393 /// Returns the last saved docket along with the size of any changed data
394 394 /// (in number of blocks), and said data as bytes.
395 395 fn inner_nodemap_data_incremental(
396 396 &self,
397 397 py: Python,
398 398 ) -> PyResult<PyObject> {
399 399 let docket = self.docket(py).borrow();
400 400 let docket = match docket.as_ref() {
401 401 Some(d) => d,
402 402 None => return Ok(py.None()),
403 403 };
404 404
405 405 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
406 406 let masked_blocks = node_tree.masked_readonly_blocks();
407 407 let (_, data) = node_tree.into_readonly_and_added_bytes();
408 408 let changed = masked_blocks * std::mem::size_of::<Block>();
409 409
410 410 Ok((docket, changed, PyBytes::new(py, &data))
411 411 .to_py_object(py)
412 412 .into_object())
413 413 }
414 414
415 415 /// Update the nodemap from the new (mmaped) data.
416 416 /// The docket is kept as a reference for later incremental calls.
417 417 fn inner_update_nodemap_data(
418 418 &self,
419 419 py: Python,
420 420 docket: PyObject,
421 421 nm_data: PyObject,
422 422 ) -> PyResult<PyObject> {
423 423 let buf = PyBuffer::get(py, &nm_data)?;
424 424 let len = buf.item_count();
425 425
426 426 // Build a slice from the mmap'ed buffer data
427 427 let cbuf = buf.buf_ptr();
428 428 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
429 429 && buf.is_c_contiguous()
430 430 && u8::is_compatible_format(buf.format())
431 431 {
432 432 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
433 433 } else {
434 434 return Err(PyErr::new::<ValueError, _>(
435 435 py,
436 436 "Nodemap data buffer has an invalid memory representation"
437 437 .to_string(),
438 438 ));
439 439 };
440 440
441 441 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
442 442 // pointer.
443 443 self.mmap(py).borrow_mut().replace(buf);
444 444
445 445 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
446 446
447 447 let data_tip =
448 448 docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
449 449 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
450 450 let idx = self.cindex(py).borrow();
451 451 let current_tip = idx.len();
452 452
453 453 for r in (data_tip + 1)..current_tip as Revision {
454 454 let rev = r as Revision;
455 455 // in this case node() won't ever return None
456 456 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
457 457 .map_err(|e| nodemap_error(py, e))?
458 458 }
459 459
460 460 *self.nt(py).borrow_mut() = Some(nt);
461 461
462 462 Ok(py.None())
463 463 }
464 464 }
465 465
466 466 fn revlog_error(py: Python) -> PyErr {
467 467 match py
468 468 .import("mercurial.error")
469 469 .and_then(|m| m.get(py, "RevlogError"))
470 470 {
471 471 Err(e) => e,
472 Ok(cls) => PyErr::from_instance(py, cls),
472 Ok(cls) => PyErr::from_instance(
473 py,
474 cls.call(py, (py.None(),), None).ok().into_py_object(py),
475 ),
473 476 }
474 477 }
475 478
476 479 fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
477 480 PyErr::new::<ValueError, _>(
478 481 py,
479 482 format!(
480 483 "Inconsistency: Revision {} found in nodemap \
481 484 is not in revlog index",
482 485 rev
483 486 ),
484 487 )
485 488 }
486 489
487 490 /// Standard treatment of NodeMapError
488 491 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
489 492 match err {
490 493 NodeMapError::MultipleResults => revlog_error(py),
491 494 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
492 495 }
493 496 }
494 497
495 498 /// Create the module, with __package__ given from parent
496 499 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
497 500 let dotted_name = &format!("{}.revlog", package);
498 501 let m = PyModule::new(py, dotted_name)?;
499 502 m.add(py, "__package__", package)?;
500 503 m.add(py, "__doc__", "RevLog - Rust implementations")?;
501 504
502 505 m.add_class::<MixedIndex>(py)?;
503 506
504 507 let sys = PyModule::import(py, "sys")?;
505 508 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
506 509 sys_modules.set_item(py, dotted_name, &m)?;
507 510
508 511 Ok(m)
509 512 }
General Comments 0
You need to be logged in to leave comments. Login now