##// END OF EJS Templates
hgweb: encode WSGI environment using the ISO-8859-1 codec...
hgweb: encode WSGI environment using the ISO-8859-1 codec The WSGI specification (PEP 3333) specifies that on Python 3 all strings passed by the server must be of type str with code points encodable using the ISO 8859-1 codec. For some reason, I introduced a bug in 2632c1ed8f34 by applying the reverse change. Maybe I got confused because PEP 3333 says that arbitrary operating system environment variables may be contained in the WSGI environment and therefore we need to handle the WSGI environment variables like we would handle operating system environment variables. The bug mentioned in the previous paragraph and fixed by this changeset manifested e.g. in the path of the URL being encoded in the wrong way. Browsers encode non-ASCII bytes with the percent-encoding. WSGI servers will decode the percent-encoded bytes and pass them to the application as strings where each byte is mapped to the corresponding code point with the same ordinal (i.e. it is decoded using the ISO-8859-1 codec). Mercurial uses the bytes type for these strings (which makes much more sense), so we need to encode it again using the ISO-8859-1 codec. If we use another codec, it can result in nonsense.

File last commit:

r50809:58074252 default
r51713:9ed281bb stable
Show More
revlog.rs
515 lines | 17.0 KiB | application/rls-services+xml | RustLexer
rust-index: add a function to convert PyObject index for hg-core...
r44398 // revlog.rs
//
Georges Racinet
rust-nodemap: add utils for propagating errors...
r44993 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
rust-index: add a function to convert PyObject index for hg-core...
r44398 //
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 use crate::{
cindex,
utils::{node_from_py_bytes, node_from_py_object},
};
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 use cpython::{
Georges Racinet
rust-nodemap: add binding to `nodemap_update_data`...
r44997 buffer::{Element, PyBuffer},
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 exc::{IndexError, ValueError},
revlog: replace revlog._io.size with a new revlog.index.entry_size...
r47736 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 };
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 use hg::{
Georges Racinet
rust-nodemap: add binding for `nodemap_data_incremental`...
r44996 nodemap::{Block, NodeMapError, NodeTree},
Simon Sapin
rust: Remove hex parsing from the nodemap...
r47161 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
Simon Sapin
rust: Simplify error type for reading hex node IDs...
r47156 Revision,
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 };
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 use std::cell::RefCell;
rust-index: add a function to convert PyObject index for hg-core...
r44398
/// Return a Struct implementing the Graph trait
Augie Fackler
revlog: run rustfmt nightly...
r44527 pub(crate) fn pyindex_to_graph(
py: Python,
index: PyObject,
) -> PyResult<cindex::Index> {
rust-index: handle `MixedIndex` in `pyindex_to_graph`...
r44463 match index.extract::<MixedIndex>(py) {
Ok(midx) => Ok(midx.clone_cindex(py)),
Err(_) => cindex::Index::new(py, index),
}
rust-index: add a function to convert PyObject index for hg-core...
r44398 }
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413
py_class!(pub class MixedIndex |py| {
data cindex: RefCell<cindex::Index>;
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 data nt: RefCell<Option<NodeTree>>;
Georges Racinet
rust-nodemap: add binding for `nodemap_data_incremental`...
r44996 data docket: RefCell<Option<PyObject>>;
Georges Racinet
rust-nodemap: add binding to `nodemap_update_data`...
r44997 // Holds a reference to the mmap'ed persistent nodemap data
data mmap: RefCell<Option<PyBuffer>>;
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413
def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
Georges Racinet
rust-index: moved constructor in separate impl block...
r44990 Self::new(py, cindex)
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 }
Georges Racinet
rust-index: expose a method to retrieve the C index...
r44464 /// Compatibility layer used for Python consumers needing access to the C index
///
/// Only use case so far is `scmutil.shortesthexnodeidprefix`,
/// that may need to build a custom `nodetree`, based on a specified revset.
/// With a Rust implementation of the nodemap, we will be able to get rid of
/// this, by exposing our own standalone nodemap class,
/// ready to accept `MixedIndex`.
def get_cindex(&self) -> PyResult<PyObject> {
Ok(self.cindex(py).borrow().inner().clone_ref(py))
}
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 /// Return Revision if found, raises a bare `error.RevlogError`
/// in case of ambiguity, same as C version does
Georges Racinet
rust-nodemap: backed out mitigation for issue 6554...
r49090 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 let opt = self.get_nodetree(py)?.borrow();
let nt = opt.as_ref().unwrap();
let idx = &*self.cindex(py).borrow();
Georges Racinet
rust-nodemap: backed out mitigation for issue 6554...
r49090 let node = node_from_py_bytes(py, &node)?;
nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 }
/// same as `get_rev()` but raises a bare `error.RevlogError` if node
/// is not found.
///
/// No need to repeat `node` in the exception, `mercurial/revlog.py`
/// will catch and rewrap with it
def rev(&self, node: PyBytes) -> PyResult<Revision> {
self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
}
/// return True if the node exist in the index
def has_node(&self, node: PyBytes) -> PyResult<bool> {
self.get_rev(py, node).map(|opt| opt.is_some())
}
/// find length of shortest hex nodeid of a binary ID
def shortest(&self, node: PyBytes) -> PyResult<usize> {
let opt = self.get_nodetree(py)?.borrow();
let nt = opt.as_ref().unwrap();
let idx = &*self.cindex(py).borrow();
match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
{
Ok(Some(l)) => Ok(l),
Ok(None) => Err(revlog_error(py)),
Err(e) => Err(nodemap_error(py, e)),
}
}
Georges Racinet
rust-nodemap: backed out mitigation for issue 6554...
r49090 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 let opt = self.get_nodetree(py)?.borrow();
let nt = opt.as_ref().unwrap();
let idx = &*self.cindex(py).borrow();
let node_as_string = if cfg!(feature = "python3-sys") {
Georges Racinet
rust-nodemap: backed out mitigation for issue 6554...
r49090 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 }
else {
Georges Racinet
rust-nodemap: backed out mitigation for issue 6554...
r49090 let node = node.extract::<PyBytes>(py)?;
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 String::from_utf8_lossy(node.data(py)).to_string()
};
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 let prefix = NodePrefix::from_hex(&node_as_string)
.map_err(|_| PyErr::new::<ValueError, _>(
py, format!("Invalid node or prefix '{}'", node_as_string))
)?;
Simon Sapin
rust: Remove hex parsing from the nodemap...
r47161
Georges Racinet
rust-nodemap: backed out mitigation for issue 6554...
r49090 nt.find_bin(idx, prefix)
// TODO make an inner API returning the node directly
.map(|opt| opt.map(
|rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
.map_err(|e| nodemap_error(py, e))
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 }
/// append an index entry
def append(&self, tup: PyTuple) -> PyResult<PyObject> {
if tup.len(py) < 8 {
// this is better than the panic promised by tup.get_item()
return Err(
PyErr::new::<IndexError, _>(py, "tuple index out of range"))
}
let node_bytes = tup.get_item(py, 7).extract(py)?;
let node = node_from_py_object(py, &node_bytes)?;
let mut idx = self.cindex(py).borrow_mut();
let rev = idx.len() as Revision;
idx.append(py, tup)?;
self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
.insert(&*idx, &node, rev)
.map_err(|e| nodemap_error(py, e))?;
Ok(py.None())
}
def __delitem__(&self, key: PyObject) -> PyResult<()> {
// __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
self.cindex(py).borrow().inner().del_item(py, key)?;
let mut opt = self.get_nodetree(py)?.borrow_mut();
Raphaël Gomès
rust: run `cargo clippy`...
r50809 let nt = opt.as_mut().unwrap();
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 nt.invalidate_all();
Raphaël Gomès
rust: run `cargo clippy`...
r50809 self.fill_nodemap(py, nt)?;
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 Ok(())
}
//
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 // Reforwarded C index API
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 //
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413
// index_methods (tp_methods). Same ordering as in revlog.c
/// return the gca set of the given revs
def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "ancestors", args, kw)
}
/// return the heads of the common ancestors of the given revs
def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "commonancestorsheads", args, kw)
}
Georges Racinet
rust-nodemap: also clear Rust data in `clearcaches`...
r44998 /// Clear the index caches and inner py_class data.
/// It is Python's responsibility to call `update_nodemap_data` again.
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
Georges Racinet
rust-nodemap: also clear Rust data in `clearcaches`...
r44998 self.nt(py).borrow_mut().take();
self.docket(py).borrow_mut().take();
self.mmap(py).borrow_mut().take();
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 self.call_cindex(py, "clearcaches", args, kw)
}
revlog: add a `entry_binary` method on index...
r47808 /// return the raw binary string representing a revision
def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "entry_binary", args, kw)
}
revlog: have an explicit "pack_header" method...
r47811 /// return a binary packed version of the header
def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "pack_header", args, kw)
}
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 /// get an index entry
def get(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "get", args, kw)
}
/// compute phases
def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "computephasesmapsets", args, kw)
}
/// reachableroots
def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "reachableroots2", args, kw)
}
/// get head revisions
def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "headrevs", args, kw)
}
/// get filtered head revisions
def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "headrevsfiltered", args, kw)
}
/// True if the object is a snapshot
def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "issnapshot", args, kw)
}
/// Gather snapshot data in a cache dict
def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "findsnapshots", args, kw)
}
/// determine revisions with deltas to reconstruct fulltext
def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "deltachain", args, kw)
}
/// slice planned chunk read to reach a density threshold
def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "slicechunktodensity", args, kw)
}
/// stats for the index
def stats(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "stats", args, kw)
}
// index_sequence_methods and index_mapping_methods.
//
// Since we call back through the high level Python API,
// there's no point making a distinction between index_get
// and index_getitem.
def __len__(&self) -> PyResult<usize> {
self.cindex(py).borrow().inner().len(py)
}
def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
// this conversion seems needless, but that's actually because
// `index_getitem` does not handle conversion from PyLong,
// which expressions such as [e for e in index] internally use.
// Note that we don't seem to have a direct way to call
Georges Racinet
rust-nodemap: also clear Rust data in `clearcaches`...
r44998 // PySequence_GetItem (does the job), which would possibly be better
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 // for performance
let key = match key.extract::<Revision>(py) {
Ok(rev) => rev.to_py_object(py).into_object(),
Err(_) => key,
};
self.cindex(py).borrow().inner().get_item(py, key)
}
def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
self.cindex(py).borrow().inner().set_item(py, key, value)
}
def __contains__(&self, item: PyObject) -> PyResult<bool> {
// ObjectProtocol does not seem to provide contains(), so
// this is an equivalent implementation of the index_contains()
// defined in revlog.c
let cindex = self.cindex(py).borrow();
match item.extract::<Revision>(py) {
Ok(rev) => {
Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
}
Err(_) => {
cindex.inner().call_method(
py,
"has_node",
PyTuple::new(py, &[item]),
None)?
.extract(py)
}
}
}
Georges Racinet
rust-nodemap: add binding for `nodemap_data_all`...
r44995 def nodemap_data_all(&self) -> PyResult<PyBytes> {
self.inner_nodemap_data_all(py)
}
Georges Racinet
rust-nodemap: add binding for `nodemap_data_incremental`...
r44996 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
self.inner_nodemap_data_incremental(py)
}
Georges Racinet
rust-nodemap: add binding to `nodemap_update_data`...
r44997 def update_nodemap_data(
&self,
docket: PyObject,
nm_data: PyObject
) -> PyResult<PyObject> {
self.inner_update_nodemap_data(py, docket, nm_data)
}
revlog: replace revlog._io.size with a new revlog.index.entry_size...
r47736 @property
def entry_size(&self) -> PyResult<PyInt> {
self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
}
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413
revlog: signal which revlog index are compatible with Rust...
r48042 @property
def rust_ext_compat(&self) -> PyResult<PyInt> {
self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py)
}
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 });
impl MixedIndex {
Georges Racinet
rust-index: moved constructor in separate impl block...
r44990 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
Self::create_instance(
py,
RefCell::new(cindex::Index::new(py, cindex)?),
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 RefCell::new(None),
Georges Racinet
rust-nodemap: add binding for `nodemap_data_incremental`...
r44996 RefCell::new(None),
Georges Racinet
rust-nodemap: add binding to `nodemap_update_data`...
r44997 RefCell::new(None),
Georges Racinet
rust-index: moved constructor in separate impl block...
r44990 )
}
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 /// This is scaffolding at this point, but it could also become
/// a way to start a persistent nodemap or perform a
/// vacuum / repack operation
fn fill_nodemap(
&self,
py: Python,
nt: &mut NodeTree,
) -> PyResult<PyObject> {
let index = self.cindex(py).borrow();
for r in 0..index.len() {
let rev = r as Revision;
// in this case node() won't ever return None
nt.insert(&*index, index.node(rev).unwrap(), rev)
.map_err(|e| nodemap_error(py, e))?
}
Ok(py.None())
}
fn get_nodetree<'a>(
&'a self,
py: Python<'a>,
) -> PyResult<&'a RefCell<Option<NodeTree>>> {
if self.nt(py).borrow().is_none() {
let readonly = Box::new(Vec::new());
let mut nt = NodeTree::load_bytes(readonly, 0);
self.fill_nodemap(py, &mut nt)?;
self.nt(py).borrow_mut().replace(nt);
}
Ok(self.nt(py))
}
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 /// forward a method call to the underlying C index
fn call_cindex(
&self,
py: Python,
name: &str,
args: &PyTuple,
kwargs: Option<&PyDict>,
) -> PyResult<PyObject> {
self.cindex(py)
.borrow()
.inner()
.call_method(py, name, args, kwargs)
}
Georges Racinet
rust-index: make it possible to clone the struct referencing the C index...
r44462
pub fn clone_cindex(&self, py: Python) -> cindex::Index {
self.cindex(py).borrow().clone_ref(py)
}
Georges Racinet
rust-nodemap: add binding for `nodemap_data_all`...
r44995
/// Returns the full nodemap bytes to be written as-is to disk
fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
// If there's anything readonly, we need to build the data again from
// scratch
let bytes = if readonly.len() > 0 {
let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
self.fill_nodemap(py, &mut nt)?;
let (readonly, bytes) = nt.into_readonly_and_added_bytes();
assert_eq!(readonly.len(), 0);
bytes
} else {
bytes
};
let bytes = PyBytes::new(py, &bytes);
Ok(bytes)
}
Georges Racinet
rust-nodemap: add binding for `nodemap_data_incremental`...
r44996
/// Returns the last saved docket along with the size of any changed data
/// (in number of blocks), and said data as bytes.
fn inner_nodemap_data_incremental(
&self,
py: Python,
) -> PyResult<PyObject> {
let docket = self.docket(py).borrow();
let docket = match docket.as_ref() {
Some(d) => d,
None => return Ok(py.None()),
};
let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
let masked_blocks = node_tree.masked_readonly_blocks();
let (_, data) = node_tree.into_readonly_and_added_bytes();
let changed = masked_blocks * std::mem::size_of::<Block>();
Ok((docket, changed, PyBytes::new(py, &data))
.to_py_object(py)
.into_object())
}
Georges Racinet
rust-nodemap: add binding to `nodemap_update_data`...
r44997
/// Update the nodemap from the new (mmaped) data.
/// The docket is kept as a reference for later incremental calls.
fn inner_update_nodemap_data(
&self,
py: Python,
docket: PyObject,
nm_data: PyObject,
) -> PyResult<PyObject> {
let buf = PyBuffer::get(py, &nm_data)?;
let len = buf.item_count();
// Build a slice from the mmap'ed buffer data
let cbuf = buf.buf_ptr();
let bytes = if std::mem::size_of::<u8>() == buf.item_size()
&& buf.is_c_contiguous()
&& u8::is_compatible_format(buf.format())
{
unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
} else {
return Err(PyErr::new::<ValueError, _>(
py,
"Nodemap data buffer has an invalid memory representation"
.to_string(),
));
};
// Keep a reference to the mmap'ed buffer, otherwise we get a dangling
// pointer.
self.mmap(py).borrow_mut().replace(buf);
let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
let data_tip =
docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
self.docket(py).borrow_mut().replace(docket.clone_ref(py));
let idx = self.cindex(py).borrow();
let current_tip = idx.len();
for r in (data_tip + 1)..current_tip as Revision {
let rev = r as Revision;
// in this case node() won't ever return None
nt.insert(&*idx, idx.node(rev).unwrap(), rev)
.map_err(|e| nodemap_error(py, e))?
}
*self.nt(py).borrow_mut() = Some(nt);
Ok(py.None())
}
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 }
Georges Racinet
rust-nodemap: add utils for propagating errors...
r44993 fn revlog_error(py: Python) -> PyErr {
match py
.import("mercurial.error")
.and_then(|m| m.get(py, "RevlogError"))
{
Err(e) => e,
Raphaël Gomès
hg-cpython: fix new occuring TypeError...
r48086 Ok(cls) => PyErr::from_instance(
py,
cls.call(py, (py.None(),), None).ok().into_py_object(py),
),
Georges Racinet
rust-nodemap: add utils for propagating errors...
r44993 }
}
fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
PyErr::new::<ValueError, _>(
py,
format!(
"Inconsistency: Revision {} found in nodemap \
is not in revlog index",
rev
),
)
}
/// Standard treatment of NodeMapError
fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
match err {
NodeMapError::MultipleResults => revlog_error(py),
NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
}
}
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 /// Create the module, with __package__ given from parent
pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
let dotted_name = &format!("{}.revlog", package);
let m = PyModule::new(py, dotted_name)?;
m.add(py, "__package__", package)?;
m.add(py, "__doc__", "RevLog - Rust implementations")?;
m.add_class::<MixedIndex>(py)?;
let sys = PyModule::import(py, "sys")?;
let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
sys_modules.set_item(py, dotted_name, &m)?;
Ok(m)
}