##// END OF EJS Templates
copies-rust: parse the changed-file sidedata directly in rust...
marmoute -
r46674:e0313b0a default
parent child Browse files
Show More
@@ -25,7 +25,10 b' from . import ('
25 25
26 26 from .utils import stringutil
27 27
28 from .revlogutils import flagutil
28 from .revlogutils import (
29 flagutil,
30 sidedata as sidedatamod,
31 )
29 32
30 33 rustmod = policy.importrust("copy_tracing")
31 34
@@ -175,7 +178,7 b' def _committedforwardcopies(a, b, base, '
175 178 return cm
176 179
177 180
178 def _revinfo_getter(repo):
181 def _revinfo_getter(repo, match):
179 182 """returns a function that returns the following data given a <rev>"
180 183
181 184 * p1: revision number of first parent
@@ -215,6 +218,28 b' def _revinfo_getter(repo):'
215 218 # time to save memory.
216 219 merge_caches = {}
217 220
221 alwaysmatch = match.always()
222
223 if rustmod is not None and alwaysmatch:
224
225 def revinfo(rev):
226 p1, p2 = parents(rev)
227 value = None
228 e = merge_caches.pop(rev, None)
229 if e is not None:
230 return e
231 if flags(rev) & HASCOPIESINFO:
232 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
233 else:
234 raw = None
235 value = (p1, p2, raw)
236 if p1 != node.nullrev and p2 != node.nullrev:
237 # XXX some case we over cache, IGNORE
238 merge_caches[rev] = value
239 return value
240
241 else:
242
218 243 def revinfo(rev):
219 244 p1, p2 = parents(rev)
220 245 value = None
@@ -289,7 +314,7 b' def _changesetforwardcopies(a, b, match)'
289 314 revs = sorted(iterrevs)
290 315
291 316 if repo.filecopiesmode == b'changeset-sidedata':
292 revinfo = _revinfo_getter(repo)
317 revinfo = _revinfo_getter(repo, match)
293 318 return _combine_changeset_copies(
294 319 revs, children, b.rev(), revinfo, match, isancestor
295 320 )
@@ -5,8 +5,9 b' use crate::Revision;'
5 5 use im_rc::ordmap::DiffItem;
6 6 use im_rc::ordmap::OrdMap;
7 7
8 use std::cmp::Ordering;
8 9 use std::collections::HashMap;
9 use std::collections::HashSet;
10 use std::convert::TryInto;
10 11
11 12 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
12 13
@@ -23,18 +24,18 b' struct TimeStampedPathCopy {'
23 24 type TimeStampedPathCopies = OrdMap<HgPathBuf, TimeStampedPathCopy>;
24 25
25 26 /// hold parent 1, parent 2 and relevant files actions.
26 pub type RevInfo = (Revision, Revision, ChangedFiles);
27 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
27 28
28 29 /// represent the files affected by a changesets
29 30 ///
30 31 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
31 32 /// all the data categories tracked by it.
32 pub struct ChangedFiles {
33 removed: HashSet<HgPathBuf>,
34 merged: HashSet<HgPathBuf>,
35 salvaged: HashSet<HgPathBuf>,
36 copied_from_p1: PathCopies,
37 copied_from_p2: PathCopies,
33 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
34 /// all the data categories tracked by it.
35 pub struct ChangedFiles<'a> {
36 nb_items: u32,
37 index: &'a [u8],
38 data: &'a [u8],
38 39 }
39 40
40 41 /// Represent active changes that affect the copy tracing.
@@ -62,55 +63,161 b' enum MergeCase {'
62 63 Normal,
63 64 }
64 65
65 impl ChangedFiles {
66 pub fn new(
67 removed: HashSet<HgPathBuf>,
68 merged: HashSet<HgPathBuf>,
69 salvaged: HashSet<HgPathBuf>,
70 copied_from_p1: PathCopies,
71 copied_from_p2: PathCopies,
72 ) -> Self {
73 ChangedFiles {
74 removed,
75 merged,
76 salvaged,
77 copied_from_p1,
78 copied_from_p2,
79 }
66 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
67
68 const EMPTY: &[u8] = b"";
69 const COPY_MASK: u8 = 3;
70 const P1_COPY: u8 = 2;
71 const P2_COPY: u8 = 3;
72 const ACTION_MASK: u8 = 28;
73 const REMOVED: u8 = 12;
74 const MERGED: u8 = 8;
75 const SALVAGED: u8 = 16;
76
77 impl<'a> ChangedFiles<'a> {
78 const INDEX_START: usize = 4;
79 const ENTRY_SIZE: u32 = 9;
80 const FILENAME_START: u32 = 1;
81 const COPY_SOURCE_START: u32 = 5;
82
83 pub fn new(data: &'a [u8]) -> Self {
84 assert!(
85 data.len() >= 4,
86 "data size ({}) is too small to contain the header (4)",
87 data.len()
88 );
89 let nb_items_raw: [u8; 4] = (&data[0..=3])
90 .try_into()
91 .expect("failed to turn 4 bytes into 4 bytes");
92 let nb_items = u32::from_be_bytes(nb_items_raw);
93
94 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
95 let index_end = Self::INDEX_START + index_size;
96
97 assert!(
98 data.len() >= index_end,
99 "data size ({}) is too small to fit the index_data ({})",
100 data.len(),
101 index_end
102 );
103
104 let ret = ChangedFiles {
105 nb_items,
106 index: &data[Self::INDEX_START..index_end],
107 data: &data[index_end..],
108 };
109 let max_data = ret.filename_end(nb_items - 1) as usize;
110 assert!(
111 ret.data.len() >= max_data,
112 "data size ({}) is too small to fit all data ({})",
113 data.len(),
114 index_end + max_data
115 );
116 ret
80 117 }
81 118
82 119 pub fn new_empty() -> Self {
83 120 ChangedFiles {
84 removed: HashSet::new(),
85 merged: HashSet::new(),
86 salvaged: HashSet::new(),
87 copied_from_p1: PathCopies::new(),
88 copied_from_p2: PathCopies::new(),
121 nb_items: 0,
122 index: EMPTY,
123 data: EMPTY,
124 }
125 }
126
127 /// internal function to return an individual entry at a given index
128 fn entry(&'a self, idx: u32) -> FileChange<'a> {
129 if idx >= self.nb_items {
130 panic!(
131 "index for entry is higher that the number of file {} >= {}",
132 idx, self.nb_items
133 )
134 }
135 let flags = self.flags(idx);
136 let filename = self.filename(idx);
137 let copy_idx = self.copy_idx(idx);
138 let copy_source = self.filename(copy_idx);
139 (flags, filename, copy_source)
140 }
141
142 /// internal function to return the filename of the entry at a given index
143 fn filename(&self, idx: u32) -> &HgPath {
144 let filename_start;
145 if idx == 0 {
146 filename_start = 0;
147 } else {
148 filename_start = self.filename_end(idx - 1)
89 149 }
150 let filename_end = self.filename_end(idx);
151 let filename_start = filename_start as usize;
152 let filename_end = filename_end as usize;
153 HgPath::new(&self.data[filename_start..filename_end])
154 }
155
156 /// internal function to return the flag field of the entry at a given
157 /// index
158 fn flags(&self, idx: u32) -> u8 {
159 let idx = idx as usize;
160 self.index[idx * (Self::ENTRY_SIZE as usize)]
161 }
162
163 /// internal function to return the end of a filename part at a given index
164 fn filename_end(&self, idx: u32) -> u32 {
165 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
166 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
167 let start = start as usize;
168 let end = end as usize;
169 let raw = (&self.index[start..end])
170 .try_into()
171 .expect("failed to turn 4 bytes into 4 bytes");
172 u32::from_be_bytes(raw)
173 }
174
175 /// internal function to return index of the copy source of the entry at a
176 /// given index
177 fn copy_idx(&self, idx: u32) -> u32 {
178 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
179 let end = (idx + 1) * Self::ENTRY_SIZE;
180 let start = start as usize;
181 let end = end as usize;
182 let raw = (&self.index[start..end])
183 .try_into()
184 .expect("failed to turn 4 bytes into 4 bytes");
185 u32::from_be_bytes(raw)
90 186 }
91 187
92 188 /// Return an iterator over all the `Action` in this instance.
93 fn iter_actions(&self, parent: usize) -> impl Iterator<Item = Action> {
94 let copies_iter = match parent {
95 1 => self.copied_from_p1.iter(),
96 2 => self.copied_from_p2.iter(),
97 _ => unreachable!(),
98 };
99 let remove_iter = self.removed.iter();
100 let copies_iter = copies_iter.map(|(x, y)| Action::Copied(x, y));
101 let remove_iter = remove_iter.map(|x| Action::Removed(x));
102 copies_iter.chain(remove_iter)
189 fn iter_actions(&self, parent: usize) -> ActionsIterator {
190 ActionsIterator {
191 changes: &self,
192 parent: parent,
193 current: 0,
194 }
103 195 }
104 196
105 197 /// return the MergeCase value associated with a filename
106 198 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
107 if self.salvaged.contains(path) {
108 return MergeCase::Salvaged;
109 } else if self.merged.contains(path) {
110 return MergeCase::Merged;
111 } else {
199 if self.nb_items == 0 {
112 200 return MergeCase::Normal;
113 201 }
202 let mut low_part = 0;
203 let mut high_part = self.nb_items;
204
205 while low_part < high_part {
206 let cursor = (low_part + high_part - 1) / 2;
207 let (flags, filename, _source) = self.entry(cursor);
208 match path.cmp(filename) {
209 Ordering::Less => low_part = cursor + 1,
210 Ordering::Greater => high_part = cursor,
211 Ordering::Equal => {
212 return match flags & ACTION_MASK {
213 MERGED => MergeCase::Merged,
214 SALVAGED => MergeCase::Salvaged,
215 _ => MergeCase::Normal,
216 };
217 }
218 }
219 }
220 MergeCase::Normal
114 221 }
115 222 }
116 223
@@ -150,6 +257,50 b" impl<'a, A: Fn(Revision, Revision) -> bo"
150 257 }
151 258 }
152 259
260 struct ActionsIterator<'a> {
261 changes: &'a ChangedFiles<'a>,
262 parent: usize,
263 current: u32,
264 }
265
266 impl<'a> Iterator for ActionsIterator<'a> {
267 type Item = Action<'a>;
268
269 fn next(&mut self) -> Option<Action<'a>> {
270 while self.current < self.changes.nb_items {
271 let (flags, file, source) = self.changes.entry(self.current);
272 self.current += 1;
273 if (flags & ACTION_MASK) == REMOVED {
274 return Some(Action::Removed(file));
275 }
276 let copy = flags & COPY_MASK;
277 if self.parent == 1 && copy == P1_COPY {
278 return Some(Action::Copied(file, source));
279 }
280 if self.parent == 2 && copy == P2_COPY {
281 return Some(Action::Copied(file, source));
282 }
283 }
284 return None;
285 }
286 }
287
288 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
289 /// ChangedFiles
290 ///
291 /// It is passed to the RevInfoMaker callback who can assign any necessary
292 /// content to the `data` attribute. The copy tracing code is responsible for
293 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
294 pub struct DataHolder<D> {
295 /// RevInfoMaker callback should assign data referenced by the
296 /// ChangedFiles struct it return to this attribute. The DataHolder
297 /// lifetime will be at least as long as the ChangedFiles one.
298 pub data: Option<D>,
299 }
300
301 pub type RevInfoMaker<'a, D> =
302 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
303
153 304 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
154 305 ///
155 306 /// Arguments are:
@@ -163,11 +314,11 b" impl<'a, A: Fn(Revision, Revision) -> bo"
163 314 /// * ChangedFiles
164 315 /// isancestors(low_rev, high_rev): callback to check if a revision is an
165 316 /// ancestor of another
166 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool>(
317 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool, D>(
167 318 revs: Vec<Revision>,
168 319 children: HashMap<Revision, Vec<Revision>>,
169 320 target_rev: Revision,
170 rev_info: &impl Fn(Revision) -> RevInfo,
321 rev_info: RevInfoMaker<D>,
171 322 is_ancestor: &A,
172 323 ) -> PathCopies {
173 324 let mut all_copies = HashMap::new();
@@ -189,8 +340,9 b' pub fn combine_changeset_copies<A: Fn(Re'
189 340 for child in current_children {
190 341 // We will chain the copies information accumulated for `rev` with
191 342 // the individual copies information for each of its children.
192 // Creating a new PathCopies for each `rev` ? `children` vertex.
193 let (p1, p2, changes) = rev_info(*child);
343 // Creating a new PathCopies for each `rev` `children` vertex.
344 let mut d: DataHolder<D> = DataHolder { data: None };
345 let (p1, p2, changes) = rev_info(*child, &mut d);
194 346
195 347 let parent = if rev == p1 {
196 348 1
@@ -11,8 +11,9 b' use cpython::Python;'
11 11
12 12 use hg::copy_tracing::combine_changeset_copies;
13 13 use hg::copy_tracing::ChangedFiles;
14 use hg::copy_tracing::DataHolder;
14 15 use hg::copy_tracing::RevInfo;
15 use hg::utils::hg_path::HgPathBuf;
16 use hg::copy_tracing::RevInfoMaker;
16 17 use hg::Revision;
17 18
18 19 /// Combines copies information contained into revision `revs` to build a copy
@@ -57,7 +58,8 b' pub fn combine_changeset_copies_wrapper('
57 58 // happens in case of programing error or severe data corruption. Such
58 59 // errors will raise panic and the rust-cpython harness will turn them into
59 60 // Python exception.
60 let rev_info_maker = |rev: Revision| -> RevInfo {
61 let rev_info_maker: RevInfoMaker<PyBytes> =
62 Box::new(|rev: Revision, d: &mut DataHolder<PyBytes>| -> RevInfo {
61 63 let res: PyTuple = rev_info
62 64 .call(py, (rev,), None)
63 65 .expect("rust-copy-tracing: python call to `rev_info` failed")
@@ -67,174 +69,30 b' pub fn combine_changeset_copies_wrapper('
67 69 unexpected non-Tuple value",
68 70 );
69 71 let p1 = res.get_item(py, 0).extract(py).expect(
70 "rust-copy-tracing: \
71 rev_info return is invalid, first item is a not a revision",
72 "rust-copy-tracing: rev_info return is invalid, first item \
73 is a not a revision",
72 74 );
73 75 let p2 = res.get_item(py, 1).extract(py).expect(
74 "rust-copy-tracing: \
75 rev_info return is invalid, second item is a not a revision",
76 "rust-copy-tracing: rev_info return is invalid, first item \
77 is a not a revision",
76 78 );
77 79
78 let changes = res.get_item(py, 2);
79
80 let files;
81 if !changes
82 .hasattr(py, "copied_from_p1")
83 .expect("rust-copy-tracing: python call to `hasattr` failed")
84 {
85 files = ChangedFiles::new_empty();
86 } else {
87 let p1_copies: PyDict = changes
88 .getattr(py, "copied_from_p1")
89 .expect(
90 "rust-copy-tracing: retrieval of python attribute \
91 `copied_from_p1` failed",
92 )
93 .cast_into(py)
94 .expect(
95 "rust-copy-tracing: failed to convert `copied_from_p1` \
96 to PyDict",
97 );
98 let p1_copies: PyResult<_> = p1_copies
99 .items(py)
100 .iter()
101 .map(|(key, value)| {
102 let key = key.extract::<PyBytes>(py).expect(
103 "rust-copy-tracing: conversion of copy destination to\
104 PyBytes failed",
105 );
106 let key = key.data(py);
107 let value = value.extract::<PyBytes>(py).expect(
108 "rust-copy-tracing: conversion of copy source to \
109 PyBytes failed",
110 );
111 let value = value.data(py);
112 Ok((
113 HgPathBuf::from_bytes(key),
114 HgPathBuf::from_bytes(value),
115 ))
116 })
117 .collect();
118
119 let p2_copies: PyDict = changes
120 .getattr(py, "copied_from_p2")
121 .expect(
122 "rust-copy-tracing: retrieval of python attribute \
123 `copied_from_p2` failed",
124 )
125 .cast_into(py)
126 .expect(
127 "rust-copy-tracing: failed to convert `copied_from_p2` \
128 to PyDict",
129 );
130 let p2_copies: PyResult<_> = p2_copies
131 .items(py)
132 .iter()
133 .map(|(key, value)| {
134 let key = key.extract::<PyBytes>(py).expect(
135 "rust-copy-tracing: conversion of copy destination to \
136 PyBytes failed");
137 let key = key.data(py);
138 let value = value.extract::<PyBytes>(py).expect(
139 "rust-copy-tracing: conversion of copy source to \
140 PyBytes failed",
141 );
142 let value = value.data(py);
143 Ok((
144 HgPathBuf::from_bytes(key),
145 HgPathBuf::from_bytes(value),
146 ))
147 })
148 .collect();
149
150 let removed: PyObject = changes.getattr(py, "removed").expect(
151 "rust-copy-tracing: retrieval of python attribute \
152 `removed` failed",
153 );
154 let removed: PyResult<_> = removed
155 .iter(py)
156 .expect(
157 "rust-copy-tracing: getting a python iterator over the \
158 `removed` set failed",
159 )
160 .map(|filename| {
161 let filename = filename
162 .expect(
163 "rust-copy-tracing: python iteration over the \
164 `removed` set failed",
165 )
166 .extract::<PyBytes>(py)
167 .expect(
168 "rust-copy-tracing: \
169 conversion of `removed` item to PyBytes failed",
170 );
171 let filename = filename.data(py);
172 Ok(HgPathBuf::from_bytes(filename))
173 })
174 .collect();
175
176 let merged: PyObject = changes.getattr(py, "merged").expect(
177 "rust-copy-tracing: retrieval of python attribute \
178 `merged` failed",
179 );
180 let merged: PyResult<_> = merged
181 .iter(py)
182 .expect(
183 "rust-copy-tracing: getting a python iterator over the \
184 `merged` set failed",
185 )
186 .map(|filename| {
187 let filename = filename
188 .expect(
189 "rust-copy-tracing: python iteration over the \
190 `merged` set failed",
191 )
192 .extract::<PyBytes>(py)
193 .expect(
194 "rust-copy-tracing: \
195 conversion of `merged` item to PyBytes failed",
196 );
197 let filename = filename.data(py);
198 Ok(HgPathBuf::from_bytes(filename))
199 })
200 .collect();
201
202 let salvaged: PyObject = changes.getattr(py, "salvaged").expect(
203 "rust-copy-tracing: retrieval of python attribute \
204 `salvaged` failed",
205 );
206 let salvaged: PyResult<_> = salvaged
207 .iter(py)
208 .expect(
209 "rust-copy-tracing: getting a python iterator over the \
210 `salvaged` set failed",
211 )
212 .map(|filename| {
213 let filename = filename
214 .expect(
215 "rust-copy-tracing: python iteration over the \
216 `salvaged` set failed",
217 )
218 .extract::<PyBytes>(py)
219 .expect(
220 "rust-copy-tracing: \
221 conversion of `salvaged` item to PyBytes failed",
222 );
223 let filename = filename.data(py);
224 Ok(HgPathBuf::from_bytes(filename))
225 })
226 .collect();
227 files = ChangedFiles::new(
228 removed.unwrap(),
229 merged.unwrap(),
230 salvaged.unwrap(),
231 p1_copies.unwrap(),
232 p2_copies.unwrap(),
233 );
80 let files = match res.get_item(py, 2).extract::<PyBytes>(py) {
81 Ok(raw) => {
82 // Give responsability for the raw bytes lifetime to
83 // hg-core
84 d.data = Some(raw);
85 let addrs = d.data.as_ref().expect(
86 "rust-copy-tracing: failed to get a reference to the \
87 raw bytes for copy data").data(py);
88 ChangedFiles::new(addrs)
234 89 }
90 // value was presumably None, meaning they was no copy data.
91 Err(_) => ChangedFiles::new_empty(),
92 };
235 93
236 94 (p1, p2, files)
237 };
95 });
238 96 let children: PyResult<_> = children
239 97 .items(py)
240 98 .iter()
@@ -250,7 +108,7 b' pub fn combine_changeset_copies_wrapper('
250 108 revs?,
251 109 children?,
252 110 target_rev,
253 &rev_info_maker,
111 rev_info_maker,
254 112 &is_ancestor_wrap,
255 113 );
256 114 let out = PyDict::new(py);
General Comments 0
You need to be logged in to leave comments. Login now