##// END OF EJS Templates
copies-rust: rewrite ChangedFiles binary parsing...
Simon Sapin -
r47413:f977a065 default
parent child Browse files
Show More
@@ -3,13 +3,13 b' use crate::utils::hg_path::HgPathBuf;'
3 3 use crate::Revision;
4 4 use crate::NULL_REVISION;
5 5
6 use bytes_cast::{unaligned, BytesCast};
6 7 use im_rc::ordmap::Entry;
7 8 use im_rc::ordmap::OrdMap;
8 9 use im_rc::OrdSet;
9 10
10 11 use std::cmp::Ordering;
11 12 use std::collections::HashMap;
12 use std::convert::TryInto;
13 13
14 14 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
15 15
@@ -110,18 +110,6 b' impl PartialEq for CopySource {'
110 110 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
111 111 type InternalPathCopies = OrdMap<PathToken, CopySource>;
112 112
113 /// represent the files affected by a changesets
114 ///
115 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
116 /// all the data categories tracked by it.
117 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
118 /// all the data categories tracked by it.
119 pub struct ChangedFiles<'a> {
120 nb_items: u32,
121 index: &'a [u8],
122 data: &'a [u8],
123 }
124
125 113 /// Represent active changes that affect the copy tracing.
126 114 enum Action<'a> {
127 115 /// The parent ? children edge is removing a file
@@ -148,9 +136,6 b' enum MergeCase {'
148 136 Normal,
149 137 }
150 138
151 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
152
153 const EMPTY: &[u8] = b"";
154 139 const COPY_MASK: u8 = 3;
155 140 const P1_COPY: u8 = 2;
156 141 const P2_COPY: u8 = 3;
@@ -159,141 +144,94 b' const REMOVED: u8 = 12;'
159 144 const MERGED: u8 = 8;
160 145 const SALVAGED: u8 = 16;
161 146
162 impl<'a> ChangedFiles<'a> {
163 const INDEX_START: usize = 4;
164 const ENTRY_SIZE: u32 = 9;
165 const FILENAME_START: u32 = 1;
166 const COPY_SOURCE_START: u32 = 5;
147 #[derive(BytesCast)]
148 #[repr(C)]
149 struct ChangedFilesIndexEntry {
150 flags: u8,
167 151
168 pub fn new(data: &'a [u8]) -> Self {
169 assert!(
170 data.len() >= 4,
171 "data size ({}) is too small to contain the header (4)",
172 data.len()
173 );
174 let nb_items_raw: [u8; 4] = (&data[0..=3])
175 .try_into()
176 .expect("failed to turn 4 bytes into 4 bytes");
177 let nb_items = u32::from_be_bytes(nb_items_raw);
152 /// Only the end position is stored. The start is at the end of the
153 /// previous entry.
154 destination_path_end_position: unaligned::U32Be,
178 155
179 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
180 let index_end = Self::INDEX_START + index_size;
156 source_index_entry_position: unaligned::U32Be,
157 }
158
159 fn _static_assert_size_of() {
160 let _ = std::mem::transmute::<ChangedFilesIndexEntry, [u8; 9]>;
161 }
181 162
182 assert!(
183 data.len() >= index_end,
184 "data size ({}) is too small to fit the index_data ({})",
185 data.len(),
186 index_end
187 );
163 /// Represents the files affected by a changeset.
164 ///
165 /// This holds a subset of `mercurial.metadata.ChangingFiles` as we do not need
166 /// all the data categories tracked by it.
167 pub struct ChangedFiles<'a> {
168 index: &'a [ChangedFilesIndexEntry],
169 paths: &'a [u8],
170 }
188 171
189 let ret = ChangedFiles {
190 nb_items,
191 index: &data[Self::INDEX_START..index_end],
192 data: &data[index_end..],
193 };
194 let max_data = ret.filename_end(nb_items - 1) as usize;
195 assert!(
196 ret.data.len() >= max_data,
197 "data size ({}) is too small to fit all data ({})",
198 data.len(),
199 index_end + max_data
200 );
201 ret
172 impl<'a> ChangedFiles<'a> {
173 pub fn new(data: &'a [u8]) -> Self {
174 let (header, rest) = unaligned::U32Be::from_bytes(data).unwrap();
175 let nb_index_entries = header.get() as usize;
176 let (index, paths) =
177 ChangedFilesIndexEntry::slice_from_bytes(rest, nb_index_entries)
178 .unwrap();
179 Self { index, paths }
202 180 }
203 181
204 182 pub fn new_empty() -> Self {
205 183 ChangedFiles {
206 nb_items: 0,
207 index: EMPTY,
208 data: EMPTY,
184 index: &[],
185 paths: &[],
209 186 }
210 187 }
211 188
212 /// internal function to return an individual entry at a given index
213 fn entry(&'a self, idx: u32) -> FileChange<'a> {
214 if idx >= self.nb_items {
215 panic!(
216 "index for entry is higher that the number of file {} >= {}",
217 idx, self.nb_items
218 )
219 }
220 let flags = self.flags(idx);
221 let filename = self.filename(idx);
222 let copy_idx = self.copy_idx(idx);
223 let copy_source = self.filename(copy_idx);
224 (flags, filename, copy_source)
225 }
226
227 /// internal function to return the filename of the entry at a given index
228 fn filename(&self, idx: u32) -> &HgPath {
229 let filename_start;
230 if idx == 0 {
231 filename_start = 0;
189 /// Internal function to return the filename of the entry at a given index
190 fn path(&self, idx: usize) -> &HgPath {
191 let start = if idx == 0 {
192 0
232 193 } else {
233 filename_start = self.filename_end(idx - 1)
234 }
235 let filename_end = self.filename_end(idx);
236 let filename_start = filename_start as usize;
237 let filename_end = filename_end as usize;
238 HgPath::new(&self.data[filename_start..filename_end])
239 }
240
241 /// internal function to return the flag field of the entry at a given
242 /// index
243 fn flags(&self, idx: u32) -> u8 {
244 let idx = idx as usize;
245 self.index[idx * (Self::ENTRY_SIZE as usize)]
246 }
247
248 /// internal function to return the end of a filename part at a given index
249 fn filename_end(&self, idx: u32) -> u32 {
250 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
251 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
252 let start = start as usize;
253 let end = end as usize;
254 let raw = (&self.index[start..end])
255 .try_into()
256 .expect("failed to turn 4 bytes into 4 bytes");
257 u32::from_be_bytes(raw)
258 }
259
260 /// internal function to return index of the copy source of the entry at a
261 /// given index
262 fn copy_idx(&self, idx: u32) -> u32 {
263 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
264 let end = (idx + 1) * Self::ENTRY_SIZE;
265 let start = start as usize;
266 let end = end as usize;
267 let raw = (&self.index[start..end])
268 .try_into()
269 .expect("failed to turn 4 bytes into 4 bytes");
270 u32::from_be_bytes(raw)
194 self.index[idx - 1].destination_path_end_position.get() as usize
195 };
196 let end = self.index[idx].destination_path_end_position.get() as usize;
197 HgPath::new(&self.paths[start..end])
271 198 }
272 199
273 200 /// Return an iterator over all the `Action` in this instance.
274 fn iter_actions(&self) -> ActionsIterator {
275 ActionsIterator {
276 changes: &self,
277 current: 0,
278 }
201 fn iter_actions(&self) -> impl Iterator<Item = Action> {
202 self.index.iter().enumerate().flat_map(move |(idx, entry)| {
203 let path = self.path(idx);
204 if (entry.flags & ACTION_MASK) == REMOVED {
205 Some(Action::Removed(path))
206 } else if (entry.flags & COPY_MASK) == P1_COPY {
207 let source_idx =
208 entry.source_index_entry_position.get() as usize;
209 Some(Action::CopiedFromP1(path, self.path(source_idx)))
210 } else if (entry.flags & COPY_MASK) == P2_COPY {
211 let source_idx =
212 entry.source_index_entry_position.get() as usize;
213 Some(Action::CopiedFromP2(path, self.path(source_idx)))
214 } else {
215 None
216 }
217 })
279 218 }
280 219
281 220 /// return the MergeCase value associated with a filename
282 221 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
283 if self.nb_items == 0 {
222 if self.index.is_empty() {
284 223 return MergeCase::Normal;
285 224 }
286 225 let mut low_part = 0;
287 let mut high_part = self.nb_items;
226 let mut high_part = self.index.len();
288 227
289 228 while low_part < high_part {
290 229 let cursor = (low_part + high_part - 1) / 2;
291 let (flags, filename, _source) = self.entry(cursor);
292 match path.cmp(filename) {
230 match path.cmp(self.path(cursor)) {
293 231 Ordering::Less => low_part = cursor + 1,
294 232 Ordering::Greater => high_part = cursor,
295 233 Ordering::Equal => {
296 return match flags & ACTION_MASK {
234 return match self.index[cursor].flags & ACTION_MASK {
297 235 MERGED => MergeCase::Merged,
298 236 SALVAGED => MergeCase::Salvaged,
299 237 _ => MergeCase::Normal,
@@ -305,32 +243,6 b" impl<'a> ChangedFiles<'a> {"
305 243 }
306 244 }
307 245
308 struct ActionsIterator<'a> {
309 changes: &'a ChangedFiles<'a>,
310 current: u32,
311 }
312
313 impl<'a> Iterator for ActionsIterator<'a> {
314 type Item = Action<'a>;
315
316 fn next(&mut self) -> Option<Action<'a>> {
317 while self.current < self.changes.nb_items {
318 let (flags, file, source) = self.changes.entry(self.current);
319 self.current += 1;
320 if (flags & ACTION_MASK) == REMOVED {
321 return Some(Action::Removed(file));
322 }
323 let copy = flags & COPY_MASK;
324 if copy == P1_COPY {
325 return Some(Action::CopiedFromP1(file, source));
326 } else if copy == P2_COPY {
327 return Some(Action::CopiedFromP2(file, source));
328 }
329 }
330 return None;
331 }
332 }
333
334 246 /// A small "tokenizer" responsible of turning full HgPath into lighter
335 247 /// PathToken
336 248 ///
General Comments 0
You need to be logged in to leave comments. Login now