##// END OF EJS Templates
copies-rust: rewrite ChangedFiles binary parsing...
Simon Sapin -
r47413:f977a065 default
parent child Browse files
Show More
@@ -3,13 +3,13 b' use crate::utils::hg_path::HgPathBuf;'
3 use crate::Revision;
3 use crate::Revision;
4 use crate::NULL_REVISION;
4 use crate::NULL_REVISION;
5
5
6 use bytes_cast::{unaligned, BytesCast};
6 use im_rc::ordmap::Entry;
7 use im_rc::ordmap::Entry;
7 use im_rc::ordmap::OrdMap;
8 use im_rc::ordmap::OrdMap;
8 use im_rc::OrdSet;
9 use im_rc::OrdSet;
9
10
10 use std::cmp::Ordering;
11 use std::cmp::Ordering;
11 use std::collections::HashMap;
12 use std::collections::HashMap;
12 use std::convert::TryInto;
13
13
14 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
14 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
15
15
@@ -110,18 +110,6 b' impl PartialEq for CopySource {'
110 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
110 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
111 type InternalPathCopies = OrdMap<PathToken, CopySource>;
111 type InternalPathCopies = OrdMap<PathToken, CopySource>;
112
112
113 /// represent the files affected by a changesets
114 ///
115 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
116 /// all the data categories tracked by it.
117 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
118 /// all the data categories tracked by it.
119 pub struct ChangedFiles<'a> {
120 nb_items: u32,
121 index: &'a [u8],
122 data: &'a [u8],
123 }
124
125 /// Represent active changes that affect the copy tracing.
113 /// Represent active changes that affect the copy tracing.
126 enum Action<'a> {
114 enum Action<'a> {
127 /// The parent ? children edge is removing a file
115 /// The parent ? children edge is removing a file
@@ -148,9 +136,6 b' enum MergeCase {'
148 Normal,
136 Normal,
149 }
137 }
150
138
151 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
152
153 const EMPTY: &[u8] = b"";
154 const COPY_MASK: u8 = 3;
139 const COPY_MASK: u8 = 3;
155 const P1_COPY: u8 = 2;
140 const P1_COPY: u8 = 2;
156 const P2_COPY: u8 = 3;
141 const P2_COPY: u8 = 3;
@@ -159,141 +144,94 b' const REMOVED: u8 = 12;'
159 const MERGED: u8 = 8;
144 const MERGED: u8 = 8;
160 const SALVAGED: u8 = 16;
145 const SALVAGED: u8 = 16;
161
146
162 impl<'a> ChangedFiles<'a> {
147 #[derive(BytesCast)]
163 const INDEX_START: usize = 4;
148 #[repr(C)]
164 const ENTRY_SIZE: u32 = 9;
149 struct ChangedFilesIndexEntry {
165 const FILENAME_START: u32 = 1;
150 flags: u8,
166 const COPY_SOURCE_START: u32 = 5;
167
151
168 pub fn new(data: &'a [u8]) -> Self {
152 /// Only the end position is stored. The start is at the end of the
169 assert!(
153 /// previous entry.
170 data.len() >= 4,
154 destination_path_end_position: unaligned::U32Be,
171 "data size ({}) is too small to contain the header (4)",
172 data.len()
173 );
174 let nb_items_raw: [u8; 4] = (&data[0..=3])
175 .try_into()
176 .expect("failed to turn 4 bytes into 4 bytes");
177 let nb_items = u32::from_be_bytes(nb_items_raw);
178
155
179 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
156 source_index_entry_position: unaligned::U32Be,
180 let index_end = Self::INDEX_START + index_size;
157 }
158
159 fn _static_assert_size_of() {
160 let _ = std::mem::transmute::<ChangedFilesIndexEntry, [u8; 9]>;
161 }
181
162
182 assert!(
163 /// Represents the files affected by a changeset.
183 data.len() >= index_end,
164 ///
184 "data size ({}) is too small to fit the index_data ({})",
165 /// This holds a subset of `mercurial.metadata.ChangingFiles` as we do not need
185 data.len(),
166 /// all the data categories tracked by it.
186 index_end
167 pub struct ChangedFiles<'a> {
187 );
168 index: &'a [ChangedFilesIndexEntry],
169 paths: &'a [u8],
170 }
188
171
189 let ret = ChangedFiles {
172 impl<'a> ChangedFiles<'a> {
190 nb_items,
173 pub fn new(data: &'a [u8]) -> Self {
191 index: &data[Self::INDEX_START..index_end],
174 let (header, rest) = unaligned::U32Be::from_bytes(data).unwrap();
192 data: &data[index_end..],
175 let nb_index_entries = header.get() as usize;
193 };
176 let (index, paths) =
194 let max_data = ret.filename_end(nb_items - 1) as usize;
177 ChangedFilesIndexEntry::slice_from_bytes(rest, nb_index_entries)
195 assert!(
178 .unwrap();
196 ret.data.len() >= max_data,
179 Self { index, paths }
197 "data size ({}) is too small to fit all data ({})",
198 data.len(),
199 index_end + max_data
200 );
201 ret
202 }
180 }
203
181
204 pub fn new_empty() -> Self {
182 pub fn new_empty() -> Self {
205 ChangedFiles {
183 ChangedFiles {
206 nb_items: 0,
184 index: &[],
207 index: EMPTY,
185 paths: &[],
208 data: EMPTY,
209 }
186 }
210 }
187 }
211
188
212 /// internal function to return an individual entry at a given index
189 /// Internal function to return the filename of the entry at a given index
213 fn entry(&'a self, idx: u32) -> FileChange<'a> {
190 fn path(&self, idx: usize) -> &HgPath {
214 if idx >= self.nb_items {
191 let start = if idx == 0 {
215 panic!(
192 0
216 "index for entry is higher that the number of file {} >= {}",
217 idx, self.nb_items
218 )
219 }
220 let flags = self.flags(idx);
221 let filename = self.filename(idx);
222 let copy_idx = self.copy_idx(idx);
223 let copy_source = self.filename(copy_idx);
224 (flags, filename, copy_source)
225 }
226
227 /// internal function to return the filename of the entry at a given index
228 fn filename(&self, idx: u32) -> &HgPath {
229 let filename_start;
230 if idx == 0 {
231 filename_start = 0;
232 } else {
193 } else {
233 filename_start = self.filename_end(idx - 1)
194 self.index[idx - 1].destination_path_end_position.get() as usize
234 }
195 };
235 let filename_end = self.filename_end(idx);
196 let end = self.index[idx].destination_path_end_position.get() as usize;
236 let filename_start = filename_start as usize;
197 HgPath::new(&self.paths[start..end])
237 let filename_end = filename_end as usize;
238 HgPath::new(&self.data[filename_start..filename_end])
239 }
240
241 /// internal function to return the flag field of the entry at a given
242 /// index
243 fn flags(&self, idx: u32) -> u8 {
244 let idx = idx as usize;
245 self.index[idx * (Self::ENTRY_SIZE as usize)]
246 }
247
248 /// internal function to return the end of a filename part at a given index
249 fn filename_end(&self, idx: u32) -> u32 {
250 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
251 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
252 let start = start as usize;
253 let end = end as usize;
254 let raw = (&self.index[start..end])
255 .try_into()
256 .expect("failed to turn 4 bytes into 4 bytes");
257 u32::from_be_bytes(raw)
258 }
259
260 /// internal function to return index of the copy source of the entry at a
261 /// given index
262 fn copy_idx(&self, idx: u32) -> u32 {
263 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
264 let end = (idx + 1) * Self::ENTRY_SIZE;
265 let start = start as usize;
266 let end = end as usize;
267 let raw = (&self.index[start..end])
268 .try_into()
269 .expect("failed to turn 4 bytes into 4 bytes");
270 u32::from_be_bytes(raw)
271 }
198 }
272
199
273 /// Return an iterator over all the `Action` in this instance.
200 /// Return an iterator over all the `Action` in this instance.
274 fn iter_actions(&self) -> ActionsIterator {
201 fn iter_actions(&self) -> impl Iterator<Item = Action> {
275 ActionsIterator {
202 self.index.iter().enumerate().flat_map(move |(idx, entry)| {
276 changes: &self,
203 let path = self.path(idx);
277 current: 0,
204 if (entry.flags & ACTION_MASK) == REMOVED {
205 Some(Action::Removed(path))
206 } else if (entry.flags & COPY_MASK) == P1_COPY {
207 let source_idx =
208 entry.source_index_entry_position.get() as usize;
209 Some(Action::CopiedFromP1(path, self.path(source_idx)))
210 } else if (entry.flags & COPY_MASK) == P2_COPY {
211 let source_idx =
212 entry.source_index_entry_position.get() as usize;
213 Some(Action::CopiedFromP2(path, self.path(source_idx)))
214 } else {
215 None
278 }
216 }
217 })
279 }
218 }
280
219
281 /// return the MergeCase value associated with a filename
220 /// return the MergeCase value associated with a filename
282 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
221 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
283 if self.nb_items == 0 {
222 if self.index.is_empty() {
284 return MergeCase::Normal;
223 return MergeCase::Normal;
285 }
224 }
286 let mut low_part = 0;
225 let mut low_part = 0;
287 let mut high_part = self.nb_items;
226 let mut high_part = self.index.len();
288
227
289 while low_part < high_part {
228 while low_part < high_part {
290 let cursor = (low_part + high_part - 1) / 2;
229 let cursor = (low_part + high_part - 1) / 2;
291 let (flags, filename, _source) = self.entry(cursor);
230 match path.cmp(self.path(cursor)) {
292 match path.cmp(filename) {
293 Ordering::Less => low_part = cursor + 1,
231 Ordering::Less => low_part = cursor + 1,
294 Ordering::Greater => high_part = cursor,
232 Ordering::Greater => high_part = cursor,
295 Ordering::Equal => {
233 Ordering::Equal => {
296 return match flags & ACTION_MASK {
234 return match self.index[cursor].flags & ACTION_MASK {
297 MERGED => MergeCase::Merged,
235 MERGED => MergeCase::Merged,
298 SALVAGED => MergeCase::Salvaged,
236 SALVAGED => MergeCase::Salvaged,
299 _ => MergeCase::Normal,
237 _ => MergeCase::Normal,
@@ -305,32 +243,6 b" impl<'a> ChangedFiles<'a> {"
305 }
243 }
306 }
244 }
307
245
308 struct ActionsIterator<'a> {
309 changes: &'a ChangedFiles<'a>,
310 current: u32,
311 }
312
313 impl<'a> Iterator for ActionsIterator<'a> {
314 type Item = Action<'a>;
315
316 fn next(&mut self) -> Option<Action<'a>> {
317 while self.current < self.changes.nb_items {
318 let (flags, file, source) = self.changes.entry(self.current);
319 self.current += 1;
320 if (flags & ACTION_MASK) == REMOVED {
321 return Some(Action::Removed(file));
322 }
323 let copy = flags & COPY_MASK;
324 if copy == P1_COPY {
325 return Some(Action::CopiedFromP1(file, source));
326 } else if copy == P2_COPY {
327 return Some(Action::CopiedFromP2(file, source));
328 }
329 }
330 return None;
331 }
332 }
333
334 /// A small "tokenizer" responsible of turning full HgPath into lighter
246 /// A small "tokenizer" responsible of turning full HgPath into lighter
335 /// PathToken
247 /// PathToken
336 ///
248 ///
General Comments 0
You need to be logged in to leave comments. Login now