Show More
@@ -52,6 +52,15 b' impl Manifestlog {' | |||||
52 | /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes. |
|
52 | /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes. | |
53 | #[derive(Debug)] |
|
53 | #[derive(Debug)] | |
54 | pub struct Manifest { |
|
54 | pub struct Manifest { | |
|
55 | /// Format for a manifest: flat sequence of variable-size entries, | |||
|
56 | /// sorted by path, each as: | |||
|
57 | /// | |||
|
58 | /// ```text | |||
|
59 | /// <path> \0 <hex_node_id> <flags> \n | |||
|
60 | /// ``` | |||
|
61 | /// | |||
|
62 | /// The last entry is also terminated by a newline character. | |||
|
63 | /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`. | |||
55 | bytes: Vec<u8>, |
|
64 | bytes: Vec<u8>, | |
56 | } |
|
65 | } | |
57 |
|
66 | |||
@@ -62,44 +71,84 b' impl Manifest {' | |||||
62 | self.bytes |
|
71 | self.bytes | |
63 | .split(|b| b == &b'\n') |
|
72 | .split(|b| b == &b'\n') | |
64 | .filter(|line| !line.is_empty()) |
|
73 | .filter(|line| !line.is_empty()) | |
65 | .map(|line| { |
|
74 | .map(ManifestEntry::from_raw) | |
66 | let (path, rest) = line.split_2(b'\0').ok_or_else(|| { |
|
|||
67 | HgError::corrupted("manifest line should contain \\0") |
|
|||
68 | })?; |
|
|||
69 | let path = HgPath::new(path); |
|
|||
70 | let (hex_node_id, flags) = match rest.split_last() { |
|
|||
71 | Some((&b'x', rest)) => (rest, Some(b'x')), |
|
|||
72 | Some((&b'l', rest)) => (rest, Some(b'l')), |
|
|||
73 | Some((&b't', rest)) => (rest, Some(b't')), |
|
|||
74 | _ => (rest, None), |
|
|||
75 | }; |
|
|||
76 | Ok(ManifestEntry { |
|
|||
77 | path, |
|
|||
78 | hex_node_id, |
|
|||
79 | flags, |
|
|||
80 | }) |
|
|||
81 | }) |
|
|||
82 | } |
|
75 | } | |
83 |
|
76 | |||
84 | /// If the given path is in this manifest, return its filelog node ID |
|
77 | /// If the given path is in this manifest, return its filelog node ID | |
85 |
pub fn find_ |
|
78 | pub fn find_by_path( | |
86 | &self, |
|
79 | &self, | |
87 | path: &HgPath, |
|
80 | path: &HgPath, | |
88 | ) -> Result<Option<ManifestEntry>, HgError> { |
|
81 | ) -> Result<Option<ManifestEntry>, HgError> { | |
89 | // TODO: use binary search instead of linear scan. This may involve |
|
82 | use std::cmp::Ordering::*; | |
90 | // building (and caching) an index of the byte indicex of each manifest |
|
83 | let path = path.as_bytes(); | |
91 | // line. |
|
84 | // Both boundaries of this `&[u8]` slice are always at the boundary of | |
|
85 | // an entry | |||
|
86 | let mut bytes = &*self.bytes; | |||
92 |
|
87 | |||
93 | // TODO: use try_find when available (if still using linear scan) |
|
88 | // Binary search algorithm derived from `[T]::binary_search_by` | |
94 |
// https://github.com/rust-lang/rust/ |
|
89 | // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221> | |
95 | for entry in self.iter() { |
|
90 | // except we don’t have a slice of entries. Instead we jump to the | |
96 | let entry = entry?; |
|
91 | // middle of the byte slice and look around for entry delimiters | |
97 | if entry.path == path { |
|
92 | // (newlines). | |
98 | return Ok(Some(entry)); |
|
93 | while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? { | |
|
94 | let (entry_path, rest) = | |||
|
95 | ManifestEntry::split_path(&bytes[entry_range.clone()])?; | |||
|
96 | let cmp = entry_path.cmp(path); | |||
|
97 | if cmp == Less { | |||
|
98 | let after_newline = entry_range.end + 1; | |||
|
99 | bytes = &bytes[after_newline..]; | |||
|
100 | } else if cmp == Greater { | |||
|
101 | bytes = &bytes[..entry_range.start]; | |||
|
102 | } else { | |||
|
103 | return Ok(Some(ManifestEntry::from_path_and_rest( | |||
|
104 | entry_path, rest, | |||
|
105 | ))); | |||
99 | } |
|
106 | } | |
100 | } |
|
107 | } | |
101 | Ok(None) |
|
108 | Ok(None) | |
102 | } |
|
109 | } | |
|
110 | ||||
|
111 | /// If there is at least one, return the byte range of an entry *excluding* | |||
|
112 | /// the final newline. | |||
|
113 | fn find_entry_near_middle_of( | |||
|
114 | bytes: &[u8], | |||
|
115 | ) -> Result<Option<std::ops::Range<usize>>, HgError> { | |||
|
116 | let len = bytes.len(); | |||
|
117 | if len > 0 { | |||
|
118 | let middle = bytes.len() / 2; | |||
|
119 | // Integer division rounds down, so `middle < len`. | |||
|
120 | let (before, after) = bytes.split_at(middle); | |||
|
121 | let is_newline = |&byte: &u8| byte == b'\n'; | |||
|
122 | let entry_start = match before.iter().rposition(is_newline) { | |||
|
123 | Some(i) => i + 1, | |||
|
124 | None => 0, // We choose the first entry in `bytes` | |||
|
125 | }; | |||
|
126 | let entry_end = match after.iter().position(is_newline) { | |||
|
127 | Some(i) => { | |||
|
128 | // No `+ 1` here to exclude this newline from the range | |||
|
129 | middle + i | |||
|
130 | } | |||
|
131 | None => { | |||
|
132 | // In a well-formed manifest: | |||
|
133 | // | |||
|
134 | // * Since `len > 0`, `bytes` contains at least one entry | |||
|
135 | // * Every entry ends with a newline | |||
|
136 | // * Since `middle < len`, `after` contains at least the | |||
|
137 | // newline at the end of the last entry of `bytes`. | |||
|
138 | // | |||
|
139 | // We didn’t find a newline, so this manifest is not | |||
|
140 | // well-formed. | |||
|
141 | return Err(HgError::corrupted( | |||
|
142 | "manifest entry without \\n delimiter", | |||
|
143 | )); | |||
|
144 | } | |||
|
145 | }; | |||
|
146 | Ok(Some(entry_start..entry_end)) | |||
|
147 | } else { | |||
|
148 | // len == 0 | |||
|
149 | Ok(None) | |||
|
150 | } | |||
|
151 | } | |||
103 | } |
|
152 | } | |
104 |
|
153 | |||
105 | /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes. |
|
154 | /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes. | |
@@ -112,7 +161,32 b" pub struct ManifestEntry<'manifest> {" | |||||
112 | pub flags: Option<u8>, |
|
161 | pub flags: Option<u8>, | |
113 | } |
|
162 | } | |
114 |
|
163 | |||
115 |
impl ManifestEntry<' |
|
164 | impl<'a> ManifestEntry<'a> { | |
|
165 | fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> { | |||
|
166 | bytes.split_2(b'\0').ok_or_else(|| { | |||
|
167 | HgError::corrupted("manifest entry without \\0 delimiter") | |||
|
168 | }) | |||
|
169 | } | |||
|
170 | ||||
|
171 | fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self { | |||
|
172 | let (hex_node_id, flags) = match rest.split_last() { | |||
|
173 | Some((&b'x', rest)) => (rest, Some(b'x')), | |||
|
174 | Some((&b'l', rest)) => (rest, Some(b'l')), | |||
|
175 | Some((&b't', rest)) => (rest, Some(b't')), | |||
|
176 | _ => (rest, None), | |||
|
177 | }; | |||
|
178 | Self { | |||
|
179 | path: HgPath::new(path), | |||
|
180 | hex_node_id, | |||
|
181 | flags, | |||
|
182 | } | |||
|
183 | } | |||
|
184 | ||||
|
185 | fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> { | |||
|
186 | let (path, rest) = Self::split_path(bytes)?; | |||
|
187 | Ok(Self::from_path_and_rest(path, rest)) | |||
|
188 | } | |||
|
189 | ||||
116 | pub fn node_id(&self) -> Result<Node, HgError> { |
|
190 | pub fn node_id(&self) -> Result<Node, HgError> { | |
117 | Node::from_hex_for_repo(self.hex_node_id) |
|
191 | Node::from_hex_for_repo(self.hex_node_id) | |
118 | } |
|
192 | } |
@@ -473,7 +473,7 b' fn unsure_is_modified(' | |||||
473 | }; |
|
473 | }; | |
474 |
|
474 | |||
475 | let entry = manifest |
|
475 | let entry = manifest | |
476 |
.find_ |
|
476 | .find_by_path(hg_path)? | |
477 | .expect("ambgious file not in p1"); |
|
477 | .expect("ambgious file not in p1"); | |
478 | if entry.flags != fs_flags { |
|
478 | if entry.flags != fs_flags { | |
479 | return Ok(true); |
|
479 | return Ok(true); |
General Comments 0
You need to be logged in to leave comments.
Login now