Show More
@@ -52,6 +52,15 b' impl Manifestlog {' | |||
|
52 | 52 | /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes. |
|
53 | 53 | #[derive(Debug)] |
|
54 | 54 | pub struct Manifest { |
|
55 | /// Format for a manifest: flat sequence of variable-size entries, | |
|
56 | /// sorted by path, each as: | |
|
57 | /// | |
|
58 | /// ```text | |
|
59 | /// <path> \0 <hex_node_id> <flags> \n | |
|
60 | /// ``` | |
|
61 | /// | |
|
62 | /// The last entry is also terminated by a newline character. | |
|
63 | /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`. | |
|
55 | 64 | bytes: Vec<u8>, |
|
56 | 65 | } |
|
57 | 66 | |
@@ -62,44 +71,84 b' impl Manifest {' | |||
|
62 | 71 | self.bytes |
|
63 | 72 | .split(|b| b == &b'\n') |
|
64 | 73 | .filter(|line| !line.is_empty()) |
|
65 | .map(|line| { | |
|
66 | let (path, rest) = line.split_2(b'\0').ok_or_else(|| { | |
|
67 | HgError::corrupted("manifest line should contain \\0") | |
|
68 | })?; | |
|
69 | let path = HgPath::new(path); | |
|
70 | let (hex_node_id, flags) = match rest.split_last() { | |
|
71 | Some((&b'x', rest)) => (rest, Some(b'x')), | |
|
72 | Some((&b'l', rest)) => (rest, Some(b'l')), | |
|
73 | Some((&b't', rest)) => (rest, Some(b't')), | |
|
74 | _ => (rest, None), | |
|
75 | }; | |
|
76 | Ok(ManifestEntry { | |
|
77 | path, | |
|
78 | hex_node_id, | |
|
79 | flags, | |
|
80 | }) | |
|
81 | }) | |
|
74 | .map(ManifestEntry::from_raw) | |
|
82 | 75 | } |
|
83 | 76 | |
|
84 | 77 | /// If the given path is in this manifest, return its filelog node ID |
|
85 |
pub fn find_ |
|
|
78 | pub fn find_by_path( | |
|
86 | 79 | &self, |
|
87 | 80 | path: &HgPath, |
|
88 | 81 | ) -> Result<Option<ManifestEntry>, HgError> { |
|
89 | // TODO: use binary search instead of linear scan. This may involve | |
|
90 | // building (and caching) an index of the byte indicex of each manifest | |
|
91 | // line. | |
|
82 | use std::cmp::Ordering::*; | |
|
83 | let path = path.as_bytes(); | |
|
84 | // Both boundaries of this `&[u8]` slice are always at the boundary of | |
|
85 | // an entry | |
|
86 | let mut bytes = &*self.bytes; | |
|
92 | 87 | |
|
93 | // TODO: use try_find when available (if still using linear scan) | |
|
94 |
// https://github.com/rust-lang/rust/ |
|
|
95 | for entry in self.iter() { | |
|
96 | let entry = entry?; | |
|
97 | if entry.path == path { | |
|
98 | return Ok(Some(entry)); | |
|
88 | // Binary search algorithm derived from `[T]::binary_search_by` | |
|
89 | // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221> | |
|
90 | // except we don’t have a slice of entries. Instead we jump to the | |
|
91 | // middle of the byte slice and look around for entry delimiters | |
|
92 | // (newlines). | |
|
93 | while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? { | |
|
94 | let (entry_path, rest) = | |
|
95 | ManifestEntry::split_path(&bytes[entry_range.clone()])?; | |
|
96 | let cmp = entry_path.cmp(path); | |
|
97 | if cmp == Less { | |
|
98 | let after_newline = entry_range.end + 1; | |
|
99 | bytes = &bytes[after_newline..]; | |
|
100 | } else if cmp == Greater { | |
|
101 | bytes = &bytes[..entry_range.start]; | |
|
102 | } else { | |
|
103 | return Ok(Some(ManifestEntry::from_path_and_rest( | |
|
104 | entry_path, rest, | |
|
105 | ))); | |
|
99 | 106 | } |
|
100 | 107 | } |
|
101 | 108 | Ok(None) |
|
102 | 109 | } |
|
110 | ||
|
111 | /// If there is at least one, return the byte range of an entry *excluding* | |
|
112 | /// the final newline. | |
|
113 | fn find_entry_near_middle_of( | |
|
114 | bytes: &[u8], | |
|
115 | ) -> Result<Option<std::ops::Range<usize>>, HgError> { | |
|
116 | let len = bytes.len(); | |
|
117 | if len > 0 { | |
|
118 | let middle = bytes.len() / 2; | |
|
119 | // Integer division rounds down, so `middle < len`. | |
|
120 | let (before, after) = bytes.split_at(middle); | |
|
121 | let is_newline = |&byte: &u8| byte == b'\n'; | |
|
122 | let entry_start = match before.iter().rposition(is_newline) { | |
|
123 | Some(i) => i + 1, | |
|
124 | None => 0, // We choose the first entry in `bytes` | |
|
125 | }; | |
|
126 | let entry_end = match after.iter().position(is_newline) { | |
|
127 | Some(i) => { | |
|
128 | // No `+ 1` here to exclude this newline from the range | |
|
129 | middle + i | |
|
130 | } | |
|
131 | None => { | |
|
132 | // In a well-formed manifest: | |
|
133 | // | |
|
134 | // * Since `len > 0`, `bytes` contains at least one entry | |
|
135 | // * Every entry ends with a newline | |
|
136 | // * Since `middle < len`, `after` contains at least the | |
|
137 | // newline at the end of the last entry of `bytes`. | |
|
138 | // | |
|
139 | // We didn’t find a newline, so this manifest is not | |
|
140 | // well-formed. | |
|
141 | return Err(HgError::corrupted( | |
|
142 | "manifest entry without \\n delimiter", | |
|
143 | )); | |
|
144 | } | |
|
145 | }; | |
|
146 | Ok(Some(entry_start..entry_end)) | |
|
147 | } else { | |
|
148 | // len == 0 | |
|
149 | Ok(None) | |
|
150 | } | |
|
151 | } | |
|
103 | 152 | } |
|
104 | 153 | |
|
105 | 154 | /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes. |
@@ -112,7 +161,32 b" pub struct ManifestEntry<'manifest> {" | |||
|
112 | 161 | pub flags: Option<u8>, |
|
113 | 162 | } |
|
114 | 163 | |
|
115 |
impl ManifestEntry<' |
|
|
164 | impl<'a> ManifestEntry<'a> { | |
|
165 | fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> { | |
|
166 | bytes.split_2(b'\0').ok_or_else(|| { | |
|
167 | HgError::corrupted("manifest entry without \\0 delimiter") | |
|
168 | }) | |
|
169 | } | |
|
170 | ||
|
171 | fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self { | |
|
172 | let (hex_node_id, flags) = match rest.split_last() { | |
|
173 | Some((&b'x', rest)) => (rest, Some(b'x')), | |
|
174 | Some((&b'l', rest)) => (rest, Some(b'l')), | |
|
175 | Some((&b't', rest)) => (rest, Some(b't')), | |
|
176 | _ => (rest, None), | |
|
177 | }; | |
|
178 | Self { | |
|
179 | path: HgPath::new(path), | |
|
180 | hex_node_id, | |
|
181 | flags, | |
|
182 | } | |
|
183 | } | |
|
184 | ||
|
185 | fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> { | |
|
186 | let (path, rest) = Self::split_path(bytes)?; | |
|
187 | Ok(Self::from_path_and_rest(path, rest)) | |
|
188 | } | |
|
189 | ||
|
116 | 190 | pub fn node_id(&self) -> Result<Node, HgError> { |
|
117 | 191 | Node::from_hex_for_repo(self.hex_node_id) |
|
118 | 192 | } |
General Comments 0
You need to be logged in to leave comments.
Login now