##// END OF EJS Templates
rhg: Use binary search in manifest lookup...
Simon Sapin -
r49324:e293ff80 default
parent child Browse files
Show More
@@ -52,6 +52,15 b' impl Manifestlog {'
52 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
52 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
53 #[derive(Debug)]
53 #[derive(Debug)]
54 pub struct Manifest {
54 pub struct Manifest {
55 /// Format for a manifest: flat sequence of variable-size entries,
56 /// sorted by path, each as:
57 ///
58 /// ```text
59 /// <path> \0 <hex_node_id> <flags> \n
60 /// ```
61 ///
62 /// The last entry is also terminated by a newline character.
63 /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`.
55 bytes: Vec<u8>,
64 bytes: Vec<u8>,
56 }
65 }
57
66
@@ -62,44 +71,84 b' impl Manifest {'
62 self.bytes
71 self.bytes
63 .split(|b| b == &b'\n')
72 .split(|b| b == &b'\n')
64 .filter(|line| !line.is_empty())
73 .filter(|line| !line.is_empty())
65 .map(|line| {
74 .map(ManifestEntry::from_raw)
66 let (path, rest) = line.split_2(b'\0').ok_or_else(|| {
67 HgError::corrupted("manifest line should contain \\0")
68 })?;
69 let path = HgPath::new(path);
70 let (hex_node_id, flags) = match rest.split_last() {
71 Some((&b'x', rest)) => (rest, Some(b'x')),
72 Some((&b'l', rest)) => (rest, Some(b'l')),
73 Some((&b't', rest)) => (rest, Some(b't')),
74 _ => (rest, None),
75 };
76 Ok(ManifestEntry {
77 path,
78 hex_node_id,
79 flags,
80 })
81 })
82 }
75 }
83
76
84 /// If the given path is in this manifest, return its filelog node ID
77 /// If the given path is in this manifest, return its filelog node ID
85 pub fn find_file(
78 pub fn find_by_path(
86 &self,
79 &self,
87 path: &HgPath,
80 path: &HgPath,
88 ) -> Result<Option<ManifestEntry>, HgError> {
81 ) -> Result<Option<ManifestEntry>, HgError> {
89 // TODO: use binary search instead of linear scan. This may involve
82 use std::cmp::Ordering::*;
90 // building (and caching) an index of the byte indicex of each manifest
83 let path = path.as_bytes();
91 // line.
84 // Both boundaries of this `&[u8]` slice are always at the boundary of
85 // an entry
86 let mut bytes = &*self.bytes;
92
87
93 // TODO: use try_find when available (if still using linear scan)
88 // Binary search algorithm derived from `[T]::binary_search_by`
94 // https://github.com/rust-lang/rust/issues/63178
89 // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221>
95 for entry in self.iter() {
90 // except we don’t have a slice of entries. Instead we jump to the
96 let entry = entry?;
91 // middle of the byte slice and look around for entry delimiters
97 if entry.path == path {
92 // (newlines).
98 return Ok(Some(entry));
93 while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? {
94 let (entry_path, rest) =
95 ManifestEntry::split_path(&bytes[entry_range.clone()])?;
96 let cmp = entry_path.cmp(path);
97 if cmp == Less {
98 let after_newline = entry_range.end + 1;
99 bytes = &bytes[after_newline..];
100 } else if cmp == Greater {
101 bytes = &bytes[..entry_range.start];
102 } else {
103 return Ok(Some(ManifestEntry::from_path_and_rest(
104 entry_path, rest,
105 )));
99 }
106 }
100 }
107 }
101 Ok(None)
108 Ok(None)
102 }
109 }
110
111 /// If there is at least one, return the byte range of an entry *excluding*
112 /// the final newline.
113 fn find_entry_near_middle_of(
114 bytes: &[u8],
115 ) -> Result<Option<std::ops::Range<usize>>, HgError> {
116 let len = bytes.len();
117 if len > 0 {
118 let middle = bytes.len() / 2;
119 // Integer division rounds down, so `middle < len`.
120 let (before, after) = bytes.split_at(middle);
121 let is_newline = |&byte: &u8| byte == b'\n';
122 let entry_start = match before.iter().rposition(is_newline) {
123 Some(i) => i + 1,
124 None => 0, // We choose the first entry in `bytes`
125 };
126 let entry_end = match after.iter().position(is_newline) {
127 Some(i) => {
128 // No `+ 1` here to exclude this newline from the range
129 middle + i
130 }
131 None => {
132 // In a well-formed manifest:
133 //
134 // * Since `len > 0`, `bytes` contains at least one entry
135 // * Every entry ends with a newline
136 // * Since `middle < len`, `after` contains at least the
137 // newline at the end of the last entry of `bytes`.
138 //
139 // We didn’t find a newline, so this manifest is not
140 // well-formed.
141 return Err(HgError::corrupted(
142 "manifest entry without \\n delimiter",
143 ));
144 }
145 };
146 Ok(Some(entry_start..entry_end))
147 } else {
148 // len == 0
149 Ok(None)
150 }
151 }
103 }
152 }
104
153
105 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
154 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
@@ -112,7 +161,32 b" pub struct ManifestEntry<'manifest> {"
112 pub flags: Option<u8>,
161 pub flags: Option<u8>,
113 }
162 }
114
163
115 impl ManifestEntry<'_> {
164 impl<'a> ManifestEntry<'a> {
165 fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> {
166 bytes.split_2(b'\0').ok_or_else(|| {
167 HgError::corrupted("manifest entry without \\0 delimiter")
168 })
169 }
170
171 fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self {
172 let (hex_node_id, flags) = match rest.split_last() {
173 Some((&b'x', rest)) => (rest, Some(b'x')),
174 Some((&b'l', rest)) => (rest, Some(b'l')),
175 Some((&b't', rest)) => (rest, Some(b't')),
176 _ => (rest, None),
177 };
178 Self {
179 path: HgPath::new(path),
180 hex_node_id,
181 flags,
182 }
183 }
184
185 fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> {
186 let (path, rest) = Self::split_path(bytes)?;
187 Ok(Self::from_path_and_rest(path, rest))
188 }
189
116 pub fn node_id(&self) -> Result<Node, HgError> {
190 pub fn node_id(&self) -> Result<Node, HgError> {
117 Node::from_hex_for_repo(self.hex_node_id)
191 Node::from_hex_for_repo(self.hex_node_id)
118 }
192 }
@@ -473,7 +473,7 b' fn unsure_is_modified('
473 };
473 };
474
474
475 let entry = manifest
475 let entry = manifest
476 .find_file(hg_path)?
476 .find_by_path(hg_path)?
477 .expect("ambgious file not in p1");
477 .expect("ambgious file not in p1");
478 if entry.flags != fs_flags {
478 if entry.flags != fs_flags {
479 return Ok(true);
479 return Ok(true);
General Comments 0
You need to be logged in to leave comments. Login now