##// END OF EJS Templates
rust: run a clippy pass with the latest stable version...
Raphaël Gomès -
r52013:532e74ad default
parent child Browse files
Show More
@@ -1,349 +1,349 b''
1 // layer.rs
1 // layer.rs
2 //
2 //
3 // Copyright 2020
3 // Copyright 2020
4 // Valentin Gatien-Baron,
4 // Valentin Gatien-Baron,
5 // Raphaël Gomès <rgomes@octobus.net>
5 // Raphaël Gomès <rgomes@octobus.net>
6 //
6 //
7 // This software may be used and distributed according to the terms of the
7 // This software may be used and distributed according to the terms of the
8 // GNU General Public License version 2 or any later version.
8 // GNU General Public License version 2 or any later version.
9
9
10 use crate::errors::HgError;
10 use crate::errors::HgError;
11 use crate::exit_codes::CONFIG_PARSE_ERROR_ABORT;
11 use crate::exit_codes::CONFIG_PARSE_ERROR_ABORT;
12 use crate::utils::files::{get_bytes_from_path, get_path_from_bytes};
12 use crate::utils::files::{get_bytes_from_path, get_path_from_bytes};
13 use format_bytes::{format_bytes, write_bytes, DisplayBytes};
13 use format_bytes::{format_bytes, write_bytes, DisplayBytes};
14 use lazy_static::lazy_static;
14 use lazy_static::lazy_static;
15 use regex::bytes::Regex;
15 use regex::bytes::Regex;
16 use std::collections::HashMap;
16 use std::collections::HashMap;
17 use std::path::{Path, PathBuf};
17 use std::path::{Path, PathBuf};
18
18
19 lazy_static! {
19 lazy_static! {
20 static ref SECTION_RE: Regex = make_regex(r"^\[([^\[]+)\]");
20 static ref SECTION_RE: Regex = make_regex(r"^\[([^\[]+)\]");
21 static ref ITEM_RE: Regex = make_regex(r"^([^=\s][^=]*?)\s*=\s*((.*\S)?)");
21 static ref ITEM_RE: Regex = make_regex(r"^([^=\s][^=]*?)\s*=\s*((.*\S)?)");
22 /// Continuation whitespace
22 /// Continuation whitespace
23 static ref CONT_RE: Regex = make_regex(r"^\s+(\S|\S.*\S)\s*$");
23 static ref CONT_RE: Regex = make_regex(r"^\s+(\S|\S.*\S)\s*$");
24 static ref EMPTY_RE: Regex = make_regex(r"^(;|#|\s*$)");
24 static ref EMPTY_RE: Regex = make_regex(r"^(;|#|\s*$)");
25 static ref COMMENT_RE: Regex = make_regex(r"^(;|#)");
25 static ref COMMENT_RE: Regex = make_regex(r"^(;|#)");
26 /// A directive that allows for removing previous entries
26 /// A directive that allows for removing previous entries
27 static ref UNSET_RE: Regex = make_regex(r"^%unset\s+(\S+)");
27 static ref UNSET_RE: Regex = make_regex(r"^%unset\s+(\S+)");
28 /// A directive that allows for including other config files
28 /// A directive that allows for including other config files
29 static ref INCLUDE_RE: Regex = make_regex(r"^%include\s+(\S|\S.*\S)\s*$");
29 static ref INCLUDE_RE: Regex = make_regex(r"^%include\s+(\S|\S.*\S)\s*$");
30 }
30 }
31
31
32 /// All config values separated by layers of precedence.
32 /// All config values separated by layers of precedence.
33 /// Each config source may be split in multiple layers if `%include` directives
33 /// Each config source may be split in multiple layers if `%include` directives
34 /// are used.
34 /// are used.
35 /// TODO detail the general precedence
35 /// TODO detail the general precedence
36 #[derive(Clone)]
36 #[derive(Clone)]
37 pub struct ConfigLayer {
37 pub struct ConfigLayer {
38 /// Mapping of the sections to their items
38 /// Mapping of the sections to their items
39 sections: HashMap<Vec<u8>, ConfigItem>,
39 sections: HashMap<Vec<u8>, ConfigItem>,
40 /// All sections (and their items/values) in a layer share the same origin
40 /// All sections (and their items/values) in a layer share the same origin
41 pub origin: ConfigOrigin,
41 pub origin: ConfigOrigin,
42 /// Whether this layer comes from a trusted user or group
42 /// Whether this layer comes from a trusted user or group
43 pub trusted: bool,
43 pub trusted: bool,
44 }
44 }
45
45
46 impl ConfigLayer {
46 impl ConfigLayer {
47 pub fn new(origin: ConfigOrigin) -> Self {
47 pub fn new(origin: ConfigOrigin) -> Self {
48 ConfigLayer {
48 ConfigLayer {
49 sections: HashMap::new(),
49 sections: HashMap::new(),
50 trusted: true, // TODO check
50 trusted: true, // TODO check
51 origin,
51 origin,
52 }
52 }
53 }
53 }
54
54
55 /// Parse `--config` CLI arguments and return a layer if there’s any
55 /// Parse `--config` CLI arguments and return a layer if there’s any
56 pub(crate) fn parse_cli_args(
56 pub(crate) fn parse_cli_args(
57 cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>,
57 cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>,
58 ) -> Result<Option<Self>, ConfigError> {
58 ) -> Result<Option<Self>, ConfigError> {
59 fn parse_one(arg: &[u8]) -> Option<(Vec<u8>, Vec<u8>, Vec<u8>)> {
59 fn parse_one(arg: &[u8]) -> Option<(Vec<u8>, Vec<u8>, Vec<u8>)> {
60 use crate::utils::SliceExt;
60 use crate::utils::SliceExt;
61
61
62 let (section_and_item, value) = arg.split_2(b'=')?;
62 let (section_and_item, value) = arg.split_2(b'=')?;
63 let (section, item) = section_and_item.trim().split_2(b'.')?;
63 let (section, item) = section_and_item.trim().split_2(b'.')?;
64 Some((
64 Some((
65 section.to_owned(),
65 section.to_owned(),
66 item.to_owned(),
66 item.to_owned(),
67 value.trim().to_owned(),
67 value.trim().to_owned(),
68 ))
68 ))
69 }
69 }
70
70
71 let mut layer = Self::new(ConfigOrigin::CommandLine);
71 let mut layer = Self::new(ConfigOrigin::CommandLine);
72 for arg in cli_config_args {
72 for arg in cli_config_args {
73 let arg = arg.as_ref();
73 let arg = arg.as_ref();
74 if let Some((section, item, value)) = parse_one(arg) {
74 if let Some((section, item, value)) = parse_one(arg) {
75 layer.add(section, item, value, None);
75 layer.add(section, item, value, None);
76 } else {
76 } else {
77 Err(HgError::abort(
77 Err(HgError::abort(
78 format!(
78 format!(
79 "abort: malformed --config option: '{}' \
79 "abort: malformed --config option: '{}' \
80 (use --config section.name=value)",
80 (use --config section.name=value)",
81 String::from_utf8_lossy(arg),
81 String::from_utf8_lossy(arg),
82 ),
82 ),
83 CONFIG_PARSE_ERROR_ABORT,
83 CONFIG_PARSE_ERROR_ABORT,
84 None,
84 None,
85 ))?
85 ))?
86 }
86 }
87 }
87 }
88 if layer.sections.is_empty() {
88 if layer.sections.is_empty() {
89 Ok(None)
89 Ok(None)
90 } else {
90 } else {
91 Ok(Some(layer))
91 Ok(Some(layer))
92 }
92 }
93 }
93 }
94
94
95 /// Returns whether this layer comes from `--config` CLI arguments
95 /// Returns whether this layer comes from `--config` CLI arguments
96 pub(crate) fn is_from_command_line(&self) -> bool {
96 pub(crate) fn is_from_command_line(&self) -> bool {
97 matches!(self.origin, ConfigOrigin::CommandLine)
97 matches!(self.origin, ConfigOrigin::CommandLine)
98 }
98 }
99
99
100 /// Add an entry to the config, overwriting the old one if already present.
100 /// Add an entry to the config, overwriting the old one if already present.
101 pub fn add(
101 pub fn add(
102 &mut self,
102 &mut self,
103 section: Vec<u8>,
103 section: Vec<u8>,
104 item: Vec<u8>,
104 item: Vec<u8>,
105 value: Vec<u8>,
105 value: Vec<u8>,
106 line: Option<usize>,
106 line: Option<usize>,
107 ) {
107 ) {
108 self.sections
108 self.sections
109 .entry(section)
109 .entry(section)
110 .or_insert_with(HashMap::new)
110 .or_default()
111 .insert(item, ConfigValue { bytes: value, line });
111 .insert(item, ConfigValue { bytes: value, line });
112 }
112 }
113
113
114 /// Returns the config value in `<section>.<item>` if it exists
114 /// Returns the config value in `<section>.<item>` if it exists
115 pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&ConfigValue> {
115 pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&ConfigValue> {
116 self.sections.get(section)?.get(item)
116 self.sections.get(section)?.get(item)
117 }
117 }
118
118
119 /// Returns the keys defined in the given section
119 /// Returns the keys defined in the given section
120 pub fn iter_keys(&self, section: &[u8]) -> impl Iterator<Item = &[u8]> {
120 pub fn iter_keys(&self, section: &[u8]) -> impl Iterator<Item = &[u8]> {
121 self.sections
121 self.sections
122 .get(section)
122 .get(section)
123 .into_iter()
123 .into_iter()
124 .flat_map(|section| section.keys().map(|vec| &**vec))
124 .flat_map(|section| section.keys().map(|vec| &**vec))
125 }
125 }
126
126
127 /// Returns the (key, value) pairs defined in the given section
127 /// Returns the (key, value) pairs defined in the given section
128 pub fn iter_section<'layer>(
128 pub fn iter_section<'layer>(
129 &'layer self,
129 &'layer self,
130 section: &[u8],
130 section: &[u8],
131 ) -> impl Iterator<Item = (&'layer [u8], &'layer [u8])> {
131 ) -> impl Iterator<Item = (&'layer [u8], &'layer [u8])> {
132 self.sections
132 self.sections
133 .get(section)
133 .get(section)
134 .into_iter()
134 .into_iter()
135 .flat_map(|section| section.iter().map(|(k, v)| (&**k, &*v.bytes)))
135 .flat_map(|section| section.iter().map(|(k, v)| (&**k, &*v.bytes)))
136 }
136 }
137
137
138 /// Returns whether any key is defined in the given section
138 /// Returns whether any key is defined in the given section
139 pub fn has_non_empty_section(&self, section: &[u8]) -> bool {
139 pub fn has_non_empty_section(&self, section: &[u8]) -> bool {
140 self.sections
140 self.sections
141 .get(section)
141 .get(section)
142 .map_or(false, |section| !section.is_empty())
142 .map_or(false, |section| !section.is_empty())
143 }
143 }
144
144
145 pub fn is_empty(&self) -> bool {
145 pub fn is_empty(&self) -> bool {
146 self.sections.is_empty()
146 self.sections.is_empty()
147 }
147 }
148
148
149 /// Returns a `Vec` of layers in order of precedence (so, in read order),
149 /// Returns a `Vec` of layers in order of precedence (so, in read order),
150 /// recursively parsing the `%include` directives if any.
150 /// recursively parsing the `%include` directives if any.
151 pub fn parse(src: &Path, data: &[u8]) -> Result<Vec<Self>, ConfigError> {
151 pub fn parse(src: &Path, data: &[u8]) -> Result<Vec<Self>, ConfigError> {
152 let mut layers = vec![];
152 let mut layers = vec![];
153
153
154 // Discard byte order mark if any
154 // Discard byte order mark if any
155 let data = if data.starts_with(b"\xef\xbb\xbf") {
155 let data = if data.starts_with(b"\xef\xbb\xbf") {
156 &data[3..]
156 &data[3..]
157 } else {
157 } else {
158 data
158 data
159 };
159 };
160
160
161 // TODO check if it's trusted
161 // TODO check if it's trusted
162 let mut current_layer = Self::new(ConfigOrigin::File(src.to_owned()));
162 let mut current_layer = Self::new(ConfigOrigin::File(src.to_owned()));
163
163
164 let mut lines_iter =
164 let mut lines_iter =
165 data.split(|b| *b == b'\n').enumerate().peekable();
165 data.split(|b| *b == b'\n').enumerate().peekable();
166 let mut section = b"".to_vec();
166 let mut section = b"".to_vec();
167
167
168 while let Some((index, bytes)) = lines_iter.next() {
168 while let Some((index, bytes)) = lines_iter.next() {
169 let line = Some(index + 1);
169 let line = Some(index + 1);
170 if let Some(m) = INCLUDE_RE.captures(bytes) {
170 if let Some(m) = INCLUDE_RE.captures(bytes) {
171 let filename_bytes = &m[1];
171 let filename_bytes = &m[1];
172 let filename_bytes = crate::utils::expand_vars(filename_bytes);
172 let filename_bytes = crate::utils::expand_vars(filename_bytes);
173 // `Path::parent` only fails for the root directory,
173 // `Path::parent` only fails for the root directory,
174 // which `src` can’t be since we’ve managed to open it as a
174 // which `src` can’t be since we’ve managed to open it as a
175 // file.
175 // file.
176 let dir = src
176 let dir = src
177 .parent()
177 .parent()
178 .expect("Path::parent fail on a file we’ve read");
178 .expect("Path::parent fail on a file we’ve read");
179 // `Path::join` with an absolute argument correctly ignores the
179 // `Path::join` with an absolute argument correctly ignores the
180 // base path
180 // base path
181 let filename = dir.join(&get_path_from_bytes(&filename_bytes));
181 let filename = dir.join(get_path_from_bytes(&filename_bytes));
182 match std::fs::read(&filename) {
182 match std::fs::read(&filename) {
183 Ok(data) => {
183 Ok(data) => {
184 layers.push(current_layer);
184 layers.push(current_layer);
185 layers.extend(Self::parse(&filename, &data)?);
185 layers.extend(Self::parse(&filename, &data)?);
186 current_layer =
186 current_layer =
187 Self::new(ConfigOrigin::File(src.to_owned()));
187 Self::new(ConfigOrigin::File(src.to_owned()));
188 }
188 }
189 Err(error) => {
189 Err(error) => {
190 if error.kind() != std::io::ErrorKind::NotFound {
190 if error.kind() != std::io::ErrorKind::NotFound {
191 return Err(ConfigParseError {
191 return Err(ConfigParseError {
192 origin: ConfigOrigin::File(src.to_owned()),
192 origin: ConfigOrigin::File(src.to_owned()),
193 line,
193 line,
194 message: format_bytes!(
194 message: format_bytes!(
195 b"cannot include {} ({})",
195 b"cannot include {} ({})",
196 filename_bytes,
196 filename_bytes,
197 format_bytes::Utf8(error)
197 format_bytes::Utf8(error)
198 ),
198 ),
199 }
199 }
200 .into());
200 .into());
201 }
201 }
202 }
202 }
203 }
203 }
204 } else if EMPTY_RE.captures(bytes).is_some() {
204 } else if EMPTY_RE.captures(bytes).is_some() {
205 } else if let Some(m) = SECTION_RE.captures(bytes) {
205 } else if let Some(m) = SECTION_RE.captures(bytes) {
206 section = m[1].to_vec();
206 section = m[1].to_vec();
207 } else if let Some(m) = ITEM_RE.captures(bytes) {
207 } else if let Some(m) = ITEM_RE.captures(bytes) {
208 let item = m[1].to_vec();
208 let item = m[1].to_vec();
209 let mut value = m[2].to_vec();
209 let mut value = m[2].to_vec();
210 loop {
210 loop {
211 match lines_iter.peek() {
211 match lines_iter.peek() {
212 None => break,
212 None => break,
213 Some((_, v)) => {
213 Some((_, v)) => {
214 if COMMENT_RE.captures(v).is_some() {
214 if COMMENT_RE.captures(v).is_some() {
215 } else if CONT_RE.captures(v).is_some() {
215 } else if CONT_RE.captures(v).is_some() {
216 value.extend(b"\n");
216 value.extend(b"\n");
217 value.extend(&m[1]);
217 value.extend(&m[1]);
218 } else {
218 } else {
219 break;
219 break;
220 }
220 }
221 }
221 }
222 };
222 };
223 lines_iter.next();
223 lines_iter.next();
224 }
224 }
225 current_layer.add(section.clone(), item, value, line);
225 current_layer.add(section.clone(), item, value, line);
226 } else if let Some(m) = UNSET_RE.captures(bytes) {
226 } else if let Some(m) = UNSET_RE.captures(bytes) {
227 if let Some(map) = current_layer.sections.get_mut(&section) {
227 if let Some(map) = current_layer.sections.get_mut(&section) {
228 map.remove(&m[1]);
228 map.remove(&m[1]);
229 }
229 }
230 } else {
230 } else {
231 let message = if bytes.starts_with(b" ") {
231 let message = if bytes.starts_with(b" ") {
232 format_bytes!(b"unexpected leading whitespace: {}", bytes)
232 format_bytes!(b"unexpected leading whitespace: {}", bytes)
233 } else {
233 } else {
234 bytes.to_owned()
234 bytes.to_owned()
235 };
235 };
236 return Err(ConfigParseError {
236 return Err(ConfigParseError {
237 origin: ConfigOrigin::File(src.to_owned()),
237 origin: ConfigOrigin::File(src.to_owned()),
238 line,
238 line,
239 message,
239 message,
240 }
240 }
241 .into());
241 .into());
242 }
242 }
243 }
243 }
244 if !current_layer.is_empty() {
244 if !current_layer.is_empty() {
245 layers.push(current_layer);
245 layers.push(current_layer);
246 }
246 }
247 Ok(layers)
247 Ok(layers)
248 }
248 }
249 }
249 }
250
250
251 impl DisplayBytes for ConfigLayer {
251 impl DisplayBytes for ConfigLayer {
252 fn display_bytes(
252 fn display_bytes(
253 &self,
253 &self,
254 out: &mut dyn std::io::Write,
254 out: &mut dyn std::io::Write,
255 ) -> std::io::Result<()> {
255 ) -> std::io::Result<()> {
256 let mut sections: Vec<_> = self.sections.iter().collect();
256 let mut sections: Vec<_> = self.sections.iter().collect();
257 sections.sort_by(|e0, e1| e0.0.cmp(e1.0));
257 sections.sort_by(|e0, e1| e0.0.cmp(e1.0));
258
258
259 for (section, items) in sections.into_iter() {
259 for (section, items) in sections.into_iter() {
260 let mut items: Vec<_> = items.iter().collect();
260 let mut items: Vec<_> = items.iter().collect();
261 items.sort_by(|e0, e1| e0.0.cmp(e1.0));
261 items.sort_by(|e0, e1| e0.0.cmp(e1.0));
262
262
263 for (item, config_entry) in items {
263 for (item, config_entry) in items {
264 write_bytes!(
264 write_bytes!(
265 out,
265 out,
266 b"{}.{}={} # {}\n",
266 b"{}.{}={} # {}\n",
267 section,
267 section,
268 item,
268 item,
269 &config_entry.bytes,
269 &config_entry.bytes,
270 &self.origin,
270 &self.origin,
271 )?
271 )?
272 }
272 }
273 }
273 }
274 Ok(())
274 Ok(())
275 }
275 }
276 }
276 }
277
277
278 /// Mapping of section item to value.
278 /// Mapping of section item to value.
279 /// In the following:
279 /// In the following:
280 /// ```text
280 /// ```text
281 /// [ui]
281 /// [ui]
282 /// paginate=no
282 /// paginate=no
283 /// ```
283 /// ```
284 /// "paginate" is the section item and "no" the value.
284 /// "paginate" is the section item and "no" the value.
285 pub type ConfigItem = HashMap<Vec<u8>, ConfigValue>;
285 pub type ConfigItem = HashMap<Vec<u8>, ConfigValue>;
286
286
287 #[derive(Clone, Debug, PartialEq)]
287 #[derive(Clone, Debug, PartialEq)]
288 pub struct ConfigValue {
288 pub struct ConfigValue {
289 /// The raw bytes of the value (be it from the CLI, env or from a file)
289 /// The raw bytes of the value (be it from the CLI, env or from a file)
290 pub bytes: Vec<u8>,
290 pub bytes: Vec<u8>,
291 /// Only present if the value comes from a file, 1-indexed.
291 /// Only present if the value comes from a file, 1-indexed.
292 pub line: Option<usize>,
292 pub line: Option<usize>,
293 }
293 }
294
294
295 #[derive(Clone, Debug, PartialEq, Eq)]
295 #[derive(Clone, Debug, PartialEq, Eq)]
296 pub enum ConfigOrigin {
296 pub enum ConfigOrigin {
297 /// From a configuration file
297 /// From a configuration file
298 File(PathBuf),
298 File(PathBuf),
299 /// From [ui.tweakdefaults]
299 /// From [ui.tweakdefaults]
300 Tweakdefaults,
300 Tweakdefaults,
301 /// From a `--config` CLI argument
301 /// From a `--config` CLI argument
302 CommandLine,
302 CommandLine,
303 /// From a `--color` CLI argument
303 /// From a `--color` CLI argument
304 CommandLineColor,
304 CommandLineColor,
305 /// From environment variables like `$PAGER` or `$EDITOR`
305 /// From environment variables like `$PAGER` or `$EDITOR`
306 Environment(Vec<u8>),
306 Environment(Vec<u8>),
307 /// From configitems.toml
307 /// From configitems.toml
308 Defaults,
308 Defaults,
309 /* TODO extensions
309 /* TODO extensions
310 * TODO Python resources?
310 * TODO Python resources?
311 * Others? */
311 * Others? */
312 }
312 }
313
313
314 impl DisplayBytes for ConfigOrigin {
314 impl DisplayBytes for ConfigOrigin {
315 fn display_bytes(
315 fn display_bytes(
316 &self,
316 &self,
317 out: &mut dyn std::io::Write,
317 out: &mut dyn std::io::Write,
318 ) -> std::io::Result<()> {
318 ) -> std::io::Result<()> {
319 match self {
319 match self {
320 ConfigOrigin::File(p) => out.write_all(&get_bytes_from_path(p)),
320 ConfigOrigin::File(p) => out.write_all(&get_bytes_from_path(p)),
321 ConfigOrigin::CommandLine => out.write_all(b"--config"),
321 ConfigOrigin::CommandLine => out.write_all(b"--config"),
322 ConfigOrigin::CommandLineColor => out.write_all(b"--color"),
322 ConfigOrigin::CommandLineColor => out.write_all(b"--color"),
323 ConfigOrigin::Environment(e) => write_bytes!(out, b"${}", e),
323 ConfigOrigin::Environment(e) => write_bytes!(out, b"${}", e),
324 ConfigOrigin::Tweakdefaults => {
324 ConfigOrigin::Tweakdefaults => {
325 write_bytes!(out, b"ui.tweakdefaults")
325 write_bytes!(out, b"ui.tweakdefaults")
326 }
326 }
327 ConfigOrigin::Defaults => {
327 ConfigOrigin::Defaults => {
328 write_bytes!(out, b"configitems.toml")
328 write_bytes!(out, b"configitems.toml")
329 }
329 }
330 }
330 }
331 }
331 }
332 }
332 }
333
333
334 #[derive(Debug)]
334 #[derive(Debug)]
335 pub struct ConfigParseError {
335 pub struct ConfigParseError {
336 pub origin: ConfigOrigin,
336 pub origin: ConfigOrigin,
337 pub line: Option<usize>,
337 pub line: Option<usize>,
338 pub message: Vec<u8>,
338 pub message: Vec<u8>,
339 }
339 }
340
340
341 #[derive(Debug, derive_more::From)]
341 #[derive(Debug, derive_more::From)]
342 pub enum ConfigError {
342 pub enum ConfigError {
343 Parse(ConfigParseError),
343 Parse(ConfigParseError),
344 Other(HgError),
344 Other(HgError),
345 }
345 }
346
346
347 fn make_regex(pattern: &'static str) -> Regex {
347 fn make_regex(pattern: &'static str) -> Regex {
348 Regex::new(pattern).expect("expected a valid regex")
348 Regex::new(pattern).expect("expected a valid regex")
349 }
349 }
@@ -1,810 +1,810 b''
1 // config.rs
1 // config.rs
2 //
2 //
3 // Copyright 2020
3 // Copyright 2020
4 // Valentin Gatien-Baron,
4 // Valentin Gatien-Baron,
5 // Raphaël Gomès <rgomes@octobus.net>
5 // Raphaël Gomès <rgomes@octobus.net>
6 //
6 //
7 // This software may be used and distributed according to the terms of the
7 // This software may be used and distributed according to the terms of the
8 // GNU General Public License version 2 or any later version.
8 // GNU General Public License version 2 or any later version.
9
9
10 //! Mercurial config parsing and interfaces.
10 //! Mercurial config parsing and interfaces.
11
11
12 pub mod config_items;
12 pub mod config_items;
13 mod layer;
13 mod layer;
14 mod plain_info;
14 mod plain_info;
15 mod values;
15 mod values;
16 pub use layer::{ConfigError, ConfigOrigin, ConfigParseError};
16 pub use layer::{ConfigError, ConfigOrigin, ConfigParseError};
17 use lazy_static::lazy_static;
17 use lazy_static::lazy_static;
18 pub use plain_info::PlainInfo;
18 pub use plain_info::PlainInfo;
19
19
20 use self::config_items::DefaultConfig;
20 use self::config_items::DefaultConfig;
21 use self::config_items::DefaultConfigItem;
21 use self::config_items::DefaultConfigItem;
22 use self::layer::ConfigLayer;
22 use self::layer::ConfigLayer;
23 use self::layer::ConfigValue;
23 use self::layer::ConfigValue;
24 use crate::errors::HgError;
24 use crate::errors::HgError;
25 use crate::errors::{HgResultExt, IoResultExt};
25 use crate::errors::{HgResultExt, IoResultExt};
26 use crate::utils::files::get_bytes_from_os_str;
26 use crate::utils::files::get_bytes_from_os_str;
27 use format_bytes::{write_bytes, DisplayBytes};
27 use format_bytes::{write_bytes, DisplayBytes};
28 use std::collections::HashSet;
28 use std::collections::HashSet;
29 use std::env;
29 use std::env;
30 use std::fmt;
30 use std::fmt;
31 use std::path::{Path, PathBuf};
31 use std::path::{Path, PathBuf};
32 use std::str;
32 use std::str;
33
33
34 lazy_static! {
34 lazy_static! {
35 static ref DEFAULT_CONFIG: Result<DefaultConfig, HgError> = {
35 static ref DEFAULT_CONFIG: Result<DefaultConfig, HgError> = {
36 DefaultConfig::from_contents(include_str!(
36 DefaultConfig::from_contents(include_str!(
37 "../../../../mercurial/configitems.toml"
37 "../../../../mercurial/configitems.toml"
38 ))
38 ))
39 };
39 };
40 }
40 }
41
41
42 /// Holds the config values for the current repository
42 /// Holds the config values for the current repository
43 /// TODO update this docstring once we support more sources
43 /// TODO update this docstring once we support more sources
44 #[derive(Clone)]
44 #[derive(Clone)]
45 pub struct Config {
45 pub struct Config {
46 layers: Vec<layer::ConfigLayer>,
46 layers: Vec<layer::ConfigLayer>,
47 plain: PlainInfo,
47 plain: PlainInfo,
48 }
48 }
49
49
50 impl DisplayBytes for Config {
50 impl DisplayBytes for Config {
51 fn display_bytes(
51 fn display_bytes(
52 &self,
52 &self,
53 out: &mut dyn std::io::Write,
53 out: &mut dyn std::io::Write,
54 ) -> std::io::Result<()> {
54 ) -> std::io::Result<()> {
55 for (index, layer) in self.layers.iter().rev().enumerate() {
55 for (index, layer) in self.layers.iter().rev().enumerate() {
56 write_bytes!(
56 write_bytes!(
57 out,
57 out,
58 b"==== Layer {} (trusted: {}) ====\n{}",
58 b"==== Layer {} (trusted: {}) ====\n{}",
59 index,
59 index,
60 if layer.trusted {
60 if layer.trusted {
61 &b"yes"[..]
61 &b"yes"[..]
62 } else {
62 } else {
63 &b"no"[..]
63 &b"no"[..]
64 },
64 },
65 layer
65 layer
66 )?;
66 )?;
67 }
67 }
68 Ok(())
68 Ok(())
69 }
69 }
70 }
70 }
71
71
72 pub enum ConfigSource {
72 pub enum ConfigSource {
73 /// Absolute path to a config file
73 /// Absolute path to a config file
74 AbsPath(PathBuf),
74 AbsPath(PathBuf),
75 /// Already parsed (from the CLI, env, Python resources, etc.)
75 /// Already parsed (from the CLI, env, Python resources, etc.)
76 Parsed(layer::ConfigLayer),
76 Parsed(layer::ConfigLayer),
77 }
77 }
78
78
79 #[derive(Debug)]
79 #[derive(Debug)]
80 pub struct ConfigValueParseErrorDetails {
80 pub struct ConfigValueParseErrorDetails {
81 pub origin: ConfigOrigin,
81 pub origin: ConfigOrigin,
82 pub line: Option<usize>,
82 pub line: Option<usize>,
83 pub section: Vec<u8>,
83 pub section: Vec<u8>,
84 pub item: Vec<u8>,
84 pub item: Vec<u8>,
85 pub value: Vec<u8>,
85 pub value: Vec<u8>,
86 pub expected_type: &'static str,
86 pub expected_type: &'static str,
87 }
87 }
88
88
89 // boxed to avoid very large Result types
89 // boxed to avoid very large Result types
90 pub type ConfigValueParseError = Box<ConfigValueParseErrorDetails>;
90 pub type ConfigValueParseError = Box<ConfigValueParseErrorDetails>;
91
91
92 impl fmt::Display for ConfigValueParseError {
92 impl fmt::Display for ConfigValueParseError {
93 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
93 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
94 // TODO: add origin and line number information, here and in
94 // TODO: add origin and line number information, here and in
95 // corresponding python code
95 // corresponding python code
96 write!(
96 write!(
97 f,
97 f,
98 "config error: {}.{} is not a {} ('{}')",
98 "config error: {}.{} is not a {} ('{}')",
99 String::from_utf8_lossy(&self.section),
99 String::from_utf8_lossy(&self.section),
100 String::from_utf8_lossy(&self.item),
100 String::from_utf8_lossy(&self.item),
101 self.expected_type,
101 self.expected_type,
102 String::from_utf8_lossy(&self.value)
102 String::from_utf8_lossy(&self.value)
103 )
103 )
104 }
104 }
105 }
105 }
106
106
107 /// Returns true if the config item is disabled by PLAIN or PLAINEXCEPT
107 /// Returns true if the config item is disabled by PLAIN or PLAINEXCEPT
108 fn should_ignore(plain: &PlainInfo, section: &[u8], item: &[u8]) -> bool {
108 fn should_ignore(plain: &PlainInfo, section: &[u8], item: &[u8]) -> bool {
109 // duplication with [_applyconfig] in [ui.py],
109 // duplication with [_applyconfig] in [ui.py],
110 if !plain.is_plain() {
110 if !plain.is_plain() {
111 return false;
111 return false;
112 }
112 }
113 if section == b"alias" {
113 if section == b"alias" {
114 return plain.plainalias();
114 return plain.plainalias();
115 }
115 }
116 if section == b"revsetalias" {
116 if section == b"revsetalias" {
117 return plain.plainrevsetalias();
117 return plain.plainrevsetalias();
118 }
118 }
119 if section == b"templatealias" {
119 if section == b"templatealias" {
120 return plain.plaintemplatealias();
120 return plain.plaintemplatealias();
121 }
121 }
122 if section == b"ui" {
122 if section == b"ui" {
123 let to_delete: &[&[u8]] = &[
123 let to_delete: &[&[u8]] = &[
124 b"debug",
124 b"debug",
125 b"fallbackencoding",
125 b"fallbackencoding",
126 b"quiet",
126 b"quiet",
127 b"slash",
127 b"slash",
128 b"logtemplate",
128 b"logtemplate",
129 b"message-output",
129 b"message-output",
130 b"statuscopies",
130 b"statuscopies",
131 b"style",
131 b"style",
132 b"traceback",
132 b"traceback",
133 b"verbose",
133 b"verbose",
134 ];
134 ];
135 return to_delete.contains(&item);
135 return to_delete.contains(&item);
136 }
136 }
137 let sections_to_delete: &[&[u8]] =
137 let sections_to_delete: &[&[u8]] =
138 &[b"defaults", b"commands", b"command-templates"];
138 &[b"defaults", b"commands", b"command-templates"];
139 sections_to_delete.contains(&section)
139 sections_to_delete.contains(&section)
140 }
140 }
141
141
142 impl Config {
142 impl Config {
143 /// The configuration to use when printing configuration-loading errors
143 /// The configuration to use when printing configuration-loading errors
144 pub fn empty() -> Self {
144 pub fn empty() -> Self {
145 Self {
145 Self {
146 layers: Vec::new(),
146 layers: Vec::new(),
147 plain: PlainInfo::empty(),
147 plain: PlainInfo::empty(),
148 }
148 }
149 }
149 }
150
150
151 /// Load system and user configuration from various files.
151 /// Load system and user configuration from various files.
152 ///
152 ///
153 /// This is also affected by some environment variables.
153 /// This is also affected by some environment variables.
154 pub fn load_non_repo() -> Result<Self, ConfigError> {
154 pub fn load_non_repo() -> Result<Self, ConfigError> {
155 let mut config = Self::empty();
155 let mut config = Self::empty();
156 let opt_rc_path = env::var_os("HGRCPATH");
156 let opt_rc_path = env::var_os("HGRCPATH");
157 // HGRCPATH replaces system config
157 // HGRCPATH replaces system config
158 if opt_rc_path.is_none() {
158 if opt_rc_path.is_none() {
159 config.add_system_config()?
159 config.add_system_config()?
160 }
160 }
161
161
162 config.add_for_environment_variable("EDITOR", b"ui", b"editor");
162 config.add_for_environment_variable("EDITOR", b"ui", b"editor");
163 config.add_for_environment_variable("VISUAL", b"ui", b"editor");
163 config.add_for_environment_variable("VISUAL", b"ui", b"editor");
164 config.add_for_environment_variable("PAGER", b"pager", b"pager");
164 config.add_for_environment_variable("PAGER", b"pager", b"pager");
165
165
166 // These are set by `run-tests.py --rhg` to enable fallback for the
166 // These are set by `run-tests.py --rhg` to enable fallback for the
167 // entire test suite. Alternatives would be setting configuration
167 // entire test suite. Alternatives would be setting configuration
168 // through `$HGRCPATH` but some tests override that, or changing the
168 // through `$HGRCPATH` but some tests override that, or changing the
169 // `hg` shell alias to include `--config` but that disrupts tests that
169 // `hg` shell alias to include `--config` but that disrupts tests that
170 // print command lines and check expected output.
170 // print command lines and check expected output.
171 config.add_for_environment_variable(
171 config.add_for_environment_variable(
172 "RHG_ON_UNSUPPORTED",
172 "RHG_ON_UNSUPPORTED",
173 b"rhg",
173 b"rhg",
174 b"on-unsupported",
174 b"on-unsupported",
175 );
175 );
176 config.add_for_environment_variable(
176 config.add_for_environment_variable(
177 "RHG_FALLBACK_EXECUTABLE",
177 "RHG_FALLBACK_EXECUTABLE",
178 b"rhg",
178 b"rhg",
179 b"fallback-executable",
179 b"fallback-executable",
180 );
180 );
181
181
182 // HGRCPATH replaces user config
182 // HGRCPATH replaces user config
183 if opt_rc_path.is_none() {
183 if opt_rc_path.is_none() {
184 config.add_user_config()?
184 config.add_user_config()?
185 }
185 }
186 if let Some(rc_path) = &opt_rc_path {
186 if let Some(rc_path) = &opt_rc_path {
187 for path in env::split_paths(rc_path) {
187 for path in env::split_paths(rc_path) {
188 if !path.as_os_str().is_empty() {
188 if !path.as_os_str().is_empty() {
189 if path.is_dir() {
189 if path.is_dir() {
190 config.add_trusted_dir(&path)?
190 config.add_trusted_dir(&path)?
191 } else {
191 } else {
192 config.add_trusted_file(&path)?
192 config.add_trusted_file(&path)?
193 }
193 }
194 }
194 }
195 }
195 }
196 }
196 }
197 Ok(config)
197 Ok(config)
198 }
198 }
199
199
200 pub fn load_cli_args(
200 pub fn load_cli_args(
201 &mut self,
201 &mut self,
202 cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>,
202 cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>,
203 color_arg: Option<Vec<u8>>,
203 color_arg: Option<Vec<u8>>,
204 ) -> Result<(), ConfigError> {
204 ) -> Result<(), ConfigError> {
205 if let Some(layer) = ConfigLayer::parse_cli_args(cli_config_args)? {
205 if let Some(layer) = ConfigLayer::parse_cli_args(cli_config_args)? {
206 self.layers.push(layer)
206 self.layers.push(layer)
207 }
207 }
208 if let Some(arg) = color_arg {
208 if let Some(arg) = color_arg {
209 let mut layer = ConfigLayer::new(ConfigOrigin::CommandLineColor);
209 let mut layer = ConfigLayer::new(ConfigOrigin::CommandLineColor);
210 layer.add(b"ui"[..].into(), b"color"[..].into(), arg, None);
210 layer.add(b"ui"[..].into(), b"color"[..].into(), arg, None);
211 self.layers.push(layer)
211 self.layers.push(layer)
212 }
212 }
213 Ok(())
213 Ok(())
214 }
214 }
215
215
216 fn add_trusted_dir(&mut self, path: &Path) -> Result<(), ConfigError> {
216 fn add_trusted_dir(&mut self, path: &Path) -> Result<(), ConfigError> {
217 if let Some(entries) = std::fs::read_dir(path)
217 if let Some(entries) = std::fs::read_dir(path)
218 .when_reading_file(path)
218 .when_reading_file(path)
219 .io_not_found_as_none()?
219 .io_not_found_as_none()?
220 {
220 {
221 let mut file_paths = entries
221 let mut file_paths = entries
222 .map(|result| {
222 .map(|result| {
223 result.when_reading_file(path).map(|entry| entry.path())
223 result.when_reading_file(path).map(|entry| entry.path())
224 })
224 })
225 .collect::<Result<Vec<_>, _>>()?;
225 .collect::<Result<Vec<_>, _>>()?;
226 file_paths.sort();
226 file_paths.sort();
227 for file_path in &file_paths {
227 for file_path in &file_paths {
228 if file_path.extension() == Some(std::ffi::OsStr::new("rc")) {
228 if file_path.extension() == Some(std::ffi::OsStr::new("rc")) {
229 self.add_trusted_file(file_path)?
229 self.add_trusted_file(file_path)?
230 }
230 }
231 }
231 }
232 }
232 }
233 Ok(())
233 Ok(())
234 }
234 }
235
235
236 fn add_trusted_file(&mut self, path: &Path) -> Result<(), ConfigError> {
236 fn add_trusted_file(&mut self, path: &Path) -> Result<(), ConfigError> {
237 if let Some(data) = std::fs::read(path)
237 if let Some(data) = std::fs::read(path)
238 .when_reading_file(path)
238 .when_reading_file(path)
239 .io_not_found_as_none()?
239 .io_not_found_as_none()?
240 {
240 {
241 self.layers.extend(ConfigLayer::parse(path, &data)?)
241 self.layers.extend(ConfigLayer::parse(path, &data)?)
242 }
242 }
243 Ok(())
243 Ok(())
244 }
244 }
245
245
246 fn add_for_environment_variable(
246 fn add_for_environment_variable(
247 &mut self,
247 &mut self,
248 var: &str,
248 var: &str,
249 section: &[u8],
249 section: &[u8],
250 key: &[u8],
250 key: &[u8],
251 ) {
251 ) {
252 if let Some(value) = env::var_os(var) {
252 if let Some(value) = env::var_os(var) {
253 let origin = layer::ConfigOrigin::Environment(var.into());
253 let origin = layer::ConfigOrigin::Environment(var.into());
254 let mut layer = ConfigLayer::new(origin);
254 let mut layer = ConfigLayer::new(origin);
255 layer.add(
255 layer.add(
256 section.to_owned(),
256 section.to_owned(),
257 key.to_owned(),
257 key.to_owned(),
258 get_bytes_from_os_str(value),
258 get_bytes_from_os_str(value),
259 None,
259 None,
260 );
260 );
261 self.layers.push(layer)
261 self.layers.push(layer)
262 }
262 }
263 }
263 }
264
264
265 #[cfg(unix)] // TODO: other platforms
265 #[cfg(unix)] // TODO: other platforms
266 fn add_system_config(&mut self) -> Result<(), ConfigError> {
266 fn add_system_config(&mut self) -> Result<(), ConfigError> {
267 let mut add_for_prefix = |prefix: &Path| -> Result<(), ConfigError> {
267 let mut add_for_prefix = |prefix: &Path| -> Result<(), ConfigError> {
268 let etc = prefix.join("etc").join("mercurial");
268 let etc = prefix.join("etc").join("mercurial");
269 self.add_trusted_file(&etc.join("hgrc"))?;
269 self.add_trusted_file(&etc.join("hgrc"))?;
270 self.add_trusted_dir(&etc.join("hgrc.d"))
270 self.add_trusted_dir(&etc.join("hgrc.d"))
271 };
271 };
272 let root = Path::new("/");
272 let root = Path::new("/");
273 // TODO: use `std::env::args_os().next().unwrap()` a.k.a. argv[0]
273 // TODO: use `std::env::args_os().next().unwrap()` a.k.a. argv[0]
274 // instead? TODO: can this be a relative path?
274 // instead? TODO: can this be a relative path?
275 let hg = crate::utils::current_exe()?;
275 let hg = crate::utils::current_exe()?;
276 // TODO: this order (per-installation then per-system) matches
276 // TODO: this order (per-installation then per-system) matches
277 // `systemrcpath()` in `mercurial/scmposix.py`, but
277 // `systemrcpath()` in `mercurial/scmposix.py`, but
278 // `mercurial/helptext/config.txt` suggests it should be reversed
278 // `mercurial/helptext/config.txt` suggests it should be reversed
279 if let Some(installation_prefix) = hg.parent().and_then(Path::parent) {
279 if let Some(installation_prefix) = hg.parent().and_then(Path::parent) {
280 if installation_prefix != root {
280 if installation_prefix != root {
281 add_for_prefix(installation_prefix)?
281 add_for_prefix(installation_prefix)?
282 }
282 }
283 }
283 }
284 add_for_prefix(root)?;
284 add_for_prefix(root)?;
285 Ok(())
285 Ok(())
286 }
286 }
287
287
288 #[cfg(unix)] // TODO: other plateforms
288 #[cfg(unix)] // TODO: other plateforms
289 fn add_user_config(&mut self) -> Result<(), ConfigError> {
289 fn add_user_config(&mut self) -> Result<(), ConfigError> {
290 let opt_home = home::home_dir();
290 let opt_home = home::home_dir();
291 if let Some(home) = &opt_home {
291 if let Some(home) = &opt_home {
292 self.add_trusted_file(&home.join(".hgrc"))?
292 self.add_trusted_file(&home.join(".hgrc"))?
293 }
293 }
294 let darwin = cfg!(any(target_os = "macos", target_os = "ios"));
294 let darwin = cfg!(any(target_os = "macos", target_os = "ios"));
295 if !darwin {
295 if !darwin {
296 if let Some(config_home) = env::var_os("XDG_CONFIG_HOME")
296 if let Some(config_home) = env::var_os("XDG_CONFIG_HOME")
297 .map(PathBuf::from)
297 .map(PathBuf::from)
298 .or_else(|| opt_home.map(|home| home.join(".config")))
298 .or_else(|| opt_home.map(|home| home.join(".config")))
299 {
299 {
300 self.add_trusted_file(&config_home.join("hg").join("hgrc"))?
300 self.add_trusted_file(&config_home.join("hg").join("hgrc"))?
301 }
301 }
302 }
302 }
303 Ok(())
303 Ok(())
304 }
304 }
305
305
306 /// Loads in order, which means that the precedence is the same
306 /// Loads in order, which means that the precedence is the same
307 /// as the order of `sources`.
307 /// as the order of `sources`.
308 pub fn load_from_explicit_sources(
308 pub fn load_from_explicit_sources(
309 sources: Vec<ConfigSource>,
309 sources: Vec<ConfigSource>,
310 ) -> Result<Self, ConfigError> {
310 ) -> Result<Self, ConfigError> {
311 let mut layers = vec![];
311 let mut layers = vec![];
312
312
313 for source in sources.into_iter() {
313 for source in sources.into_iter() {
314 match source {
314 match source {
315 ConfigSource::Parsed(c) => layers.push(c),
315 ConfigSource::Parsed(c) => layers.push(c),
316 ConfigSource::AbsPath(c) => {
316 ConfigSource::AbsPath(c) => {
317 // TODO check if it should be trusted
317 // TODO check if it should be trusted
318 // mercurial/ui.py:427
318 // mercurial/ui.py:427
319 let data = match std::fs::read(&c) {
319 let data = match std::fs::read(&c) {
320 Err(_) => continue, // same as the python code
320 Err(_) => continue, // same as the python code
321 Ok(data) => data,
321 Ok(data) => data,
322 };
322 };
323 layers.extend(ConfigLayer::parse(&c, &data)?)
323 layers.extend(ConfigLayer::parse(&c, &data)?)
324 }
324 }
325 }
325 }
326 }
326 }
327
327
328 Ok(Config {
328 Ok(Config {
329 layers,
329 layers,
330 plain: PlainInfo::empty(),
330 plain: PlainInfo::empty(),
331 })
331 })
332 }
332 }
333
333
334 /// Loads the per-repository config into a new `Config` which is combined
334 /// Loads the per-repository config into a new `Config` which is combined
335 /// with `self`.
335 /// with `self`.
336 pub(crate) fn combine_with_repo(
336 pub(crate) fn combine_with_repo(
337 &self,
337 &self,
338 repo_config_files: &[PathBuf],
338 repo_config_files: &[PathBuf],
339 ) -> Result<Self, ConfigError> {
339 ) -> Result<Self, ConfigError> {
340 let (cli_layers, other_layers) = self
340 let (cli_layers, other_layers) = self
341 .layers
341 .layers
342 .iter()
342 .iter()
343 .cloned()
343 .cloned()
344 .partition(ConfigLayer::is_from_command_line);
344 .partition(ConfigLayer::is_from_command_line);
345
345
346 let mut repo_config = Self {
346 let mut repo_config = Self {
347 layers: other_layers,
347 layers: other_layers,
348 plain: PlainInfo::empty(),
348 plain: PlainInfo::empty(),
349 };
349 };
350 for path in repo_config_files {
350 for path in repo_config_files {
351 // TODO: check if this file should be trusted:
351 // TODO: check if this file should be trusted:
352 // `mercurial/ui.py:427`
352 // `mercurial/ui.py:427`
353 repo_config.add_trusted_file(path)?;
353 repo_config.add_trusted_file(path)?;
354 }
354 }
355 repo_config.layers.extend(cli_layers);
355 repo_config.layers.extend(cli_layers);
356 Ok(repo_config)
356 Ok(repo_config)
357 }
357 }
358
358
359 pub fn apply_plain(&mut self, plain: PlainInfo) {
359 pub fn apply_plain(&mut self, plain: PlainInfo) {
360 self.plain = plain;
360 self.plain = plain;
361 }
361 }
362
362
363 /// Returns the default value for the given config item, if any.
363 /// Returns the default value for the given config item, if any.
364 pub fn get_default(
364 pub fn get_default(
365 &self,
365 &self,
366 section: &[u8],
366 section: &[u8],
367 item: &[u8],
367 item: &[u8],
368 ) -> Result<Option<&DefaultConfigItem>, HgError> {
368 ) -> Result<Option<&DefaultConfigItem>, HgError> {
369 let default_config = DEFAULT_CONFIG.as_ref().map_err(|e| {
369 let default_config = DEFAULT_CONFIG.as_ref().map_err(|e| {
370 HgError::abort(
370 HgError::abort(
371 e.to_string(),
371 e.to_string(),
372 crate::exit_codes::ABORT,
372 crate::exit_codes::ABORT,
373 Some("`mercurial/configitems.toml` is not valid".into()),
373 Some("`mercurial/configitems.toml` is not valid".into()),
374 )
374 )
375 })?;
375 })?;
376 let default_opt = default_config.get(section, item);
376 let default_opt = default_config.get(section, item);
377 Ok(default_opt.filter(|default| {
377 Ok(default_opt.filter(|default| {
378 default
378 default
379 .in_core_extension()
379 .in_core_extension()
380 .map(|extension| {
380 .map(|extension| {
381 // Only return the default for an in-core extension item
381 // Only return the default for an in-core extension item
382 // if said extension is enabled
382 // if said extension is enabled
383 self.is_extension_enabled(extension.as_bytes())
383 self.is_extension_enabled(extension.as_bytes())
384 })
384 })
385 .unwrap_or(true)
385 .unwrap_or(true)
386 }))
386 }))
387 }
387 }
388
388
389 /// Return the config item that corresponds to a section + item, a function
389 /// Return the config item that corresponds to a section + item, a function
390 /// to parse from the raw bytes to the expected type (which is passed as
390 /// to parse from the raw bytes to the expected type (which is passed as
391 /// a string only to make debugging easier).
391 /// a string only to make debugging easier).
392 /// Used by higher-level methods like `get_bool`.
392 /// Used by higher-level methods like `get_bool`.
393 ///
393 ///
394 /// `fallback_to_default` controls whether the default value (if any) is
394 /// `fallback_to_default` controls whether the default value (if any) is
395 /// returned if nothing is found.
395 /// returned if nothing is found.
396 fn get_parse<'config, T: 'config>(
396 fn get_parse<'config, T: 'config>(
397 &'config self,
397 &'config self,
398 section: &[u8],
398 section: &[u8],
399 item: &[u8],
399 item: &[u8],
400 expected_type: &'static str,
400 expected_type: &'static str,
401 parse: impl Fn(&'config [u8]) -> Option<T>,
401 parse: impl Fn(&'config [u8]) -> Option<T>,
402 fallback_to_default: bool,
402 fallback_to_default: bool,
403 ) -> Result<Option<T>, HgError>
403 ) -> Result<Option<T>, HgError>
404 where
404 where
405 Option<T>: TryFrom<&'config DefaultConfigItem, Error = HgError>,
405 Option<T>: TryFrom<&'config DefaultConfigItem, Error = HgError>,
406 {
406 {
407 match self.get_inner(section, item) {
407 match self.get_inner(section, item) {
408 Some((layer, v)) => match parse(&v.bytes) {
408 Some((layer, v)) => match parse(&v.bytes) {
409 Some(b) => Ok(Some(b)),
409 Some(b) => Ok(Some(b)),
410 None => Err(Box::new(ConfigValueParseErrorDetails {
410 None => Err(Box::new(ConfigValueParseErrorDetails {
411 origin: layer.origin.to_owned(),
411 origin: layer.origin.to_owned(),
412 line: v.line,
412 line: v.line,
413 value: v.bytes.to_owned(),
413 value: v.bytes.to_owned(),
414 section: section.to_owned(),
414 section: section.to_owned(),
415 item: item.to_owned(),
415 item: item.to_owned(),
416 expected_type,
416 expected_type,
417 })
417 })
418 .into()),
418 .into()),
419 },
419 },
420 None => {
420 None => {
421 if !fallback_to_default {
421 if !fallback_to_default {
422 return Ok(None);
422 return Ok(None);
423 }
423 }
424 match self.get_default(section, item)? {
424 match self.get_default(section, item)? {
425 Some(default) => {
425 Some(default) => {
426 // Defaults are TOML values, so they're not in the same
426 // Defaults are TOML values, so they're not in the same
427 // shape as in the config files.
427 // shape as in the config files.
428 // First try to convert directly to the expected type
428 // First try to convert directly to the expected type
429 let as_t = default.try_into();
429 let as_t = default.try_into();
430 match as_t {
430 match as_t {
431 Ok(t) => Ok(t),
431 Ok(t) => Ok(t),
432 Err(e) => {
432 Err(e) => {
433 // If it fails, it means that...
433 // If it fails, it means that...
434 let as_bytes: Result<Option<&[u8]>, _> =
434 let as_bytes: Result<Option<&[u8]>, _> =
435 default.try_into();
435 default.try_into();
436 match as_bytes {
436 match as_bytes {
437 Ok(bytes_opt) => {
437 Ok(bytes_opt) => {
438 if let Some(bytes) = bytes_opt {
438 if let Some(bytes) = bytes_opt {
439 // ...we should be able to parse it
439 // ...we should be able to parse it
440 return Ok(parse(bytes));
440 return Ok(parse(bytes));
441 }
441 }
442 Err(e)
442 Err(e)
443 }
443 }
444 Err(_) => Err(e),
444 Err(_) => Err(e),
445 }
445 }
446 }
446 }
447 }
447 }
448 }
448 }
449 None => {
449 None => {
450 self.print_devel_warning(section, item)?;
450 self.print_devel_warning(section, item)?;
451 Ok(None)
451 Ok(None)
452 }
452 }
453 }
453 }
454 }
454 }
455 }
455 }
456 }
456 }
457
457
458 fn print_devel_warning(
458 fn print_devel_warning(
459 &self,
459 &self,
460 section: &[u8],
460 section: &[u8],
461 item: &[u8],
461 item: &[u8],
462 ) -> Result<(), HgError> {
462 ) -> Result<(), HgError> {
463 let warn_all = self.get_bool(b"devel", b"all-warnings")?;
463 let warn_all = self.get_bool(b"devel", b"all-warnings")?;
464 let warn_specific = self.get_bool(b"devel", b"warn-config-unknown")?;
464 let warn_specific = self.get_bool(b"devel", b"warn-config-unknown")?;
465 if !warn_all || !warn_specific {
465 if !warn_all || !warn_specific {
466 // We technically shouldn't print anything here since it's not
466 // We technically shouldn't print anything here since it's not
467 // the concern of `hg-core`.
467 // the concern of `hg-core`.
468 //
468 //
469 // We're printing directly to stderr since development warnings
469 // We're printing directly to stderr since development warnings
470 // are not on by default and surfacing this to consumer crates
470 // are not on by default and surfacing this to consumer crates
471 // (like `rhg`) would be more difficult, probably requiring
471 // (like `rhg`) would be more difficult, probably requiring
472 // something à la `log` crate.
472 // something à la `log` crate.
473 //
473 //
474 // TODO maybe figure out a way of exposing a "warnings" channel
474 // TODO maybe figure out a way of exposing a "warnings" channel
475 // that consumer crates can hook into. It would be useful for
475 // that consumer crates can hook into. It would be useful for
476 // all other warnings that `hg-core` could expose.
476 // all other warnings that `hg-core` could expose.
477 eprintln!(
477 eprintln!(
478 "devel-warn: accessing unregistered config item: '{}.{}'",
478 "devel-warn: accessing unregistered config item: '{}.{}'",
479 String::from_utf8_lossy(section),
479 String::from_utf8_lossy(section),
480 String::from_utf8_lossy(item),
480 String::from_utf8_lossy(item),
481 );
481 );
482 }
482 }
483 Ok(())
483 Ok(())
484 }
484 }
485
485
486 /// Returns an `Err` if the first value found is not a valid UTF-8 string.
486 /// Returns an `Err` if the first value found is not a valid UTF-8 string.
487 /// Otherwise, returns an `Ok(value)` if found, or `None`.
487 /// Otherwise, returns an `Ok(value)` if found, or `None`.
488 pub fn get_str(
488 pub fn get_str(
489 &self,
489 &self,
490 section: &[u8],
490 section: &[u8],
491 item: &[u8],
491 item: &[u8],
492 ) -> Result<Option<&str>, HgError> {
492 ) -> Result<Option<&str>, HgError> {
493 self.get_parse(
493 self.get_parse(
494 section,
494 section,
495 item,
495 item,
496 "ASCII or UTF-8 string",
496 "ASCII or UTF-8 string",
497 |value| str::from_utf8(value).ok(),
497 |value| str::from_utf8(value).ok(),
498 true,
498 true,
499 )
499 )
500 }
500 }
501
501
502 /// Same as `get_str`, but doesn't fall back to the default `configitem`
502 /// Same as `get_str`, but doesn't fall back to the default `configitem`
503 /// if not defined in the user config.
503 /// if not defined in the user config.
504 pub fn get_str_no_default(
504 pub fn get_str_no_default(
505 &self,
505 &self,
506 section: &[u8],
506 section: &[u8],
507 item: &[u8],
507 item: &[u8],
508 ) -> Result<Option<&str>, HgError> {
508 ) -> Result<Option<&str>, HgError> {
509 self.get_parse(
509 self.get_parse(
510 section,
510 section,
511 item,
511 item,
512 "ASCII or UTF-8 string",
512 "ASCII or UTF-8 string",
513 |value| str::from_utf8(value).ok(),
513 |value| str::from_utf8(value).ok(),
514 false,
514 false,
515 )
515 )
516 }
516 }
517
517
518 /// Returns an `Err` if the first value found is not a valid unsigned
518 /// Returns an `Err` if the first value found is not a valid unsigned
519 /// integer. Otherwise, returns an `Ok(value)` if found, or `None`.
519 /// integer. Otherwise, returns an `Ok(value)` if found, or `None`.
520 pub fn get_u32(
520 pub fn get_u32(
521 &self,
521 &self,
522 section: &[u8],
522 section: &[u8],
523 item: &[u8],
523 item: &[u8],
524 ) -> Result<Option<u32>, HgError> {
524 ) -> Result<Option<u32>, HgError> {
525 self.get_parse(
525 self.get_parse(
526 section,
526 section,
527 item,
527 item,
528 "valid integer",
528 "valid integer",
529 |value| str::from_utf8(value).ok()?.parse().ok(),
529 |value| str::from_utf8(value).ok()?.parse().ok(),
530 true,
530 true,
531 )
531 )
532 }
532 }
533
533
534 /// Returns an `Err` if the first value found is not a valid file size
534 /// Returns an `Err` if the first value found is not a valid file size
535 /// value such as `30` (default unit is bytes), `7 MB`, or `42.5 kb`.
535 /// value such as `30` (default unit is bytes), `7 MB`, or `42.5 kb`.
536 /// Otherwise, returns an `Ok(value_in_bytes)` if found, or `None`.
536 /// Otherwise, returns an `Ok(value_in_bytes)` if found, or `None`.
537 pub fn get_byte_size(
537 pub fn get_byte_size(
538 &self,
538 &self,
539 section: &[u8],
539 section: &[u8],
540 item: &[u8],
540 item: &[u8],
541 ) -> Result<Option<u64>, HgError> {
541 ) -> Result<Option<u64>, HgError> {
542 self.get_parse(
542 self.get_parse(
543 section,
543 section,
544 item,
544 item,
545 "byte quantity",
545 "byte quantity",
546 values::parse_byte_size,
546 values::parse_byte_size,
547 true,
547 true,
548 )
548 )
549 }
549 }
550
550
551 /// Returns an `Err` if the first value found is not a valid boolean.
551 /// Returns an `Err` if the first value found is not a valid boolean.
552 /// Otherwise, returns an `Ok(option)`, where `option` is the boolean if
552 /// Otherwise, returns an `Ok(option)`, where `option` is the boolean if
553 /// found, or `None`.
553 /// found, or `None`.
554 pub fn get_option(
554 pub fn get_option(
555 &self,
555 &self,
556 section: &[u8],
556 section: &[u8],
557 item: &[u8],
557 item: &[u8],
558 ) -> Result<Option<bool>, HgError> {
558 ) -> Result<Option<bool>, HgError> {
559 self.get_parse(section, item, "boolean", values::parse_bool, true)
559 self.get_parse(section, item, "boolean", values::parse_bool, true)
560 }
560 }
561
561
562 /// Same as `get_option`, but doesn't fall back to the default `configitem`
562 /// Same as `get_option`, but doesn't fall back to the default `configitem`
563 /// if not defined in the user config.
563 /// if not defined in the user config.
564 pub fn get_option_no_default(
564 pub fn get_option_no_default(
565 &self,
565 &self,
566 section: &[u8],
566 section: &[u8],
567 item: &[u8],
567 item: &[u8],
568 ) -> Result<Option<bool>, HgError> {
568 ) -> Result<Option<bool>, HgError> {
569 self.get_parse(section, item, "boolean", values::parse_bool, false)
569 self.get_parse(section, item, "boolean", values::parse_bool, false)
570 }
570 }
571
571
572 /// Returns the corresponding boolean in the config. Returns `Ok(false)`
572 /// Returns the corresponding boolean in the config. Returns `Ok(false)`
573 /// if the value is not found, an `Err` if it's not a valid boolean.
573 /// if the value is not found, an `Err` if it's not a valid boolean.
574 pub fn get_bool(
574 pub fn get_bool(
575 &self,
575 &self,
576 section: &[u8],
576 section: &[u8],
577 item: &[u8],
577 item: &[u8],
578 ) -> Result<bool, HgError> {
578 ) -> Result<bool, HgError> {
579 Ok(self.get_option(section, item)?.unwrap_or(false))
579 Ok(self.get_option(section, item)?.unwrap_or(false))
580 }
580 }
581
581
582 /// Same as `get_bool`, but doesn't fall back to the default `configitem`
582 /// Same as `get_bool`, but doesn't fall back to the default `configitem`
583 /// if not defined in the user config.
583 /// if not defined in the user config.
584 pub fn get_bool_no_default(
584 pub fn get_bool_no_default(
585 &self,
585 &self,
586 section: &[u8],
586 section: &[u8],
587 item: &[u8],
587 item: &[u8],
588 ) -> Result<bool, HgError> {
588 ) -> Result<bool, HgError> {
589 Ok(self.get_option_no_default(section, item)?.unwrap_or(false))
589 Ok(self.get_option_no_default(section, item)?.unwrap_or(false))
590 }
590 }
591
591
592 /// Returns `true` if the extension is enabled, `false` otherwise
592 /// Returns `true` if the extension is enabled, `false` otherwise
593 pub fn is_extension_enabled(&self, extension: &[u8]) -> bool {
593 pub fn is_extension_enabled(&self, extension: &[u8]) -> bool {
594 let value = self.get(b"extensions", extension);
594 let value = self.get(b"extensions", extension);
595 match value {
595 match value {
596 Some(c) => !c.starts_with(b"!"),
596 Some(c) => !c.starts_with(b"!"),
597 None => false,
597 None => false,
598 }
598 }
599 }
599 }
600
600
601 /// If there is an `item` value in `section`, parse and return a list of
601 /// If there is an `item` value in `section`, parse and return a list of
602 /// byte strings.
602 /// byte strings.
603 pub fn get_list(
603 pub fn get_list(
604 &self,
604 &self,
605 section: &[u8],
605 section: &[u8],
606 item: &[u8],
606 item: &[u8],
607 ) -> Option<Vec<Vec<u8>>> {
607 ) -> Option<Vec<Vec<u8>>> {
608 self.get(section, item).map(values::parse_list)
608 self.get(section, item).map(values::parse_list)
609 }
609 }
610
610
611 /// Returns the raw value bytes of the first one found, or `None`.
611 /// Returns the raw value bytes of the first one found, or `None`.
612 pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> {
612 pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> {
613 self.get_inner(section, item)
613 self.get_inner(section, item)
614 .map(|(_, value)| value.bytes.as_ref())
614 .map(|(_, value)| value.bytes.as_ref())
615 }
615 }
616
616
617 /// Returns the raw value bytes of the first one found, or `None`.
617 /// Returns the raw value bytes of the first one found, or `None`.
618 pub fn get_with_origin(
618 pub fn get_with_origin(
619 &self,
619 &self,
620 section: &[u8],
620 section: &[u8],
621 item: &[u8],
621 item: &[u8],
622 ) -> Option<(&[u8], &ConfigOrigin)> {
622 ) -> Option<(&[u8], &ConfigOrigin)> {
623 self.get_inner(section, item)
623 self.get_inner(section, item)
624 .map(|(layer, value)| (value.bytes.as_ref(), &layer.origin))
624 .map(|(layer, value)| (value.bytes.as_ref(), &layer.origin))
625 }
625 }
626
626
627 /// Returns the layer and the value of the first one found, or `None`.
627 /// Returns the layer and the value of the first one found, or `None`.
628 fn get_inner(
628 fn get_inner(
629 &self,
629 &self,
630 section: &[u8],
630 section: &[u8],
631 item: &[u8],
631 item: &[u8],
632 ) -> Option<(&ConfigLayer, &ConfigValue)> {
632 ) -> Option<(&ConfigLayer, &ConfigValue)> {
633 // Filter out the config items that are hidden by [PLAIN].
633 // Filter out the config items that are hidden by [PLAIN].
634 // This differs from python hg where we delete them from the config.
634 // This differs from python hg where we delete them from the config.
635 let should_ignore = should_ignore(&self.plain, section, item);
635 let should_ignore = should_ignore(&self.plain, section, item);
636 for layer in self.layers.iter().rev() {
636 for layer in self.layers.iter().rev() {
637 if !layer.trusted {
637 if !layer.trusted {
638 continue;
638 continue;
639 }
639 }
640 //The [PLAIN] config should not affect the defaults.
640 //The [PLAIN] config should not affect the defaults.
641 //
641 //
642 // However, PLAIN should also affect the "tweaked" defaults (unless
642 // However, PLAIN should also affect the "tweaked" defaults (unless
643 // "tweakdefault" is part of "HGPLAINEXCEPT").
643 // "tweakdefault" is part of "HGPLAINEXCEPT").
644 //
644 //
645 // In practice the tweak-default layer is only added when it is
645 // In practice the tweak-default layer is only added when it is
646 // relevant, so we can safely always take it into
646 // relevant, so we can safely always take it into
647 // account here.
647 // account here.
648 if should_ignore && !(layer.origin == ConfigOrigin::Tweakdefaults)
648 if should_ignore && !(layer.origin == ConfigOrigin::Tweakdefaults)
649 {
649 {
650 continue;
650 continue;
651 }
651 }
652 if let Some(v) = layer.get(section, item) {
652 if let Some(v) = layer.get(section, item) {
653 return Some((layer, v));
653 return Some((layer, v));
654 }
654 }
655 }
655 }
656 None
656 None
657 }
657 }
658
658
659 /// Return all keys defined for the given section
659 /// Return all keys defined for the given section
660 pub fn get_section_keys(&self, section: &[u8]) -> HashSet<&[u8]> {
660 pub fn get_section_keys(&self, section: &[u8]) -> HashSet<&[u8]> {
661 self.layers
661 self.layers
662 .iter()
662 .iter()
663 .flat_map(|layer| layer.iter_keys(section))
663 .flat_map(|layer| layer.iter_keys(section))
664 .collect()
664 .collect()
665 }
665 }
666
666
667 /// Returns whether any key is defined in the given section
667 /// Returns whether any key is defined in the given section
668 pub fn has_non_empty_section(&self, section: &[u8]) -> bool {
668 pub fn has_non_empty_section(&self, section: &[u8]) -> bool {
669 self.layers
669 self.layers
670 .iter()
670 .iter()
671 .any(|layer| layer.has_non_empty_section(section))
671 .any(|layer| layer.has_non_empty_section(section))
672 }
672 }
673
673
674 /// Yields (key, value) pairs for everything in the given section
674 /// Yields (key, value) pairs for everything in the given section
675 pub fn iter_section<'a>(
675 pub fn iter_section<'a>(
676 &'a self,
676 &'a self,
677 section: &'a [u8],
677 section: &'a [u8],
678 ) -> impl Iterator<Item = (&[u8], &[u8])> + 'a {
678 ) -> impl Iterator<Item = (&[u8], &[u8])> + 'a {
679 // Deduplicate keys redefined in multiple layers
679 // Deduplicate keys redefined in multiple layers
680 let mut keys_already_seen = HashSet::new();
680 let mut keys_already_seen = HashSet::new();
681 let mut key_is_new =
681 let mut key_is_new =
682 move |&(key, _value): &(&'a [u8], &'a [u8])| -> bool {
682 move |&(key, _value): &(&'a [u8], &'a [u8])| -> bool {
683 keys_already_seen.insert(key)
683 keys_already_seen.insert(key)
684 };
684 };
685 // This is similar to `flat_map` + `filter_map`, except with a single
685 // This is similar to `flat_map` + `filter_map`, except with a single
686 // closure that owns `key_is_new` (and therefore the
686 // closure that owns `key_is_new` (and therefore the
687 // `keys_already_seen` set):
687 // `keys_already_seen` set):
688 let mut layer_iters = self
688 let mut layer_iters = self
689 .layers
689 .layers
690 .iter()
690 .iter()
691 .rev()
691 .rev()
692 .map(move |layer| layer.iter_section(section))
692 .map(move |layer| layer.iter_section(section))
693 .peekable();
693 .peekable();
694 std::iter::from_fn(move || loop {
694 std::iter::from_fn(move || loop {
695 if let Some(pair) = layer_iters.peek_mut()?.find(&mut key_is_new) {
695 if let Some(pair) = layer_iters.peek_mut()?.find(&mut key_is_new) {
696 return Some(pair);
696 return Some(pair);
697 } else {
697 } else {
698 layer_iters.next();
698 layer_iters.next();
699 }
699 }
700 })
700 })
701 }
701 }
702
702
703 /// Get raw values bytes from all layers (even untrusted ones) in order
703 /// Get raw values bytes from all layers (even untrusted ones) in order
704 /// of precedence.
704 /// of precedence.
705 #[cfg(test)]
705 #[cfg(test)]
706 fn get_all(&self, section: &[u8], item: &[u8]) -> Vec<&[u8]> {
706 fn get_all(&self, section: &[u8], item: &[u8]) -> Vec<&[u8]> {
707 let mut res = vec![];
707 let mut res = vec![];
708 for layer in self.layers.iter().rev() {
708 for layer in self.layers.iter().rev() {
709 if let Some(v) = layer.get(section, item) {
709 if let Some(v) = layer.get(section, item) {
710 res.push(v.bytes.as_ref());
710 res.push(v.bytes.as_ref());
711 }
711 }
712 }
712 }
713 res
713 res
714 }
714 }
715
715
716 // a config layer that's introduced by ui.tweakdefaults
716 // a config layer that's introduced by ui.tweakdefaults
717 fn tweakdefaults_layer() -> ConfigLayer {
717 fn tweakdefaults_layer() -> ConfigLayer {
718 let mut layer = ConfigLayer::new(ConfigOrigin::Tweakdefaults);
718 let mut layer = ConfigLayer::new(ConfigOrigin::Tweakdefaults);
719
719
720 let mut add = |section: &[u8], item: &[u8], value: &[u8]| {
720 let mut add = |section: &[u8], item: &[u8], value: &[u8]| {
721 layer.add(
721 layer.add(
722 section[..].into(),
722 section[..].into(),
723 item[..].into(),
723 item[..].into(),
724 value[..].into(),
724 value[..].into(),
725 None,
725 None,
726 );
726 );
727 };
727 };
728 // duplication of [tweakrc] from [ui.py]
728 // duplication of [tweakrc] from [ui.py]
729 add(b"ui", b"rollback", b"False");
729 add(b"ui", b"rollback", b"False");
730 add(b"ui", b"statuscopies", b"yes");
730 add(b"ui", b"statuscopies", b"yes");
731 add(b"ui", b"interface", b"curses");
731 add(b"ui", b"interface", b"curses");
732 add(b"ui", b"relative-paths", b"yes");
732 add(b"ui", b"relative-paths", b"yes");
733 add(b"commands", b"grep.all-files", b"True");
733 add(b"commands", b"grep.all-files", b"True");
734 add(b"commands", b"update.check", b"noconflict");
734 add(b"commands", b"update.check", b"noconflict");
735 add(b"commands", b"status.verbose", b"True");
735 add(b"commands", b"status.verbose", b"True");
736 add(b"commands", b"resolve.explicit-re-merge", b"True");
736 add(b"commands", b"resolve.explicit-re-merge", b"True");
737 add(b"git", b"git", b"1");
737 add(b"git", b"git", b"1");
738 add(b"git", b"showfunc", b"1");
738 add(b"git", b"showfunc", b"1");
739 add(b"git", b"word-diff", b"1");
739 add(b"git", b"word-diff", b"1");
740 layer
740 layer
741 }
741 }
742
742
743 // introduce the tweaked defaults as implied by ui.tweakdefaults
743 // introduce the tweaked defaults as implied by ui.tweakdefaults
744 pub fn tweakdefaults(&mut self) {
744 pub fn tweakdefaults(&mut self) {
745 self.layers.insert(0, Config::tweakdefaults_layer());
745 self.layers.insert(0, Config::tweakdefaults_layer());
746 }
746 }
747 }
747 }
748
748
749 #[cfg(test)]
749 #[cfg(test)]
750 mod tests {
750 mod tests {
751 use super::*;
751 use super::*;
752 use pretty_assertions::assert_eq;
752 use pretty_assertions::assert_eq;
753 use std::fs::File;
753 use std::fs::File;
754 use std::io::Write;
754 use std::io::Write;
755
755
756 #[test]
756 #[test]
757 fn test_include_layer_ordering() {
757 fn test_include_layer_ordering() {
758 let tmpdir = tempfile::tempdir().unwrap();
758 let tmpdir = tempfile::tempdir().unwrap();
759 let tmpdir_path = tmpdir.path();
759 let tmpdir_path = tmpdir.path();
760 let mut included_file =
760 let mut included_file =
761 File::create(&tmpdir_path.join("included.rc")).unwrap();
761 File::create(tmpdir_path.join("included.rc")).unwrap();
762
762
763 included_file.write_all(b"[section]\nitem=value1").unwrap();
763 included_file.write_all(b"[section]\nitem=value1").unwrap();
764 let base_config_path = tmpdir_path.join("base.rc");
764 let base_config_path = tmpdir_path.join("base.rc");
765 let mut config_file = File::create(&base_config_path).unwrap();
765 let mut config_file = File::create(&base_config_path).unwrap();
766 let data =
766 let data =
767 b"[section]\nitem=value0\n%include included.rc\nitem=value2\n\
767 b"[section]\nitem=value0\n%include included.rc\nitem=value2\n\
768 [section2]\ncount = 4\nsize = 1.5 KB\nnot-count = 1.5\nnot-size = 1 ub";
768 [section2]\ncount = 4\nsize = 1.5 KB\nnot-count = 1.5\nnot-size = 1 ub";
769 config_file.write_all(data).unwrap();
769 config_file.write_all(data).unwrap();
770
770
771 let sources = vec![ConfigSource::AbsPath(base_config_path)];
771 let sources = vec![ConfigSource::AbsPath(base_config_path)];
772 let config = Config::load_from_explicit_sources(sources)
772 let config = Config::load_from_explicit_sources(sources)
773 .expect("expected valid config");
773 .expect("expected valid config");
774
774
775 let (_, value) = config.get_inner(b"section", b"item").unwrap();
775 let (_, value) = config.get_inner(b"section", b"item").unwrap();
776 assert_eq!(
776 assert_eq!(
777 value,
777 value,
778 &ConfigValue {
778 &ConfigValue {
779 bytes: b"value2".to_vec(),
779 bytes: b"value2".to_vec(),
780 line: Some(4)
780 line: Some(4)
781 }
781 }
782 );
782 );
783
783
784 let value = config.get(b"section", b"item").unwrap();
784 let value = config.get(b"section", b"item").unwrap();
785 assert_eq!(value, b"value2",);
785 assert_eq!(value, b"value2",);
786 assert_eq!(
786 assert_eq!(
787 config.get_all(b"section", b"item"),
787 config.get_all(b"section", b"item"),
788 [b"value2", b"value1", b"value0"]
788 [b"value2", b"value1", b"value0"]
789 );
789 );
790
790
791 assert_eq!(config.get_u32(b"section2", b"count").unwrap(), Some(4));
791 assert_eq!(config.get_u32(b"section2", b"count").unwrap(), Some(4));
792 assert_eq!(
792 assert_eq!(
793 config.get_byte_size(b"section2", b"size").unwrap(),
793 config.get_byte_size(b"section2", b"size").unwrap(),
794 Some(1024 + 512)
794 Some(1024 + 512)
795 );
795 );
796 assert!(config.get_u32(b"section2", b"not-count").is_err());
796 assert!(config.get_u32(b"section2", b"not-count").is_err());
797 assert!(config.get_byte_size(b"section2", b"not-size").is_err());
797 assert!(config.get_byte_size(b"section2", b"not-size").is_err());
798 }
798 }
799
799
800 #[test]
800 #[test]
801 fn test_default_parse() {
801 fn test_default_parse() {
802 let config = Config::load_from_explicit_sources(vec![])
802 let config = Config::load_from_explicit_sources(vec![])
803 .expect("expected valid config");
803 .expect("expected valid config");
804 let ret = config.get_byte_size(b"cmdserver", b"max-log-size");
804 let ret = config.get_byte_size(b"cmdserver", b"max-log-size");
805 assert!(ret.is_ok(), "{:?}", ret);
805 assert!(ret.is_ok(), "{:?}", ret);
806
806
807 let ret = config.get_byte_size(b"ui", b"formatted");
807 let ret = config.get_byte_size(b"ui", b"formatted");
808 assert!(ret.unwrap().is_none());
808 assert!(ret.unwrap().is_none());
809 }
809 }
810 }
810 }
@@ -1,286 +1,285 b''
1 // dagops.rs
1 // dagops.rs
2 //
2 //
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Miscellaneous DAG operations
8 //! Miscellaneous DAG operations
9 //!
9 //!
10 //! # Terminology
10 //! # Terminology
11 //! - By *relative heads* of a collection of revision numbers (`Revision`), we
11 //! - By *relative heads* of a collection of revision numbers (`Revision`), we
12 //! mean those revisions that have no children among the collection.
12 //! mean those revisions that have no children among the collection.
13 //! - Similarly *relative roots* of a collection of `Revision`, we mean those
13 //! - Similarly *relative roots* of a collection of `Revision`, we mean those
14 //! whose parents, if any, don't belong to the collection.
14 //! whose parents, if any, don't belong to the collection.
15 use super::{Graph, GraphError, Revision, NULL_REVISION};
15 use super::{Graph, GraphError, Revision, NULL_REVISION};
16 use crate::ancestors::AncestorsIterator;
16 use crate::ancestors::AncestorsIterator;
17 use std::collections::{BTreeSet, HashSet};
17 use std::collections::{BTreeSet, HashSet};
18
18
19 fn remove_parents<S: std::hash::BuildHasher>(
19 fn remove_parents<S: std::hash::BuildHasher>(
20 graph: &impl Graph,
20 graph: &impl Graph,
21 rev: Revision,
21 rev: Revision,
22 set: &mut HashSet<Revision, S>,
22 set: &mut HashSet<Revision, S>,
23 ) -> Result<(), GraphError> {
23 ) -> Result<(), GraphError> {
24 for parent in graph.parents(rev)?.iter() {
24 for parent in graph.parents(rev)?.iter() {
25 if *parent != NULL_REVISION {
25 if *parent != NULL_REVISION {
26 set.remove(parent);
26 set.remove(parent);
27 }
27 }
28 }
28 }
29 Ok(())
29 Ok(())
30 }
30 }
31
31
32 /// Relative heads out of some revisions, passed as an iterator.
32 /// Relative heads out of some revisions, passed as an iterator.
33 ///
33 ///
34 /// These heads are defined as those revisions that have no children
34 /// These heads are defined as those revisions that have no children
35 /// among those emitted by the iterator.
35 /// among those emitted by the iterator.
36 ///
36 ///
37 /// # Performance notes
37 /// # Performance notes
38 /// Internally, this clones the iterator, and builds a `HashSet` out of it.
38 /// Internally, this clones the iterator, and builds a `HashSet` out of it.
39 ///
39 ///
40 /// This function takes an `Iterator` instead of `impl IntoIterator` to
40 /// This function takes an `Iterator` instead of `impl IntoIterator` to
41 /// guarantee that cloning the iterator doesn't result in cloning the full
41 /// guarantee that cloning the iterator doesn't result in cloning the full
42 /// construct it comes from.
42 /// construct it comes from.
43 pub fn heads<'a>(
43 pub fn heads<'a>(
44 graph: &impl Graph,
44 graph: &impl Graph,
45 iter_revs: impl Clone + Iterator<Item = &'a Revision>,
45 iter_revs: impl Clone + Iterator<Item = &'a Revision>,
46 ) -> Result<HashSet<Revision>, GraphError> {
46 ) -> Result<HashSet<Revision>, GraphError> {
47 let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
47 let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
48 heads.remove(&NULL_REVISION);
48 heads.remove(&NULL_REVISION);
49 for rev in iter_revs {
49 for rev in iter_revs {
50 if *rev != NULL_REVISION {
50 if *rev != NULL_REVISION {
51 remove_parents(graph, *rev, &mut heads)?;
51 remove_parents(graph, *rev, &mut heads)?;
52 }
52 }
53 }
53 }
54 Ok(heads)
54 Ok(heads)
55 }
55 }
56
56
57 /// Retain in `revs` only its relative heads.
57 /// Retain in `revs` only its relative heads.
58 ///
58 ///
59 /// This is an in-place operation, so that control of the incoming
59 /// This is an in-place operation, so that control of the incoming
60 /// set is left to the caller.
60 /// set is left to the caller.
61 /// - a direct Python binding would probably need to build its own `HashSet`
61 /// - a direct Python binding would probably need to build its own `HashSet`
62 /// from an incoming iterable, even if its sole purpose is to extract the
62 /// from an incoming iterable, even if its sole purpose is to extract the
63 /// heads.
63 /// heads.
64 /// - a Rust caller can decide whether cloning beforehand is appropriate
64 /// - a Rust caller can decide whether cloning beforehand is appropriate
65 ///
65 ///
66 /// # Performance notes
66 /// # Performance notes
67 /// Internally, this function will store a full copy of `revs` in a `Vec`.
67 /// Internally, this function will store a full copy of `revs` in a `Vec`.
68 pub fn retain_heads<S: std::hash::BuildHasher>(
68 pub fn retain_heads<S: std::hash::BuildHasher>(
69 graph: &impl Graph,
69 graph: &impl Graph,
70 revs: &mut HashSet<Revision, S>,
70 revs: &mut HashSet<Revision, S>,
71 ) -> Result<(), GraphError> {
71 ) -> Result<(), GraphError> {
72 revs.remove(&NULL_REVISION);
72 revs.remove(&NULL_REVISION);
73 // we need to construct an iterable copy of revs to avoid itering while
73 // we need to construct an iterable copy of revs to avoid itering while
74 // mutating
74 // mutating
75 let as_vec: Vec<Revision> = revs.iter().cloned().collect();
75 let as_vec: Vec<Revision> = revs.iter().cloned().collect();
76 for rev in as_vec {
76 for rev in as_vec {
77 if rev != NULL_REVISION {
77 if rev != NULL_REVISION {
78 remove_parents(graph, rev, revs)?;
78 remove_parents(graph, rev, revs)?;
79 }
79 }
80 }
80 }
81 Ok(())
81 Ok(())
82 }
82 }
83
83
84 /// Roots of `revs`, passed as a `HashSet`
84 /// Roots of `revs`, passed as a `HashSet`
85 ///
85 ///
86 /// They are returned in arbitrary order
86 /// They are returned in arbitrary order
87 pub fn roots<G: Graph, S: std::hash::BuildHasher>(
87 pub fn roots<G: Graph, S: std::hash::BuildHasher>(
88 graph: &G,
88 graph: &G,
89 revs: &HashSet<Revision, S>,
89 revs: &HashSet<Revision, S>,
90 ) -> Result<Vec<Revision>, GraphError> {
90 ) -> Result<Vec<Revision>, GraphError> {
91 let mut roots: Vec<Revision> = Vec::new();
91 let mut roots: Vec<Revision> = Vec::new();
92 for rev in revs {
92 for rev in revs {
93 if graph
93 if graph
94 .parents(*rev)?
94 .parents(*rev)?
95 .iter()
95 .iter()
96 .filter(|p| **p != NULL_REVISION)
96 .filter(|p| **p != NULL_REVISION)
97 .all(|p| !revs.contains(p))
97 .all(|p| !revs.contains(p))
98 {
98 {
99 roots.push(*rev);
99 roots.push(*rev);
100 }
100 }
101 }
101 }
102 Ok(roots)
102 Ok(roots)
103 }
103 }
104
104
105 /// Compute the topological range between two collections of revisions
105 /// Compute the topological range between two collections of revisions
106 ///
106 ///
107 /// This is equivalent to the revset `<roots>::<heads>`.
107 /// This is equivalent to the revset `<roots>::<heads>`.
108 ///
108 ///
109 /// Currently, the given `Graph` has to implement `Clone`, which means
109 /// Currently, the given `Graph` has to implement `Clone`, which means
110 /// actually cloning just a reference-counted Python pointer if
110 /// actually cloning just a reference-counted Python pointer if
111 /// it's passed over through `rust-cpython`. This is due to the internal
111 /// it's passed over through `rust-cpython`. This is due to the internal
112 /// use of `AncestorsIterator`
112 /// use of `AncestorsIterator`
113 ///
113 ///
114 /// # Algorithmic details
114 /// # Algorithmic details
115 ///
115 ///
116 /// This is a two-pass swipe inspired from what `reachableroots2` from
116 /// This is a two-pass swipe inspired from what `reachableroots2` from
117 /// `mercurial.cext.parsers` does to obtain the same results.
117 /// `mercurial.cext.parsers` does to obtain the same results.
118 ///
118 ///
119 /// - first, we climb up the DAG from `heads` in topological order, keeping
119 /// - first, we climb up the DAG from `heads` in topological order, keeping
120 /// them in the vector `heads_ancestors` vector, and adding any element of
120 /// them in the vector `heads_ancestors` vector, and adding any element of
121 /// `roots` we find among them to the resulting range.
121 /// `roots` we find among them to the resulting range.
122 /// - Then, we iterate on that recorded vector so that a revision is always
122 /// - Then, we iterate on that recorded vector so that a revision is always
123 /// emitted after its parents and add all revisions whose parents are already
123 /// emitted after its parents and add all revisions whose parents are already
124 /// in the range to the results.
124 /// in the range to the results.
125 ///
125 ///
126 /// # Performance notes
126 /// # Performance notes
127 ///
127 ///
128 /// The main difference with the C implementation is that
128 /// The main difference with the C implementation is that
129 /// the latter uses a flat array with bit flags, instead of complex structures
129 /// the latter uses a flat array with bit flags, instead of complex structures
130 /// like `HashSet`, making it faster in most scenarios. In theory, it's
130 /// like `HashSet`, making it faster in most scenarios. In theory, it's
131 /// possible that the present implementation could be more memory efficient
131 /// possible that the present implementation could be more memory efficient
132 /// for very large repositories with many branches.
132 /// for very large repositories with many branches.
133 pub fn range(
133 pub fn range(
134 graph: &(impl Graph + Clone),
134 graph: &(impl Graph + Clone),
135 roots: impl IntoIterator<Item = Revision>,
135 roots: impl IntoIterator<Item = Revision>,
136 heads: impl IntoIterator<Item = Revision>,
136 heads: impl IntoIterator<Item = Revision>,
137 ) -> Result<BTreeSet<Revision>, GraphError> {
137 ) -> Result<BTreeSet<Revision>, GraphError> {
138 let mut range = BTreeSet::new();
138 let mut range = BTreeSet::new();
139 let roots: HashSet<Revision> = roots.into_iter().collect();
139 let roots: HashSet<Revision> = roots.into_iter().collect();
140 let min_root: Revision = match roots.iter().cloned().min() {
140 let min_root: Revision = match roots.iter().cloned().min() {
141 None => {
141 None => {
142 return Ok(range);
142 return Ok(range);
143 }
143 }
144 Some(r) => r,
144 Some(r) => r,
145 };
145 };
146
146
147 // Internally, AncestorsIterator currently maintains a `HashSet`
147 // Internally, AncestorsIterator currently maintains a `HashSet`
148 // of all seen revision, which is also what we record, albeit in an ordered
148 // of all seen revision, which is also what we record, albeit in an ordered
149 // way. There's room for improvement on this duplication.
149 // way. There's room for improvement on this duplication.
150 let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?;
150 let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?;
151 let mut heads_ancestors: Vec<Revision> = Vec::new();
151 let mut heads_ancestors: Vec<Revision> = Vec::new();
152 for revres in ait {
152 for revres in ait {
153 let rev = revres?;
153 let rev = revres?;
154 if roots.contains(&rev) {
154 if roots.contains(&rev) {
155 range.insert(rev);
155 range.insert(rev);
156 }
156 }
157 heads_ancestors.push(rev);
157 heads_ancestors.push(rev);
158 }
158 }
159
159
160 for rev in heads_ancestors.into_iter().rev() {
160 for rev in heads_ancestors.into_iter().rev() {
161 for parent in graph.parents(rev)?.iter() {
161 for parent in graph.parents(rev)?.iter() {
162 if *parent != NULL_REVISION && range.contains(parent) {
162 if *parent != NULL_REVISION && range.contains(parent) {
163 range.insert(rev);
163 range.insert(rev);
164 }
164 }
165 }
165 }
166 }
166 }
167 Ok(range)
167 Ok(range)
168 }
168 }
169
169
170 #[cfg(test)]
170 #[cfg(test)]
171 mod tests {
171 mod tests {
172
172
173 use super::*;
173 use super::*;
174 use crate::{testing::SampleGraph, BaseRevision};
174 use crate::{testing::SampleGraph, BaseRevision};
175
175
176 /// Apply `retain_heads()` to the given slice and return as a sorted `Vec`
176 /// Apply `retain_heads()` to the given slice and return as a sorted `Vec`
177 fn retain_heads_sorted(
177 fn retain_heads_sorted(
178 graph: &impl Graph,
178 graph: &impl Graph,
179 revs: &[BaseRevision],
179 revs: &[BaseRevision],
180 ) -> Result<Vec<Revision>, GraphError> {
180 ) -> Result<Vec<Revision>, GraphError> {
181 let mut revs: HashSet<Revision> =
181 let mut revs: HashSet<Revision> =
182 revs.iter().cloned().map(Revision).collect();
182 revs.iter().cloned().map(Revision).collect();
183 retain_heads(graph, &mut revs)?;
183 retain_heads(graph, &mut revs)?;
184 let mut as_vec: Vec<Revision> = revs.iter().cloned().collect();
184 let mut as_vec: Vec<Revision> = revs.iter().cloned().collect();
185 as_vec.sort_unstable();
185 as_vec.sort_unstable();
186 Ok(as_vec)
186 Ok(as_vec)
187 }
187 }
188
188
189 #[test]
189 #[test]
190 fn test_retain_heads() -> Result<(), GraphError> {
190 fn test_retain_heads() -> Result<(), GraphError> {
191 assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
191 assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
192 assert_eq!(
192 assert_eq!(
193 retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
193 retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
194 vec![1, 6, 12]
194 vec![1, 6, 12]
195 );
195 );
196 assert_eq!(
196 assert_eq!(
197 retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
197 retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
198 vec![3, 5, 8, 9]
198 vec![3, 5, 8, 9]
199 );
199 );
200 Ok(())
200 Ok(())
201 }
201 }
202
202
203 /// Apply `heads()` to the given slice and return as a sorted `Vec`
203 /// Apply `heads()` to the given slice and return as a sorted `Vec`
204 fn heads_sorted(
204 fn heads_sorted(
205 graph: &impl Graph,
205 graph: &impl Graph,
206 revs: &[BaseRevision],
206 revs: &[BaseRevision],
207 ) -> Result<Vec<Revision>, GraphError> {
207 ) -> Result<Vec<Revision>, GraphError> {
208 let iter_revs: Vec<_> =
208 let iter_revs: Vec<_> = revs.iter().cloned().map(Revision).collect();
209 revs.into_iter().cloned().map(Revision).collect();
210 let heads = heads(graph, iter_revs.iter())?;
209 let heads = heads(graph, iter_revs.iter())?;
211 let mut as_vec: Vec<Revision> = heads.iter().cloned().collect();
210 let mut as_vec: Vec<Revision> = heads.iter().cloned().collect();
212 as_vec.sort_unstable();
211 as_vec.sort_unstable();
213 Ok(as_vec)
212 Ok(as_vec)
214 }
213 }
215
214
216 #[test]
215 #[test]
217 fn test_heads() -> Result<(), GraphError> {
216 fn test_heads() -> Result<(), GraphError> {
218 assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
217 assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
219 assert_eq!(
218 assert_eq!(
220 heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
219 heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
221 vec![1, 6, 12]
220 vec![1, 6, 12]
222 );
221 );
223 assert_eq!(
222 assert_eq!(
224 heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
223 heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
225 vec![3, 5, 8, 9]
224 vec![3, 5, 8, 9]
226 );
225 );
227 Ok(())
226 Ok(())
228 }
227 }
229
228
230 /// Apply `roots()` and sort the result for easier comparison
229 /// Apply `roots()` and sort the result for easier comparison
231 fn roots_sorted(
230 fn roots_sorted(
232 graph: &impl Graph,
231 graph: &impl Graph,
233 revs: &[BaseRevision],
232 revs: &[BaseRevision],
234 ) -> Result<Vec<Revision>, GraphError> {
233 ) -> Result<Vec<Revision>, GraphError> {
235 let set: HashSet<_> = revs.iter().cloned().map(Revision).collect();
234 let set: HashSet<_> = revs.iter().cloned().map(Revision).collect();
236 let mut as_vec = roots(graph, &set)?;
235 let mut as_vec = roots(graph, &set)?;
237 as_vec.sort_unstable();
236 as_vec.sort_unstable();
238 Ok(as_vec)
237 Ok(as_vec)
239 }
238 }
240
239
241 #[test]
240 #[test]
242 fn test_roots() -> Result<(), GraphError> {
241 fn test_roots() -> Result<(), GraphError> {
243 assert_eq!(roots_sorted(&SampleGraph, &[4, 5, 6])?, vec![4]);
242 assert_eq!(roots_sorted(&SampleGraph, &[4, 5, 6])?, vec![4]);
244 assert_eq!(
243 assert_eq!(
245 roots_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
244 roots_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
246 vec![0, 4, 12]
245 vec![0, 4, 12]
247 );
246 );
248 assert_eq!(
247 assert_eq!(
249 roots_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
248 roots_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
250 vec![1, 8]
249 vec![1, 8]
251 );
250 );
252 Ok(())
251 Ok(())
253 }
252 }
254
253
255 /// Apply `range()` and convert the result into a Vec for easier comparison
254 /// Apply `range()` and convert the result into a Vec for easier comparison
256 fn range_vec(
255 fn range_vec(
257 graph: impl Graph + Clone,
256 graph: impl Graph + Clone,
258 roots: &[BaseRevision],
257 roots: &[BaseRevision],
259 heads: &[BaseRevision],
258 heads: &[BaseRevision],
260 ) -> Result<Vec<Revision>, GraphError> {
259 ) -> Result<Vec<Revision>, GraphError> {
261 range(
260 range(
262 &graph,
261 &graph,
263 roots.iter().cloned().map(Revision),
262 roots.iter().cloned().map(Revision),
264 heads.iter().cloned().map(Revision),
263 heads.iter().cloned().map(Revision),
265 )
264 )
266 .map(|bs| bs.into_iter().collect())
265 .map(|bs| bs.into_iter().collect())
267 }
266 }
268
267
269 #[test]
268 #[test]
270 fn test_range() -> Result<(), GraphError> {
269 fn test_range() -> Result<(), GraphError> {
271 assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]);
270 assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]);
272 assert_eq!(
271 assert_eq!(
273 range_vec(SampleGraph, &[0], &[8])?,
272 range_vec(SampleGraph, &[0], &[8])?,
274 Vec::<Revision>::new()
273 Vec::<Revision>::new()
275 );
274 );
276 assert_eq!(
275 assert_eq!(
277 range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?,
276 range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?,
278 vec![5, 10]
277 vec![5, 10]
279 );
278 );
280 assert_eq!(
279 assert_eq!(
281 range_vec(SampleGraph, &[5, 6], &[10, 12])?,
280 range_vec(SampleGraph, &[5, 6], &[10, 12])?,
282 vec![5, 6, 9, 10, 12]
281 vec![5, 6, 9, 10, 12]
283 );
282 );
284 Ok(())
283 Ok(())
285 }
284 }
286 }
285 }
@@ -1,1937 +1,1937 b''
1 use bytes_cast::BytesCast;
1 use bytes_cast::BytesCast;
2 use std::borrow::Cow;
2 use std::borrow::Cow;
3 use std::path::PathBuf;
3 use std::path::PathBuf;
4
4
5 use super::on_disk;
5 use super::on_disk;
6 use super::on_disk::DirstateV2ParseError;
6 use super::on_disk::DirstateV2ParseError;
7 use super::owning::OwningDirstateMap;
7 use super::owning::OwningDirstateMap;
8 use super::path_with_basename::WithBasename;
8 use super::path_with_basename::WithBasename;
9 use crate::dirstate::parsers::pack_entry;
9 use crate::dirstate::parsers::pack_entry;
10 use crate::dirstate::parsers::packed_entry_size;
10 use crate::dirstate::parsers::packed_entry_size;
11 use crate::dirstate::parsers::parse_dirstate_entries;
11 use crate::dirstate::parsers::parse_dirstate_entries;
12 use crate::dirstate::CopyMapIter;
12 use crate::dirstate::CopyMapIter;
13 use crate::dirstate::DirstateV2Data;
13 use crate::dirstate::DirstateV2Data;
14 use crate::dirstate::ParentFileData;
14 use crate::dirstate::ParentFileData;
15 use crate::dirstate::StateMapIter;
15 use crate::dirstate::StateMapIter;
16 use crate::dirstate::TruncatedTimestamp;
16 use crate::dirstate::TruncatedTimestamp;
17 use crate::matchers::Matcher;
17 use crate::matchers::Matcher;
18 use crate::utils::filter_map_results;
18 use crate::utils::filter_map_results;
19 use crate::utils::hg_path::{HgPath, HgPathBuf};
19 use crate::utils::hg_path::{HgPath, HgPathBuf};
20 use crate::DirstateEntry;
20 use crate::DirstateEntry;
21 use crate::DirstateError;
21 use crate::DirstateError;
22 use crate::DirstateMapError;
22 use crate::DirstateMapError;
23 use crate::DirstateParents;
23 use crate::DirstateParents;
24 use crate::DirstateStatus;
24 use crate::DirstateStatus;
25 use crate::FastHashbrownMap as FastHashMap;
25 use crate::FastHashbrownMap as FastHashMap;
26 use crate::PatternFileWarning;
26 use crate::PatternFileWarning;
27 use crate::StatusError;
27 use crate::StatusError;
28 use crate::StatusOptions;
28 use crate::StatusOptions;
29
29
30 /// Append to an existing data file if the amount of unreachable data (not used
30 /// Append to an existing data file if the amount of unreachable data (not used
31 /// anymore) is less than this fraction of the total amount of existing data.
31 /// anymore) is less than this fraction of the total amount of existing data.
32 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
32 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
33
33
34 #[derive(Debug, PartialEq, Eq)]
34 #[derive(Debug, PartialEq, Eq)]
35 /// Version of the on-disk format
35 /// Version of the on-disk format
36 pub enum DirstateVersion {
36 pub enum DirstateVersion {
37 V1,
37 V1,
38 V2,
38 V2,
39 }
39 }
40
40
41 #[derive(Debug, PartialEq, Eq)]
41 #[derive(Debug, PartialEq, Eq)]
42 pub enum DirstateMapWriteMode {
42 pub enum DirstateMapWriteMode {
43 Auto,
43 Auto,
44 ForceNewDataFile,
44 ForceNewDataFile,
45 ForceAppend,
45 ForceAppend,
46 }
46 }
47
47
48 #[derive(Debug)]
48 #[derive(Debug)]
49 pub struct DirstateMap<'on_disk> {
49 pub struct DirstateMap<'on_disk> {
50 /// Contents of the `.hg/dirstate` file
50 /// Contents of the `.hg/dirstate` file
51 pub(super) on_disk: &'on_disk [u8],
51 pub(super) on_disk: &'on_disk [u8],
52
52
53 pub(super) root: ChildNodes<'on_disk>,
53 pub(super) root: ChildNodes<'on_disk>,
54
54
55 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
55 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
56 pub(super) nodes_with_entry_count: u32,
56 pub(super) nodes_with_entry_count: u32,
57
57
58 /// Number of nodes anywhere in the tree that have
58 /// Number of nodes anywhere in the tree that have
59 /// `.copy_source.is_some()`.
59 /// `.copy_source.is_some()`.
60 pub(super) nodes_with_copy_source_count: u32,
60 pub(super) nodes_with_copy_source_count: u32,
61
61
62 /// See on_disk::Header
62 /// See on_disk::Header
63 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
63 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
64
64
65 /// How many bytes of `on_disk` are not used anymore
65 /// How many bytes of `on_disk` are not used anymore
66 pub(super) unreachable_bytes: u32,
66 pub(super) unreachable_bytes: u32,
67
67
68 /// Size of the data used to first load this `DirstateMap`. Used in case
68 /// Size of the data used to first load this `DirstateMap`. Used in case
69 /// we need to write some new metadata, but no new data on disk,
69 /// we need to write some new metadata, but no new data on disk,
70 /// as well as to detect writes that have happened in another process
70 /// as well as to detect writes that have happened in another process
71 /// since first read.
71 /// since first read.
72 pub(super) old_data_size: usize,
72 pub(super) old_data_size: usize,
73
73
74 /// UUID used when first loading this `DirstateMap`. Used to check if
74 /// UUID used when first loading this `DirstateMap`. Used to check if
75 /// the UUID has been changed by another process since first read.
75 /// the UUID has been changed by another process since first read.
76 /// Can be `None` if using dirstate v1 or if it's a brand new dirstate.
76 /// Can be `None` if using dirstate v1 or if it's a brand new dirstate.
77 pub(super) old_uuid: Option<Vec<u8>>,
77 pub(super) old_uuid: Option<Vec<u8>>,
78
78
79 /// Identity of the dirstate file (for dirstate-v1) or the docket file
79 /// Identity of the dirstate file (for dirstate-v1) or the docket file
80 /// (v2). Used to detect if the file has changed from another process.
80 /// (v2). Used to detect if the file has changed from another process.
81 /// Since it's always written atomically, we can compare the inode to
81 /// Since it's always written atomically, we can compare the inode to
82 /// check the file identity.
82 /// check the file identity.
83 ///
83 ///
84 /// TODO On non-Unix systems, something like hashing is a possibility?
84 /// TODO On non-Unix systems, something like hashing is a possibility?
85 pub(super) identity: Option<u64>,
85 pub(super) identity: Option<u64>,
86
86
87 pub(super) dirstate_version: DirstateVersion,
87 pub(super) dirstate_version: DirstateVersion,
88
88
89 /// Controlled by config option `devel.dirstate.v2.data_update_mode`
89 /// Controlled by config option `devel.dirstate.v2.data_update_mode`
90 pub(super) write_mode: DirstateMapWriteMode,
90 pub(super) write_mode: DirstateMapWriteMode,
91 }
91 }
92
92
93 /// Using a plain `HgPathBuf` of the full path from the repository root as a
93 /// Using a plain `HgPathBuf` of the full path from the repository root as a
94 /// map key would also work: all paths in a given map have the same parent
94 /// map key would also work: all paths in a given map have the same parent
95 /// path, so comparing full paths gives the same result as comparing base
95 /// path, so comparing full paths gives the same result as comparing base
96 /// names. However `HashMap` would waste time always re-hashing the same
96 /// names. However `HashMap` would waste time always re-hashing the same
97 /// string prefix.
97 /// string prefix.
98 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
98 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
99
99
100 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
100 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
101 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
101 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
102 #[derive(Debug)]
102 #[derive(Debug)]
103 pub(super) enum BorrowedPath<'tree, 'on_disk> {
103 pub(super) enum BorrowedPath<'tree, 'on_disk> {
104 InMemory(&'tree HgPathBuf),
104 InMemory(&'tree HgPathBuf),
105 OnDisk(&'on_disk HgPath),
105 OnDisk(&'on_disk HgPath),
106 }
106 }
107
107
108 #[derive(Debug)]
108 #[derive(Debug)]
109 pub(super) enum ChildNodes<'on_disk> {
109 pub(super) enum ChildNodes<'on_disk> {
110 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
110 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
111 OnDisk(&'on_disk [on_disk::Node]),
111 OnDisk(&'on_disk [on_disk::Node]),
112 }
112 }
113
113
114 #[derive(Debug)]
114 #[derive(Debug)]
115 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
115 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
116 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
116 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
117 OnDisk(&'on_disk [on_disk::Node]),
117 OnDisk(&'on_disk [on_disk::Node]),
118 }
118 }
119
119
120 #[derive(Debug)]
120 #[derive(Debug)]
121 pub(super) enum NodeRef<'tree, 'on_disk> {
121 pub(super) enum NodeRef<'tree, 'on_disk> {
122 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
122 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
123 OnDisk(&'on_disk on_disk::Node),
123 OnDisk(&'on_disk on_disk::Node),
124 }
124 }
125
125
126 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
126 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
127 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
127 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
128 match *self {
128 match *self {
129 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
129 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
130 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
130 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
131 }
131 }
132 }
132 }
133 }
133 }
134
134
135 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
135 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
136 type Target = HgPath;
136 type Target = HgPath;
137
137
138 fn deref(&self) -> &HgPath {
138 fn deref(&self) -> &HgPath {
139 match *self {
139 match *self {
140 BorrowedPath::InMemory(in_memory) => in_memory,
140 BorrowedPath::InMemory(in_memory) => in_memory,
141 BorrowedPath::OnDisk(on_disk) => on_disk,
141 BorrowedPath::OnDisk(on_disk) => on_disk,
142 }
142 }
143 }
143 }
144 }
144 }
145
145
146 impl Default for ChildNodes<'_> {
146 impl Default for ChildNodes<'_> {
147 fn default() -> Self {
147 fn default() -> Self {
148 ChildNodes::InMemory(Default::default())
148 ChildNodes::InMemory(Default::default())
149 }
149 }
150 }
150 }
151
151
152 impl<'on_disk> ChildNodes<'on_disk> {
152 impl<'on_disk> ChildNodes<'on_disk> {
153 pub(super) fn as_ref<'tree>(
153 pub(super) fn as_ref<'tree>(
154 &'tree self,
154 &'tree self,
155 ) -> ChildNodesRef<'tree, 'on_disk> {
155 ) -> ChildNodesRef<'tree, 'on_disk> {
156 match self {
156 match self {
157 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
157 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
158 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
158 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
159 }
159 }
160 }
160 }
161
161
162 pub(super) fn is_empty(&self) -> bool {
162 pub(super) fn is_empty(&self) -> bool {
163 match self {
163 match self {
164 ChildNodes::InMemory(nodes) => nodes.is_empty(),
164 ChildNodes::InMemory(nodes) => nodes.is_empty(),
165 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
165 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
166 }
166 }
167 }
167 }
168
168
169 fn make_mut(
169 fn make_mut(
170 &mut self,
170 &mut self,
171 on_disk: &'on_disk [u8],
171 on_disk: &'on_disk [u8],
172 unreachable_bytes: &mut u32,
172 unreachable_bytes: &mut u32,
173 ) -> Result<
173 ) -> Result<
174 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
174 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
175 DirstateV2ParseError,
175 DirstateV2ParseError,
176 > {
176 > {
177 match self {
177 match self {
178 ChildNodes::InMemory(nodes) => Ok(nodes),
178 ChildNodes::InMemory(nodes) => Ok(nodes),
179 ChildNodes::OnDisk(nodes) => {
179 ChildNodes::OnDisk(nodes) => {
180 *unreachable_bytes +=
180 *unreachable_bytes +=
181 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
181 std::mem::size_of_val::<[on_disk::Node]>(*nodes) as u32;
182 let nodes = nodes
182 let nodes = nodes
183 .iter()
183 .iter()
184 .map(|node| {
184 .map(|node| {
185 Ok((
185 Ok((
186 node.path(on_disk)?,
186 node.path(on_disk)?,
187 node.to_in_memory_node(on_disk)?,
187 node.to_in_memory_node(on_disk)?,
188 ))
188 ))
189 })
189 })
190 .collect::<Result<_, _>>()?;
190 .collect::<Result<_, _>>()?;
191 *self = ChildNodes::InMemory(nodes);
191 *self = ChildNodes::InMemory(nodes);
192 match self {
192 match self {
193 ChildNodes::InMemory(nodes) => Ok(nodes),
193 ChildNodes::InMemory(nodes) => Ok(nodes),
194 ChildNodes::OnDisk(_) => unreachable!(),
194 ChildNodes::OnDisk(_) => unreachable!(),
195 }
195 }
196 }
196 }
197 }
197 }
198 }
198 }
199 }
199 }
200
200
201 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
201 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
202 pub(super) fn get(
202 pub(super) fn get(
203 &self,
203 &self,
204 base_name: &HgPath,
204 base_name: &HgPath,
205 on_disk: &'on_disk [u8],
205 on_disk: &'on_disk [u8],
206 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
206 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
207 match self {
207 match self {
208 ChildNodesRef::InMemory(nodes) => Ok(nodes
208 ChildNodesRef::InMemory(nodes) => Ok(nodes
209 .get_key_value(base_name)
209 .get_key_value(base_name)
210 .map(|(k, v)| NodeRef::InMemory(k, v))),
210 .map(|(k, v)| NodeRef::InMemory(k, v))),
211 ChildNodesRef::OnDisk(nodes) => {
211 ChildNodesRef::OnDisk(nodes) => {
212 let mut parse_result = Ok(());
212 let mut parse_result = Ok(());
213 let search_result = nodes.binary_search_by(|node| {
213 let search_result = nodes.binary_search_by(|node| {
214 match node.base_name(on_disk) {
214 match node.base_name(on_disk) {
215 Ok(node_base_name) => node_base_name.cmp(base_name),
215 Ok(node_base_name) => node_base_name.cmp(base_name),
216 Err(e) => {
216 Err(e) => {
217 parse_result = Err(e);
217 parse_result = Err(e);
218 // Dummy comparison result, `search_result` won’t
218 // Dummy comparison result, `search_result` won’t
219 // be used since `parse_result` is an error
219 // be used since `parse_result` is an error
220 std::cmp::Ordering::Equal
220 std::cmp::Ordering::Equal
221 }
221 }
222 }
222 }
223 });
223 });
224 parse_result.map(|()| {
224 parse_result.map(|()| {
225 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
225 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
226 })
226 })
227 }
227 }
228 }
228 }
229 }
229 }
230
230
231 /// Iterate in undefined order
231 /// Iterate in undefined order
232 pub(super) fn iter(
232 pub(super) fn iter(
233 &self,
233 &self,
234 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
234 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
235 match self {
235 match self {
236 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
236 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
237 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
237 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
238 ),
238 ),
239 ChildNodesRef::OnDisk(nodes) => {
239 ChildNodesRef::OnDisk(nodes) => {
240 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
240 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
241 }
241 }
242 }
242 }
243 }
243 }
244
244
245 /// Iterate in parallel in undefined order
245 /// Iterate in parallel in undefined order
246 pub(super) fn par_iter(
246 pub(super) fn par_iter(
247 &self,
247 &self,
248 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
248 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
249 {
249 {
250 use rayon::prelude::*;
250 use rayon::prelude::*;
251 match self {
251 match self {
252 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
252 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
253 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
253 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
254 ),
254 ),
255 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
255 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
256 nodes.par_iter().map(NodeRef::OnDisk),
256 nodes.par_iter().map(NodeRef::OnDisk),
257 ),
257 ),
258 }
258 }
259 }
259 }
260
260
261 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
261 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
262 match self {
262 match self {
263 ChildNodesRef::InMemory(nodes) => {
263 ChildNodesRef::InMemory(nodes) => {
264 let mut vec: Vec<_> = nodes
264 let mut vec: Vec<_> = nodes
265 .iter()
265 .iter()
266 .map(|(k, v)| NodeRef::InMemory(k, v))
266 .map(|(k, v)| NodeRef::InMemory(k, v))
267 .collect();
267 .collect();
268 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
268 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
269 match node {
269 match node {
270 NodeRef::InMemory(path, _node) => path.base_name(),
270 NodeRef::InMemory(path, _node) => path.base_name(),
271 NodeRef::OnDisk(_) => unreachable!(),
271 NodeRef::OnDisk(_) => unreachable!(),
272 }
272 }
273 }
273 }
274 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
274 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
275 // value: https://github.com/rust-lang/rust/issues/34162
275 // value: https://github.com/rust-lang/rust/issues/34162
276 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
276 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
277 vec
277 vec
278 }
278 }
279 ChildNodesRef::OnDisk(nodes) => {
279 ChildNodesRef::OnDisk(nodes) => {
280 // Nodes on disk are already sorted
280 // Nodes on disk are already sorted
281 nodes.iter().map(NodeRef::OnDisk).collect()
281 nodes.iter().map(NodeRef::OnDisk).collect()
282 }
282 }
283 }
283 }
284 }
284 }
285 }
285 }
286
286
287 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
287 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
288 pub(super) fn full_path(
288 pub(super) fn full_path(
289 &self,
289 &self,
290 on_disk: &'on_disk [u8],
290 on_disk: &'on_disk [u8],
291 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
291 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
292 match self {
292 match self {
293 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
293 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
294 NodeRef::OnDisk(node) => node.full_path(on_disk),
294 NodeRef::OnDisk(node) => node.full_path(on_disk),
295 }
295 }
296 }
296 }
297
297
298 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
298 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
299 /// HgPath>` detached from `'tree`
299 /// HgPath>` detached from `'tree`
300 pub(super) fn full_path_borrowed(
300 pub(super) fn full_path_borrowed(
301 &self,
301 &self,
302 on_disk: &'on_disk [u8],
302 on_disk: &'on_disk [u8],
303 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
303 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
304 match self {
304 match self {
305 NodeRef::InMemory(path, _node) => match path.full_path() {
305 NodeRef::InMemory(path, _node) => match path.full_path() {
306 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
306 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
307 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
307 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
308 },
308 },
309 NodeRef::OnDisk(node) => {
309 NodeRef::OnDisk(node) => {
310 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
310 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
311 }
311 }
312 }
312 }
313 }
313 }
314
314
315 pub(super) fn base_name(
315 pub(super) fn base_name(
316 &self,
316 &self,
317 on_disk: &'on_disk [u8],
317 on_disk: &'on_disk [u8],
318 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
318 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
319 match self {
319 match self {
320 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
320 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
321 NodeRef::OnDisk(node) => node.base_name(on_disk),
321 NodeRef::OnDisk(node) => node.base_name(on_disk),
322 }
322 }
323 }
323 }
324
324
325 pub(super) fn children(
325 pub(super) fn children(
326 &self,
326 &self,
327 on_disk: &'on_disk [u8],
327 on_disk: &'on_disk [u8],
328 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
328 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
329 match self {
329 match self {
330 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
330 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
331 NodeRef::OnDisk(node) => {
331 NodeRef::OnDisk(node) => {
332 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
332 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
333 }
333 }
334 }
334 }
335 }
335 }
336
336
337 pub(super) fn has_copy_source(&self) -> bool {
337 pub(super) fn has_copy_source(&self) -> bool {
338 match self {
338 match self {
339 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
339 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
340 NodeRef::OnDisk(node) => node.has_copy_source(),
340 NodeRef::OnDisk(node) => node.has_copy_source(),
341 }
341 }
342 }
342 }
343
343
344 pub(super) fn copy_source(
344 pub(super) fn copy_source(
345 &self,
345 &self,
346 on_disk: &'on_disk [u8],
346 on_disk: &'on_disk [u8],
347 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
347 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
348 match self {
348 match self {
349 NodeRef::InMemory(_path, node) => Ok(node.copy_source.as_deref()),
349 NodeRef::InMemory(_path, node) => Ok(node.copy_source.as_deref()),
350 NodeRef::OnDisk(node) => node.copy_source(on_disk),
350 NodeRef::OnDisk(node) => node.copy_source(on_disk),
351 }
351 }
352 }
352 }
353 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
353 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
354 /// HgPath>` detached from `'tree`
354 /// HgPath>` detached from `'tree`
355 pub(super) fn copy_source_borrowed(
355 pub(super) fn copy_source_borrowed(
356 &self,
356 &self,
357 on_disk: &'on_disk [u8],
357 on_disk: &'on_disk [u8],
358 ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
358 ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
359 {
359 {
360 Ok(match self {
360 Ok(match self {
361 NodeRef::InMemory(_path, node) => {
361 NodeRef::InMemory(_path, node) => {
362 node.copy_source.as_ref().map(|source| match source {
362 node.copy_source.as_ref().map(|source| match source {
363 Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
363 Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
364 Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
364 Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
365 })
365 })
366 }
366 }
367 NodeRef::OnDisk(node) => {
367 NodeRef::OnDisk(node) => {
368 node.copy_source(on_disk)?.map(BorrowedPath::OnDisk)
368 node.copy_source(on_disk)?.map(BorrowedPath::OnDisk)
369 }
369 }
370 })
370 })
371 }
371 }
372
372
373 pub(super) fn entry(
373 pub(super) fn entry(
374 &self,
374 &self,
375 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
375 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
376 match self {
376 match self {
377 NodeRef::InMemory(_path, node) => {
377 NodeRef::InMemory(_path, node) => {
378 Ok(node.data.as_entry().copied())
378 Ok(node.data.as_entry().copied())
379 }
379 }
380 NodeRef::OnDisk(node) => node.entry(),
380 NodeRef::OnDisk(node) => node.entry(),
381 }
381 }
382 }
382 }
383
383
384 pub(super) fn cached_directory_mtime(
384 pub(super) fn cached_directory_mtime(
385 &self,
385 &self,
386 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
386 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
387 match self {
387 match self {
388 NodeRef::InMemory(_path, node) => Ok(match node.data {
388 NodeRef::InMemory(_path, node) => Ok(match node.data {
389 NodeData::CachedDirectory { mtime } => Some(mtime),
389 NodeData::CachedDirectory { mtime } => Some(mtime),
390 _ => None,
390 _ => None,
391 }),
391 }),
392 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
392 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
393 }
393 }
394 }
394 }
395
395
396 pub(super) fn descendants_with_entry_count(&self) -> u32 {
396 pub(super) fn descendants_with_entry_count(&self) -> u32 {
397 match self {
397 match self {
398 NodeRef::InMemory(_path, node) => {
398 NodeRef::InMemory(_path, node) => {
399 node.descendants_with_entry_count
399 node.descendants_with_entry_count
400 }
400 }
401 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
401 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
402 }
402 }
403 }
403 }
404
404
405 pub(super) fn tracked_descendants_count(&self) -> u32 {
405 pub(super) fn tracked_descendants_count(&self) -> u32 {
406 match self {
406 match self {
407 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
407 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
408 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
408 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
409 }
409 }
410 }
410 }
411 }
411 }
412
412
413 /// Represents a file or a directory
413 /// Represents a file or a directory
414 #[derive(Default, Debug)]
414 #[derive(Default, Debug)]
415 pub(super) struct Node<'on_disk> {
415 pub(super) struct Node<'on_disk> {
416 pub(super) data: NodeData,
416 pub(super) data: NodeData,
417
417
418 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
418 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
419
419
420 pub(super) children: ChildNodes<'on_disk>,
420 pub(super) children: ChildNodes<'on_disk>,
421
421
422 /// How many (non-inclusive) descendants of this node have an entry.
422 /// How many (non-inclusive) descendants of this node have an entry.
423 pub(super) descendants_with_entry_count: u32,
423 pub(super) descendants_with_entry_count: u32,
424
424
425 /// How many (non-inclusive) descendants of this node have an entry whose
425 /// How many (non-inclusive) descendants of this node have an entry whose
426 /// state is "tracked".
426 /// state is "tracked".
427 pub(super) tracked_descendants_count: u32,
427 pub(super) tracked_descendants_count: u32,
428 }
428 }
429
429
430 #[derive(Debug)]
430 #[derive(Debug)]
431 pub(super) enum NodeData {
431 pub(super) enum NodeData {
432 Entry(DirstateEntry),
432 Entry(DirstateEntry),
433 CachedDirectory { mtime: TruncatedTimestamp },
433 CachedDirectory { mtime: TruncatedTimestamp },
434 None,
434 None,
435 }
435 }
436
436
437 impl Default for NodeData {
437 impl Default for NodeData {
438 fn default() -> Self {
438 fn default() -> Self {
439 NodeData::None
439 NodeData::None
440 }
440 }
441 }
441 }
442
442
443 impl NodeData {
443 impl NodeData {
444 fn has_entry(&self) -> bool {
444 fn has_entry(&self) -> bool {
445 matches!(self, NodeData::Entry(_))
445 matches!(self, NodeData::Entry(_))
446 }
446 }
447
447
448 fn as_entry(&self) -> Option<&DirstateEntry> {
448 fn as_entry(&self) -> Option<&DirstateEntry> {
449 match self {
449 match self {
450 NodeData::Entry(entry) => Some(entry),
450 NodeData::Entry(entry) => Some(entry),
451 _ => None,
451 _ => None,
452 }
452 }
453 }
453 }
454
454
455 fn as_entry_mut(&mut self) -> Option<&mut DirstateEntry> {
455 fn as_entry_mut(&mut self) -> Option<&mut DirstateEntry> {
456 match self {
456 match self {
457 NodeData::Entry(entry) => Some(entry),
457 NodeData::Entry(entry) => Some(entry),
458 _ => None,
458 _ => None,
459 }
459 }
460 }
460 }
461 }
461 }
462
462
463 impl<'on_disk> DirstateMap<'on_disk> {
463 impl<'on_disk> DirstateMap<'on_disk> {
464 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
464 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
465 Self {
465 Self {
466 on_disk,
466 on_disk,
467 root: ChildNodes::default(),
467 root: ChildNodes::default(),
468 nodes_with_entry_count: 0,
468 nodes_with_entry_count: 0,
469 nodes_with_copy_source_count: 0,
469 nodes_with_copy_source_count: 0,
470 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
470 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
471 unreachable_bytes: 0,
471 unreachable_bytes: 0,
472 old_data_size: 0,
472 old_data_size: 0,
473 old_uuid: None,
473 old_uuid: None,
474 identity: None,
474 identity: None,
475 dirstate_version: DirstateVersion::V1,
475 dirstate_version: DirstateVersion::V1,
476 write_mode: DirstateMapWriteMode::Auto,
476 write_mode: DirstateMapWriteMode::Auto,
477 }
477 }
478 }
478 }
479
479
480 #[logging_timer::time("trace")]
480 #[logging_timer::time("trace")]
481 pub fn new_v2(
481 pub fn new_v2(
482 on_disk: &'on_disk [u8],
482 on_disk: &'on_disk [u8],
483 data_size: usize,
483 data_size: usize,
484 metadata: &[u8],
484 metadata: &[u8],
485 uuid: Vec<u8>,
485 uuid: Vec<u8>,
486 identity: Option<u64>,
486 identity: Option<u64>,
487 ) -> Result<Self, DirstateError> {
487 ) -> Result<Self, DirstateError> {
488 if let Some(data) = on_disk.get(..data_size) {
488 if let Some(data) = on_disk.get(..data_size) {
489 Ok(on_disk::read(data, metadata, uuid, identity)?)
489 Ok(on_disk::read(data, metadata, uuid, identity)?)
490 } else {
490 } else {
491 Err(DirstateV2ParseError::new("not enough bytes on disk").into())
491 Err(DirstateV2ParseError::new("not enough bytes on disk").into())
492 }
492 }
493 }
493 }
494
494
495 #[logging_timer::time("trace")]
495 #[logging_timer::time("trace")]
496 pub fn new_v1(
496 pub fn new_v1(
497 on_disk: &'on_disk [u8],
497 on_disk: &'on_disk [u8],
498 identity: Option<u64>,
498 identity: Option<u64>,
499 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
499 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
500 let mut map = Self::empty(on_disk);
500 let mut map = Self::empty(on_disk);
501 if map.on_disk.is_empty() {
501 if map.on_disk.is_empty() {
502 return Ok((map, None));
502 return Ok((map, None));
503 }
503 }
504
504
505 let parents = parse_dirstate_entries(
505 let parents = parse_dirstate_entries(
506 map.on_disk,
506 map.on_disk,
507 |path, entry, copy_source| {
507 |path, entry, copy_source| {
508 let tracked = entry.tracked();
508 let tracked = entry.tracked();
509 let node = Self::get_or_insert_node_inner(
509 let node = Self::get_or_insert_node_inner(
510 map.on_disk,
510 map.on_disk,
511 &mut map.unreachable_bytes,
511 &mut map.unreachable_bytes,
512 &mut map.root,
512 &mut map.root,
513 path,
513 path,
514 WithBasename::to_cow_borrowed,
514 WithBasename::to_cow_borrowed,
515 |ancestor| {
515 |ancestor| {
516 if tracked {
516 if tracked {
517 ancestor.tracked_descendants_count += 1
517 ancestor.tracked_descendants_count += 1
518 }
518 }
519 ancestor.descendants_with_entry_count += 1
519 ancestor.descendants_with_entry_count += 1
520 },
520 },
521 )?;
521 )?;
522 assert!(
522 assert!(
523 !node.data.has_entry(),
523 !node.data.has_entry(),
524 "duplicate dirstate entry in read"
524 "duplicate dirstate entry in read"
525 );
525 );
526 assert!(
526 assert!(
527 node.copy_source.is_none(),
527 node.copy_source.is_none(),
528 "duplicate dirstate entry in read"
528 "duplicate dirstate entry in read"
529 );
529 );
530 node.data = NodeData::Entry(*entry);
530 node.data = NodeData::Entry(*entry);
531 node.copy_source = copy_source.map(Cow::Borrowed);
531 node.copy_source = copy_source.map(Cow::Borrowed);
532 map.nodes_with_entry_count += 1;
532 map.nodes_with_entry_count += 1;
533 if copy_source.is_some() {
533 if copy_source.is_some() {
534 map.nodes_with_copy_source_count += 1
534 map.nodes_with_copy_source_count += 1
535 }
535 }
536 Ok(())
536 Ok(())
537 },
537 },
538 )?;
538 )?;
539 let parents = Some(*parents);
539 let parents = Some(*parents);
540 map.identity = identity;
540 map.identity = identity;
541
541
542 Ok((map, parents))
542 Ok((map, parents))
543 }
543 }
544
544
545 /// Assuming dirstate-v2 format, returns whether the next write should
545 /// Assuming dirstate-v2 format, returns whether the next write should
546 /// append to the existing data file that contains `self.on_disk` (true),
546 /// append to the existing data file that contains `self.on_disk` (true),
547 /// or create a new data file from scratch (false).
547 /// or create a new data file from scratch (false).
548 pub(super) fn write_should_append(&self) -> bool {
548 pub(super) fn write_should_append(&self) -> bool {
549 match self.write_mode {
549 match self.write_mode {
550 DirstateMapWriteMode::ForceAppend => true,
550 DirstateMapWriteMode::ForceAppend => true,
551 DirstateMapWriteMode::ForceNewDataFile => false,
551 DirstateMapWriteMode::ForceNewDataFile => false,
552 DirstateMapWriteMode::Auto => {
552 DirstateMapWriteMode::Auto => {
553 let ratio =
553 let ratio =
554 self.unreachable_bytes as f32 / self.on_disk.len() as f32;
554 self.unreachable_bytes as f32 / self.on_disk.len() as f32;
555 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
555 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
556 }
556 }
557 }
557 }
558 }
558 }
559
559
560 fn get_node<'tree>(
560 fn get_node<'tree>(
561 &'tree self,
561 &'tree self,
562 path: &HgPath,
562 path: &HgPath,
563 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
563 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
564 let mut children = self.root.as_ref();
564 let mut children = self.root.as_ref();
565 let mut components = path.components();
565 let mut components = path.components();
566 let mut component =
566 let mut component =
567 components.next().expect("expected at least one components");
567 components.next().expect("expected at least one components");
568 loop {
568 loop {
569 if let Some(child) = children.get(component, self.on_disk)? {
569 if let Some(child) = children.get(component, self.on_disk)? {
570 if let Some(next_component) = components.next() {
570 if let Some(next_component) = components.next() {
571 component = next_component;
571 component = next_component;
572 children = child.children(self.on_disk)?;
572 children = child.children(self.on_disk)?;
573 } else {
573 } else {
574 return Ok(Some(child));
574 return Ok(Some(child));
575 }
575 }
576 } else {
576 } else {
577 return Ok(None);
577 return Ok(None);
578 }
578 }
579 }
579 }
580 }
580 }
581
581
582 pub fn has_node(
582 pub fn has_node(
583 &self,
583 &self,
584 path: &HgPath,
584 path: &HgPath,
585 ) -> Result<bool, DirstateV2ParseError> {
585 ) -> Result<bool, DirstateV2ParseError> {
586 let node = self.get_node(path)?;
586 let node = self.get_node(path)?;
587 Ok(node.is_some())
587 Ok(node.is_some())
588 }
588 }
589
589
590 /// Returns a mutable reference to the node at `path` if it exists
590 /// Returns a mutable reference to the node at `path` if it exists
591 ///
591 ///
592 /// `each_ancestor` is a callback that is called for each ancestor node
592 /// `each_ancestor` is a callback that is called for each ancestor node
593 /// when descending the tree. It is used to keep the different counters
593 /// when descending the tree. It is used to keep the different counters
594 /// of the `DirstateMap` up-to-date.
594 /// of the `DirstateMap` up-to-date.
595 fn get_node_mut<'tree>(
595 fn get_node_mut<'tree>(
596 &'tree mut self,
596 &'tree mut self,
597 path: &HgPath,
597 path: &HgPath,
598 each_ancestor: impl FnMut(&mut Node),
598 each_ancestor: impl FnMut(&mut Node),
599 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
599 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
600 Self::get_node_mut_inner(
600 Self::get_node_mut_inner(
601 self.on_disk,
601 self.on_disk,
602 &mut self.unreachable_bytes,
602 &mut self.unreachable_bytes,
603 &mut self.root,
603 &mut self.root,
604 path,
604 path,
605 each_ancestor,
605 each_ancestor,
606 )
606 )
607 }
607 }
608
608
609 /// Lower-level version of `get_node_mut`.
609 /// Lower-level version of `get_node_mut`.
610 ///
610 ///
611 /// This takes `root` instead of `&mut self` so that callers can mutate
611 /// This takes `root` instead of `&mut self` so that callers can mutate
612 /// other fields while the returned borrow is still valid.
612 /// other fields while the returned borrow is still valid.
613 ///
613 ///
614 /// `each_ancestor` is a callback that is called for each ancestor node
614 /// `each_ancestor` is a callback that is called for each ancestor node
615 /// when descending the tree. It is used to keep the different counters
615 /// when descending the tree. It is used to keep the different counters
616 /// of the `DirstateMap` up-to-date.
616 /// of the `DirstateMap` up-to-date.
617 fn get_node_mut_inner<'tree>(
617 fn get_node_mut_inner<'tree>(
618 on_disk: &'on_disk [u8],
618 on_disk: &'on_disk [u8],
619 unreachable_bytes: &mut u32,
619 unreachable_bytes: &mut u32,
620 root: &'tree mut ChildNodes<'on_disk>,
620 root: &'tree mut ChildNodes<'on_disk>,
621 path: &HgPath,
621 path: &HgPath,
622 mut each_ancestor: impl FnMut(&mut Node),
622 mut each_ancestor: impl FnMut(&mut Node),
623 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
623 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
624 let mut children = root;
624 let mut children = root;
625 let mut components = path.components();
625 let mut components = path.components();
626 let mut component =
626 let mut component =
627 components.next().expect("expected at least one components");
627 components.next().expect("expected at least one components");
628 loop {
628 loop {
629 if let Some(child) = children
629 if let Some(child) = children
630 .make_mut(on_disk, unreachable_bytes)?
630 .make_mut(on_disk, unreachable_bytes)?
631 .get_mut(component)
631 .get_mut(component)
632 {
632 {
633 if let Some(next_component) = components.next() {
633 if let Some(next_component) = components.next() {
634 each_ancestor(child);
634 each_ancestor(child);
635 component = next_component;
635 component = next_component;
636 children = &mut child.children;
636 children = &mut child.children;
637 } else {
637 } else {
638 return Ok(Some(child));
638 return Ok(Some(child));
639 }
639 }
640 } else {
640 } else {
641 return Ok(None);
641 return Ok(None);
642 }
642 }
643 }
643 }
644 }
644 }
645
645
646 /// Get a mutable reference to the node at `path`, creating it if it does
646 /// Get a mutable reference to the node at `path`, creating it if it does
647 /// not exist.
647 /// not exist.
648 ///
648 ///
649 /// `each_ancestor` is a callback that is called for each ancestor node
649 /// `each_ancestor` is a callback that is called for each ancestor node
650 /// when descending the tree. It is used to keep the different counters
650 /// when descending the tree. It is used to keep the different counters
651 /// of the `DirstateMap` up-to-date.
651 /// of the `DirstateMap` up-to-date.
652 fn get_or_insert_node<'tree, 'path>(
652 fn get_or_insert_node<'tree, 'path>(
653 &'tree mut self,
653 &'tree mut self,
654 path: &'path HgPath,
654 path: &'path HgPath,
655 each_ancestor: impl FnMut(&mut Node),
655 each_ancestor: impl FnMut(&mut Node),
656 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
656 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
657 Self::get_or_insert_node_inner(
657 Self::get_or_insert_node_inner(
658 self.on_disk,
658 self.on_disk,
659 &mut self.unreachable_bytes,
659 &mut self.unreachable_bytes,
660 &mut self.root,
660 &mut self.root,
661 path,
661 path,
662 WithBasename::to_cow_owned,
662 WithBasename::to_cow_owned,
663 each_ancestor,
663 each_ancestor,
664 )
664 )
665 }
665 }
666
666
667 /// Lower-level version of `get_or_insert_node_inner`, which is used when
667 /// Lower-level version of `get_or_insert_node_inner`, which is used when
668 /// parsing disk data to remove allocations for new nodes.
668 /// parsing disk data to remove allocations for new nodes.
669 fn get_or_insert_node_inner<'tree, 'path>(
669 fn get_or_insert_node_inner<'tree, 'path>(
670 on_disk: &'on_disk [u8],
670 on_disk: &'on_disk [u8],
671 unreachable_bytes: &mut u32,
671 unreachable_bytes: &mut u32,
672 root: &'tree mut ChildNodes<'on_disk>,
672 root: &'tree mut ChildNodes<'on_disk>,
673 path: &'path HgPath,
673 path: &'path HgPath,
674 to_cow: impl Fn(
674 to_cow: impl Fn(
675 WithBasename<&'path HgPath>,
675 WithBasename<&'path HgPath>,
676 ) -> WithBasename<Cow<'on_disk, HgPath>>,
676 ) -> WithBasename<Cow<'on_disk, HgPath>>,
677 mut each_ancestor: impl FnMut(&mut Node),
677 mut each_ancestor: impl FnMut(&mut Node),
678 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
678 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
679 let mut child_nodes = root;
679 let mut child_nodes = root;
680 let mut inclusive_ancestor_paths =
680 let mut inclusive_ancestor_paths =
681 WithBasename::inclusive_ancestors_of(path);
681 WithBasename::inclusive_ancestors_of(path);
682 let mut ancestor_path = inclusive_ancestor_paths
682 let mut ancestor_path = inclusive_ancestor_paths
683 .next()
683 .next()
684 .expect("expected at least one inclusive ancestor");
684 .expect("expected at least one inclusive ancestor");
685 loop {
685 loop {
686 let (_, child_node) = child_nodes
686 let (_, child_node) = child_nodes
687 .make_mut(on_disk, unreachable_bytes)?
687 .make_mut(on_disk, unreachable_bytes)?
688 .raw_entry_mut()
688 .raw_entry_mut()
689 .from_key(ancestor_path.base_name())
689 .from_key(ancestor_path.base_name())
690 .or_insert_with(|| (to_cow(ancestor_path), Node::default()));
690 .or_insert_with(|| (to_cow(ancestor_path), Node::default()));
691 if let Some(next) = inclusive_ancestor_paths.next() {
691 if let Some(next) = inclusive_ancestor_paths.next() {
692 each_ancestor(child_node);
692 each_ancestor(child_node);
693 ancestor_path = next;
693 ancestor_path = next;
694 child_nodes = &mut child_node.children;
694 child_nodes = &mut child_node.children;
695 } else {
695 } else {
696 return Ok(child_node);
696 return Ok(child_node);
697 }
697 }
698 }
698 }
699 }
699 }
700
700
701 #[allow(clippy::too_many_arguments)]
701 #[allow(clippy::too_many_arguments)]
702 fn reset_state(
702 fn reset_state(
703 &mut self,
703 &mut self,
704 filename: &HgPath,
704 filename: &HgPath,
705 old_entry_opt: Option<DirstateEntry>,
705 old_entry_opt: Option<DirstateEntry>,
706 wc_tracked: bool,
706 wc_tracked: bool,
707 p1_tracked: bool,
707 p1_tracked: bool,
708 p2_info: bool,
708 p2_info: bool,
709 has_meaningful_mtime: bool,
709 has_meaningful_mtime: bool,
710 parent_file_data_opt: Option<ParentFileData>,
710 parent_file_data_opt: Option<ParentFileData>,
711 ) -> Result<(), DirstateError> {
711 ) -> Result<(), DirstateError> {
712 let (had_entry, was_tracked) = match old_entry_opt {
712 let (had_entry, was_tracked) = match old_entry_opt {
713 Some(old_entry) => (true, old_entry.tracked()),
713 Some(old_entry) => (true, old_entry.tracked()),
714 None => (false, false),
714 None => (false, false),
715 };
715 };
716 let node = self.get_or_insert_node(filename, |ancestor| {
716 let node = self.get_or_insert_node(filename, |ancestor| {
717 if !had_entry {
717 if !had_entry {
718 ancestor.descendants_with_entry_count += 1;
718 ancestor.descendants_with_entry_count += 1;
719 }
719 }
720 if was_tracked {
720 if was_tracked {
721 if !wc_tracked {
721 if !wc_tracked {
722 ancestor.tracked_descendants_count = ancestor
722 ancestor.tracked_descendants_count = ancestor
723 .tracked_descendants_count
723 .tracked_descendants_count
724 .checked_sub(1)
724 .checked_sub(1)
725 .expect("tracked count to be >= 0");
725 .expect("tracked count to be >= 0");
726 }
726 }
727 } else if wc_tracked {
727 } else if wc_tracked {
728 ancestor.tracked_descendants_count += 1;
728 ancestor.tracked_descendants_count += 1;
729 }
729 }
730 })?;
730 })?;
731
731
732 let v2_data = if let Some(parent_file_data) = parent_file_data_opt {
732 let v2_data = if let Some(parent_file_data) = parent_file_data_opt {
733 DirstateV2Data {
733 DirstateV2Data {
734 wc_tracked,
734 wc_tracked,
735 p1_tracked,
735 p1_tracked,
736 p2_info,
736 p2_info,
737 mode_size: parent_file_data.mode_size,
737 mode_size: parent_file_data.mode_size,
738 mtime: if has_meaningful_mtime {
738 mtime: if has_meaningful_mtime {
739 parent_file_data.mtime
739 parent_file_data.mtime
740 } else {
740 } else {
741 None
741 None
742 },
742 },
743 ..Default::default()
743 ..Default::default()
744 }
744 }
745 } else {
745 } else {
746 DirstateV2Data {
746 DirstateV2Data {
747 wc_tracked,
747 wc_tracked,
748 p1_tracked,
748 p1_tracked,
749 p2_info,
749 p2_info,
750 ..Default::default()
750 ..Default::default()
751 }
751 }
752 };
752 };
753 node.data = NodeData::Entry(DirstateEntry::from_v2_data(v2_data));
753 node.data = NodeData::Entry(DirstateEntry::from_v2_data(v2_data));
754 if !had_entry {
754 if !had_entry {
755 self.nodes_with_entry_count += 1;
755 self.nodes_with_entry_count += 1;
756 }
756 }
757 Ok(())
757 Ok(())
758 }
758 }
759
759
760 fn set_tracked(
760 fn set_tracked(
761 &mut self,
761 &mut self,
762 filename: &HgPath,
762 filename: &HgPath,
763 old_entry_opt: Option<DirstateEntry>,
763 old_entry_opt: Option<DirstateEntry>,
764 ) -> Result<bool, DirstateV2ParseError> {
764 ) -> Result<bool, DirstateV2ParseError> {
765 let was_tracked = old_entry_opt.map_or(false, |e| e.tracked());
765 let was_tracked = old_entry_opt.map_or(false, |e| e.tracked());
766 let had_entry = old_entry_opt.is_some();
766 let had_entry = old_entry_opt.is_some();
767 let tracked_count_increment = if was_tracked { 0 } else { 1 };
767 let tracked_count_increment = u32::from(!was_tracked);
768 let mut new = false;
768 let mut new = false;
769
769
770 let node = self.get_or_insert_node(filename, |ancestor| {
770 let node = self.get_or_insert_node(filename, |ancestor| {
771 if !had_entry {
771 if !had_entry {
772 ancestor.descendants_with_entry_count += 1;
772 ancestor.descendants_with_entry_count += 1;
773 }
773 }
774
774
775 ancestor.tracked_descendants_count += tracked_count_increment;
775 ancestor.tracked_descendants_count += tracked_count_increment;
776 })?;
776 })?;
777 if let Some(old_entry) = old_entry_opt {
777 if let Some(old_entry) = old_entry_opt {
778 let mut e = old_entry;
778 let mut e = old_entry;
779 if e.tracked() {
779 if e.tracked() {
780 // XXX
780 // XXX
781 // This is probably overkill for more case, but we need this to
781 // This is probably overkill for more case, but we need this to
782 // fully replace the `normallookup` call with `set_tracked`
782 // fully replace the `normallookup` call with `set_tracked`
783 // one. Consider smoothing this in the future.
783 // one. Consider smoothing this in the future.
784 e.set_possibly_dirty();
784 e.set_possibly_dirty();
785 } else {
785 } else {
786 new = true;
786 new = true;
787 e.set_tracked();
787 e.set_tracked();
788 }
788 }
789 node.data = NodeData::Entry(e)
789 node.data = NodeData::Entry(e)
790 } else {
790 } else {
791 node.data = NodeData::Entry(DirstateEntry::new_tracked());
791 node.data = NodeData::Entry(DirstateEntry::new_tracked());
792 self.nodes_with_entry_count += 1;
792 self.nodes_with_entry_count += 1;
793 new = true;
793 new = true;
794 };
794 };
795 Ok(new)
795 Ok(new)
796 }
796 }
797
797
798 /// Set a node as untracked in the dirstate.
798 /// Set a node as untracked in the dirstate.
799 ///
799 ///
800 /// It is the responsibility of the caller to remove the copy source and/or
800 /// It is the responsibility of the caller to remove the copy source and/or
801 /// the entry itself if appropriate.
801 /// the entry itself if appropriate.
802 ///
802 ///
803 /// # Panics
803 /// # Panics
804 ///
804 ///
805 /// Panics if the node does not exist.
805 /// Panics if the node does not exist.
806 fn set_untracked(
806 fn set_untracked(
807 &mut self,
807 &mut self,
808 filename: &HgPath,
808 filename: &HgPath,
809 old_entry: DirstateEntry,
809 old_entry: DirstateEntry,
810 ) -> Result<(), DirstateV2ParseError> {
810 ) -> Result<(), DirstateV2ParseError> {
811 let node = self
811 let node = self
812 .get_node_mut(filename, |ancestor| {
812 .get_node_mut(filename, |ancestor| {
813 ancestor.tracked_descendants_count = ancestor
813 ancestor.tracked_descendants_count = ancestor
814 .tracked_descendants_count
814 .tracked_descendants_count
815 .checked_sub(1)
815 .checked_sub(1)
816 .expect("tracked_descendants_count should be >= 0");
816 .expect("tracked_descendants_count should be >= 0");
817 })?
817 })?
818 .expect("node should exist");
818 .expect("node should exist");
819 let mut new_entry = old_entry;
819 let mut new_entry = old_entry;
820 new_entry.set_untracked();
820 new_entry.set_untracked();
821 node.data = NodeData::Entry(new_entry);
821 node.data = NodeData::Entry(new_entry);
822 Ok(())
822 Ok(())
823 }
823 }
824
824
825 /// Set a node as clean in the dirstate.
825 /// Set a node as clean in the dirstate.
826 ///
826 ///
827 /// It is the responsibility of the caller to remove the copy source.
827 /// It is the responsibility of the caller to remove the copy source.
828 ///
828 ///
829 /// # Panics
829 /// # Panics
830 ///
830 ///
831 /// Panics if the node does not exist.
831 /// Panics if the node does not exist.
832 fn set_clean(
832 fn set_clean(
833 &mut self,
833 &mut self,
834 filename: &HgPath,
834 filename: &HgPath,
835 old_entry: DirstateEntry,
835 old_entry: DirstateEntry,
836 mode: u32,
836 mode: u32,
837 size: u32,
837 size: u32,
838 mtime: TruncatedTimestamp,
838 mtime: TruncatedTimestamp,
839 ) -> Result<(), DirstateError> {
839 ) -> Result<(), DirstateError> {
840 let node = self
840 let node = self
841 .get_node_mut(filename, |ancestor| {
841 .get_node_mut(filename, |ancestor| {
842 if !old_entry.tracked() {
842 if !old_entry.tracked() {
843 ancestor.tracked_descendants_count += 1;
843 ancestor.tracked_descendants_count += 1;
844 }
844 }
845 })?
845 })?
846 .expect("node should exist");
846 .expect("node should exist");
847 let mut new_entry = old_entry;
847 let mut new_entry = old_entry;
848 new_entry.set_clean(mode, size, mtime);
848 new_entry.set_clean(mode, size, mtime);
849 node.data = NodeData::Entry(new_entry);
849 node.data = NodeData::Entry(new_entry);
850 Ok(())
850 Ok(())
851 }
851 }
852
852
853 /// Set a node as possibly dirty in the dirstate.
853 /// Set a node as possibly dirty in the dirstate.
854 ///
854 ///
855 /// # Panics
855 /// # Panics
856 ///
856 ///
857 /// Panics if the node does not exist.
857 /// Panics if the node does not exist.
858 fn set_possibly_dirty(
858 fn set_possibly_dirty(
859 &mut self,
859 &mut self,
860 filename: &HgPath,
860 filename: &HgPath,
861 ) -> Result<(), DirstateError> {
861 ) -> Result<(), DirstateError> {
862 let node = self
862 let node = self
863 .get_node_mut(filename, |_ancestor| {})?
863 .get_node_mut(filename, |_ancestor| {})?
864 .expect("node should exist");
864 .expect("node should exist");
865 let entry = node.data.as_entry_mut().expect("entry should exist");
865 let entry = node.data.as_entry_mut().expect("entry should exist");
866 entry.set_possibly_dirty();
866 entry.set_possibly_dirty();
867 node.data = NodeData::Entry(*entry);
867 node.data = NodeData::Entry(*entry);
868 Ok(())
868 Ok(())
869 }
869 }
870
870
871 /// Clears the cached mtime for the (potential) folder at `path`.
871 /// Clears the cached mtime for the (potential) folder at `path`.
872 pub(super) fn clear_cached_mtime(
872 pub(super) fn clear_cached_mtime(
873 &mut self,
873 &mut self,
874 path: &HgPath,
874 path: &HgPath,
875 ) -> Result<(), DirstateV2ParseError> {
875 ) -> Result<(), DirstateV2ParseError> {
876 let node = match self.get_node_mut(path, |_ancestor| {})? {
876 let node = match self.get_node_mut(path, |_ancestor| {})? {
877 Some(node) => node,
877 Some(node) => node,
878 None => return Ok(()),
878 None => return Ok(()),
879 };
879 };
880 if let NodeData::CachedDirectory { .. } = &node.data {
880 if let NodeData::CachedDirectory { .. } = &node.data {
881 node.data = NodeData::None
881 node.data = NodeData::None
882 }
882 }
883 Ok(())
883 Ok(())
884 }
884 }
885
885
886 /// Sets the cached mtime for the (potential) folder at `path`.
886 /// Sets the cached mtime for the (potential) folder at `path`.
887 pub(super) fn set_cached_mtime(
887 pub(super) fn set_cached_mtime(
888 &mut self,
888 &mut self,
889 path: &HgPath,
889 path: &HgPath,
890 mtime: TruncatedTimestamp,
890 mtime: TruncatedTimestamp,
891 ) -> Result<(), DirstateV2ParseError> {
891 ) -> Result<(), DirstateV2ParseError> {
892 let node = match self.get_node_mut(path, |_ancestor| {})? {
892 let node = match self.get_node_mut(path, |_ancestor| {})? {
893 Some(node) => node,
893 Some(node) => node,
894 None => return Ok(()),
894 None => return Ok(()),
895 };
895 };
896 match &node.data {
896 match &node.data {
897 NodeData::Entry(_) => {} // Don’t overwrite an entry
897 NodeData::Entry(_) => {} // Don’t overwrite an entry
898 NodeData::CachedDirectory { .. } | NodeData::None => {
898 NodeData::CachedDirectory { .. } | NodeData::None => {
899 node.data = NodeData::CachedDirectory { mtime }
899 node.data = NodeData::CachedDirectory { mtime }
900 }
900 }
901 }
901 }
902 Ok(())
902 Ok(())
903 }
903 }
904
904
905 fn iter_nodes<'tree>(
905 fn iter_nodes<'tree>(
906 &'tree self,
906 &'tree self,
907 ) -> impl Iterator<
907 ) -> impl Iterator<
908 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
908 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
909 > + 'tree {
909 > + 'tree {
910 // Depth first tree traversal.
910 // Depth first tree traversal.
911 //
911 //
912 // If we could afford internal iteration and recursion,
912 // If we could afford internal iteration and recursion,
913 // this would look like:
913 // this would look like:
914 //
914 //
915 // ```
915 // ```
916 // fn traverse_children(
916 // fn traverse_children(
917 // children: &ChildNodes,
917 // children: &ChildNodes,
918 // each: &mut impl FnMut(&Node),
918 // each: &mut impl FnMut(&Node),
919 // ) {
919 // ) {
920 // for child in children.values() {
920 // for child in children.values() {
921 // traverse_children(&child.children, each);
921 // traverse_children(&child.children, each);
922 // each(child);
922 // each(child);
923 // }
923 // }
924 // }
924 // }
925 // ```
925 // ```
926 //
926 //
927 // However we want an external iterator and therefore can’t use the
927 // However we want an external iterator and therefore can’t use the
928 // call stack. Use an explicit stack instead:
928 // call stack. Use an explicit stack instead:
929 let mut stack = Vec::new();
929 let mut stack = Vec::new();
930 let mut iter = self.root.as_ref().iter();
930 let mut iter = self.root.as_ref().iter();
931 std::iter::from_fn(move || {
931 std::iter::from_fn(move || {
932 while let Some(child_node) = iter.next() {
932 while let Some(child_node) = iter.next() {
933 let children = match child_node.children(self.on_disk) {
933 let children = match child_node.children(self.on_disk) {
934 Ok(children) => children,
934 Ok(children) => children,
935 Err(error) => return Some(Err(error)),
935 Err(error) => return Some(Err(error)),
936 };
936 };
937 // Pseudo-recursion
937 // Pseudo-recursion
938 let new_iter = children.iter();
938 let new_iter = children.iter();
939 let old_iter = std::mem::replace(&mut iter, new_iter);
939 let old_iter = std::mem::replace(&mut iter, new_iter);
940 stack.push((child_node, old_iter));
940 stack.push((child_node, old_iter));
941 }
941 }
942 // Found the end of a `children.iter()` iterator.
942 // Found the end of a `children.iter()` iterator.
943 if let Some((child_node, next_iter)) = stack.pop() {
943 if let Some((child_node, next_iter)) = stack.pop() {
944 // "Return" from pseudo-recursion by restoring state from the
944 // "Return" from pseudo-recursion by restoring state from the
945 // explicit stack
945 // explicit stack
946 iter = next_iter;
946 iter = next_iter;
947
947
948 Some(Ok(child_node))
948 Some(Ok(child_node))
949 } else {
949 } else {
950 // Reached the bottom of the stack, we’re done
950 // Reached the bottom of the stack, we’re done
951 None
951 None
952 }
952 }
953 })
953 })
954 }
954 }
955
955
956 fn count_dropped_path(unreachable_bytes: &mut u32, path: Cow<HgPath>) {
956 fn count_dropped_path(unreachable_bytes: &mut u32, path: Cow<HgPath>) {
957 if let Cow::Borrowed(path) = path {
957 if let Cow::Borrowed(path) = path {
958 *unreachable_bytes += path.len() as u32
958 *unreachable_bytes += path.len() as u32
959 }
959 }
960 }
960 }
961
961
962 pub(crate) fn set_write_mode(&mut self, write_mode: DirstateMapWriteMode) {
962 pub(crate) fn set_write_mode(&mut self, write_mode: DirstateMapWriteMode) {
963 self.write_mode = write_mode;
963 self.write_mode = write_mode;
964 }
964 }
965 }
965 }
966
966
967 type DebugDirstateTuple<'a> = (&'a HgPath, (u8, i32, i32, i32));
967 type DebugDirstateTuple<'a> = (&'a HgPath, (u8, i32, i32, i32));
968
968
969 impl OwningDirstateMap {
969 impl OwningDirstateMap {
970 pub fn clear(&mut self) {
970 pub fn clear(&mut self) {
971 self.with_dmap_mut(|map| {
971 self.with_dmap_mut(|map| {
972 map.root = Default::default();
972 map.root = Default::default();
973 map.nodes_with_entry_count = 0;
973 map.nodes_with_entry_count = 0;
974 map.nodes_with_copy_source_count = 0;
974 map.nodes_with_copy_source_count = 0;
975 map.unreachable_bytes = map.on_disk.len() as u32;
975 map.unreachable_bytes = map.on_disk.len() as u32;
976 });
976 });
977 }
977 }
978
978
979 pub fn set_tracked(
979 pub fn set_tracked(
980 &mut self,
980 &mut self,
981 filename: &HgPath,
981 filename: &HgPath,
982 ) -> Result<bool, DirstateV2ParseError> {
982 ) -> Result<bool, DirstateV2ParseError> {
983 let old_entry_opt = self.get(filename)?;
983 let old_entry_opt = self.get(filename)?;
984 self.with_dmap_mut(|map| map.set_tracked(filename, old_entry_opt))
984 self.with_dmap_mut(|map| map.set_tracked(filename, old_entry_opt))
985 }
985 }
986
986
987 pub fn set_untracked(
987 pub fn set_untracked(
988 &mut self,
988 &mut self,
989 filename: &HgPath,
989 filename: &HgPath,
990 ) -> Result<bool, DirstateError> {
990 ) -> Result<bool, DirstateError> {
991 let old_entry_opt = self.get(filename)?;
991 let old_entry_opt = self.get(filename)?;
992 match old_entry_opt {
992 match old_entry_opt {
993 None => Ok(false),
993 None => Ok(false),
994 Some(old_entry) => {
994 Some(old_entry) => {
995 if !old_entry.tracked() {
995 if !old_entry.tracked() {
996 // `DirstateMap::set_untracked` is not a noop if
996 // `DirstateMap::set_untracked` is not a noop if
997 // already not tracked as it will decrement the
997 // already not tracked as it will decrement the
998 // tracked counters while going down.
998 // tracked counters while going down.
999 return Ok(true);
999 return Ok(true);
1000 }
1000 }
1001 if old_entry.added() {
1001 if old_entry.added() {
1002 // Untracking an "added" entry will just result in a
1002 // Untracking an "added" entry will just result in a
1003 // worthless entry (and other parts of the code will
1003 // worthless entry (and other parts of the code will
1004 // complain about it), just drop it entirely.
1004 // complain about it), just drop it entirely.
1005 self.drop_entry_and_copy_source(filename)?;
1005 self.drop_entry_and_copy_source(filename)?;
1006 return Ok(true);
1006 return Ok(true);
1007 }
1007 }
1008 if !old_entry.p2_info() {
1008 if !old_entry.p2_info() {
1009 self.copy_map_remove(filename)?;
1009 self.copy_map_remove(filename)?;
1010 }
1010 }
1011
1011
1012 self.with_dmap_mut(|map| {
1012 self.with_dmap_mut(|map| {
1013 map.set_untracked(filename, old_entry)?;
1013 map.set_untracked(filename, old_entry)?;
1014 Ok(true)
1014 Ok(true)
1015 })
1015 })
1016 }
1016 }
1017 }
1017 }
1018 }
1018 }
1019
1019
1020 pub fn set_clean(
1020 pub fn set_clean(
1021 &mut self,
1021 &mut self,
1022 filename: &HgPath,
1022 filename: &HgPath,
1023 mode: u32,
1023 mode: u32,
1024 size: u32,
1024 size: u32,
1025 mtime: TruncatedTimestamp,
1025 mtime: TruncatedTimestamp,
1026 ) -> Result<(), DirstateError> {
1026 ) -> Result<(), DirstateError> {
1027 let old_entry = match self.get(filename)? {
1027 let old_entry = match self.get(filename)? {
1028 None => {
1028 None => {
1029 return Err(
1029 return Err(
1030 DirstateMapError::PathNotFound(filename.into()).into()
1030 DirstateMapError::PathNotFound(filename.into()).into()
1031 )
1031 )
1032 }
1032 }
1033 Some(e) => e,
1033 Some(e) => e,
1034 };
1034 };
1035 self.copy_map_remove(filename)?;
1035 self.copy_map_remove(filename)?;
1036 self.with_dmap_mut(|map| {
1036 self.with_dmap_mut(|map| {
1037 map.set_clean(filename, old_entry, mode, size, mtime)
1037 map.set_clean(filename, old_entry, mode, size, mtime)
1038 })
1038 })
1039 }
1039 }
1040
1040
1041 pub fn set_possibly_dirty(
1041 pub fn set_possibly_dirty(
1042 &mut self,
1042 &mut self,
1043 filename: &HgPath,
1043 filename: &HgPath,
1044 ) -> Result<(), DirstateError> {
1044 ) -> Result<(), DirstateError> {
1045 if self.get(filename)?.is_none() {
1045 if self.get(filename)?.is_none() {
1046 return Err(DirstateMapError::PathNotFound(filename.into()).into());
1046 return Err(DirstateMapError::PathNotFound(filename.into()).into());
1047 }
1047 }
1048 self.with_dmap_mut(|map| map.set_possibly_dirty(filename))
1048 self.with_dmap_mut(|map| map.set_possibly_dirty(filename))
1049 }
1049 }
1050
1050
1051 pub fn reset_state(
1051 pub fn reset_state(
1052 &mut self,
1052 &mut self,
1053 filename: &HgPath,
1053 filename: &HgPath,
1054 wc_tracked: bool,
1054 wc_tracked: bool,
1055 p1_tracked: bool,
1055 p1_tracked: bool,
1056 p2_info: bool,
1056 p2_info: bool,
1057 has_meaningful_mtime: bool,
1057 has_meaningful_mtime: bool,
1058 parent_file_data_opt: Option<ParentFileData>,
1058 parent_file_data_opt: Option<ParentFileData>,
1059 ) -> Result<(), DirstateError> {
1059 ) -> Result<(), DirstateError> {
1060 if !(p1_tracked || p2_info || wc_tracked) {
1060 if !(p1_tracked || p2_info || wc_tracked) {
1061 self.drop_entry_and_copy_source(filename)?;
1061 self.drop_entry_and_copy_source(filename)?;
1062 return Ok(());
1062 return Ok(());
1063 }
1063 }
1064 self.copy_map_remove(filename)?;
1064 self.copy_map_remove(filename)?;
1065 let old_entry_opt = self.get(filename)?;
1065 let old_entry_opt = self.get(filename)?;
1066 self.with_dmap_mut(|map| {
1066 self.with_dmap_mut(|map| {
1067 map.reset_state(
1067 map.reset_state(
1068 filename,
1068 filename,
1069 old_entry_opt,
1069 old_entry_opt,
1070 wc_tracked,
1070 wc_tracked,
1071 p1_tracked,
1071 p1_tracked,
1072 p2_info,
1072 p2_info,
1073 has_meaningful_mtime,
1073 has_meaningful_mtime,
1074 parent_file_data_opt,
1074 parent_file_data_opt,
1075 )
1075 )
1076 })
1076 })
1077 }
1077 }
1078
1078
1079 pub fn drop_entry_and_copy_source(
1079 pub fn drop_entry_and_copy_source(
1080 &mut self,
1080 &mut self,
1081 filename: &HgPath,
1081 filename: &HgPath,
1082 ) -> Result<(), DirstateError> {
1082 ) -> Result<(), DirstateError> {
1083 let was_tracked = self.get(filename)?.map_or(false, |e| e.tracked());
1083 let was_tracked = self.get(filename)?.map_or(false, |e| e.tracked());
1084 struct Dropped {
1084 struct Dropped {
1085 was_tracked: bool,
1085 was_tracked: bool,
1086 had_entry: bool,
1086 had_entry: bool,
1087 had_copy_source: bool,
1087 had_copy_source: bool,
1088 }
1088 }
1089
1089
1090 /// If this returns `Ok(Some((dropped, removed)))`, then
1090 /// If this returns `Ok(Some((dropped, removed)))`, then
1091 ///
1091 ///
1092 /// * `dropped` is about the leaf node that was at `filename`
1092 /// * `dropped` is about the leaf node that was at `filename`
1093 /// * `removed` is whether this particular level of recursion just
1093 /// * `removed` is whether this particular level of recursion just
1094 /// removed a node in `nodes`.
1094 /// removed a node in `nodes`.
1095 fn recur<'on_disk>(
1095 fn recur<'on_disk>(
1096 on_disk: &'on_disk [u8],
1096 on_disk: &'on_disk [u8],
1097 unreachable_bytes: &mut u32,
1097 unreachable_bytes: &mut u32,
1098 nodes: &mut ChildNodes<'on_disk>,
1098 nodes: &mut ChildNodes<'on_disk>,
1099 path: &HgPath,
1099 path: &HgPath,
1100 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
1100 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
1101 let (first_path_component, rest_of_path) =
1101 let (first_path_component, rest_of_path) =
1102 path.split_first_component();
1102 path.split_first_component();
1103 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
1103 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
1104 let node = if let Some(node) = nodes.get_mut(first_path_component)
1104 let node = if let Some(node) = nodes.get_mut(first_path_component)
1105 {
1105 {
1106 node
1106 node
1107 } else {
1107 } else {
1108 return Ok(None);
1108 return Ok(None);
1109 };
1109 };
1110 let dropped;
1110 let dropped;
1111 if let Some(rest) = rest_of_path {
1111 if let Some(rest) = rest_of_path {
1112 if let Some((d, removed)) = recur(
1112 if let Some((d, removed)) = recur(
1113 on_disk,
1113 on_disk,
1114 unreachable_bytes,
1114 unreachable_bytes,
1115 &mut node.children,
1115 &mut node.children,
1116 rest,
1116 rest,
1117 )? {
1117 )? {
1118 dropped = d;
1118 dropped = d;
1119 if dropped.had_entry {
1119 if dropped.had_entry {
1120 node.descendants_with_entry_count = node
1120 node.descendants_with_entry_count = node
1121 .descendants_with_entry_count
1121 .descendants_with_entry_count
1122 .checked_sub(1)
1122 .checked_sub(1)
1123 .expect(
1123 .expect(
1124 "descendants_with_entry_count should be >= 0",
1124 "descendants_with_entry_count should be >= 0",
1125 );
1125 );
1126 }
1126 }
1127 if dropped.was_tracked {
1127 if dropped.was_tracked {
1128 node.tracked_descendants_count = node
1128 node.tracked_descendants_count = node
1129 .tracked_descendants_count
1129 .tracked_descendants_count
1130 .checked_sub(1)
1130 .checked_sub(1)
1131 .expect(
1131 .expect(
1132 "tracked_descendants_count should be >= 0",
1132 "tracked_descendants_count should be >= 0",
1133 );
1133 );
1134 }
1134 }
1135
1135
1136 // Directory caches must be invalidated when removing a
1136 // Directory caches must be invalidated when removing a
1137 // child node
1137 // child node
1138 if removed {
1138 if removed {
1139 if let NodeData::CachedDirectory { .. } = &node.data {
1139 if let NodeData::CachedDirectory { .. } = &node.data {
1140 node.data = NodeData::None
1140 node.data = NodeData::None
1141 }
1141 }
1142 }
1142 }
1143 } else {
1143 } else {
1144 return Ok(None);
1144 return Ok(None);
1145 }
1145 }
1146 } else {
1146 } else {
1147 let entry = node.data.as_entry();
1147 let entry = node.data.as_entry();
1148 let was_tracked = entry.map_or(false, |entry| entry.tracked());
1148 let was_tracked = entry.map_or(false, |entry| entry.tracked());
1149 let had_entry = entry.is_some();
1149 let had_entry = entry.is_some();
1150 if had_entry {
1150 if had_entry {
1151 node.data = NodeData::None
1151 node.data = NodeData::None
1152 }
1152 }
1153 let mut had_copy_source = false;
1153 let mut had_copy_source = false;
1154 if let Some(source) = &node.copy_source {
1154 if let Some(source) = &node.copy_source {
1155 DirstateMap::count_dropped_path(
1155 DirstateMap::count_dropped_path(
1156 unreachable_bytes,
1156 unreachable_bytes,
1157 Cow::Borrowed(source),
1157 Cow::Borrowed(source),
1158 );
1158 );
1159 had_copy_source = true;
1159 had_copy_source = true;
1160 node.copy_source = None
1160 node.copy_source = None
1161 }
1161 }
1162 dropped = Dropped {
1162 dropped = Dropped {
1163 was_tracked,
1163 was_tracked,
1164 had_entry,
1164 had_entry,
1165 had_copy_source,
1165 had_copy_source,
1166 };
1166 };
1167 }
1167 }
1168 // After recursion, for both leaf (rest_of_path is None) nodes and
1168 // After recursion, for both leaf (rest_of_path is None) nodes and
1169 // parent nodes, remove a node if it just became empty.
1169 // parent nodes, remove a node if it just became empty.
1170 let remove = !node.data.has_entry()
1170 let remove = !node.data.has_entry()
1171 && node.copy_source.is_none()
1171 && node.copy_source.is_none()
1172 && node.children.is_empty();
1172 && node.children.is_empty();
1173 if remove {
1173 if remove {
1174 let (key, _) =
1174 let (key, _) =
1175 nodes.remove_entry(first_path_component).unwrap();
1175 nodes.remove_entry(first_path_component).unwrap();
1176 DirstateMap::count_dropped_path(
1176 DirstateMap::count_dropped_path(
1177 unreachable_bytes,
1177 unreachable_bytes,
1178 Cow::Borrowed(key.full_path()),
1178 Cow::Borrowed(key.full_path()),
1179 )
1179 )
1180 }
1180 }
1181 Ok(Some((dropped, remove)))
1181 Ok(Some((dropped, remove)))
1182 }
1182 }
1183
1183
1184 self.with_dmap_mut(|map| {
1184 self.with_dmap_mut(|map| {
1185 if let Some((dropped, _removed)) = recur(
1185 if let Some((dropped, _removed)) = recur(
1186 map.on_disk,
1186 map.on_disk,
1187 &mut map.unreachable_bytes,
1187 &mut map.unreachable_bytes,
1188 &mut map.root,
1188 &mut map.root,
1189 filename,
1189 filename,
1190 )? {
1190 )? {
1191 if dropped.had_entry {
1191 if dropped.had_entry {
1192 map.nodes_with_entry_count = map
1192 map.nodes_with_entry_count = map
1193 .nodes_with_entry_count
1193 .nodes_with_entry_count
1194 .checked_sub(1)
1194 .checked_sub(1)
1195 .expect("nodes_with_entry_count should be >= 0");
1195 .expect("nodes_with_entry_count should be >= 0");
1196 }
1196 }
1197 if dropped.had_copy_source {
1197 if dropped.had_copy_source {
1198 map.nodes_with_copy_source_count = map
1198 map.nodes_with_copy_source_count = map
1199 .nodes_with_copy_source_count
1199 .nodes_with_copy_source_count
1200 .checked_sub(1)
1200 .checked_sub(1)
1201 .expect("nodes_with_copy_source_count should be >= 0");
1201 .expect("nodes_with_copy_source_count should be >= 0");
1202 }
1202 }
1203 } else {
1203 } else {
1204 debug_assert!(!was_tracked);
1204 debug_assert!(!was_tracked);
1205 }
1205 }
1206 Ok(())
1206 Ok(())
1207 })
1207 })
1208 }
1208 }
1209
1209
1210 pub fn has_tracked_dir(
1210 pub fn has_tracked_dir(
1211 &mut self,
1211 &mut self,
1212 directory: &HgPath,
1212 directory: &HgPath,
1213 ) -> Result<bool, DirstateError> {
1213 ) -> Result<bool, DirstateError> {
1214 self.with_dmap_mut(|map| {
1214 self.with_dmap_mut(|map| {
1215 if let Some(node) = map.get_node(directory)? {
1215 if let Some(node) = map.get_node(directory)? {
1216 // A node without a `DirstateEntry` was created to hold child
1216 // A node without a `DirstateEntry` was created to hold child
1217 // nodes, and is therefore a directory.
1217 // nodes, and is therefore a directory.
1218 let is_dir = node.entry()?.is_none();
1218 let is_dir = node.entry()?.is_none();
1219 Ok(is_dir && node.tracked_descendants_count() > 0)
1219 Ok(is_dir && node.tracked_descendants_count() > 0)
1220 } else {
1220 } else {
1221 Ok(false)
1221 Ok(false)
1222 }
1222 }
1223 })
1223 })
1224 }
1224 }
1225
1225
1226 pub fn has_dir(
1226 pub fn has_dir(
1227 &mut self,
1227 &mut self,
1228 directory: &HgPath,
1228 directory: &HgPath,
1229 ) -> Result<bool, DirstateError> {
1229 ) -> Result<bool, DirstateError> {
1230 self.with_dmap_mut(|map| {
1230 self.with_dmap_mut(|map| {
1231 if let Some(node) = map.get_node(directory)? {
1231 if let Some(node) = map.get_node(directory)? {
1232 // A node without a `DirstateEntry` was created to hold child
1232 // A node without a `DirstateEntry` was created to hold child
1233 // nodes, and is therefore a directory.
1233 // nodes, and is therefore a directory.
1234 let is_dir = node.entry()?.is_none();
1234 let is_dir = node.entry()?.is_none();
1235 Ok(is_dir && node.descendants_with_entry_count() > 0)
1235 Ok(is_dir && node.descendants_with_entry_count() > 0)
1236 } else {
1236 } else {
1237 Ok(false)
1237 Ok(false)
1238 }
1238 }
1239 })
1239 })
1240 }
1240 }
1241
1241
1242 #[logging_timer::time("trace")]
1242 #[logging_timer::time("trace")]
1243 pub fn pack_v1(
1243 pub fn pack_v1(
1244 &self,
1244 &self,
1245 parents: DirstateParents,
1245 parents: DirstateParents,
1246 ) -> Result<Vec<u8>, DirstateError> {
1246 ) -> Result<Vec<u8>, DirstateError> {
1247 let map = self.get_map();
1247 let map = self.get_map();
1248 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
1248 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
1249 // reallocations
1249 // reallocations
1250 let mut size = parents.as_bytes().len();
1250 let mut size = parents.as_bytes().len();
1251 for node in map.iter_nodes() {
1251 for node in map.iter_nodes() {
1252 let node = node?;
1252 let node = node?;
1253 if node.entry()?.is_some() {
1253 if node.entry()?.is_some() {
1254 size += packed_entry_size(
1254 size += packed_entry_size(
1255 node.full_path(map.on_disk)?,
1255 node.full_path(map.on_disk)?,
1256 node.copy_source(map.on_disk)?,
1256 node.copy_source(map.on_disk)?,
1257 );
1257 );
1258 }
1258 }
1259 }
1259 }
1260
1260
1261 let mut packed = Vec::with_capacity(size);
1261 let mut packed = Vec::with_capacity(size);
1262 packed.extend(parents.as_bytes());
1262 packed.extend(parents.as_bytes());
1263
1263
1264 for node in map.iter_nodes() {
1264 for node in map.iter_nodes() {
1265 let node = node?;
1265 let node = node?;
1266 if let Some(entry) = node.entry()? {
1266 if let Some(entry) = node.entry()? {
1267 pack_entry(
1267 pack_entry(
1268 node.full_path(map.on_disk)?,
1268 node.full_path(map.on_disk)?,
1269 &entry,
1269 &entry,
1270 node.copy_source(map.on_disk)?,
1270 node.copy_source(map.on_disk)?,
1271 &mut packed,
1271 &mut packed,
1272 );
1272 );
1273 }
1273 }
1274 }
1274 }
1275 Ok(packed)
1275 Ok(packed)
1276 }
1276 }
1277
1277
1278 /// Returns new data and metadata together with whether that data should be
1278 /// Returns new data and metadata together with whether that data should be
1279 /// appended to the existing data file whose content is at
1279 /// appended to the existing data file whose content is at
1280 /// `map.on_disk` (true), instead of written to a new data file
1280 /// `map.on_disk` (true), instead of written to a new data file
1281 /// (false), and the previous size of data on disk.
1281 /// (false), and the previous size of data on disk.
1282 #[logging_timer::time("trace")]
1282 #[logging_timer::time("trace")]
1283 pub fn pack_v2(
1283 pub fn pack_v2(
1284 &self,
1284 &self,
1285 write_mode: DirstateMapWriteMode,
1285 write_mode: DirstateMapWriteMode,
1286 ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool, usize), DirstateError>
1286 ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool, usize), DirstateError>
1287 {
1287 {
1288 let map = self.get_map();
1288 let map = self.get_map();
1289 on_disk::write(map, write_mode)
1289 on_disk::write(map, write_mode)
1290 }
1290 }
1291
1291
1292 /// `callback` allows the caller to process and do something with the
1292 /// `callback` allows the caller to process and do something with the
1293 /// results of the status. This is needed to do so efficiently (i.e.
1293 /// results of the status. This is needed to do so efficiently (i.e.
1294 /// without cloning the `DirstateStatus` object with its paths) because
1294 /// without cloning the `DirstateStatus` object with its paths) because
1295 /// we need to borrow from `Self`.
1295 /// we need to borrow from `Self`.
1296 pub fn with_status<R>(
1296 pub fn with_status<R>(
1297 &mut self,
1297 &mut self,
1298 matcher: &(dyn Matcher + Sync),
1298 matcher: &(dyn Matcher + Sync),
1299 root_dir: PathBuf,
1299 root_dir: PathBuf,
1300 ignore_files: Vec<PathBuf>,
1300 ignore_files: Vec<PathBuf>,
1301 options: StatusOptions,
1301 options: StatusOptions,
1302 callback: impl for<'r> FnOnce(
1302 callback: impl for<'r> FnOnce(
1303 Result<(DirstateStatus<'r>, Vec<PatternFileWarning>), StatusError>,
1303 Result<(DirstateStatus<'r>, Vec<PatternFileWarning>), StatusError>,
1304 ) -> R,
1304 ) -> R,
1305 ) -> R {
1305 ) -> R {
1306 self.with_dmap_mut(|map| {
1306 self.with_dmap_mut(|map| {
1307 callback(super::status::status(
1307 callback(super::status::status(
1308 map,
1308 map,
1309 matcher,
1309 matcher,
1310 root_dir,
1310 root_dir,
1311 ignore_files,
1311 ignore_files,
1312 options,
1312 options,
1313 ))
1313 ))
1314 })
1314 })
1315 }
1315 }
1316
1316
1317 pub fn copy_map_len(&self) -> usize {
1317 pub fn copy_map_len(&self) -> usize {
1318 let map = self.get_map();
1318 let map = self.get_map();
1319 map.nodes_with_copy_source_count as usize
1319 map.nodes_with_copy_source_count as usize
1320 }
1320 }
1321
1321
1322 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
1322 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
1323 let map = self.get_map();
1323 let map = self.get_map();
1324 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1324 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1325 Ok(if let Some(source) = node.copy_source(map.on_disk)? {
1325 Ok(if let Some(source) = node.copy_source(map.on_disk)? {
1326 Some((node.full_path(map.on_disk)?, source))
1326 Some((node.full_path(map.on_disk)?, source))
1327 } else {
1327 } else {
1328 None
1328 None
1329 })
1329 })
1330 }))
1330 }))
1331 }
1331 }
1332
1332
1333 pub fn copy_map_contains_key(
1333 pub fn copy_map_contains_key(
1334 &self,
1334 &self,
1335 key: &HgPath,
1335 key: &HgPath,
1336 ) -> Result<bool, DirstateV2ParseError> {
1336 ) -> Result<bool, DirstateV2ParseError> {
1337 let map = self.get_map();
1337 let map = self.get_map();
1338 Ok(if let Some(node) = map.get_node(key)? {
1338 Ok(if let Some(node) = map.get_node(key)? {
1339 node.has_copy_source()
1339 node.has_copy_source()
1340 } else {
1340 } else {
1341 false
1341 false
1342 })
1342 })
1343 }
1343 }
1344
1344
1345 pub fn copy_map_get(
1345 pub fn copy_map_get(
1346 &self,
1346 &self,
1347 key: &HgPath,
1347 key: &HgPath,
1348 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1348 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1349 let map = self.get_map();
1349 let map = self.get_map();
1350 if let Some(node) = map.get_node(key)? {
1350 if let Some(node) = map.get_node(key)? {
1351 if let Some(source) = node.copy_source(map.on_disk)? {
1351 if let Some(source) = node.copy_source(map.on_disk)? {
1352 return Ok(Some(source));
1352 return Ok(Some(source));
1353 }
1353 }
1354 }
1354 }
1355 Ok(None)
1355 Ok(None)
1356 }
1356 }
1357
1357
1358 pub fn copy_map_remove(
1358 pub fn copy_map_remove(
1359 &mut self,
1359 &mut self,
1360 key: &HgPath,
1360 key: &HgPath,
1361 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1361 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1362 self.with_dmap_mut(|map| {
1362 self.with_dmap_mut(|map| {
1363 let count = &mut map.nodes_with_copy_source_count;
1363 let count = &mut map.nodes_with_copy_source_count;
1364 let unreachable_bytes = &mut map.unreachable_bytes;
1364 let unreachable_bytes = &mut map.unreachable_bytes;
1365 Ok(DirstateMap::get_node_mut_inner(
1365 Ok(DirstateMap::get_node_mut_inner(
1366 map.on_disk,
1366 map.on_disk,
1367 unreachable_bytes,
1367 unreachable_bytes,
1368 &mut map.root,
1368 &mut map.root,
1369 key,
1369 key,
1370 |_ancestor| {},
1370 |_ancestor| {},
1371 )?
1371 )?
1372 .and_then(|node| {
1372 .and_then(|node| {
1373 if let Some(source) = &node.copy_source {
1373 if let Some(source) = &node.copy_source {
1374 *count = count
1374 *count = count
1375 .checked_sub(1)
1375 .checked_sub(1)
1376 .expect("nodes_with_copy_source_count should be >= 0");
1376 .expect("nodes_with_copy_source_count should be >= 0");
1377 DirstateMap::count_dropped_path(
1377 DirstateMap::count_dropped_path(
1378 unreachable_bytes,
1378 unreachable_bytes,
1379 Cow::Borrowed(source),
1379 Cow::Borrowed(source),
1380 );
1380 );
1381 }
1381 }
1382 node.copy_source.take().map(Cow::into_owned)
1382 node.copy_source.take().map(Cow::into_owned)
1383 }))
1383 }))
1384 })
1384 })
1385 }
1385 }
1386
1386
1387 pub fn copy_map_insert(
1387 pub fn copy_map_insert(
1388 &mut self,
1388 &mut self,
1389 key: &HgPath,
1389 key: &HgPath,
1390 value: &HgPath,
1390 value: &HgPath,
1391 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1391 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1392 self.with_dmap_mut(|map| {
1392 self.with_dmap_mut(|map| {
1393 let node = map.get_or_insert_node(key, |_ancestor| {})?;
1393 let node = map.get_or_insert_node(key, |_ancestor| {})?;
1394 let had_copy_source = node.copy_source.is_none();
1394 let had_copy_source = node.copy_source.is_none();
1395 let old = node
1395 let old = node
1396 .copy_source
1396 .copy_source
1397 .replace(value.to_owned().into())
1397 .replace(value.to_owned().into())
1398 .map(Cow::into_owned);
1398 .map(Cow::into_owned);
1399 if had_copy_source {
1399 if had_copy_source {
1400 map.nodes_with_copy_source_count += 1
1400 map.nodes_with_copy_source_count += 1
1401 }
1401 }
1402 Ok(old)
1402 Ok(old)
1403 })
1403 })
1404 }
1404 }
1405
1405
1406 pub fn len(&self) -> usize {
1406 pub fn len(&self) -> usize {
1407 let map = self.get_map();
1407 let map = self.get_map();
1408 map.nodes_with_entry_count as usize
1408 map.nodes_with_entry_count as usize
1409 }
1409 }
1410
1410
1411 pub fn is_empty(&self) -> bool {
1411 pub fn is_empty(&self) -> bool {
1412 self.len() == 0
1412 self.len() == 0
1413 }
1413 }
1414
1414
1415 pub fn contains_key(
1415 pub fn contains_key(
1416 &self,
1416 &self,
1417 key: &HgPath,
1417 key: &HgPath,
1418 ) -> Result<bool, DirstateV2ParseError> {
1418 ) -> Result<bool, DirstateV2ParseError> {
1419 Ok(self.get(key)?.is_some())
1419 Ok(self.get(key)?.is_some())
1420 }
1420 }
1421
1421
1422 pub fn get(
1422 pub fn get(
1423 &self,
1423 &self,
1424 key: &HgPath,
1424 key: &HgPath,
1425 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1425 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1426 let map = self.get_map();
1426 let map = self.get_map();
1427 Ok(if let Some(node) = map.get_node(key)? {
1427 Ok(if let Some(node) = map.get_node(key)? {
1428 node.entry()?
1428 node.entry()?
1429 } else {
1429 } else {
1430 None
1430 None
1431 })
1431 })
1432 }
1432 }
1433
1433
1434 pub fn iter(&self) -> StateMapIter<'_> {
1434 pub fn iter(&self) -> StateMapIter<'_> {
1435 let map = self.get_map();
1435 let map = self.get_map();
1436 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1436 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1437 Ok(if let Some(entry) = node.entry()? {
1437 Ok(if let Some(entry) = node.entry()? {
1438 Some((node.full_path(map.on_disk)?, entry))
1438 Some((node.full_path(map.on_disk)?, entry))
1439 } else {
1439 } else {
1440 None
1440 None
1441 })
1441 })
1442 }))
1442 }))
1443 }
1443 }
1444
1444
1445 pub fn iter_tracked_dirs(
1445 pub fn iter_tracked_dirs(
1446 &mut self,
1446 &mut self,
1447 ) -> Result<
1447 ) -> Result<
1448 Box<
1448 Box<
1449 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1449 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1450 + Send
1450 + Send
1451 + '_,
1451 + '_,
1452 >,
1452 >,
1453 DirstateError,
1453 DirstateError,
1454 > {
1454 > {
1455 let map = self.get_map();
1455 let map = self.get_map();
1456 let on_disk = map.on_disk;
1456 let on_disk = map.on_disk;
1457 Ok(Box::new(filter_map_results(
1457 Ok(Box::new(filter_map_results(
1458 map.iter_nodes(),
1458 map.iter_nodes(),
1459 move |node| {
1459 move |node| {
1460 Ok(if node.tracked_descendants_count() > 0 {
1460 Ok(if node.tracked_descendants_count() > 0 {
1461 Some(node.full_path(on_disk)?)
1461 Some(node.full_path(on_disk)?)
1462 } else {
1462 } else {
1463 None
1463 None
1464 })
1464 })
1465 },
1465 },
1466 )))
1466 )))
1467 }
1467 }
1468
1468
1469 /// Only public because it needs to be exposed to the Python layer.
1469 /// Only public because it needs to be exposed to the Python layer.
1470 /// It is not the full `setparents` logic, only the parts that mutate the
1470 /// It is not the full `setparents` logic, only the parts that mutate the
1471 /// entries.
1471 /// entries.
1472 pub fn setparents_fixup(
1472 pub fn setparents_fixup(
1473 &mut self,
1473 &mut self,
1474 ) -> Result<Vec<(HgPathBuf, HgPathBuf)>, DirstateV2ParseError> {
1474 ) -> Result<Vec<(HgPathBuf, HgPathBuf)>, DirstateV2ParseError> {
1475 // XXX
1475 // XXX
1476 // All the copying and re-querying is quite inefficient, but this is
1476 // All the copying and re-querying is quite inefficient, but this is
1477 // still a lot better than doing it from Python.
1477 // still a lot better than doing it from Python.
1478 //
1478 //
1479 // The better solution is to develop a mechanism for `iter_mut`,
1479 // The better solution is to develop a mechanism for `iter_mut`,
1480 // which will be a lot more involved: we're dealing with a lazy,
1480 // which will be a lot more involved: we're dealing with a lazy,
1481 // append-mostly, tree-like data structure. This will do for now.
1481 // append-mostly, tree-like data structure. This will do for now.
1482 let mut copies = vec![];
1482 let mut copies = vec![];
1483 let mut files_with_p2_info = vec![];
1483 let mut files_with_p2_info = vec![];
1484 for res in self.iter() {
1484 for res in self.iter() {
1485 let (path, entry) = res?;
1485 let (path, entry) = res?;
1486 if entry.p2_info() {
1486 if entry.p2_info() {
1487 files_with_p2_info.push(path.to_owned())
1487 files_with_p2_info.push(path.to_owned())
1488 }
1488 }
1489 }
1489 }
1490 self.with_dmap_mut(|map| {
1490 self.with_dmap_mut(|map| {
1491 for path in files_with_p2_info.iter() {
1491 for path in files_with_p2_info.iter() {
1492 let node = map.get_or_insert_node(path, |_| {})?;
1492 let node = map.get_or_insert_node(path, |_| {})?;
1493 let entry =
1493 let entry =
1494 node.data.as_entry_mut().expect("entry should exist");
1494 node.data.as_entry_mut().expect("entry should exist");
1495 entry.drop_merge_data();
1495 entry.drop_merge_data();
1496 if let Some(source) = node.copy_source.take().as_deref() {
1496 if let Some(source) = node.copy_source.take().as_deref() {
1497 copies.push((path.to_owned(), source.to_owned()));
1497 copies.push((path.to_owned(), source.to_owned()));
1498 }
1498 }
1499 }
1499 }
1500 Ok(copies)
1500 Ok(copies)
1501 })
1501 })
1502 }
1502 }
1503
1503
1504 pub fn debug_iter(
1504 pub fn debug_iter(
1505 &self,
1505 &self,
1506 all: bool,
1506 all: bool,
1507 ) -> Box<
1507 ) -> Box<
1508 dyn Iterator<Item = Result<DebugDirstateTuple, DirstateV2ParseError>>
1508 dyn Iterator<Item = Result<DebugDirstateTuple, DirstateV2ParseError>>
1509 + Send
1509 + Send
1510 + '_,
1510 + '_,
1511 > {
1511 > {
1512 let map = self.get_map();
1512 let map = self.get_map();
1513 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1513 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1514 let debug_tuple = if let Some(entry) = node.entry()? {
1514 let debug_tuple = if let Some(entry) = node.entry()? {
1515 entry.debug_tuple()
1515 entry.debug_tuple()
1516 } else if !all {
1516 } else if !all {
1517 return Ok(None);
1517 return Ok(None);
1518 } else if let Some(mtime) = node.cached_directory_mtime()? {
1518 } else if let Some(mtime) = node.cached_directory_mtime()? {
1519 (b' ', 0, -1, mtime.truncated_seconds() as i32)
1519 (b' ', 0, -1, mtime.truncated_seconds() as i32)
1520 } else {
1520 } else {
1521 (b' ', 0, -1, -1)
1521 (b' ', 0, -1, -1)
1522 };
1522 };
1523 Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
1523 Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
1524 }))
1524 }))
1525 }
1525 }
1526 }
1526 }
1527 #[cfg(test)]
1527 #[cfg(test)]
1528 mod tests {
1528 mod tests {
1529 use super::*;
1529 use super::*;
1530
1530
1531 /// Shortcut to return tracked descendants of a path.
1531 /// Shortcut to return tracked descendants of a path.
1532 /// Panics if the path does not exist.
1532 /// Panics if the path does not exist.
1533 fn tracked_descendants(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1533 fn tracked_descendants(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1534 let path = dbg!(HgPath::new(path));
1534 let path = dbg!(HgPath::new(path));
1535 let node = map.get_map().get_node(path);
1535 let node = map.get_map().get_node(path);
1536 node.unwrap().unwrap().tracked_descendants_count()
1536 node.unwrap().unwrap().tracked_descendants_count()
1537 }
1537 }
1538
1538
1539 /// Shortcut to return descendants with an entry.
1539 /// Shortcut to return descendants with an entry.
1540 /// Panics if the path does not exist.
1540 /// Panics if the path does not exist.
1541 fn descendants_with_an_entry(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1541 fn descendants_with_an_entry(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1542 let path = dbg!(HgPath::new(path));
1542 let path = dbg!(HgPath::new(path));
1543 let node = map.get_map().get_node(path);
1543 let node = map.get_map().get_node(path);
1544 node.unwrap().unwrap().descendants_with_entry_count()
1544 node.unwrap().unwrap().descendants_with_entry_count()
1545 }
1545 }
1546
1546
1547 fn assert_does_not_exist(map: &OwningDirstateMap, path: &[u8]) {
1547 fn assert_does_not_exist(map: &OwningDirstateMap, path: &[u8]) {
1548 let path = dbg!(HgPath::new(path));
1548 let path = dbg!(HgPath::new(path));
1549 let node = map.get_map().get_node(path);
1549 let node = map.get_map().get_node(path);
1550 assert!(node.unwrap().is_none());
1550 assert!(node.unwrap().is_none());
1551 }
1551 }
1552
1552
1553 /// Shortcut for path creation in tests
1553 /// Shortcut for path creation in tests
1554 fn p(b: &[u8]) -> &HgPath {
1554 fn p(b: &[u8]) -> &HgPath {
1555 HgPath::new(b)
1555 HgPath::new(b)
1556 }
1556 }
1557
1557
1558 /// Test the very simple case a single tracked file
1558 /// Test the very simple case a single tracked file
1559 #[test]
1559 #[test]
1560 fn test_tracked_descendants_simple() -> Result<(), DirstateError> {
1560 fn test_tracked_descendants_simple() -> Result<(), DirstateError> {
1561 let mut map = OwningDirstateMap::new_empty(vec![]);
1561 let mut map = OwningDirstateMap::new_empty(vec![]);
1562 assert_eq!(map.len(), 0);
1562 assert_eq!(map.len(), 0);
1563
1563
1564 map.set_tracked(p(b"some/nested/path"))?;
1564 map.set_tracked(p(b"some/nested/path"))?;
1565
1565
1566 assert_eq!(map.len(), 1);
1566 assert_eq!(map.len(), 1);
1567 assert_eq!(tracked_descendants(&map, b"some"), 1);
1567 assert_eq!(tracked_descendants(&map, b"some"), 1);
1568 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1568 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1569 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1569 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1570
1570
1571 map.set_untracked(p(b"some/nested/path"))?;
1571 map.set_untracked(p(b"some/nested/path"))?;
1572 assert_eq!(map.len(), 0);
1572 assert_eq!(map.len(), 0);
1573 assert!(map.get_map().get_node(p(b"some"))?.is_none());
1573 assert!(map.get_map().get_node(p(b"some"))?.is_none());
1574
1574
1575 Ok(())
1575 Ok(())
1576 }
1576 }
1577
1577
1578 /// Test the simple case of all tracked, but multiple files
1578 /// Test the simple case of all tracked, but multiple files
1579 #[test]
1579 #[test]
1580 fn test_tracked_descendants_multiple() -> Result<(), DirstateError> {
1580 fn test_tracked_descendants_multiple() -> Result<(), DirstateError> {
1581 let mut map = OwningDirstateMap::new_empty(vec![]);
1581 let mut map = OwningDirstateMap::new_empty(vec![]);
1582
1582
1583 map.set_tracked(p(b"some/nested/path"))?;
1583 map.set_tracked(p(b"some/nested/path"))?;
1584 map.set_tracked(p(b"some/nested/file"))?;
1584 map.set_tracked(p(b"some/nested/file"))?;
1585 // one layer without any files to test deletion cascade
1585 // one layer without any files to test deletion cascade
1586 map.set_tracked(p(b"some/other/nested/path"))?;
1586 map.set_tracked(p(b"some/other/nested/path"))?;
1587 map.set_tracked(p(b"root_file"))?;
1587 map.set_tracked(p(b"root_file"))?;
1588 map.set_tracked(p(b"some/file"))?;
1588 map.set_tracked(p(b"some/file"))?;
1589 map.set_tracked(p(b"some/file2"))?;
1589 map.set_tracked(p(b"some/file2"))?;
1590 map.set_tracked(p(b"some/file3"))?;
1590 map.set_tracked(p(b"some/file3"))?;
1591
1591
1592 assert_eq!(map.len(), 7);
1592 assert_eq!(map.len(), 7);
1593 assert_eq!(tracked_descendants(&map, b"some"), 6);
1593 assert_eq!(tracked_descendants(&map, b"some"), 6);
1594 assert_eq!(tracked_descendants(&map, b"some/nested"), 2);
1594 assert_eq!(tracked_descendants(&map, b"some/nested"), 2);
1595 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1595 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1596 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1596 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1597 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1597 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1598
1598
1599 map.set_untracked(p(b"some/nested/path"))?;
1599 map.set_untracked(p(b"some/nested/path"))?;
1600 assert_eq!(map.len(), 6);
1600 assert_eq!(map.len(), 6);
1601 assert_eq!(tracked_descendants(&map, b"some"), 5);
1601 assert_eq!(tracked_descendants(&map, b"some"), 5);
1602 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1602 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1603 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1603 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1604 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1604 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1605
1605
1606 map.set_untracked(p(b"some/nested/file"))?;
1606 map.set_untracked(p(b"some/nested/file"))?;
1607 assert_eq!(map.len(), 5);
1607 assert_eq!(map.len(), 5);
1608 assert_eq!(tracked_descendants(&map, b"some"), 4);
1608 assert_eq!(tracked_descendants(&map, b"some"), 4);
1609 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1609 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1610 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1610 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1611 assert_does_not_exist(&map, b"some_nested");
1611 assert_does_not_exist(&map, b"some_nested");
1612
1612
1613 map.set_untracked(p(b"some/other/nested/path"))?;
1613 map.set_untracked(p(b"some/other/nested/path"))?;
1614 assert_eq!(map.len(), 4);
1614 assert_eq!(map.len(), 4);
1615 assert_eq!(tracked_descendants(&map, b"some"), 3);
1615 assert_eq!(tracked_descendants(&map, b"some"), 3);
1616 assert_does_not_exist(&map, b"some/other");
1616 assert_does_not_exist(&map, b"some/other");
1617
1617
1618 map.set_untracked(p(b"root_file"))?;
1618 map.set_untracked(p(b"root_file"))?;
1619 assert_eq!(map.len(), 3);
1619 assert_eq!(map.len(), 3);
1620 assert_eq!(tracked_descendants(&map, b"some"), 3);
1620 assert_eq!(tracked_descendants(&map, b"some"), 3);
1621 assert_does_not_exist(&map, b"root_file");
1621 assert_does_not_exist(&map, b"root_file");
1622
1622
1623 map.set_untracked(p(b"some/file"))?;
1623 map.set_untracked(p(b"some/file"))?;
1624 assert_eq!(map.len(), 2);
1624 assert_eq!(map.len(), 2);
1625 assert_eq!(tracked_descendants(&map, b"some"), 2);
1625 assert_eq!(tracked_descendants(&map, b"some"), 2);
1626 assert_does_not_exist(&map, b"some/file");
1626 assert_does_not_exist(&map, b"some/file");
1627
1627
1628 map.set_untracked(p(b"some/file2"))?;
1628 map.set_untracked(p(b"some/file2"))?;
1629 assert_eq!(map.len(), 1);
1629 assert_eq!(map.len(), 1);
1630 assert_eq!(tracked_descendants(&map, b"some"), 1);
1630 assert_eq!(tracked_descendants(&map, b"some"), 1);
1631 assert_does_not_exist(&map, b"some/file2");
1631 assert_does_not_exist(&map, b"some/file2");
1632
1632
1633 map.set_untracked(p(b"some/file3"))?;
1633 map.set_untracked(p(b"some/file3"))?;
1634 assert_eq!(map.len(), 0);
1634 assert_eq!(map.len(), 0);
1635 assert_does_not_exist(&map, b"some/file3");
1635 assert_does_not_exist(&map, b"some/file3");
1636
1636
1637 Ok(())
1637 Ok(())
1638 }
1638 }
1639
1639
1640 /// Check with a mix of tracked and non-tracked items
1640 /// Check with a mix of tracked and non-tracked items
1641 #[test]
1641 #[test]
1642 fn test_tracked_descendants_different() -> Result<(), DirstateError> {
1642 fn test_tracked_descendants_different() -> Result<(), DirstateError> {
1643 let mut map = OwningDirstateMap::new_empty(vec![]);
1643 let mut map = OwningDirstateMap::new_empty(vec![]);
1644
1644
1645 // A file that was just added
1645 // A file that was just added
1646 map.set_tracked(p(b"some/nested/path"))?;
1646 map.set_tracked(p(b"some/nested/path"))?;
1647 // This has no information, the dirstate should ignore it
1647 // This has no information, the dirstate should ignore it
1648 map.reset_state(p(b"some/file"), false, false, false, false, None)?;
1648 map.reset_state(p(b"some/file"), false, false, false, false, None)?;
1649 assert_does_not_exist(&map, b"some/file");
1649 assert_does_not_exist(&map, b"some/file");
1650
1650
1651 // A file that was removed
1651 // A file that was removed
1652 map.reset_state(
1652 map.reset_state(
1653 p(b"some/nested/file"),
1653 p(b"some/nested/file"),
1654 false,
1654 false,
1655 true,
1655 true,
1656 false,
1656 false,
1657 false,
1657 false,
1658 None,
1658 None,
1659 )?;
1659 )?;
1660 assert!(!map.get(p(b"some/nested/file"))?.unwrap().tracked());
1660 assert!(!map.get(p(b"some/nested/file"))?.unwrap().tracked());
1661 // Only present in p2
1661 // Only present in p2
1662 map.reset_state(p(b"some/file3"), false, false, true, false, None)?;
1662 map.reset_state(p(b"some/file3"), false, false, true, false, None)?;
1663 assert!(!map.get(p(b"some/file3"))?.unwrap().tracked());
1663 assert!(!map.get(p(b"some/file3"))?.unwrap().tracked());
1664 // A file that was merged
1664 // A file that was merged
1665 map.reset_state(p(b"root_file"), true, true, true, false, None)?;
1665 map.reset_state(p(b"root_file"), true, true, true, false, None)?;
1666 assert!(map.get(p(b"root_file"))?.unwrap().tracked());
1666 assert!(map.get(p(b"root_file"))?.unwrap().tracked());
1667 // A file that is added, with info from p2
1667 // A file that is added, with info from p2
1668 // XXX is that actually possible?
1668 // XXX is that actually possible?
1669 map.reset_state(p(b"some/file2"), true, false, true, false, None)?;
1669 map.reset_state(p(b"some/file2"), true, false, true, false, None)?;
1670 assert!(map.get(p(b"some/file2"))?.unwrap().tracked());
1670 assert!(map.get(p(b"some/file2"))?.unwrap().tracked());
1671 // A clean file
1671 // A clean file
1672 // One layer without any files to test deletion cascade
1672 // One layer without any files to test deletion cascade
1673 map.reset_state(
1673 map.reset_state(
1674 p(b"some/other/nested/path"),
1674 p(b"some/other/nested/path"),
1675 true,
1675 true,
1676 true,
1676 true,
1677 false,
1677 false,
1678 false,
1678 false,
1679 None,
1679 None,
1680 )?;
1680 )?;
1681 assert!(map.get(p(b"some/other/nested/path"))?.unwrap().tracked());
1681 assert!(map.get(p(b"some/other/nested/path"))?.unwrap().tracked());
1682
1682
1683 assert_eq!(map.len(), 6);
1683 assert_eq!(map.len(), 6);
1684 assert_eq!(tracked_descendants(&map, b"some"), 3);
1684 assert_eq!(tracked_descendants(&map, b"some"), 3);
1685 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1685 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1686 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1686 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1687 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1687 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1688 assert_eq!(tracked_descendants(&map, b"some/other/nested/path"), 0);
1688 assert_eq!(tracked_descendants(&map, b"some/other/nested/path"), 0);
1689 assert_eq!(
1689 assert_eq!(
1690 descendants_with_an_entry(&map, b"some/other/nested/path"),
1690 descendants_with_an_entry(&map, b"some/other/nested/path"),
1691 0
1691 0
1692 );
1692 );
1693 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1693 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1694 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1694 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1695
1695
1696 // might as well check this
1696 // might as well check this
1697 map.set_untracked(p(b"path/does/not/exist"))?;
1697 map.set_untracked(p(b"path/does/not/exist"))?;
1698 assert_eq!(map.len(), 6);
1698 assert_eq!(map.len(), 6);
1699
1699
1700 map.set_untracked(p(b"some/other/nested/path"))?;
1700 map.set_untracked(p(b"some/other/nested/path"))?;
1701 // It is set untracked but not deleted since it held other information
1701 // It is set untracked but not deleted since it held other information
1702 assert_eq!(map.len(), 6);
1702 assert_eq!(map.len(), 6);
1703 assert_eq!(tracked_descendants(&map, b"some"), 2);
1703 assert_eq!(tracked_descendants(&map, b"some"), 2);
1704 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1704 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1705 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1705 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1706 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1706 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1707 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1707 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1708 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1708 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1709
1709
1710 map.set_untracked(p(b"some/nested/path"))?;
1710 map.set_untracked(p(b"some/nested/path"))?;
1711 // It is set untracked *and* deleted since it was only added
1711 // It is set untracked *and* deleted since it was only added
1712 assert_eq!(map.len(), 5);
1712 assert_eq!(map.len(), 5);
1713 assert_eq!(tracked_descendants(&map, b"some"), 1);
1713 assert_eq!(tracked_descendants(&map, b"some"), 1);
1714 assert_eq!(descendants_with_an_entry(&map, b"some"), 4);
1714 assert_eq!(descendants_with_an_entry(&map, b"some"), 4);
1715 assert_eq!(tracked_descendants(&map, b"some/nested"), 0);
1715 assert_eq!(tracked_descendants(&map, b"some/nested"), 0);
1716 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 1);
1716 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 1);
1717 assert_does_not_exist(&map, b"some/nested/path");
1717 assert_does_not_exist(&map, b"some/nested/path");
1718
1718
1719 map.set_untracked(p(b"root_file"))?;
1719 map.set_untracked(p(b"root_file"))?;
1720 // Untracked but not deleted
1720 // Untracked but not deleted
1721 assert_eq!(map.len(), 5);
1721 assert_eq!(map.len(), 5);
1722 assert!(map.get(p(b"root_file"))?.is_some());
1722 assert!(map.get(p(b"root_file"))?.is_some());
1723
1723
1724 map.set_untracked(p(b"some/file2"))?;
1724 map.set_untracked(p(b"some/file2"))?;
1725 assert_eq!(map.len(), 5);
1725 assert_eq!(map.len(), 5);
1726 assert_eq!(tracked_descendants(&map, b"some"), 0);
1726 assert_eq!(tracked_descendants(&map, b"some"), 0);
1727 assert!(map.get(p(b"some/file2"))?.is_some());
1727 assert!(map.get(p(b"some/file2"))?.is_some());
1728
1728
1729 map.set_untracked(p(b"some/file3"))?;
1729 map.set_untracked(p(b"some/file3"))?;
1730 assert_eq!(map.len(), 5);
1730 assert_eq!(map.len(), 5);
1731 assert_eq!(tracked_descendants(&map, b"some"), 0);
1731 assert_eq!(tracked_descendants(&map, b"some"), 0);
1732 assert!(map.get(p(b"some/file3"))?.is_some());
1732 assert!(map.get(p(b"some/file3"))?.is_some());
1733
1733
1734 Ok(())
1734 Ok(())
1735 }
1735 }
1736
1736
1737 /// Check that copies counter is correctly updated
1737 /// Check that copies counter is correctly updated
1738 #[test]
1738 #[test]
1739 fn test_copy_source() -> Result<(), DirstateError> {
1739 fn test_copy_source() -> Result<(), DirstateError> {
1740 let mut map = OwningDirstateMap::new_empty(vec![]);
1740 let mut map = OwningDirstateMap::new_empty(vec![]);
1741
1741
1742 // Clean file
1742 // Clean file
1743 map.reset_state(p(b"files/clean"), true, true, false, false, None)?;
1743 map.reset_state(p(b"files/clean"), true, true, false, false, None)?;
1744 // Merged file
1744 // Merged file
1745 map.reset_state(p(b"files/from_p2"), true, true, true, false, None)?;
1745 map.reset_state(p(b"files/from_p2"), true, true, true, false, None)?;
1746 // Removed file
1746 // Removed file
1747 map.reset_state(p(b"removed"), false, true, false, false, None)?;
1747 map.reset_state(p(b"removed"), false, true, false, false, None)?;
1748 // Added file
1748 // Added file
1749 map.reset_state(p(b"files/added"), true, false, false, false, None)?;
1749 map.reset_state(p(b"files/added"), true, false, false, false, None)?;
1750 // Add copy
1750 // Add copy
1751 map.copy_map_insert(p(b"files/clean"), p(b"clean_copy_source"))?;
1751 map.copy_map_insert(p(b"files/clean"), p(b"clean_copy_source"))?;
1752 assert_eq!(map.copy_map_len(), 1);
1752 assert_eq!(map.copy_map_len(), 1);
1753
1753
1754 // Copy override
1754 // Copy override
1755 map.copy_map_insert(p(b"files/clean"), p(b"other_clean_copy_source"))?;
1755 map.copy_map_insert(p(b"files/clean"), p(b"other_clean_copy_source"))?;
1756 assert_eq!(map.copy_map_len(), 1);
1756 assert_eq!(map.copy_map_len(), 1);
1757
1757
1758 // Multiple copies
1758 // Multiple copies
1759 map.copy_map_insert(p(b"removed"), p(b"removed_copy_source"))?;
1759 map.copy_map_insert(p(b"removed"), p(b"removed_copy_source"))?;
1760 assert_eq!(map.copy_map_len(), 2);
1760 assert_eq!(map.copy_map_len(), 2);
1761
1761
1762 map.copy_map_insert(p(b"files/added"), p(b"added_copy_source"))?;
1762 map.copy_map_insert(p(b"files/added"), p(b"added_copy_source"))?;
1763 assert_eq!(map.copy_map_len(), 3);
1763 assert_eq!(map.copy_map_len(), 3);
1764
1764
1765 // Added, so the entry is completely removed
1765 // Added, so the entry is completely removed
1766 map.set_untracked(p(b"files/added"))?;
1766 map.set_untracked(p(b"files/added"))?;
1767 assert_does_not_exist(&map, b"files/added");
1767 assert_does_not_exist(&map, b"files/added");
1768 assert_eq!(map.copy_map_len(), 2);
1768 assert_eq!(map.copy_map_len(), 2);
1769
1769
1770 // Removed, so the entry is kept around, so is its copy
1770 // Removed, so the entry is kept around, so is its copy
1771 map.set_untracked(p(b"removed"))?;
1771 map.set_untracked(p(b"removed"))?;
1772 assert!(map.get(p(b"removed"))?.is_some());
1772 assert!(map.get(p(b"removed"))?.is_some());
1773 assert_eq!(map.copy_map_len(), 2);
1773 assert_eq!(map.copy_map_len(), 2);
1774
1774
1775 // Clean, so the entry is kept around, but not its copy
1775 // Clean, so the entry is kept around, but not its copy
1776 map.set_untracked(p(b"files/clean"))?;
1776 map.set_untracked(p(b"files/clean"))?;
1777 assert!(map.get(p(b"files/clean"))?.is_some());
1777 assert!(map.get(p(b"files/clean"))?.is_some());
1778 assert_eq!(map.copy_map_len(), 1);
1778 assert_eq!(map.copy_map_len(), 1);
1779
1779
1780 map.copy_map_insert(p(b"files/from_p2"), p(b"from_p2_copy_source"))?;
1780 map.copy_map_insert(p(b"files/from_p2"), p(b"from_p2_copy_source"))?;
1781 assert_eq!(map.copy_map_len(), 2);
1781 assert_eq!(map.copy_map_len(), 2);
1782
1782
1783 // Info from p2, so its copy source info is kept around
1783 // Info from p2, so its copy source info is kept around
1784 map.set_untracked(p(b"files/from_p2"))?;
1784 map.set_untracked(p(b"files/from_p2"))?;
1785 assert!(map.get(p(b"files/from_p2"))?.is_some());
1785 assert!(map.get(p(b"files/from_p2"))?.is_some());
1786 assert_eq!(map.copy_map_len(), 2);
1786 assert_eq!(map.copy_map_len(), 2);
1787
1787
1788 Ok(())
1788 Ok(())
1789 }
1789 }
1790
1790
1791 /// Test with "on disk" data. For the sake of this test, the "on disk" data
1791 /// Test with "on disk" data. For the sake of this test, the "on disk" data
1792 /// does not actually come from the disk, but it's opaque to the code being
1792 /// does not actually come from the disk, but it's opaque to the code being
1793 /// tested.
1793 /// tested.
1794 #[test]
1794 #[test]
1795 fn test_on_disk() -> Result<(), DirstateError> {
1795 fn test_on_disk() -> Result<(), DirstateError> {
1796 // First let's create some data to put "on disk"
1796 // First let's create some data to put "on disk"
1797 let mut map = OwningDirstateMap::new_empty(vec![]);
1797 let mut map = OwningDirstateMap::new_empty(vec![]);
1798
1798
1799 // A file that was just added
1799 // A file that was just added
1800 map.set_tracked(p(b"some/nested/added"))?;
1800 map.set_tracked(p(b"some/nested/added"))?;
1801 map.copy_map_insert(p(b"some/nested/added"), p(b"added_copy_source"))?;
1801 map.copy_map_insert(p(b"some/nested/added"), p(b"added_copy_source"))?;
1802
1802
1803 // A file that was removed
1803 // A file that was removed
1804 map.reset_state(
1804 map.reset_state(
1805 p(b"some/nested/removed"),
1805 p(b"some/nested/removed"),
1806 false,
1806 false,
1807 true,
1807 true,
1808 false,
1808 false,
1809 false,
1809 false,
1810 None,
1810 None,
1811 )?;
1811 )?;
1812 // Only present in p2
1812 // Only present in p2
1813 map.reset_state(
1813 map.reset_state(
1814 p(b"other/p2_info_only"),
1814 p(b"other/p2_info_only"),
1815 false,
1815 false,
1816 false,
1816 false,
1817 true,
1817 true,
1818 false,
1818 false,
1819 None,
1819 None,
1820 )?;
1820 )?;
1821 map.copy_map_insert(
1821 map.copy_map_insert(
1822 p(b"other/p2_info_only"),
1822 p(b"other/p2_info_only"),
1823 p(b"other/p2_info_copy_source"),
1823 p(b"other/p2_info_copy_source"),
1824 )?;
1824 )?;
1825 // A file that was merged
1825 // A file that was merged
1826 map.reset_state(p(b"merged"), true, true, true, false, None)?;
1826 map.reset_state(p(b"merged"), true, true, true, false, None)?;
1827 // A file that is added, with info from p2
1827 // A file that is added, with info from p2
1828 // XXX is that actually possible?
1828 // XXX is that actually possible?
1829 map.reset_state(
1829 map.reset_state(
1830 p(b"other/added_with_p2"),
1830 p(b"other/added_with_p2"),
1831 true,
1831 true,
1832 false,
1832 false,
1833 true,
1833 true,
1834 false,
1834 false,
1835 None,
1835 None,
1836 )?;
1836 )?;
1837 // One layer without any files to test deletion cascade
1837 // One layer without any files to test deletion cascade
1838 // A clean file
1838 // A clean file
1839 map.reset_state(
1839 map.reset_state(
1840 p(b"some/other/nested/clean"),
1840 p(b"some/other/nested/clean"),
1841 true,
1841 true,
1842 true,
1842 true,
1843 false,
1843 false,
1844 false,
1844 false,
1845 None,
1845 None,
1846 )?;
1846 )?;
1847
1847
1848 let (packed, metadata, _should_append, _old_data_size) =
1848 let (packed, metadata, _should_append, _old_data_size) =
1849 map.pack_v2(DirstateMapWriteMode::ForceNewDataFile)?;
1849 map.pack_v2(DirstateMapWriteMode::ForceNewDataFile)?;
1850 let packed_len = packed.len();
1850 let packed_len = packed.len();
1851 assert!(packed_len > 0);
1851 assert!(packed_len > 0);
1852
1852
1853 // Recreate "from disk"
1853 // Recreate "from disk"
1854 let mut map = OwningDirstateMap::new_v2(
1854 let mut map = OwningDirstateMap::new_v2(
1855 packed,
1855 packed,
1856 packed_len,
1856 packed_len,
1857 metadata.as_bytes(),
1857 metadata.as_bytes(),
1858 vec![],
1858 vec![],
1859 None,
1859 None,
1860 )?;
1860 )?;
1861
1861
1862 // Check that everything is accounted for
1862 // Check that everything is accounted for
1863 assert!(map.contains_key(p(b"some/nested/added"))?);
1863 assert!(map.contains_key(p(b"some/nested/added"))?);
1864 assert!(map.contains_key(p(b"some/nested/removed"))?);
1864 assert!(map.contains_key(p(b"some/nested/removed"))?);
1865 assert!(map.contains_key(p(b"merged"))?);
1865 assert!(map.contains_key(p(b"merged"))?);
1866 assert!(map.contains_key(p(b"other/p2_info_only"))?);
1866 assert!(map.contains_key(p(b"other/p2_info_only"))?);
1867 assert!(map.contains_key(p(b"other/added_with_p2"))?);
1867 assert!(map.contains_key(p(b"other/added_with_p2"))?);
1868 assert!(map.contains_key(p(b"some/other/nested/clean"))?);
1868 assert!(map.contains_key(p(b"some/other/nested/clean"))?);
1869 assert_eq!(
1869 assert_eq!(
1870 map.copy_map_get(p(b"some/nested/added"))?,
1870 map.copy_map_get(p(b"some/nested/added"))?,
1871 Some(p(b"added_copy_source"))
1871 Some(p(b"added_copy_source"))
1872 );
1872 );
1873 assert_eq!(
1873 assert_eq!(
1874 map.copy_map_get(p(b"other/p2_info_only"))?,
1874 map.copy_map_get(p(b"other/p2_info_only"))?,
1875 Some(p(b"other/p2_info_copy_source"))
1875 Some(p(b"other/p2_info_copy_source"))
1876 );
1876 );
1877 assert_eq!(tracked_descendants(&map, b"some"), 2);
1877 assert_eq!(tracked_descendants(&map, b"some"), 2);
1878 assert_eq!(descendants_with_an_entry(&map, b"some"), 3);
1878 assert_eq!(descendants_with_an_entry(&map, b"some"), 3);
1879 assert_eq!(tracked_descendants(&map, b"other"), 1);
1879 assert_eq!(tracked_descendants(&map, b"other"), 1);
1880 assert_eq!(descendants_with_an_entry(&map, b"other"), 2);
1880 assert_eq!(descendants_with_an_entry(&map, b"other"), 2);
1881 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1881 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1882 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1882 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1883 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1883 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1884 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1884 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1885 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1885 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1886 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1886 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1887 assert_eq!(map.len(), 6);
1887 assert_eq!(map.len(), 6);
1888 assert_eq!(map.get_map().unreachable_bytes, 0);
1888 assert_eq!(map.get_map().unreachable_bytes, 0);
1889 assert_eq!(map.copy_map_len(), 2);
1889 assert_eq!(map.copy_map_len(), 2);
1890
1890
1891 // Shouldn't change anything since it's already not tracked
1891 // Shouldn't change anything since it's already not tracked
1892 map.set_untracked(p(b"some/nested/removed"))?;
1892 map.set_untracked(p(b"some/nested/removed"))?;
1893 assert_eq!(map.get_map().unreachable_bytes, 0);
1893 assert_eq!(map.get_map().unreachable_bytes, 0);
1894
1894
1895 if let ChildNodes::InMemory(_) = map.get_map().root {
1895 if let ChildNodes::InMemory(_) = map.get_map().root {
1896 panic!("root should not have been mutated")
1896 panic!("root should not have been mutated")
1897 }
1897 }
1898 // We haven't mutated enough (nothing, actually), we should still be in
1898 // We haven't mutated enough (nothing, actually), we should still be in
1899 // the append strategy
1899 // the append strategy
1900 assert!(map.get_map().write_should_append());
1900 assert!(map.get_map().write_should_append());
1901
1901
1902 // But this mutates the structure, so there should be unreachable_bytes
1902 // But this mutates the structure, so there should be unreachable_bytes
1903 assert!(map.set_untracked(p(b"some/nested/added"))?);
1903 assert!(map.set_untracked(p(b"some/nested/added"))?);
1904 let unreachable_bytes = map.get_map().unreachable_bytes;
1904 let unreachable_bytes = map.get_map().unreachable_bytes;
1905 assert!(unreachable_bytes > 0);
1905 assert!(unreachable_bytes > 0);
1906
1906
1907 if let ChildNodes::OnDisk(_) = map.get_map().root {
1907 if let ChildNodes::OnDisk(_) = map.get_map().root {
1908 panic!("root should have been mutated")
1908 panic!("root should have been mutated")
1909 }
1909 }
1910
1910
1911 // This should not mutate the structure either, since `root` has
1911 // This should not mutate the structure either, since `root` has
1912 // already been mutated along with its direct children.
1912 // already been mutated along with its direct children.
1913 map.set_untracked(p(b"merged"))?;
1913 map.set_untracked(p(b"merged"))?;
1914 assert_eq!(map.get_map().unreachable_bytes, unreachable_bytes);
1914 assert_eq!(map.get_map().unreachable_bytes, unreachable_bytes);
1915
1915
1916 if let NodeRef::InMemory(_, _) =
1916 if let NodeRef::InMemory(_, _) =
1917 map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap()
1917 map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap()
1918 {
1918 {
1919 panic!("'other/added_with_p2' should not have been mutated")
1919 panic!("'other/added_with_p2' should not have been mutated")
1920 }
1920 }
1921 // But this should, since it's in a different path
1921 // But this should, since it's in a different path
1922 // than `<root>some/nested/add`
1922 // than `<root>some/nested/add`
1923 map.set_untracked(p(b"other/added_with_p2"))?;
1923 map.set_untracked(p(b"other/added_with_p2"))?;
1924 assert!(map.get_map().unreachable_bytes > unreachable_bytes);
1924 assert!(map.get_map().unreachable_bytes > unreachable_bytes);
1925
1925
1926 if let NodeRef::OnDisk(_) =
1926 if let NodeRef::OnDisk(_) =
1927 map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap()
1927 map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap()
1928 {
1928 {
1929 panic!("'other/added_with_p2' should have been mutated")
1929 panic!("'other/added_with_p2' should have been mutated")
1930 }
1930 }
1931
1931
1932 // We have rewritten most of the tree, we should create a new file
1932 // We have rewritten most of the tree, we should create a new file
1933 assert!(!map.get_map().write_should_append());
1933 assert!(!map.get_map().write_should_append());
1934
1934
1935 Ok(())
1935 Ok(())
1936 }
1936 }
1937 }
1937 }
@@ -1,1055 +1,1055 b''
1 use crate::dirstate::entry::TruncatedTimestamp;
1 use crate::dirstate::entry::TruncatedTimestamp;
2 use crate::dirstate::status::IgnoreFnType;
2 use crate::dirstate::status::IgnoreFnType;
3 use crate::dirstate::status::StatusPath;
3 use crate::dirstate::status::StatusPath;
4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 use crate::dirstate_tree::dirstate_map::DirstateMap;
6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 use crate::dirstate_tree::dirstate_map::DirstateVersion;
7 use crate::dirstate_tree::dirstate_map::DirstateVersion;
8 use crate::dirstate_tree::dirstate_map::NodeRef;
8 use crate::dirstate_tree::dirstate_map::NodeRef;
9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 use crate::matchers::get_ignore_function;
10 use crate::matchers::get_ignore_function;
11 use crate::matchers::{Matcher, VisitChildrenSet};
11 use crate::matchers::{Matcher, VisitChildrenSet};
12 use crate::utils::files::get_bytes_from_os_string;
12 use crate::utils::files::get_bytes_from_os_string;
13 use crate::utils::files::get_bytes_from_path;
13 use crate::utils::files::get_bytes_from_path;
14 use crate::utils::files::get_path_from_bytes;
14 use crate::utils::files::get_path_from_bytes;
15 use crate::utils::hg_path::hg_path_to_path_buf;
15 use crate::utils::hg_path::hg_path_to_path_buf;
16 use crate::utils::hg_path::HgPath;
16 use crate::utils::hg_path::HgPath;
17 use crate::BadMatch;
17 use crate::BadMatch;
18 use crate::BadType;
18 use crate::BadType;
19 use crate::DirstateStatus;
19 use crate::DirstateStatus;
20 use crate::HgPathCow;
20 use crate::HgPathCow;
21 use crate::PatternFileWarning;
21 use crate::PatternFileWarning;
22 use crate::StatusError;
22 use crate::StatusError;
23 use crate::StatusOptions;
23 use crate::StatusOptions;
24 use once_cell::sync::OnceCell;
24 use once_cell::sync::OnceCell;
25 use rayon::prelude::*;
25 use rayon::prelude::*;
26 use sha1::{Digest, Sha1};
26 use sha1::{Digest, Sha1};
27 use std::borrow::Cow;
27 use std::borrow::Cow;
28 use std::io;
28 use std::io;
29 use std::os::unix::prelude::FileTypeExt;
29 use std::os::unix::prelude::FileTypeExt;
30 use std::path::Path;
30 use std::path::Path;
31 use std::path::PathBuf;
31 use std::path::PathBuf;
32 use std::sync::Mutex;
32 use std::sync::Mutex;
33 use std::time::SystemTime;
33 use std::time::SystemTime;
34
34
35 /// Returns the status of the working directory compared to its parent
35 /// Returns the status of the working directory compared to its parent
36 /// changeset.
36 /// changeset.
37 ///
37 ///
38 /// This algorithm is based on traversing the filesystem tree (`fs` in function
38 /// This algorithm is based on traversing the filesystem tree (`fs` in function
39 /// and variable names) and dirstate tree at the same time. The core of this
39 /// and variable names) and dirstate tree at the same time. The core of this
40 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
40 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
41 /// and its use of `itertools::merge_join_by`. When reaching a path that only
41 /// and its use of `itertools::merge_join_by`. When reaching a path that only
42 /// exists in one of the two trees, depending on information requested by
42 /// exists in one of the two trees, depending on information requested by
43 /// `options` we may need to traverse the remaining subtree.
43 /// `options` we may need to traverse the remaining subtree.
44 #[logging_timer::time("trace")]
44 #[logging_timer::time("trace")]
45 pub fn status<'dirstate>(
45 pub fn status<'dirstate>(
46 dmap: &'dirstate mut DirstateMap,
46 dmap: &'dirstate mut DirstateMap,
47 matcher: &(dyn Matcher + Sync),
47 matcher: &(dyn Matcher + Sync),
48 root_dir: PathBuf,
48 root_dir: PathBuf,
49 ignore_files: Vec<PathBuf>,
49 ignore_files: Vec<PathBuf>,
50 options: StatusOptions,
50 options: StatusOptions,
51 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
51 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
52 {
52 {
53 // Also cap for a Python caller of this function, but don't complain if
53 // Also cap for a Python caller of this function, but don't complain if
54 // the global threadpool has already been set since this code path is also
54 // the global threadpool has already been set since this code path is also
55 // being used by `rhg`, which calls this early.
55 // being used by `rhg`, which calls this early.
56 let _ = crate::utils::cap_default_rayon_threads();
56 let _ = crate::utils::cap_default_rayon_threads();
57
57
58 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
58 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
59 if options.list_ignored || options.list_unknown {
59 if options.list_ignored || options.list_unknown {
60 let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
60 let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
61 DirstateVersion::V1 => {
61 DirstateVersion::V1 => {
62 let (ignore_fn, warnings) = get_ignore_function(
62 let (ignore_fn, warnings) = get_ignore_function(
63 ignore_files,
63 ignore_files,
64 &root_dir,
64 &root_dir,
65 &mut |_source, _pattern_bytes| {},
65 &mut |_source, _pattern_bytes| {},
66 )?;
66 )?;
67 (ignore_fn, warnings, None)
67 (ignore_fn, warnings, None)
68 }
68 }
69 DirstateVersion::V2 => {
69 DirstateVersion::V2 => {
70 let mut hasher = Sha1::new();
70 let mut hasher = Sha1::new();
71 let (ignore_fn, warnings) = get_ignore_function(
71 let (ignore_fn, warnings) = get_ignore_function(
72 ignore_files,
72 ignore_files,
73 &root_dir,
73 &root_dir,
74 &mut |source, pattern_bytes| {
74 &mut |source, pattern_bytes| {
75 // If inside the repo, use the relative version to
75 // If inside the repo, use the relative version to
76 // make it deterministic inside tests.
76 // make it deterministic inside tests.
77 // The performance hit should be negligible.
77 // The performance hit should be negligible.
78 let source = source
78 let source = source
79 .strip_prefix(&root_dir)
79 .strip_prefix(&root_dir)
80 .unwrap_or(source);
80 .unwrap_or(source);
81 let source = get_bytes_from_path(source);
81 let source = get_bytes_from_path(source);
82
82
83 let mut subhasher = Sha1::new();
83 let mut subhasher = Sha1::new();
84 subhasher.update(pattern_bytes);
84 subhasher.update(pattern_bytes);
85 let patterns_hash = subhasher.finalize();
85 let patterns_hash = subhasher.finalize();
86
86
87 hasher.update(source);
87 hasher.update(source);
88 hasher.update(b" ");
88 hasher.update(b" ");
89 hasher.update(patterns_hash);
89 hasher.update(patterns_hash);
90 hasher.update(b"\n");
90 hasher.update(b"\n");
91 },
91 },
92 )?;
92 )?;
93 let new_hash = *hasher.finalize().as_ref();
93 let new_hash = *hasher.finalize().as_ref();
94 let changed = new_hash != dmap.ignore_patterns_hash;
94 let changed = new_hash != dmap.ignore_patterns_hash;
95 dmap.ignore_patterns_hash = new_hash;
95 dmap.ignore_patterns_hash = new_hash;
96 (ignore_fn, warnings, Some(changed))
96 (ignore_fn, warnings, Some(changed))
97 }
97 }
98 };
98 };
99 (ignore_fn, warnings, changed)
99 (ignore_fn, warnings, changed)
100 } else {
100 } else {
101 (Box::new(|&_| true), vec![], None)
101 (Box::new(|&_| true), vec![], None)
102 };
102 };
103
103
104 let filesystem_time_at_status_start =
104 let filesystem_time_at_status_start =
105 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
105 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
106
106
107 // If the repository is under the current directory, prefer using a
107 // If the repository is under the current directory, prefer using a
108 // relative path, so the kernel needs to traverse fewer directory in every
108 // relative path, so the kernel needs to traverse fewer directory in every
109 // call to `read_dir` or `symlink_metadata`.
109 // call to `read_dir` or `symlink_metadata`.
110 // This is effective in the common case where the current directory is the
110 // This is effective in the common case where the current directory is the
111 // repository root.
111 // repository root.
112
112
113 // TODO: Better yet would be to use libc functions like `openat` and
113 // TODO: Better yet would be to use libc functions like `openat` and
114 // `fstatat` to remove such repeated traversals entirely, but the standard
114 // `fstatat` to remove such repeated traversals entirely, but the standard
115 // library does not provide APIs based on those.
115 // library does not provide APIs based on those.
116 // Maybe with a crate like https://crates.io/crates/openat instead?
116 // Maybe with a crate like https://crates.io/crates/openat instead?
117 let root_dir = if let Some(relative) = std::env::current_dir()
117 let root_dir = if let Some(relative) = std::env::current_dir()
118 .ok()
118 .ok()
119 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
119 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
120 {
120 {
121 relative
121 relative
122 } else {
122 } else {
123 &root_dir
123 &root_dir
124 };
124 };
125
125
126 let outcome = DirstateStatus {
126 let outcome = DirstateStatus {
127 filesystem_time_at_status_start,
127 filesystem_time_at_status_start,
128 ..Default::default()
128 ..Default::default()
129 };
129 };
130 let common = StatusCommon {
130 let common = StatusCommon {
131 dmap,
131 dmap,
132 options,
132 options,
133 matcher,
133 matcher,
134 ignore_fn,
134 ignore_fn,
135 outcome: Mutex::new(outcome),
135 outcome: Mutex::new(outcome),
136 ignore_patterns_have_changed: patterns_changed,
136 ignore_patterns_have_changed: patterns_changed,
137 new_cacheable_directories: Default::default(),
137 new_cacheable_directories: Default::default(),
138 outdated_cached_directories: Default::default(),
138 outdated_cached_directories: Default::default(),
139 filesystem_time_at_status_start,
139 filesystem_time_at_status_start,
140 };
140 };
141 let is_at_repo_root = true;
141 let is_at_repo_root = true;
142 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
142 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
143 let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path);
143 let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path);
144 let root_cached_mtime = None;
144 let root_cached_mtime = None;
145 // If the path we have for the repository root is a symlink, do follow it.
145 // If the path we have for the repository root is a symlink, do follow it.
146 // (As opposed to symlinks within the working directory which are not
146 // (As opposed to symlinks within the working directory which are not
147 // followed, using `std::fs::symlink_metadata`.)
147 // followed, using `std::fs::symlink_metadata`.)
148 common.traverse_fs_directory_and_dirstate(
148 common.traverse_fs_directory_and_dirstate(
149 &has_ignored_ancestor,
149 &has_ignored_ancestor,
150 dmap.root.as_ref(),
150 dmap.root.as_ref(),
151 hg_path,
151 hg_path,
152 &DirEntry {
152 &DirEntry {
153 hg_path: Cow::Borrowed(HgPath::new(b"")),
153 hg_path: Cow::Borrowed(HgPath::new(b"")),
154 fs_path: Cow::Borrowed(root_dir),
154 fs_path: Cow::Borrowed(root_dir),
155 symlink_metadata: None,
155 symlink_metadata: None,
156 file_type: FakeFileType::Directory,
156 file_type: FakeFileType::Directory,
157 },
157 },
158 root_cached_mtime,
158 root_cached_mtime,
159 is_at_repo_root,
159 is_at_repo_root,
160 )?;
160 )?;
161 if let Some(file_set) = common.matcher.file_set() {
161 if let Some(file_set) = common.matcher.file_set() {
162 for file in file_set {
162 for file in file_set {
163 if !file.is_empty() && !dmap.has_node(file)? {
163 if !file.is_empty() && !dmap.has_node(file)? {
164 let path = hg_path_to_path_buf(file)?;
164 let path = hg_path_to_path_buf(file)?;
165 if let io::Result::Err(error) =
165 if let io::Result::Err(error) =
166 root_dir.join(path).symlink_metadata()
166 root_dir.join(path).symlink_metadata()
167 {
167 {
168 common.io_error(error, file)
168 common.io_error(error, file)
169 }
169 }
170 }
170 }
171 }
171 }
172 }
172 }
173 let mut outcome = common.outcome.into_inner().unwrap();
173 let mut outcome = common.outcome.into_inner().unwrap();
174 let new_cacheable = common.new_cacheable_directories.into_inner().unwrap();
174 let new_cacheable = common.new_cacheable_directories.into_inner().unwrap();
175 let outdated = common.outdated_cached_directories.into_inner().unwrap();
175 let outdated = common.outdated_cached_directories.into_inner().unwrap();
176
176
177 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
177 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
178 || !outdated.is_empty()
178 || !outdated.is_empty()
179 || (!new_cacheable.is_empty()
179 || (!new_cacheable.is_empty()
180 && dmap.dirstate_version == DirstateVersion::V2);
180 && dmap.dirstate_version == DirstateVersion::V2);
181
181
182 // Remove outdated mtimes before adding new mtimes, in case a given
182 // Remove outdated mtimes before adding new mtimes, in case a given
183 // directory is both
183 // directory is both
184 for path in &outdated {
184 for path in &outdated {
185 dmap.clear_cached_mtime(path)?;
185 dmap.clear_cached_mtime(path)?;
186 }
186 }
187 for (path, mtime) in &new_cacheable {
187 for (path, mtime) in &new_cacheable {
188 dmap.set_cached_mtime(path, *mtime)?;
188 dmap.set_cached_mtime(path, *mtime)?;
189 }
189 }
190
190
191 Ok((outcome, warnings))
191 Ok((outcome, warnings))
192 }
192 }
193
193
194 /// Bag of random things needed by various parts of the algorithm. Reduces the
194 /// Bag of random things needed by various parts of the algorithm. Reduces the
195 /// number of parameters passed to functions.
195 /// number of parameters passed to functions.
196 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
196 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
197 dmap: &'tree DirstateMap<'on_disk>,
197 dmap: &'tree DirstateMap<'on_disk>,
198 options: StatusOptions,
198 options: StatusOptions,
199 matcher: &'a (dyn Matcher + Sync),
199 matcher: &'a (dyn Matcher + Sync),
200 ignore_fn: IgnoreFnType<'a>,
200 ignore_fn: IgnoreFnType<'a>,
201 outcome: Mutex<DirstateStatus<'on_disk>>,
201 outcome: Mutex<DirstateStatus<'on_disk>>,
202 /// New timestamps of directories to be used for caching their readdirs
202 /// New timestamps of directories to be used for caching their readdirs
203 new_cacheable_directories:
203 new_cacheable_directories:
204 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
204 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
205 /// Used to invalidate the readdir cache of directories
205 /// Used to invalidate the readdir cache of directories
206 outdated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
206 outdated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
207
207
208 /// Whether ignore files like `.hgignore` have changed since the previous
208 /// Whether ignore files like `.hgignore` have changed since the previous
209 /// time a `status()` call wrote their hash to the dirstate. `None` means
209 /// time a `status()` call wrote their hash to the dirstate. `None` means
210 /// we don’t know as this run doesn’t list either ignored or uknown files
210 /// we don’t know as this run doesn’t list either ignored or uknown files
211 /// and therefore isn’t reading `.hgignore`.
211 /// and therefore isn’t reading `.hgignore`.
212 ignore_patterns_have_changed: Option<bool>,
212 ignore_patterns_have_changed: Option<bool>,
213
213
214 /// The current time at the start of the `status()` algorithm, as measured
214 /// The current time at the start of the `status()` algorithm, as measured
215 /// and possibly truncated by the filesystem.
215 /// and possibly truncated by the filesystem.
216 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
216 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
217 }
217 }
218
218
219 enum Outcome {
219 enum Outcome {
220 Modified,
220 Modified,
221 Added,
221 Added,
222 Removed,
222 Removed,
223 Deleted,
223 Deleted,
224 Clean,
224 Clean,
225 Ignored,
225 Ignored,
226 Unknown,
226 Unknown,
227 Unsure,
227 Unsure,
228 }
228 }
229
229
230 /// Lazy computation of whether a given path has a hgignored
230 /// Lazy computation of whether a given path has a hgignored
231 /// ancestor.
231 /// ancestor.
232 struct HasIgnoredAncestor<'a> {
232 struct HasIgnoredAncestor<'a> {
233 /// `path` and `parent` constitute the inputs to the computation,
233 /// `path` and `parent` constitute the inputs to the computation,
234 /// `cache` stores the outcome.
234 /// `cache` stores the outcome.
235 path: &'a HgPath,
235 path: &'a HgPath,
236 parent: Option<&'a HasIgnoredAncestor<'a>>,
236 parent: Option<&'a HasIgnoredAncestor<'a>>,
237 cache: OnceCell<bool>,
237 cache: OnceCell<bool>,
238 }
238 }
239
239
240 impl<'a> HasIgnoredAncestor<'a> {
240 impl<'a> HasIgnoredAncestor<'a> {
241 fn create(
241 fn create(
242 parent: Option<&'a HasIgnoredAncestor<'a>>,
242 parent: Option<&'a HasIgnoredAncestor<'a>>,
243 path: &'a HgPath,
243 path: &'a HgPath,
244 ) -> HasIgnoredAncestor<'a> {
244 ) -> HasIgnoredAncestor<'a> {
245 Self {
245 Self {
246 path,
246 path,
247 parent,
247 parent,
248 cache: OnceCell::new(),
248 cache: OnceCell::new(),
249 }
249 }
250 }
250 }
251
251
252 fn force<'b>(&self, ignore_fn: &IgnoreFnType<'b>) -> bool {
252 fn force(&self, ignore_fn: &IgnoreFnType<'_>) -> bool {
253 match self.parent {
253 match self.parent {
254 None => false,
254 None => false,
255 Some(parent) => {
255 Some(parent) => {
256 *(self.cache.get_or_init(|| {
256 *(self.cache.get_or_init(|| {
257 parent.force(ignore_fn) || ignore_fn(self.path)
257 parent.force(ignore_fn) || ignore_fn(self.path)
258 }))
258 }))
259 }
259 }
260 }
260 }
261 }
261 }
262 }
262 }
263
263
264 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
264 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
265 fn push_outcome(
265 fn push_outcome(
266 &self,
266 &self,
267 which: Outcome,
267 which: Outcome,
268 dirstate_node: &NodeRef<'tree, 'on_disk>,
268 dirstate_node: &NodeRef<'tree, 'on_disk>,
269 ) -> Result<(), DirstateV2ParseError> {
269 ) -> Result<(), DirstateV2ParseError> {
270 let path = dirstate_node
270 let path = dirstate_node
271 .full_path_borrowed(self.dmap.on_disk)?
271 .full_path_borrowed(self.dmap.on_disk)?
272 .detach_from_tree();
272 .detach_from_tree();
273 let copy_source = if self.options.list_copies {
273 let copy_source = if self.options.list_copies {
274 dirstate_node
274 dirstate_node
275 .copy_source_borrowed(self.dmap.on_disk)?
275 .copy_source_borrowed(self.dmap.on_disk)?
276 .map(|source| source.detach_from_tree())
276 .map(|source| source.detach_from_tree())
277 } else {
277 } else {
278 None
278 None
279 };
279 };
280 self.push_outcome_common(which, path, copy_source);
280 self.push_outcome_common(which, path, copy_source);
281 Ok(())
281 Ok(())
282 }
282 }
283
283
284 fn push_outcome_without_copy_source(
284 fn push_outcome_without_copy_source(
285 &self,
285 &self,
286 which: Outcome,
286 which: Outcome,
287 path: &BorrowedPath<'_, 'on_disk>,
287 path: &BorrowedPath<'_, 'on_disk>,
288 ) {
288 ) {
289 self.push_outcome_common(which, path.detach_from_tree(), None)
289 self.push_outcome_common(which, path.detach_from_tree(), None)
290 }
290 }
291
291
292 fn push_outcome_common(
292 fn push_outcome_common(
293 &self,
293 &self,
294 which: Outcome,
294 which: Outcome,
295 path: HgPathCow<'on_disk>,
295 path: HgPathCow<'on_disk>,
296 copy_source: Option<HgPathCow<'on_disk>>,
296 copy_source: Option<HgPathCow<'on_disk>>,
297 ) {
297 ) {
298 let mut outcome = self.outcome.lock().unwrap();
298 let mut outcome = self.outcome.lock().unwrap();
299 let vec = match which {
299 let vec = match which {
300 Outcome::Modified => &mut outcome.modified,
300 Outcome::Modified => &mut outcome.modified,
301 Outcome::Added => &mut outcome.added,
301 Outcome::Added => &mut outcome.added,
302 Outcome::Removed => &mut outcome.removed,
302 Outcome::Removed => &mut outcome.removed,
303 Outcome::Deleted => &mut outcome.deleted,
303 Outcome::Deleted => &mut outcome.deleted,
304 Outcome::Clean => &mut outcome.clean,
304 Outcome::Clean => &mut outcome.clean,
305 Outcome::Ignored => &mut outcome.ignored,
305 Outcome::Ignored => &mut outcome.ignored,
306 Outcome::Unknown => &mut outcome.unknown,
306 Outcome::Unknown => &mut outcome.unknown,
307 Outcome::Unsure => &mut outcome.unsure,
307 Outcome::Unsure => &mut outcome.unsure,
308 };
308 };
309 vec.push(StatusPath { path, copy_source });
309 vec.push(StatusPath { path, copy_source });
310 }
310 }
311
311
312 fn read_dir(
312 fn read_dir(
313 &self,
313 &self,
314 hg_path: &HgPath,
314 hg_path: &HgPath,
315 fs_path: &Path,
315 fs_path: &Path,
316 is_at_repo_root: bool,
316 is_at_repo_root: bool,
317 ) -> Result<Vec<DirEntry>, ()> {
317 ) -> Result<Vec<DirEntry>, ()> {
318 DirEntry::read_dir(fs_path, is_at_repo_root)
318 DirEntry::read_dir(fs_path, is_at_repo_root)
319 .map_err(|error| self.io_error(error, hg_path))
319 .map_err(|error| self.io_error(error, hg_path))
320 }
320 }
321
321
322 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
322 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
323 let errno = error.raw_os_error().expect("expected real OS error");
323 let errno = error.raw_os_error().expect("expected real OS error");
324 self.outcome
324 self.outcome
325 .lock()
325 .lock()
326 .unwrap()
326 .unwrap()
327 .bad
327 .bad
328 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
328 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
329 }
329 }
330
330
331 fn check_for_outdated_directory_cache(
331 fn check_for_outdated_directory_cache(
332 &self,
332 &self,
333 dirstate_node: &NodeRef<'tree, 'on_disk>,
333 dirstate_node: &NodeRef<'tree, 'on_disk>,
334 ) -> Result<bool, DirstateV2ParseError> {
334 ) -> Result<bool, DirstateV2ParseError> {
335 if self.ignore_patterns_have_changed == Some(true)
335 if self.ignore_patterns_have_changed == Some(true)
336 && dirstate_node.cached_directory_mtime()?.is_some()
336 && dirstate_node.cached_directory_mtime()?.is_some()
337 {
337 {
338 self.outdated_cached_directories.lock().unwrap().push(
338 self.outdated_cached_directories.lock().unwrap().push(
339 dirstate_node
339 dirstate_node
340 .full_path_borrowed(self.dmap.on_disk)?
340 .full_path_borrowed(self.dmap.on_disk)?
341 .detach_from_tree(),
341 .detach_from_tree(),
342 );
342 );
343 return Ok(true);
343 return Ok(true);
344 }
344 }
345 Ok(false)
345 Ok(false)
346 }
346 }
347
347
348 /// If this returns true, we can get accurate results by only using
348 /// If this returns true, we can get accurate results by only using
349 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
349 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
350 /// need to call `read_dir`.
350 /// need to call `read_dir`.
351 fn can_skip_fs_readdir(
351 fn can_skip_fs_readdir(
352 &self,
352 &self,
353 directory_entry: &DirEntry,
353 directory_entry: &DirEntry,
354 cached_directory_mtime: Option<TruncatedTimestamp>,
354 cached_directory_mtime: Option<TruncatedTimestamp>,
355 ) -> bool {
355 ) -> bool {
356 if !self.options.list_unknown && !self.options.list_ignored {
356 if !self.options.list_unknown && !self.options.list_ignored {
357 // All states that we care about listing have corresponding
357 // All states that we care about listing have corresponding
358 // dirstate entries.
358 // dirstate entries.
359 // This happens for example with `hg status -mard`.
359 // This happens for example with `hg status -mard`.
360 return true;
360 return true;
361 }
361 }
362 if !self.options.list_ignored
362 if !self.options.list_ignored
363 && self.ignore_patterns_have_changed == Some(false)
363 && self.ignore_patterns_have_changed == Some(false)
364 {
364 {
365 if let Some(cached_mtime) = cached_directory_mtime {
365 if let Some(cached_mtime) = cached_directory_mtime {
366 // The dirstate contains a cached mtime for this directory, set
366 // The dirstate contains a cached mtime for this directory, set
367 // by a previous run of the `status` algorithm which found this
367 // by a previous run of the `status` algorithm which found this
368 // directory eligible for `read_dir` caching.
368 // directory eligible for `read_dir` caching.
369 if let Ok(meta) = directory_entry.symlink_metadata() {
369 if let Ok(meta) = directory_entry.symlink_metadata() {
370 if cached_mtime
370 if cached_mtime
371 .likely_equal_to_mtime_of(&meta)
371 .likely_equal_to_mtime_of(&meta)
372 .unwrap_or(false)
372 .unwrap_or(false)
373 {
373 {
374 // The mtime of that directory has not changed
374 // The mtime of that directory has not changed
375 // since then, which means that the results of
375 // since then, which means that the results of
376 // `read_dir` should also be unchanged.
376 // `read_dir` should also be unchanged.
377 return true;
377 return true;
378 }
378 }
379 }
379 }
380 }
380 }
381 }
381 }
382 false
382 false
383 }
383 }
384
384
385 fn should_visit(set: &VisitChildrenSet, basename: &HgPath) -> bool {
385 fn should_visit(set: &VisitChildrenSet, basename: &HgPath) -> bool {
386 match set {
386 match set {
387 VisitChildrenSet::This | VisitChildrenSet::Recursive => true,
387 VisitChildrenSet::This | VisitChildrenSet::Recursive => true,
388 VisitChildrenSet::Empty => false,
388 VisitChildrenSet::Empty => false,
389 VisitChildrenSet::Set(children_to_visit) => {
389 VisitChildrenSet::Set(children_to_visit) => {
390 children_to_visit.contains(basename)
390 children_to_visit.contains(basename)
391 }
391 }
392 }
392 }
393 }
393 }
394
394
395 /// Returns whether all child entries of the filesystem directory have a
395 /// Returns whether all child entries of the filesystem directory have a
396 /// corresponding dirstate node or are ignored.
396 /// corresponding dirstate node or are ignored.
397 fn traverse_fs_directory_and_dirstate<'ancestor>(
397 fn traverse_fs_directory_and_dirstate<'ancestor>(
398 &self,
398 &self,
399 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
399 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
400 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
400 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
401 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
401 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
402 directory_entry: &DirEntry,
402 directory_entry: &DirEntry,
403 cached_directory_mtime: Option<TruncatedTimestamp>,
403 cached_directory_mtime: Option<TruncatedTimestamp>,
404 is_at_repo_root: bool,
404 is_at_repo_root: bool,
405 ) -> Result<bool, DirstateV2ParseError> {
405 ) -> Result<bool, DirstateV2ParseError> {
406 let children_set = self.matcher.visit_children_set(directory_hg_path);
406 let children_set = self.matcher.visit_children_set(directory_hg_path);
407 if let VisitChildrenSet::Empty = children_set {
407 if let VisitChildrenSet::Empty = children_set {
408 return Ok(false);
408 return Ok(false);
409 }
409 }
410 if self.can_skip_fs_readdir(directory_entry, cached_directory_mtime) {
410 if self.can_skip_fs_readdir(directory_entry, cached_directory_mtime) {
411 dirstate_nodes
411 dirstate_nodes
412 .par_iter()
412 .par_iter()
413 .map(|dirstate_node| {
413 .map(|dirstate_node| {
414 let fs_path = &directory_entry.fs_path;
414 let fs_path = &directory_entry.fs_path;
415 let basename =
415 let basename =
416 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes();
416 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes();
417 let fs_path = fs_path.join(get_path_from_bytes(basename));
417 let fs_path = fs_path.join(get_path_from_bytes(basename));
418 if !Self::should_visit(
418 if !Self::should_visit(
419 &children_set,
419 &children_set,
420 HgPath::new(basename),
420 HgPath::new(basename),
421 ) {
421 ) {
422 return Ok(());
422 return Ok(());
423 }
423 }
424 match std::fs::symlink_metadata(&fs_path) {
424 match std::fs::symlink_metadata(&fs_path) {
425 Ok(fs_metadata) => {
425 Ok(fs_metadata) => {
426 let file_type = fs_metadata.file_type().into();
426 let file_type = fs_metadata.file_type().into();
427 let entry = DirEntry {
427 let entry = DirEntry {
428 hg_path: Cow::Borrowed(
428 hg_path: Cow::Borrowed(
429 dirstate_node
429 dirstate_node
430 .full_path(self.dmap.on_disk)?,
430 .full_path(self.dmap.on_disk)?,
431 ),
431 ),
432 fs_path: Cow::Borrowed(&fs_path),
432 fs_path: Cow::Borrowed(&fs_path),
433 symlink_metadata: Some(fs_metadata),
433 symlink_metadata: Some(fs_metadata),
434 file_type,
434 file_type,
435 };
435 };
436 self.traverse_fs_and_dirstate(
436 self.traverse_fs_and_dirstate(
437 &entry,
437 &entry,
438 dirstate_node,
438 dirstate_node,
439 has_ignored_ancestor,
439 has_ignored_ancestor,
440 )
440 )
441 }
441 }
442 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
442 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
443 self.traverse_dirstate_only(dirstate_node)
443 self.traverse_dirstate_only(dirstate_node)
444 }
444 }
445 Err(error) => {
445 Err(error) => {
446 let hg_path =
446 let hg_path =
447 dirstate_node.full_path(self.dmap.on_disk)?;
447 dirstate_node.full_path(self.dmap.on_disk)?;
448 self.io_error(error, hg_path);
448 self.io_error(error, hg_path);
449 Ok(())
449 Ok(())
450 }
450 }
451 }
451 }
452 })
452 })
453 .collect::<Result<_, _>>()?;
453 .collect::<Result<_, _>>()?;
454
454
455 // We don’t know, so conservatively say this isn’t the case
455 // We don’t know, so conservatively say this isn’t the case
456 let children_all_have_dirstate_node_or_are_ignored = false;
456 let children_all_have_dirstate_node_or_are_ignored = false;
457
457
458 return Ok(children_all_have_dirstate_node_or_are_ignored);
458 return Ok(children_all_have_dirstate_node_or_are_ignored);
459 }
459 }
460
460
461 let readdir_succeeded;
461 let readdir_succeeded;
462 let mut fs_entries = if let Ok(entries) = self.read_dir(
462 let mut fs_entries = if let Ok(entries) = self.read_dir(
463 directory_hg_path,
463 directory_hg_path,
464 &directory_entry.fs_path,
464 &directory_entry.fs_path,
465 is_at_repo_root,
465 is_at_repo_root,
466 ) {
466 ) {
467 readdir_succeeded = true;
467 readdir_succeeded = true;
468 entries
468 entries
469 } else {
469 } else {
470 // Treat an unreadable directory (typically because of insufficient
470 // Treat an unreadable directory (typically because of insufficient
471 // permissions) like an empty directory. `self.read_dir` has
471 // permissions) like an empty directory. `self.read_dir` has
472 // already called `self.io_error` so a warning will be emitted.
472 // already called `self.io_error` so a warning will be emitted.
473 // We still need to remember that there was an error so that we
473 // We still need to remember that there was an error so that we
474 // know not to cache this result.
474 // know not to cache this result.
475 readdir_succeeded = false;
475 readdir_succeeded = false;
476 Vec::new()
476 Vec::new()
477 };
477 };
478
478
479 // `merge_join_by` requires both its input iterators to be sorted:
479 // `merge_join_by` requires both its input iterators to be sorted:
480
480
481 let dirstate_nodes = dirstate_nodes.sorted();
481 let dirstate_nodes = dirstate_nodes.sorted();
482 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
482 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
483 // https://github.com/rust-lang/rust/issues/34162
483 // https://github.com/rust-lang/rust/issues/34162
484 fs_entries.sort_unstable_by(|e1, e2| e1.hg_path.cmp(&e2.hg_path));
484 fs_entries.sort_unstable_by(|e1, e2| e1.hg_path.cmp(&e2.hg_path));
485
485
486 // Propagate here any error that would happen inside the comparison
486 // Propagate here any error that would happen inside the comparison
487 // callback below
487 // callback below
488 for dirstate_node in &dirstate_nodes {
488 for dirstate_node in &dirstate_nodes {
489 dirstate_node.base_name(self.dmap.on_disk)?;
489 dirstate_node.base_name(self.dmap.on_disk)?;
490 }
490 }
491 itertools::merge_join_by(
491 itertools::merge_join_by(
492 dirstate_nodes,
492 dirstate_nodes,
493 &fs_entries,
493 &fs_entries,
494 |dirstate_node, fs_entry| {
494 |dirstate_node, fs_entry| {
495 // This `unwrap` never panics because we already propagated
495 // This `unwrap` never panics because we already propagated
496 // those errors above
496 // those errors above
497 dirstate_node
497 dirstate_node
498 .base_name(self.dmap.on_disk)
498 .base_name(self.dmap.on_disk)
499 .unwrap()
499 .unwrap()
500 .cmp(&fs_entry.hg_path)
500 .cmp(&fs_entry.hg_path)
501 },
501 },
502 )
502 )
503 .par_bridge()
503 .par_bridge()
504 .map(|pair| {
504 .map(|pair| {
505 use itertools::EitherOrBoth::*;
505 use itertools::EitherOrBoth::*;
506 let basename = match &pair {
506 let basename = match &pair {
507 Left(dirstate_node) | Both(dirstate_node, _) => HgPath::new(
507 Left(dirstate_node) | Both(dirstate_node, _) => HgPath::new(
508 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
508 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
509 ),
509 ),
510 Right(fs_entry) => &fs_entry.hg_path,
510 Right(fs_entry) => &fs_entry.hg_path,
511 };
511 };
512 if !Self::should_visit(&children_set, basename) {
512 if !Self::should_visit(&children_set, basename) {
513 return Ok(false);
513 return Ok(false);
514 }
514 }
515 let has_dirstate_node_or_is_ignored = match pair {
515 let has_dirstate_node_or_is_ignored = match pair {
516 Both(dirstate_node, fs_entry) => {
516 Both(dirstate_node, fs_entry) => {
517 self.traverse_fs_and_dirstate(
517 self.traverse_fs_and_dirstate(
518 fs_entry,
518 fs_entry,
519 dirstate_node,
519 dirstate_node,
520 has_ignored_ancestor,
520 has_ignored_ancestor,
521 )?;
521 )?;
522 true
522 true
523 }
523 }
524 Left(dirstate_node) => {
524 Left(dirstate_node) => {
525 self.traverse_dirstate_only(dirstate_node)?;
525 self.traverse_dirstate_only(dirstate_node)?;
526 true
526 true
527 }
527 }
528 Right(fs_entry) => self.traverse_fs_only(
528 Right(fs_entry) => self.traverse_fs_only(
529 has_ignored_ancestor.force(&self.ignore_fn),
529 has_ignored_ancestor.force(&self.ignore_fn),
530 directory_hg_path,
530 directory_hg_path,
531 fs_entry,
531 fs_entry,
532 ),
532 ),
533 };
533 };
534 Ok(has_dirstate_node_or_is_ignored)
534 Ok(has_dirstate_node_or_is_ignored)
535 })
535 })
536 .try_reduce(|| true, |a, b| Ok(a && b))
536 .try_reduce(|| true, |a, b| Ok(a && b))
537 .map(|res| res && readdir_succeeded)
537 .map(|res| res && readdir_succeeded)
538 }
538 }
539
539
540 fn traverse_fs_and_dirstate<'ancestor>(
540 fn traverse_fs_and_dirstate<'ancestor>(
541 &self,
541 &self,
542 fs_entry: &DirEntry,
542 fs_entry: &DirEntry,
543 dirstate_node: NodeRef<'tree, 'on_disk>,
543 dirstate_node: NodeRef<'tree, 'on_disk>,
544 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
544 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
545 ) -> Result<(), DirstateV2ParseError> {
545 ) -> Result<(), DirstateV2ParseError> {
546 let outdated_dircache =
546 let outdated_dircache =
547 self.check_for_outdated_directory_cache(&dirstate_node)?;
547 self.check_for_outdated_directory_cache(&dirstate_node)?;
548 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
548 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
549 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
549 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
550 if !file_or_symlink {
550 if !file_or_symlink {
551 // If we previously had a file here, it was removed (with
551 // If we previously had a file here, it was removed (with
552 // `hg rm` or similar) or deleted before it could be
552 // `hg rm` or similar) or deleted before it could be
553 // replaced by a directory or something else.
553 // replaced by a directory or something else.
554 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
554 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
555 }
555 }
556 if let Some(bad_type) = fs_entry.is_bad() {
556 if let Some(bad_type) = fs_entry.is_bad() {
557 if self.matcher.exact_match(hg_path) {
557 if self.matcher.exact_match(hg_path) {
558 let path = dirstate_node.full_path(self.dmap.on_disk)?;
558 let path = dirstate_node.full_path(self.dmap.on_disk)?;
559 self.outcome.lock().unwrap().bad.push((
559 self.outcome.lock().unwrap().bad.push((
560 path.to_owned().into(),
560 path.to_owned().into(),
561 BadMatch::BadType(bad_type),
561 BadMatch::BadType(bad_type),
562 ))
562 ))
563 }
563 }
564 }
564 }
565 if fs_entry.is_dir() {
565 if fs_entry.is_dir() {
566 if self.options.collect_traversed_dirs {
566 if self.options.collect_traversed_dirs {
567 self.outcome
567 self.outcome
568 .lock()
568 .lock()
569 .unwrap()
569 .unwrap()
570 .traversed
570 .traversed
571 .push(hg_path.detach_from_tree())
571 .push(hg_path.detach_from_tree())
572 }
572 }
573 let is_ignored = HasIgnoredAncestor::create(
573 let is_ignored = HasIgnoredAncestor::create(
574 Some(has_ignored_ancestor),
574 Some(has_ignored_ancestor),
575 hg_path,
575 hg_path,
576 );
576 );
577 let is_at_repo_root = false;
577 let is_at_repo_root = false;
578 let children_all_have_dirstate_node_or_are_ignored = self
578 let children_all_have_dirstate_node_or_are_ignored = self
579 .traverse_fs_directory_and_dirstate(
579 .traverse_fs_directory_and_dirstate(
580 &is_ignored,
580 &is_ignored,
581 dirstate_node.children(self.dmap.on_disk)?,
581 dirstate_node.children(self.dmap.on_disk)?,
582 hg_path,
582 hg_path,
583 fs_entry,
583 fs_entry,
584 dirstate_node.cached_directory_mtime()?,
584 dirstate_node.cached_directory_mtime()?,
585 is_at_repo_root,
585 is_at_repo_root,
586 )?;
586 )?;
587 self.maybe_save_directory_mtime(
587 self.maybe_save_directory_mtime(
588 children_all_have_dirstate_node_or_are_ignored,
588 children_all_have_dirstate_node_or_are_ignored,
589 fs_entry,
589 fs_entry,
590 dirstate_node,
590 dirstate_node,
591 outdated_dircache,
591 outdated_dircache,
592 )?
592 )?
593 } else {
593 } else {
594 if file_or_symlink && self.matcher.matches(hg_path) {
594 if file_or_symlink && self.matcher.matches(hg_path) {
595 if let Some(entry) = dirstate_node.entry()? {
595 if let Some(entry) = dirstate_node.entry()? {
596 if !entry.any_tracked() {
596 if !entry.any_tracked() {
597 // Forward-compat if we start tracking unknown/ignored
597 // Forward-compat if we start tracking unknown/ignored
598 // files for caching reasons
598 // files for caching reasons
599 self.mark_unknown_or_ignored(
599 self.mark_unknown_or_ignored(
600 has_ignored_ancestor.force(&self.ignore_fn),
600 has_ignored_ancestor.force(&self.ignore_fn),
601 hg_path,
601 hg_path,
602 );
602 );
603 }
603 }
604 if entry.added() {
604 if entry.added() {
605 self.push_outcome(Outcome::Added, &dirstate_node)?;
605 self.push_outcome(Outcome::Added, &dirstate_node)?;
606 } else if entry.removed() {
606 } else if entry.removed() {
607 self.push_outcome(Outcome::Removed, &dirstate_node)?;
607 self.push_outcome(Outcome::Removed, &dirstate_node)?;
608 } else if entry.modified() {
608 } else if entry.modified() {
609 self.push_outcome(Outcome::Modified, &dirstate_node)?;
609 self.push_outcome(Outcome::Modified, &dirstate_node)?;
610 } else {
610 } else {
611 self.handle_normal_file(&dirstate_node, fs_entry)?;
611 self.handle_normal_file(&dirstate_node, fs_entry)?;
612 }
612 }
613 } else {
613 } else {
614 // `node.entry.is_none()` indicates a "directory"
614 // `node.entry.is_none()` indicates a "directory"
615 // node, but the filesystem has a file
615 // node, but the filesystem has a file
616 self.mark_unknown_or_ignored(
616 self.mark_unknown_or_ignored(
617 has_ignored_ancestor.force(&self.ignore_fn),
617 has_ignored_ancestor.force(&self.ignore_fn),
618 hg_path,
618 hg_path,
619 );
619 );
620 }
620 }
621 }
621 }
622
622
623 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
623 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
624 {
624 {
625 self.traverse_dirstate_only(child_node)?
625 self.traverse_dirstate_only(child_node)?
626 }
626 }
627 }
627 }
628 Ok(())
628 Ok(())
629 }
629 }
630
630
631 /// Save directory mtime if applicable.
631 /// Save directory mtime if applicable.
632 ///
632 ///
633 /// `outdated_directory_cache` is `true` if we've just invalidated the
633 /// `outdated_directory_cache` is `true` if we've just invalidated the
634 /// cache for this directory in `check_for_outdated_directory_cache`,
634 /// cache for this directory in `check_for_outdated_directory_cache`,
635 /// which forces the update.
635 /// which forces the update.
636 fn maybe_save_directory_mtime(
636 fn maybe_save_directory_mtime(
637 &self,
637 &self,
638 children_all_have_dirstate_node_or_are_ignored: bool,
638 children_all_have_dirstate_node_or_are_ignored: bool,
639 directory_entry: &DirEntry,
639 directory_entry: &DirEntry,
640 dirstate_node: NodeRef<'tree, 'on_disk>,
640 dirstate_node: NodeRef<'tree, 'on_disk>,
641 outdated_directory_cache: bool,
641 outdated_directory_cache: bool,
642 ) -> Result<(), DirstateV2ParseError> {
642 ) -> Result<(), DirstateV2ParseError> {
643 if !children_all_have_dirstate_node_or_are_ignored {
643 if !children_all_have_dirstate_node_or_are_ignored {
644 return Ok(());
644 return Ok(());
645 }
645 }
646 // All filesystem directory entries from `read_dir` have a
646 // All filesystem directory entries from `read_dir` have a
647 // corresponding node in the dirstate, so we can reconstitute the
647 // corresponding node in the dirstate, so we can reconstitute the
648 // names of those entries without calling `read_dir` again.
648 // names of those entries without calling `read_dir` again.
649
649
650 // TODO: use let-else here and below when available:
650 // TODO: use let-else here and below when available:
651 // https://github.com/rust-lang/rust/issues/87335
651 // https://github.com/rust-lang/rust/issues/87335
652 let status_start = if let Some(status_start) =
652 let status_start = if let Some(status_start) =
653 &self.filesystem_time_at_status_start
653 &self.filesystem_time_at_status_start
654 {
654 {
655 status_start
655 status_start
656 } else {
656 } else {
657 return Ok(());
657 return Ok(());
658 };
658 };
659
659
660 // Although the Rust standard library’s `SystemTime` type
660 // Although the Rust standard library’s `SystemTime` type
661 // has nanosecond precision, the times reported for a
661 // has nanosecond precision, the times reported for a
662 // directory’s (or file’s) modified time may have lower
662 // directory’s (or file’s) modified time may have lower
663 // resolution based on the filesystem (for example ext3
663 // resolution based on the filesystem (for example ext3
664 // only stores integer seconds), kernel (see
664 // only stores integer seconds), kernel (see
665 // https://stackoverflow.com/a/14393315/1162888), etc.
665 // https://stackoverflow.com/a/14393315/1162888), etc.
666 let metadata = match directory_entry.symlink_metadata() {
666 let metadata = match directory_entry.symlink_metadata() {
667 Ok(meta) => meta,
667 Ok(meta) => meta,
668 Err(_) => return Ok(()),
668 Err(_) => return Ok(()),
669 };
669 };
670
670
671 let directory_mtime = match TruncatedTimestamp::for_reliable_mtime_of(
671 let directory_mtime = match TruncatedTimestamp::for_reliable_mtime_of(
672 &metadata,
672 &metadata,
673 status_start,
673 status_start,
674 ) {
674 ) {
675 Ok(Some(directory_mtime)) => directory_mtime,
675 Ok(Some(directory_mtime)) => directory_mtime,
676 Ok(None) => {
676 Ok(None) => {
677 // The directory was modified too recently,
677 // The directory was modified too recently,
678 // don’t cache its `read_dir` results.
678 // don’t cache its `read_dir` results.
679 //
679 //
680 // 1. A change to this directory (direct child was
680 // 1. A change to this directory (direct child was
681 // added or removed) cause its mtime to be set
681 // added or removed) cause its mtime to be set
682 // (possibly truncated) to `directory_mtime`
682 // (possibly truncated) to `directory_mtime`
683 // 2. This `status` algorithm calls `read_dir`
683 // 2. This `status` algorithm calls `read_dir`
684 // 3. An other change is made to the same directory is
684 // 3. An other change is made to the same directory is
685 // made so that calling `read_dir` agin would give
685 // made so that calling `read_dir` agin would give
686 // different results, but soon enough after 1. that
686 // different results, but soon enough after 1. that
687 // the mtime stays the same
687 // the mtime stays the same
688 //
688 //
689 // On a system where the time resolution poor, this
689 // On a system where the time resolution poor, this
690 // scenario is not unlikely if all three steps are caused
690 // scenario is not unlikely if all three steps are caused
691 // by the same script.
691 // by the same script.
692 return Ok(());
692 return Ok(());
693 }
693 }
694 Err(_) => {
694 Err(_) => {
695 // OS/libc does not support mtime?
695 // OS/libc does not support mtime?
696 return Ok(());
696 return Ok(());
697 }
697 }
698 };
698 };
699 // We’ve observed (through `status_start`) that time has
699 // We’ve observed (through `status_start`) that time has
700 // “progressed” since `directory_mtime`, so any further
700 // “progressed” since `directory_mtime`, so any further
701 // change to this directory is extremely likely to cause a
701 // change to this directory is extremely likely to cause a
702 // different mtime.
702 // different mtime.
703 //
703 //
704 // Having the same mtime again is not entirely impossible
704 // Having the same mtime again is not entirely impossible
705 // since the system clock is not monotonous. It could jump
705 // since the system clock is not monotonous. It could jump
706 // backward to some point before `directory_mtime`, then a
706 // backward to some point before `directory_mtime`, then a
707 // directory change could potentially happen during exactly
707 // directory change could potentially happen during exactly
708 // the wrong tick.
708 // the wrong tick.
709 //
709 //
710 // We deem this scenario (unlike the previous one) to be
710 // We deem this scenario (unlike the previous one) to be
711 // unlikely enough in practice.
711 // unlikely enough in practice.
712
712
713 let is_up_to_date = if let Some(cached) =
713 let is_up_to_date = if let Some(cached) =
714 dirstate_node.cached_directory_mtime()?
714 dirstate_node.cached_directory_mtime()?
715 {
715 {
716 !outdated_directory_cache && cached.likely_equal(directory_mtime)
716 !outdated_directory_cache && cached.likely_equal(directory_mtime)
717 } else {
717 } else {
718 false
718 false
719 };
719 };
720 if !is_up_to_date {
720 if !is_up_to_date {
721 let hg_path = dirstate_node
721 let hg_path = dirstate_node
722 .full_path_borrowed(self.dmap.on_disk)?
722 .full_path_borrowed(self.dmap.on_disk)?
723 .detach_from_tree();
723 .detach_from_tree();
724 self.new_cacheable_directories
724 self.new_cacheable_directories
725 .lock()
725 .lock()
726 .unwrap()
726 .unwrap()
727 .push((hg_path, directory_mtime))
727 .push((hg_path, directory_mtime))
728 }
728 }
729 Ok(())
729 Ok(())
730 }
730 }
731
731
732 /// A file that is clean in the dirstate was found in the filesystem
732 /// A file that is clean in the dirstate was found in the filesystem
733 fn handle_normal_file(
733 fn handle_normal_file(
734 &self,
734 &self,
735 dirstate_node: &NodeRef<'tree, 'on_disk>,
735 dirstate_node: &NodeRef<'tree, 'on_disk>,
736 fs_entry: &DirEntry,
736 fs_entry: &DirEntry,
737 ) -> Result<(), DirstateV2ParseError> {
737 ) -> Result<(), DirstateV2ParseError> {
738 // Keep the low 31 bits
738 // Keep the low 31 bits
739 fn truncate_u64(value: u64) -> i32 {
739 fn truncate_u64(value: u64) -> i32 {
740 (value & 0x7FFF_FFFF) as i32
740 (value & 0x7FFF_FFFF) as i32
741 }
741 }
742
742
743 let fs_metadata = match fs_entry.symlink_metadata() {
743 let fs_metadata = match fs_entry.symlink_metadata() {
744 Ok(meta) => meta,
744 Ok(meta) => meta,
745 Err(_) => return Ok(()),
745 Err(_) => return Ok(()),
746 };
746 };
747
747
748 let entry = dirstate_node
748 let entry = dirstate_node
749 .entry()?
749 .entry()?
750 .expect("handle_normal_file called with entry-less node");
750 .expect("handle_normal_file called with entry-less node");
751 let mode_changed =
751 let mode_changed =
752 || self.options.check_exec && entry.mode_changed(&fs_metadata);
752 || self.options.check_exec && entry.mode_changed(&fs_metadata);
753 let size = entry.size();
753 let size = entry.size();
754 let size_changed = size != truncate_u64(fs_metadata.len());
754 let size_changed = size != truncate_u64(fs_metadata.len());
755 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
755 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
756 // issue6456: Size returned may be longer due to encryption
756 // issue6456: Size returned may be longer due to encryption
757 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
757 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
758 self.push_outcome(Outcome::Unsure, dirstate_node)?
758 self.push_outcome(Outcome::Unsure, dirstate_node)?
759 } else if dirstate_node.has_copy_source()
759 } else if dirstate_node.has_copy_source()
760 || entry.is_from_other_parent()
760 || entry.is_from_other_parent()
761 || (size >= 0 && (size_changed || mode_changed()))
761 || (size >= 0 && (size_changed || mode_changed()))
762 {
762 {
763 self.push_outcome(Outcome::Modified, dirstate_node)?
763 self.push_outcome(Outcome::Modified, dirstate_node)?
764 } else {
764 } else {
765 let mtime_looks_clean = if let Some(dirstate_mtime) =
765 let mtime_looks_clean = if let Some(dirstate_mtime) =
766 entry.truncated_mtime()
766 entry.truncated_mtime()
767 {
767 {
768 let fs_mtime = TruncatedTimestamp::for_mtime_of(&fs_metadata)
768 let fs_mtime = TruncatedTimestamp::for_mtime_of(&fs_metadata)
769 .expect("OS/libc does not support mtime?");
769 .expect("OS/libc does not support mtime?");
770 // There might be a change in the future if for example the
770 // There might be a change in the future if for example the
771 // internal clock become off while process run, but this is a
771 // internal clock become off while process run, but this is a
772 // case where the issues the user would face
772 // case where the issues the user would face
773 // would be a lot worse and there is nothing we
773 // would be a lot worse and there is nothing we
774 // can really do.
774 // can really do.
775 fs_mtime.likely_equal(dirstate_mtime)
775 fs_mtime.likely_equal(dirstate_mtime)
776 } else {
776 } else {
777 // No mtime in the dirstate entry
777 // No mtime in the dirstate entry
778 false
778 false
779 };
779 };
780 if !mtime_looks_clean {
780 if !mtime_looks_clean {
781 self.push_outcome(Outcome::Unsure, dirstate_node)?
781 self.push_outcome(Outcome::Unsure, dirstate_node)?
782 } else if self.options.list_clean {
782 } else if self.options.list_clean {
783 self.push_outcome(Outcome::Clean, dirstate_node)?
783 self.push_outcome(Outcome::Clean, dirstate_node)?
784 }
784 }
785 }
785 }
786 Ok(())
786 Ok(())
787 }
787 }
788
788
789 /// A node in the dirstate tree has no corresponding filesystem entry
789 /// A node in the dirstate tree has no corresponding filesystem entry
790 fn traverse_dirstate_only(
790 fn traverse_dirstate_only(
791 &self,
791 &self,
792 dirstate_node: NodeRef<'tree, 'on_disk>,
792 dirstate_node: NodeRef<'tree, 'on_disk>,
793 ) -> Result<(), DirstateV2ParseError> {
793 ) -> Result<(), DirstateV2ParseError> {
794 self.check_for_outdated_directory_cache(&dirstate_node)?;
794 self.check_for_outdated_directory_cache(&dirstate_node)?;
795 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
795 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
796 dirstate_node
796 dirstate_node
797 .children(self.dmap.on_disk)?
797 .children(self.dmap.on_disk)?
798 .par_iter()
798 .par_iter()
799 .map(|child_node| self.traverse_dirstate_only(child_node))
799 .map(|child_node| self.traverse_dirstate_only(child_node))
800 .collect()
800 .collect()
801 }
801 }
802
802
803 /// A node in the dirstate tree has no corresponding *file* on the
803 /// A node in the dirstate tree has no corresponding *file* on the
804 /// filesystem
804 /// filesystem
805 ///
805 ///
806 /// Does nothing on a "directory" node
806 /// Does nothing on a "directory" node
807 fn mark_removed_or_deleted_if_file(
807 fn mark_removed_or_deleted_if_file(
808 &self,
808 &self,
809 dirstate_node: &NodeRef<'tree, 'on_disk>,
809 dirstate_node: &NodeRef<'tree, 'on_disk>,
810 ) -> Result<(), DirstateV2ParseError> {
810 ) -> Result<(), DirstateV2ParseError> {
811 if let Some(entry) = dirstate_node.entry()? {
811 if let Some(entry) = dirstate_node.entry()? {
812 if !entry.any_tracked() {
812 if !entry.any_tracked() {
813 // Future-compat for when we start storing ignored and unknown
813 // Future-compat for when we start storing ignored and unknown
814 // files for caching reasons
814 // files for caching reasons
815 return Ok(());
815 return Ok(());
816 }
816 }
817 let path = dirstate_node.full_path(self.dmap.on_disk)?;
817 let path = dirstate_node.full_path(self.dmap.on_disk)?;
818 if self.matcher.matches(path) {
818 if self.matcher.matches(path) {
819 if entry.removed() {
819 if entry.removed() {
820 self.push_outcome(Outcome::Removed, dirstate_node)?
820 self.push_outcome(Outcome::Removed, dirstate_node)?
821 } else {
821 } else {
822 self.push_outcome(Outcome::Deleted, dirstate_node)?
822 self.push_outcome(Outcome::Deleted, dirstate_node)?
823 }
823 }
824 }
824 }
825 }
825 }
826 Ok(())
826 Ok(())
827 }
827 }
828
828
829 /// Something in the filesystem has no corresponding dirstate node
829 /// Something in the filesystem has no corresponding dirstate node
830 ///
830 ///
831 /// Returns whether that path is ignored
831 /// Returns whether that path is ignored
832 fn traverse_fs_only(
832 fn traverse_fs_only(
833 &self,
833 &self,
834 has_ignored_ancestor: bool,
834 has_ignored_ancestor: bool,
835 directory_hg_path: &HgPath,
835 directory_hg_path: &HgPath,
836 fs_entry: &DirEntry,
836 fs_entry: &DirEntry,
837 ) -> bool {
837 ) -> bool {
838 let hg_path = directory_hg_path.join(&fs_entry.hg_path);
838 let hg_path = directory_hg_path.join(&fs_entry.hg_path);
839 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
839 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
840 if fs_entry.is_dir() {
840 if fs_entry.is_dir() {
841 let is_ignored =
841 let is_ignored =
842 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
842 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
843 let traverse_children = if is_ignored {
843 let traverse_children = if is_ignored {
844 // Descendants of an ignored directory are all ignored
844 // Descendants of an ignored directory are all ignored
845 self.options.list_ignored
845 self.options.list_ignored
846 } else {
846 } else {
847 // Descendants of an unknown directory may be either unknown or
847 // Descendants of an unknown directory may be either unknown or
848 // ignored
848 // ignored
849 self.options.list_unknown || self.options.list_ignored
849 self.options.list_unknown || self.options.list_ignored
850 };
850 };
851 if traverse_children {
851 if traverse_children {
852 let is_at_repo_root = false;
852 let is_at_repo_root = false;
853 if let Ok(children_fs_entries) =
853 if let Ok(children_fs_entries) =
854 self.read_dir(&hg_path, &fs_entry.fs_path, is_at_repo_root)
854 self.read_dir(&hg_path, &fs_entry.fs_path, is_at_repo_root)
855 {
855 {
856 children_fs_entries.par_iter().for_each(|child_fs_entry| {
856 children_fs_entries.par_iter().for_each(|child_fs_entry| {
857 self.traverse_fs_only(
857 self.traverse_fs_only(
858 is_ignored,
858 is_ignored,
859 &hg_path,
859 &hg_path,
860 child_fs_entry,
860 child_fs_entry,
861 );
861 );
862 })
862 })
863 }
863 }
864 if self.options.collect_traversed_dirs {
864 if self.options.collect_traversed_dirs {
865 self.outcome.lock().unwrap().traversed.push(hg_path.into())
865 self.outcome.lock().unwrap().traversed.push(hg_path.into())
866 }
866 }
867 }
867 }
868 is_ignored
868 is_ignored
869 } else if file_or_symlink {
869 } else if file_or_symlink {
870 if self.matcher.matches(&hg_path) {
870 if self.matcher.matches(&hg_path) {
871 self.mark_unknown_or_ignored(
871 self.mark_unknown_or_ignored(
872 has_ignored_ancestor,
872 has_ignored_ancestor,
873 &BorrowedPath::InMemory(&hg_path),
873 &BorrowedPath::InMemory(&hg_path),
874 )
874 )
875 } else {
875 } else {
876 // We haven’t computed whether this path is ignored. It
876 // We haven’t computed whether this path is ignored. It
877 // might not be, and a future run of status might have a
877 // might not be, and a future run of status might have a
878 // different matcher that matches it. So treat it as not
878 // different matcher that matches it. So treat it as not
879 // ignored. That is, inhibit readdir caching of the parent
879 // ignored. That is, inhibit readdir caching of the parent
880 // directory.
880 // directory.
881 false
881 false
882 }
882 }
883 } else {
883 } else {
884 // This is neither a directory, a plain file, or a symlink.
884 // This is neither a directory, a plain file, or a symlink.
885 // Treat it like an ignored file.
885 // Treat it like an ignored file.
886 true
886 true
887 }
887 }
888 }
888 }
889
889
890 /// Returns whether that path is ignored
890 /// Returns whether that path is ignored
891 fn mark_unknown_or_ignored(
891 fn mark_unknown_or_ignored(
892 &self,
892 &self,
893 has_ignored_ancestor: bool,
893 has_ignored_ancestor: bool,
894 hg_path: &BorrowedPath<'_, 'on_disk>,
894 hg_path: &BorrowedPath<'_, 'on_disk>,
895 ) -> bool {
895 ) -> bool {
896 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
896 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
897 if is_ignored {
897 if is_ignored {
898 if self.options.list_ignored {
898 if self.options.list_ignored {
899 self.push_outcome_without_copy_source(
899 self.push_outcome_without_copy_source(
900 Outcome::Ignored,
900 Outcome::Ignored,
901 hg_path,
901 hg_path,
902 )
902 )
903 }
903 }
904 } else if self.options.list_unknown {
904 } else if self.options.list_unknown {
905 self.push_outcome_without_copy_source(Outcome::Unknown, hg_path)
905 self.push_outcome_without_copy_source(Outcome::Unknown, hg_path)
906 }
906 }
907 is_ignored
907 is_ignored
908 }
908 }
909 }
909 }
910
910
911 /// Since [`std::fs::FileType`] cannot be built directly, we emulate what we
911 /// Since [`std::fs::FileType`] cannot be built directly, we emulate what we
912 /// care about.
912 /// care about.
913 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
913 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
914 enum FakeFileType {
914 enum FakeFileType {
915 File,
915 File,
916 Directory,
916 Directory,
917 Symlink,
917 Symlink,
918 BadType(BadType),
918 BadType(BadType),
919 }
919 }
920
920
921 impl From<std::fs::FileType> for FakeFileType {
921 impl From<std::fs::FileType> for FakeFileType {
922 fn from(f: std::fs::FileType) -> Self {
922 fn from(f: std::fs::FileType) -> Self {
923 if f.is_dir() {
923 if f.is_dir() {
924 Self::Directory
924 Self::Directory
925 } else if f.is_file() {
925 } else if f.is_file() {
926 Self::File
926 Self::File
927 } else if f.is_symlink() {
927 } else if f.is_symlink() {
928 Self::Symlink
928 Self::Symlink
929 } else if f.is_fifo() {
929 } else if f.is_fifo() {
930 Self::BadType(BadType::FIFO)
930 Self::BadType(BadType::FIFO)
931 } else if f.is_block_device() {
931 } else if f.is_block_device() {
932 Self::BadType(BadType::BlockDevice)
932 Self::BadType(BadType::BlockDevice)
933 } else if f.is_char_device() {
933 } else if f.is_char_device() {
934 Self::BadType(BadType::CharacterDevice)
934 Self::BadType(BadType::CharacterDevice)
935 } else if f.is_socket() {
935 } else if f.is_socket() {
936 Self::BadType(BadType::Socket)
936 Self::BadType(BadType::Socket)
937 } else {
937 } else {
938 Self::BadType(BadType::Unknown)
938 Self::BadType(BadType::Unknown)
939 }
939 }
940 }
940 }
941 }
941 }
942
942
943 struct DirEntry<'a> {
943 struct DirEntry<'a> {
944 /// Path as stored in the dirstate, or just the filename for optimization.
944 /// Path as stored in the dirstate, or just the filename for optimization.
945 hg_path: HgPathCow<'a>,
945 hg_path: HgPathCow<'a>,
946 /// Filesystem path
946 /// Filesystem path
947 fs_path: Cow<'a, Path>,
947 fs_path: Cow<'a, Path>,
948 /// Lazily computed
948 /// Lazily computed
949 symlink_metadata: Option<std::fs::Metadata>,
949 symlink_metadata: Option<std::fs::Metadata>,
950 /// Already computed for ergonomics.
950 /// Already computed for ergonomics.
951 file_type: FakeFileType,
951 file_type: FakeFileType,
952 }
952 }
953
953
954 impl<'a> DirEntry<'a> {
954 impl<'a> DirEntry<'a> {
955 /// Returns **unsorted** entries in the given directory, with name,
955 /// Returns **unsorted** entries in the given directory, with name,
956 /// metadata and file type.
956 /// metadata and file type.
957 ///
957 ///
958 /// If a `.hg` sub-directory is encountered:
958 /// If a `.hg` sub-directory is encountered:
959 ///
959 ///
960 /// * At the repository root, ignore that sub-directory
960 /// * At the repository root, ignore that sub-directory
961 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
961 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
962 /// list instead.
962 /// list instead.
963 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
963 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
964 // `read_dir` returns a "not found" error for the empty path
964 // `read_dir` returns a "not found" error for the empty path
965 let at_cwd = path == Path::new("");
965 let at_cwd = path == Path::new("");
966 let read_dir_path = if at_cwd { Path::new(".") } else { path };
966 let read_dir_path = if at_cwd { Path::new(".") } else { path };
967 let mut results = Vec::new();
967 let mut results = Vec::new();
968 for entry in read_dir_path.read_dir()? {
968 for entry in read_dir_path.read_dir()? {
969 let entry = entry?;
969 let entry = entry?;
970 let file_type = match entry.file_type() {
970 let file_type = match entry.file_type() {
971 Ok(v) => v,
971 Ok(v) => v,
972 Err(e) => {
972 Err(e) => {
973 // race with file deletion?
973 // race with file deletion?
974 if e.kind() == std::io::ErrorKind::NotFound {
974 if e.kind() == std::io::ErrorKind::NotFound {
975 continue;
975 continue;
976 } else {
976 } else {
977 return Err(e);
977 return Err(e);
978 }
978 }
979 }
979 }
980 };
980 };
981 let file_name = entry.file_name();
981 let file_name = entry.file_name();
982 // FIXME don't do this when cached
982 // FIXME don't do this when cached
983 if file_name == ".hg" {
983 if file_name == ".hg" {
984 if is_at_repo_root {
984 if is_at_repo_root {
985 // Skip the repo’s own .hg (might be a symlink)
985 // Skip the repo’s own .hg (might be a symlink)
986 continue;
986 continue;
987 } else if file_type.is_dir() {
987 } else if file_type.is_dir() {
988 // A .hg sub-directory at another location means a subrepo,
988 // A .hg sub-directory at another location means a subrepo,
989 // skip it entirely.
989 // skip it entirely.
990 return Ok(Vec::new());
990 return Ok(Vec::new());
991 }
991 }
992 }
992 }
993 let full_path = if at_cwd {
993 let full_path = if at_cwd {
994 file_name.clone().into()
994 file_name.clone().into()
995 } else {
995 } else {
996 entry.path()
996 entry.path()
997 };
997 };
998 let filename =
998 let filename =
999 Cow::Owned(get_bytes_from_os_string(file_name).into());
999 Cow::Owned(get_bytes_from_os_string(file_name).into());
1000 let file_type = FakeFileType::from(file_type);
1000 let file_type = FakeFileType::from(file_type);
1001 results.push(DirEntry {
1001 results.push(DirEntry {
1002 hg_path: filename,
1002 hg_path: filename,
1003 fs_path: Cow::Owned(full_path.to_path_buf()),
1003 fs_path: Cow::Owned(full_path.to_path_buf()),
1004 symlink_metadata: None,
1004 symlink_metadata: None,
1005 file_type,
1005 file_type,
1006 })
1006 })
1007 }
1007 }
1008 Ok(results)
1008 Ok(results)
1009 }
1009 }
1010
1010
1011 fn symlink_metadata(&self) -> Result<std::fs::Metadata, std::io::Error> {
1011 fn symlink_metadata(&self) -> Result<std::fs::Metadata, std::io::Error> {
1012 match &self.symlink_metadata {
1012 match &self.symlink_metadata {
1013 Some(meta) => Ok(meta.clone()),
1013 Some(meta) => Ok(meta.clone()),
1014 None => std::fs::symlink_metadata(&self.fs_path),
1014 None => std::fs::symlink_metadata(&self.fs_path),
1015 }
1015 }
1016 }
1016 }
1017
1017
1018 fn is_dir(&self) -> bool {
1018 fn is_dir(&self) -> bool {
1019 self.file_type == FakeFileType::Directory
1019 self.file_type == FakeFileType::Directory
1020 }
1020 }
1021
1021
1022 fn is_file(&self) -> bool {
1022 fn is_file(&self) -> bool {
1023 self.file_type == FakeFileType::File
1023 self.file_type == FakeFileType::File
1024 }
1024 }
1025
1025
1026 fn is_symlink(&self) -> bool {
1026 fn is_symlink(&self) -> bool {
1027 self.file_type == FakeFileType::Symlink
1027 self.file_type == FakeFileType::Symlink
1028 }
1028 }
1029
1029
1030 fn is_bad(&self) -> Option<BadType> {
1030 fn is_bad(&self) -> Option<BadType> {
1031 match self.file_type {
1031 match self.file_type {
1032 FakeFileType::BadType(ty) => Some(ty),
1032 FakeFileType::BadType(ty) => Some(ty),
1033 _ => None,
1033 _ => None,
1034 }
1034 }
1035 }
1035 }
1036 }
1036 }
1037
1037
1038 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
1038 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
1039 /// of the give repository.
1039 /// of the give repository.
1040 ///
1040 ///
1041 /// This is similar to `SystemTime::now()`, with the result truncated to the
1041 /// This is similar to `SystemTime::now()`, with the result truncated to the
1042 /// same time resolution as other files’ modification times. Using `.hg`
1042 /// same time resolution as other files’ modification times. Using `.hg`
1043 /// instead of the system’s default temporary directory (such as `/tmp`) makes
1043 /// instead of the system’s default temporary directory (such as `/tmp`) makes
1044 /// it more likely the temporary file is in the same disk partition as contents
1044 /// it more likely the temporary file is in the same disk partition as contents
1045 /// of the working directory, which can matter since different filesystems may
1045 /// of the working directory, which can matter since different filesystems may
1046 /// store timestamps with different resolutions.
1046 /// store timestamps with different resolutions.
1047 ///
1047 ///
1048 /// This may fail, typically if we lack write permissions. In that case we
1048 /// This may fail, typically if we lack write permissions. In that case we
1049 /// should continue the `status()` algoritm anyway and consider the current
1049 /// should continue the `status()` algoritm anyway and consider the current
1050 /// date/time to be unknown.
1050 /// date/time to be unknown.
1051 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
1051 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
1052 tempfile::tempfile_in(repo_root.join(".hg"))?
1052 tempfile::tempfile_in(repo_root.join(".hg"))?
1053 .metadata()?
1053 .metadata()?
1054 .modified()
1054 .modified()
1055 }
1055 }
@@ -1,711 +1,711 b''
1 // discovery.rs
1 // discovery.rs
2 //
2 //
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Discovery operations
8 //! Discovery operations
9 //!
9 //!
10 //! This is a Rust counterpart to the `partialdiscovery` class of
10 //! This is a Rust counterpart to the `partialdiscovery` class of
11 //! `mercurial.setdiscovery`
11 //! `mercurial.setdiscovery`
12
12
13 use super::{Graph, GraphError, Revision, NULL_REVISION};
13 use super::{Graph, GraphError, Revision, NULL_REVISION};
14 use crate::{ancestors::MissingAncestors, dagops, FastHashMap};
14 use crate::{ancestors::MissingAncestors, dagops, FastHashMap};
15 use rand::seq::SliceRandom;
15 use rand::seq::SliceRandom;
16 use rand::{thread_rng, RngCore, SeedableRng};
16 use rand::{thread_rng, RngCore, SeedableRng};
17 use std::cmp::{max, min};
17 use std::cmp::{max, min};
18 use std::collections::{HashSet, VecDeque};
18 use std::collections::{HashSet, VecDeque};
19
19
20 type Rng = rand_pcg::Pcg32;
20 type Rng = rand_pcg::Pcg32;
21 type Seed = [u8; 16];
21 type Seed = [u8; 16];
22
22
23 pub struct PartialDiscovery<G: Graph + Clone> {
23 pub struct PartialDiscovery<G: Graph + Clone> {
24 target_heads: Option<Vec<Revision>>,
24 target_heads: Option<Vec<Revision>>,
25 graph: G, // plays the role of self._repo
25 graph: G, // plays the role of self._repo
26 common: MissingAncestors<G>,
26 common: MissingAncestors<G>,
27 undecided: Option<HashSet<Revision>>,
27 undecided: Option<HashSet<Revision>>,
28 children_cache: Option<FastHashMap<Revision, Vec<Revision>>>,
28 children_cache: Option<FastHashMap<Revision, Vec<Revision>>>,
29 missing: HashSet<Revision>,
29 missing: HashSet<Revision>,
30 rng: Rng,
30 rng: Rng,
31 respect_size: bool,
31 respect_size: bool,
32 randomize: bool,
32 randomize: bool,
33 }
33 }
34
34
35 pub struct DiscoveryStats {
35 pub struct DiscoveryStats {
36 pub undecided: Option<usize>,
36 pub undecided: Option<usize>,
37 }
37 }
38
38
39 /// Update an existing sample to match the expected size
39 /// Update an existing sample to match the expected size
40 ///
40 ///
41 /// The sample is updated with revisions exponentially distant from each
41 /// The sample is updated with revisions exponentially distant from each
42 /// element of `heads`.
42 /// element of `heads`.
43 ///
43 ///
44 /// If a target size is specified, the sampling will stop once this size is
44 /// If a target size is specified, the sampling will stop once this size is
45 /// reached. Otherwise sampling will happen until roots of the <revs> set are
45 /// reached. Otherwise sampling will happen until roots of the <revs> set are
46 /// reached.
46 /// reached.
47 ///
47 ///
48 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
48 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
49 /// represented by `parentfn`
49 /// represented by `parentfn`
50 /// - `heads`: set of DAG head revs
50 /// - `heads`: set of DAG head revs
51 /// - `sample`: a sample to update
51 /// - `sample`: a sample to update
52 /// - `parentfn`: a callable to resolve parents for a revision
52 /// - `parentfn`: a callable to resolve parents for a revision
53 /// - `quicksamplesize`: optional target size of the sample
53 /// - `quicksamplesize`: optional target size of the sample
54 fn update_sample<I>(
54 fn update_sample<I>(
55 revs: Option<&HashSet<Revision>>,
55 revs: Option<&HashSet<Revision>>,
56 heads: impl IntoIterator<Item = Revision>,
56 heads: impl IntoIterator<Item = Revision>,
57 sample: &mut HashSet<Revision>,
57 sample: &mut HashSet<Revision>,
58 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
58 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
59 quicksamplesize: Option<usize>,
59 quicksamplesize: Option<usize>,
60 ) -> Result<(), GraphError>
60 ) -> Result<(), GraphError>
61 where
61 where
62 I: Iterator<Item = Revision>,
62 I: Iterator<Item = Revision>,
63 {
63 {
64 let mut distances: FastHashMap<Revision, u32> = FastHashMap::default();
64 let mut distances: FastHashMap<Revision, u32> = FastHashMap::default();
65 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
65 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
66 let mut factor: u32 = 1;
66 let mut factor: u32 = 1;
67 let mut seen: HashSet<Revision> = HashSet::new();
67 let mut seen: HashSet<Revision> = HashSet::new();
68 while let Some(current) = visit.pop_front() {
68 while let Some(current) = visit.pop_front() {
69 if !seen.insert(current) {
69 if !seen.insert(current) {
70 continue;
70 continue;
71 }
71 }
72
72
73 let d = *distances.entry(current).or_insert(1);
73 let d = *distances.entry(current).or_insert(1);
74 if d > factor {
74 if d > factor {
75 factor *= 2;
75 factor *= 2;
76 }
76 }
77 if d == factor {
77 if d == factor {
78 sample.insert(current);
78 sample.insert(current);
79 if let Some(sz) = quicksamplesize {
79 if let Some(sz) = quicksamplesize {
80 if sample.len() >= sz {
80 if sample.len() >= sz {
81 return Ok(());
81 return Ok(());
82 }
82 }
83 }
83 }
84 }
84 }
85 for p in parentsfn(current)? {
85 for p in parentsfn(current)? {
86 if let Some(revs) = revs {
86 if let Some(revs) = revs {
87 if !revs.contains(&p) {
87 if !revs.contains(&p) {
88 continue;
88 continue;
89 }
89 }
90 }
90 }
91 distances.entry(p).or_insert(d + 1);
91 distances.entry(p).or_insert(d + 1);
92 visit.push_back(p);
92 visit.push_back(p);
93 }
93 }
94 }
94 }
95 Ok(())
95 Ok(())
96 }
96 }
97
97
98 struct ParentsIterator {
98 struct ParentsIterator {
99 parents: [Revision; 2],
99 parents: [Revision; 2],
100 cur: usize,
100 cur: usize,
101 }
101 }
102
102
103 impl ParentsIterator {
103 impl ParentsIterator {
104 fn graph_parents(
104 fn graph_parents(
105 graph: &impl Graph,
105 graph: &impl Graph,
106 r: Revision,
106 r: Revision,
107 ) -> Result<ParentsIterator, GraphError> {
107 ) -> Result<ParentsIterator, GraphError> {
108 Ok(ParentsIterator {
108 Ok(ParentsIterator {
109 parents: graph.parents(r)?,
109 parents: graph.parents(r)?,
110 cur: 0,
110 cur: 0,
111 })
111 })
112 }
112 }
113 }
113 }
114
114
115 impl Iterator for ParentsIterator {
115 impl Iterator for ParentsIterator {
116 type Item = Revision;
116 type Item = Revision;
117
117
118 fn next(&mut self) -> Option<Revision> {
118 fn next(&mut self) -> Option<Revision> {
119 if self.cur > 1 {
119 if self.cur > 1 {
120 return None;
120 return None;
121 }
121 }
122 let rev = self.parents[self.cur];
122 let rev = self.parents[self.cur];
123 self.cur += 1;
123 self.cur += 1;
124 if rev == NULL_REVISION {
124 if rev == NULL_REVISION {
125 return self.next();
125 return self.next();
126 }
126 }
127 Some(rev)
127 Some(rev)
128 }
128 }
129 }
129 }
130
130
131 impl<G: Graph + Clone> PartialDiscovery<G> {
131 impl<G: Graph + Clone> PartialDiscovery<G> {
132 /// Create a PartialDiscovery object, with the intent
132 /// Create a PartialDiscovery object, with the intent
133 /// of comparing our `::<target_heads>` revset to the contents of another
133 /// of comparing our `::<target_heads>` revset to the contents of another
134 /// repo.
134 /// repo.
135 ///
135 ///
136 /// For now `target_heads` is passed as a vector, and will be used
136 /// For now `target_heads` is passed as a vector, and will be used
137 /// at the first call to `ensure_undecided()`.
137 /// at the first call to `ensure_undecided()`.
138 ///
138 ///
139 /// If we want to make the signature more flexible,
139 /// If we want to make the signature more flexible,
140 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
140 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
141 /// object since we'll keep it in the meanwhile
141 /// object since we'll keep it in the meanwhile
142 ///
142 ///
143 /// The `respect_size` boolean controls how the sampling methods
143 /// The `respect_size` boolean controls how the sampling methods
144 /// will interpret the size argument requested by the caller. If it's
144 /// will interpret the size argument requested by the caller. If it's
145 /// `false`, they are allowed to produce a sample whose size is more
145 /// `false`, they are allowed to produce a sample whose size is more
146 /// appropriate to the situation (typically bigger).
146 /// appropriate to the situation (typically bigger).
147 ///
147 ///
148 /// The `randomize` boolean affects sampling, and specifically how
148 /// The `randomize` boolean affects sampling, and specifically how
149 /// limiting or last-minute expanding is been done:
149 /// limiting or last-minute expanding is been done:
150 ///
150 ///
151 /// If `true`, both will perform random picking from `self.undecided`.
151 /// If `true`, both will perform random picking from `self.undecided`.
152 /// This is currently the best for actual discoveries.
152 /// This is currently the best for actual discoveries.
153 ///
153 ///
154 /// If `false`, a reproductible picking strategy is performed. This is
154 /// If `false`, a reproductible picking strategy is performed. This is
155 /// useful for integration tests.
155 /// useful for integration tests.
156 pub fn new(
156 pub fn new(
157 graph: G,
157 graph: G,
158 target_heads: Vec<Revision>,
158 target_heads: Vec<Revision>,
159 respect_size: bool,
159 respect_size: bool,
160 randomize: bool,
160 randomize: bool,
161 ) -> Self {
161 ) -> Self {
162 let mut seed = [0; 16];
162 let mut seed = [0; 16];
163 if randomize {
163 if randomize {
164 thread_rng().fill_bytes(&mut seed);
164 thread_rng().fill_bytes(&mut seed);
165 }
165 }
166 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
166 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
167 }
167 }
168
168
169 pub fn new_with_seed(
169 pub fn new_with_seed(
170 graph: G,
170 graph: G,
171 target_heads: Vec<Revision>,
171 target_heads: Vec<Revision>,
172 seed: Seed,
172 seed: Seed,
173 respect_size: bool,
173 respect_size: bool,
174 randomize: bool,
174 randomize: bool,
175 ) -> Self {
175 ) -> Self {
176 PartialDiscovery {
176 PartialDiscovery {
177 undecided: None,
177 undecided: None,
178 children_cache: None,
178 children_cache: None,
179 target_heads: Some(target_heads),
179 target_heads: Some(target_heads),
180 graph: graph.clone(),
180 graph: graph.clone(),
181 common: MissingAncestors::new(graph, vec![]),
181 common: MissingAncestors::new(graph, vec![]),
182 missing: HashSet::new(),
182 missing: HashSet::new(),
183 rng: Rng::from_seed(seed),
183 rng: Rng::from_seed(seed),
184 respect_size,
184 respect_size,
185 randomize,
185 randomize,
186 }
186 }
187 }
187 }
188
188
189 /// Extract at most `size` random elements from sample and return them
189 /// Extract at most `size` random elements from sample and return them
190 /// as a vector
190 /// as a vector
191 fn limit_sample(
191 fn limit_sample(
192 &mut self,
192 &mut self,
193 mut sample: Vec<Revision>,
193 mut sample: Vec<Revision>,
194 size: usize,
194 size: usize,
195 ) -> Vec<Revision> {
195 ) -> Vec<Revision> {
196 if !self.randomize {
196 if !self.randomize {
197 sample.sort_unstable();
197 sample.sort_unstable();
198 sample.truncate(size);
198 sample.truncate(size);
199 return sample;
199 return sample;
200 }
200 }
201 let sample_len = sample.len();
201 let sample_len = sample.len();
202 if sample_len <= size {
202 if sample_len <= size {
203 return sample;
203 return sample;
204 }
204 }
205 let rng = &mut self.rng;
205 let rng = &mut self.rng;
206 let dropped_size = sample_len - size;
206 let dropped_size = sample_len - size;
207 let limited_slice = if size < dropped_size {
207 let limited_slice = if size < dropped_size {
208 sample.partial_shuffle(rng, size).0
208 sample.partial_shuffle(rng, size).0
209 } else {
209 } else {
210 sample.partial_shuffle(rng, dropped_size).1
210 sample.partial_shuffle(rng, dropped_size).1
211 };
211 };
212 limited_slice.to_owned()
212 limited_slice.to_owned()
213 }
213 }
214
214
215 /// Register revisions known as being common
215 /// Register revisions known as being common
216 pub fn add_common_revisions(
216 pub fn add_common_revisions(
217 &mut self,
217 &mut self,
218 common: impl IntoIterator<Item = Revision>,
218 common: impl IntoIterator<Item = Revision>,
219 ) -> Result<(), GraphError> {
219 ) -> Result<(), GraphError> {
220 let before_len = self.common.get_bases().len();
220 let before_len = self.common.get_bases().len();
221 self.common.add_bases(common);
221 self.common.add_bases(common);
222 if self.common.get_bases().len() == before_len {
222 if self.common.get_bases().len() == before_len {
223 return Ok(());
223 return Ok(());
224 }
224 }
225 if let Some(ref mut undecided) = self.undecided {
225 if let Some(ref mut undecided) = self.undecided {
226 self.common.remove_ancestors_from(undecided)?;
226 self.common.remove_ancestors_from(undecided)?;
227 }
227 }
228 Ok(())
228 Ok(())
229 }
229 }
230
230
231 /// Register revisions known as being missing
231 /// Register revisions known as being missing
232 ///
232 ///
233 /// # Performance note
233 /// # Performance note
234 ///
234 ///
235 /// Except in the most trivial case, the first call of this method has
235 /// Except in the most trivial case, the first call of this method has
236 /// the side effect of computing `self.undecided` set for the first time,
236 /// the side effect of computing `self.undecided` set for the first time,
237 /// and the related caches it might need for efficiency of its internal
237 /// and the related caches it might need for efficiency of its internal
238 /// computation. This is typically faster if more information is
238 /// computation. This is typically faster if more information is
239 /// available in `self.common`. Therefore, for good performance, the
239 /// available in `self.common`. Therefore, for good performance, the
240 /// caller should avoid calling this too early.
240 /// caller should avoid calling this too early.
241 pub fn add_missing_revisions(
241 pub fn add_missing_revisions(
242 &mut self,
242 &mut self,
243 missing: impl IntoIterator<Item = Revision>,
243 missing: impl IntoIterator<Item = Revision>,
244 ) -> Result<(), GraphError> {
244 ) -> Result<(), GraphError> {
245 let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
245 let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
246 if tovisit.is_empty() {
246 if tovisit.is_empty() {
247 return Ok(());
247 return Ok(());
248 }
248 }
249 self.ensure_children_cache()?;
249 self.ensure_children_cache()?;
250 self.ensure_undecided()?; // for safety of possible future refactors
250 self.ensure_undecided()?; // for safety of possible future refactors
251 let children = self.children_cache.as_ref().unwrap();
251 let children = self.children_cache.as_ref().unwrap();
252 let mut seen: HashSet<Revision> = HashSet::new();
252 let mut seen: HashSet<Revision> = HashSet::new();
253 let undecided_mut = self.undecided.as_mut().unwrap();
253 let undecided_mut = self.undecided.as_mut().unwrap();
254 while let Some(rev) = tovisit.pop_front() {
254 while let Some(rev) = tovisit.pop_front() {
255 if !self.missing.insert(rev) {
255 if !self.missing.insert(rev) {
256 // either it's known to be missing from a previous
256 // either it's known to be missing from a previous
257 // invocation, and there's no need to iterate on its
257 // invocation, and there's no need to iterate on its
258 // children (we now they are all missing)
258 // children (we now they are all missing)
259 // or it's from a previous iteration of this loop
259 // or it's from a previous iteration of this loop
260 // and its children have already been queued
260 // and its children have already been queued
261 continue;
261 continue;
262 }
262 }
263 undecided_mut.remove(&rev);
263 undecided_mut.remove(&rev);
264 match children.get(&rev) {
264 match children.get(&rev) {
265 None => {
265 None => {
266 continue;
266 continue;
267 }
267 }
268 Some(this_children) => {
268 Some(this_children) => {
269 for child in this_children.iter().cloned() {
269 for child in this_children.iter().cloned() {
270 if seen.insert(child) {
270 if seen.insert(child) {
271 tovisit.push_back(child);
271 tovisit.push_back(child);
272 }
272 }
273 }
273 }
274 }
274 }
275 }
275 }
276 }
276 }
277 Ok(())
277 Ok(())
278 }
278 }
279
279
280 /// Do we have any information about the peer?
280 /// Do we have any information about the peer?
281 pub fn has_info(&self) -> bool {
281 pub fn has_info(&self) -> bool {
282 self.common.has_bases()
282 self.common.has_bases()
283 }
283 }
284
284
285 /// Did we acquire full knowledge of our Revisions that the peer has?
285 /// Did we acquire full knowledge of our Revisions that the peer has?
286 pub fn is_complete(&self) -> bool {
286 pub fn is_complete(&self) -> bool {
287 self.undecided.as_ref().map_or(false, HashSet::is_empty)
287 self.undecided.as_ref().map_or(false, HashSet::is_empty)
288 }
288 }
289
289
290 /// Return the heads of the currently known common set of revisions.
290 /// Return the heads of the currently known common set of revisions.
291 ///
291 ///
292 /// If the discovery process is not complete (see `is_complete()`), the
292 /// If the discovery process is not complete (see `is_complete()`), the
293 /// caller must be aware that this is an intermediate state.
293 /// caller must be aware that this is an intermediate state.
294 ///
294 ///
295 /// On the other hand, if it is complete, then this is currently
295 /// On the other hand, if it is complete, then this is currently
296 /// the only way to retrieve the end results of the discovery process.
296 /// the only way to retrieve the end results of the discovery process.
297 ///
297 ///
298 /// We may introduce in the future an `into_common_heads` call that
298 /// We may introduce in the future an `into_common_heads` call that
299 /// would be more appropriate for normal Rust callers, dropping `self`
299 /// would be more appropriate for normal Rust callers, dropping `self`
300 /// if it is complete.
300 /// if it is complete.
301 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
301 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
302 self.common.bases_heads()
302 self.common.bases_heads()
303 }
303 }
304
304
305 /// Force first computation of `self.undecided`
305 /// Force first computation of `self.undecided`
306 ///
306 ///
307 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
307 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
308 /// unwrapped to get workable immutable or mutable references without
308 /// unwrapped to get workable immutable or mutable references without
309 /// any panic.
309 /// any panic.
310 ///
310 ///
311 /// This is an imperative call instead of an access with added lazyness
311 /// This is an imperative call instead of an access with added lazyness
312 /// to reduce easily the scope of mutable borrow for the caller,
312 /// to reduce easily the scope of mutable borrow for the caller,
313 /// compared to undecided(&'a mut self) -> &'a… that would keep it
313 /// compared to undecided(&'a mut self) -> &'a… that would keep it
314 /// as long as the resulting immutable one.
314 /// as long as the resulting immutable one.
315 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
315 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
316 if self.undecided.is_some() {
316 if self.undecided.is_some() {
317 return Ok(());
317 return Ok(());
318 }
318 }
319 let tgt = self.target_heads.take().unwrap();
319 let tgt = self.target_heads.take().unwrap();
320 self.undecided =
320 self.undecided =
321 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
321 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
322 Ok(())
322 Ok(())
323 }
323 }
324
324
325 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
325 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
326 if self.children_cache.is_some() {
326 if self.children_cache.is_some() {
327 return Ok(());
327 return Ok(());
328 }
328 }
329 self.ensure_undecided()?;
329 self.ensure_undecided()?;
330
330
331 let mut children: FastHashMap<Revision, Vec<Revision>> =
331 let mut children: FastHashMap<Revision, Vec<Revision>> =
332 FastHashMap::default();
332 FastHashMap::default();
333 for &rev in self.undecided.as_ref().unwrap() {
333 for &rev in self.undecided.as_ref().unwrap() {
334 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
334 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
335 children.entry(p).or_insert_with(Vec::new).push(rev);
335 children.entry(p).or_default().push(rev);
336 }
336 }
337 }
337 }
338 self.children_cache = Some(children);
338 self.children_cache = Some(children);
339 Ok(())
339 Ok(())
340 }
340 }
341
341
342 /// Provide statistics about the current state of the discovery process
342 /// Provide statistics about the current state of the discovery process
343 pub fn stats(&self) -> DiscoveryStats {
343 pub fn stats(&self) -> DiscoveryStats {
344 DiscoveryStats {
344 DiscoveryStats {
345 undecided: self.undecided.as_ref().map(HashSet::len),
345 undecided: self.undecided.as_ref().map(HashSet::len),
346 }
346 }
347 }
347 }
348
348
349 pub fn take_quick_sample(
349 pub fn take_quick_sample(
350 &mut self,
350 &mut self,
351 headrevs: impl IntoIterator<Item = Revision>,
351 headrevs: impl IntoIterator<Item = Revision>,
352 size: usize,
352 size: usize,
353 ) -> Result<Vec<Revision>, GraphError> {
353 ) -> Result<Vec<Revision>, GraphError> {
354 self.ensure_undecided()?;
354 self.ensure_undecided()?;
355 let mut sample = {
355 let mut sample = {
356 let undecided = self.undecided.as_ref().unwrap();
356 let undecided = self.undecided.as_ref().unwrap();
357 if undecided.len() <= size {
357 if undecided.len() <= size {
358 return Ok(undecided.iter().cloned().collect());
358 return Ok(undecided.iter().cloned().collect());
359 }
359 }
360 dagops::heads(&self.graph, undecided.iter())?
360 dagops::heads(&self.graph, undecided.iter())?
361 };
361 };
362 if sample.len() >= size {
362 if sample.len() >= size {
363 return Ok(self.limit_sample(sample.into_iter().collect(), size));
363 return Ok(self.limit_sample(sample.into_iter().collect(), size));
364 }
364 }
365 update_sample(
365 update_sample(
366 None,
366 None,
367 headrevs,
367 headrevs,
368 &mut sample,
368 &mut sample,
369 |r| ParentsIterator::graph_parents(&self.graph, r),
369 |r| ParentsIterator::graph_parents(&self.graph, r),
370 Some(size),
370 Some(size),
371 )?;
371 )?;
372 Ok(sample.into_iter().collect())
372 Ok(sample.into_iter().collect())
373 }
373 }
374
374
375 /// Extract a sample from `self.undecided`, going from its heads and roots.
375 /// Extract a sample from `self.undecided`, going from its heads and roots.
376 ///
376 ///
377 /// The `size` parameter is used to avoid useless computations if
377 /// The `size` parameter is used to avoid useless computations if
378 /// it turns out to be bigger than the whole set of undecided Revisions.
378 /// it turns out to be bigger than the whole set of undecided Revisions.
379 ///
379 ///
380 /// The sample is taken by using `update_sample` from the heads, then
380 /// The sample is taken by using `update_sample` from the heads, then
381 /// from the roots, working on the reverse DAG,
381 /// from the roots, working on the reverse DAG,
382 /// expressed by `self.children_cache`.
382 /// expressed by `self.children_cache`.
383 ///
383 ///
384 /// No effort is being made to complete or limit the sample to `size`
384 /// No effort is being made to complete or limit the sample to `size`
385 /// but this method returns another interesting size that it derives
385 /// but this method returns another interesting size that it derives
386 /// from its knowledge of the structure of the various sets, leaving
386 /// from its knowledge of the structure of the various sets, leaving
387 /// to the caller the decision to use it or not.
387 /// to the caller the decision to use it or not.
388 fn bidirectional_sample(
388 fn bidirectional_sample(
389 &mut self,
389 &mut self,
390 size: usize,
390 size: usize,
391 ) -> Result<(HashSet<Revision>, usize), GraphError> {
391 ) -> Result<(HashSet<Revision>, usize), GraphError> {
392 self.ensure_undecided()?;
392 self.ensure_undecided()?;
393 {
393 {
394 // we don't want to compute children_cache before this
394 // we don't want to compute children_cache before this
395 // but doing it after extracting self.undecided takes a mutable
395 // but doing it after extracting self.undecided takes a mutable
396 // ref to self while a shareable one is still active.
396 // ref to self while a shareable one is still active.
397 let undecided = self.undecided.as_ref().unwrap();
397 let undecided = self.undecided.as_ref().unwrap();
398 if undecided.len() <= size {
398 if undecided.len() <= size {
399 return Ok((undecided.clone(), size));
399 return Ok((undecided.clone(), size));
400 }
400 }
401 }
401 }
402
402
403 self.ensure_children_cache()?;
403 self.ensure_children_cache()?;
404 let revs = self.undecided.as_ref().unwrap();
404 let revs = self.undecided.as_ref().unwrap();
405 let mut sample: HashSet<Revision> = revs.clone();
405 let mut sample: HashSet<Revision> = revs.clone();
406
406
407 // it's possible that leveraging the children cache would be more
407 // it's possible that leveraging the children cache would be more
408 // efficient here
408 // efficient here
409 dagops::retain_heads(&self.graph, &mut sample)?;
409 dagops::retain_heads(&self.graph, &mut sample)?;
410 let revsheads = sample.clone(); // was again heads(revs) in python
410 let revsheads = sample.clone(); // was again heads(revs) in python
411
411
412 // update from heads
412 // update from heads
413 update_sample(
413 update_sample(
414 Some(revs),
414 Some(revs),
415 revsheads.iter().cloned(),
415 revsheads.iter().cloned(),
416 &mut sample,
416 &mut sample,
417 |r| ParentsIterator::graph_parents(&self.graph, r),
417 |r| ParentsIterator::graph_parents(&self.graph, r),
418 None,
418 None,
419 )?;
419 )?;
420
420
421 // update from roots
421 // update from roots
422 let revroots: HashSet<Revision> =
422 let revroots: HashSet<Revision> =
423 dagops::roots(&self.graph, revs)?.into_iter().collect();
423 dagops::roots(&self.graph, revs)?.into_iter().collect();
424 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
424 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
425
425
426 let children = self.children_cache.as_ref().unwrap();
426 let children = self.children_cache.as_ref().unwrap();
427 let empty_vec: Vec<Revision> = Vec::new();
427 let empty_vec: Vec<Revision> = Vec::new();
428 update_sample(
428 update_sample(
429 Some(revs),
429 Some(revs),
430 revroots,
430 revroots,
431 &mut sample,
431 &mut sample,
432 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
432 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
433 None,
433 None,
434 )?;
434 )?;
435 Ok((sample, prescribed_size))
435 Ok((sample, prescribed_size))
436 }
436 }
437
437
438 /// Fill up sample up to the wished size with random undecided Revisions.
438 /// Fill up sample up to the wished size with random undecided Revisions.
439 ///
439 ///
440 /// This is intended to be used as a last resort completion if the
440 /// This is intended to be used as a last resort completion if the
441 /// regular sampling algorithm returns too few elements.
441 /// regular sampling algorithm returns too few elements.
442 fn random_complete_sample(
442 fn random_complete_sample(
443 &mut self,
443 &mut self,
444 sample: &mut Vec<Revision>,
444 sample: &mut Vec<Revision>,
445 size: usize,
445 size: usize,
446 ) {
446 ) {
447 let sample_len = sample.len();
447 let sample_len = sample.len();
448 if size <= sample_len {
448 if size <= sample_len {
449 return;
449 return;
450 }
450 }
451 let take_from: Vec<Revision> = self
451 let take_from: Vec<Revision> = self
452 .undecided
452 .undecided
453 .as_ref()
453 .as_ref()
454 .unwrap()
454 .unwrap()
455 .iter()
455 .iter()
456 .filter(|&r| !sample.contains(r))
456 .filter(|&r| !sample.contains(r))
457 .cloned()
457 .cloned()
458 .collect();
458 .collect();
459 sample.extend(self.limit_sample(take_from, size - sample_len));
459 sample.extend(self.limit_sample(take_from, size - sample_len));
460 }
460 }
461
461
462 pub fn take_full_sample(
462 pub fn take_full_sample(
463 &mut self,
463 &mut self,
464 size: usize,
464 size: usize,
465 ) -> Result<Vec<Revision>, GraphError> {
465 ) -> Result<Vec<Revision>, GraphError> {
466 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
466 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
467 let size = if self.respect_size {
467 let size = if self.respect_size {
468 size
468 size
469 } else {
469 } else {
470 prescribed_size
470 prescribed_size
471 };
471 };
472 let mut sample =
472 let mut sample =
473 self.limit_sample(sample_set.into_iter().collect(), size);
473 self.limit_sample(sample_set.into_iter().collect(), size);
474 self.random_complete_sample(&mut sample, size);
474 self.random_complete_sample(&mut sample, size);
475 Ok(sample)
475 Ok(sample)
476 }
476 }
477 }
477 }
478
478
479 #[cfg(test)]
479 #[cfg(test)]
480 mod tests {
480 mod tests {
481 use super::*;
481 use super::*;
482 use crate::testing::SampleGraph;
482 use crate::testing::SampleGraph;
483
483
484 /// Shorthand to reduce boilerplate when creating [`Revision`] for testing
484 /// Shorthand to reduce boilerplate when creating [`Revision`] for testing
485 macro_rules! R {
485 macro_rules! R {
486 ($revision:literal) => {
486 ($revision:literal) => {
487 Revision($revision)
487 Revision($revision)
488 };
488 };
489 }
489 }
490
490
491 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
491 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
492 ///
492 ///
493 /// To avoid actual randomness in these tests, we give it a fixed
493 /// To avoid actual randomness in these tests, we give it a fixed
494 /// random seed, but by default we'll test the random version.
494 /// random seed, but by default we'll test the random version.
495 fn full_disco() -> PartialDiscovery<SampleGraph> {
495 fn full_disco() -> PartialDiscovery<SampleGraph> {
496 PartialDiscovery::new_with_seed(
496 PartialDiscovery::new_with_seed(
497 SampleGraph,
497 SampleGraph,
498 vec![R!(10), R!(11), R!(12), R!(13)],
498 vec![R!(10), R!(11), R!(12), R!(13)],
499 [0; 16],
499 [0; 16],
500 true,
500 true,
501 true,
501 true,
502 )
502 )
503 }
503 }
504
504
505 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
505 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
506 ///
506 ///
507 /// To avoid actual randomness in tests, we give it a fixed random seed.
507 /// To avoid actual randomness in tests, we give it a fixed random seed.
508 fn disco12() -> PartialDiscovery<SampleGraph> {
508 fn disco12() -> PartialDiscovery<SampleGraph> {
509 PartialDiscovery::new_with_seed(
509 PartialDiscovery::new_with_seed(
510 SampleGraph,
510 SampleGraph,
511 vec![R!(12)],
511 vec![R!(12)],
512 [0; 16],
512 [0; 16],
513 true,
513 true,
514 true,
514 true,
515 )
515 )
516 }
516 }
517
517
518 fn sorted_undecided(
518 fn sorted_undecided(
519 disco: &PartialDiscovery<SampleGraph>,
519 disco: &PartialDiscovery<SampleGraph>,
520 ) -> Vec<Revision> {
520 ) -> Vec<Revision> {
521 let mut as_vec: Vec<Revision> =
521 let mut as_vec: Vec<Revision> =
522 disco.undecided.as_ref().unwrap().iter().cloned().collect();
522 disco.undecided.as_ref().unwrap().iter().cloned().collect();
523 as_vec.sort_unstable();
523 as_vec.sort_unstable();
524 as_vec
524 as_vec
525 }
525 }
526
526
527 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
527 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
528 let mut as_vec: Vec<Revision> =
528 let mut as_vec: Vec<Revision> =
529 disco.missing.iter().cloned().collect();
529 disco.missing.iter().cloned().collect();
530 as_vec.sort_unstable();
530 as_vec.sort_unstable();
531 as_vec
531 as_vec
532 }
532 }
533
533
534 fn sorted_common_heads(
534 fn sorted_common_heads(
535 disco: &PartialDiscovery<SampleGraph>,
535 disco: &PartialDiscovery<SampleGraph>,
536 ) -> Result<Vec<Revision>, GraphError> {
536 ) -> Result<Vec<Revision>, GraphError> {
537 let mut as_vec: Vec<Revision> =
537 let mut as_vec: Vec<Revision> =
538 disco.common_heads()?.iter().cloned().collect();
538 disco.common_heads()?.iter().cloned().collect();
539 as_vec.sort_unstable();
539 as_vec.sort_unstable();
540 Ok(as_vec)
540 Ok(as_vec)
541 }
541 }
542
542
543 #[test]
543 #[test]
544 fn test_add_common_get_undecided() -> Result<(), GraphError> {
544 fn test_add_common_get_undecided() -> Result<(), GraphError> {
545 let mut disco = full_disco();
545 let mut disco = full_disco();
546 assert_eq!(disco.undecided, None);
546 assert_eq!(disco.undecided, None);
547 assert!(!disco.has_info());
547 assert!(!disco.has_info());
548 assert_eq!(disco.stats().undecided, None);
548 assert_eq!(disco.stats().undecided, None);
549
549
550 disco.add_common_revisions(vec![R!(11), R!(12)])?;
550 disco.add_common_revisions(vec![R!(11), R!(12)])?;
551 assert!(disco.has_info());
551 assert!(disco.has_info());
552 assert!(!disco.is_complete());
552 assert!(!disco.is_complete());
553 assert!(disco.missing.is_empty());
553 assert!(disco.missing.is_empty());
554
554
555 // add_common_revisions did not trigger a premature computation
555 // add_common_revisions did not trigger a premature computation
556 // of `undecided`, let's check that and ask for them
556 // of `undecided`, let's check that and ask for them
557 assert_eq!(disco.undecided, None);
557 assert_eq!(disco.undecided, None);
558 disco.ensure_undecided()?;
558 disco.ensure_undecided()?;
559 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
559 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
560 assert_eq!(disco.stats().undecided, Some(4));
560 assert_eq!(disco.stats().undecided, Some(4));
561 Ok(())
561 Ok(())
562 }
562 }
563
563
564 /// in this test, we pretend that our peer misses exactly (8+10)::
564 /// in this test, we pretend that our peer misses exactly (8+10)::
565 /// and we're comparing all our repo to it (as in a bare push)
565 /// and we're comparing all our repo to it (as in a bare push)
566 #[test]
566 #[test]
567 fn test_discovery() -> Result<(), GraphError> {
567 fn test_discovery() -> Result<(), GraphError> {
568 let mut disco = full_disco();
568 let mut disco = full_disco();
569 disco.add_common_revisions(vec![R!(11), R!(12)])?;
569 disco.add_common_revisions(vec![R!(11), R!(12)])?;
570 disco.add_missing_revisions(vec![R!(8), R!(10)])?;
570 disco.add_missing_revisions(vec![R!(8), R!(10)])?;
571 assert_eq!(sorted_undecided(&disco), vec![5]);
571 assert_eq!(sorted_undecided(&disco), vec![5]);
572 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
572 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
573 assert!(!disco.is_complete());
573 assert!(!disco.is_complete());
574
574
575 disco.add_common_revisions(vec![R!(5)])?;
575 disco.add_common_revisions(vec![R!(5)])?;
576 assert_eq!(sorted_undecided(&disco), Vec::<Revision>::new());
576 assert_eq!(sorted_undecided(&disco), Vec::<Revision>::new());
577 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
577 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
578 assert!(disco.is_complete());
578 assert!(disco.is_complete());
579 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
579 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
580 Ok(())
580 Ok(())
581 }
581 }
582
582
583 #[test]
583 #[test]
584 fn test_add_missing_early_continue() -> Result<(), GraphError> {
584 fn test_add_missing_early_continue() -> Result<(), GraphError> {
585 eprintln!("test_add_missing_early_stop");
585 eprintln!("test_add_missing_early_stop");
586 let mut disco = full_disco();
586 let mut disco = full_disco();
587 disco.add_common_revisions(vec![R!(13), R!(3), R!(4)])?;
587 disco.add_common_revisions(vec![R!(13), R!(3), R!(4)])?;
588 disco.ensure_children_cache()?;
588 disco.ensure_children_cache()?;
589 // 12 is grand-child of 6 through 9
589 // 12 is grand-child of 6 through 9
590 // passing them in this order maximizes the chances of the
590 // passing them in this order maximizes the chances of the
591 // early continue to do the wrong thing
591 // early continue to do the wrong thing
592 disco.add_missing_revisions(vec![R!(6), R!(9), R!(12)])?;
592 disco.add_missing_revisions(vec![R!(6), R!(9), R!(12)])?;
593 assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
593 assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
594 assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
594 assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
595 assert!(!disco.is_complete());
595 assert!(!disco.is_complete());
596 Ok(())
596 Ok(())
597 }
597 }
598
598
599 #[test]
599 #[test]
600 fn test_limit_sample_no_need_to() {
600 fn test_limit_sample_no_need_to() {
601 let sample = vec![R!(1), R!(2), R!(3), R!(4)];
601 let sample = vec![R!(1), R!(2), R!(3), R!(4)];
602 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
602 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
603 }
603 }
604
604
605 #[test]
605 #[test]
606 fn test_limit_sample_less_than_half() {
606 fn test_limit_sample_less_than_half() {
607 assert_eq!(
607 assert_eq!(
608 full_disco().limit_sample((1..6).map(Revision).collect(), 2),
608 full_disco().limit_sample((1..6).map(Revision).collect(), 2),
609 vec![2, 5]
609 vec![2, 5]
610 );
610 );
611 }
611 }
612
612
613 #[test]
613 #[test]
614 fn test_limit_sample_more_than_half() {
614 fn test_limit_sample_more_than_half() {
615 assert_eq!(
615 assert_eq!(
616 full_disco().limit_sample((1..4).map(Revision).collect(), 2),
616 full_disco().limit_sample((1..4).map(Revision).collect(), 2),
617 vec![1, 2]
617 vec![1, 2]
618 );
618 );
619 }
619 }
620
620
621 #[test]
621 #[test]
622 fn test_limit_sample_no_random() {
622 fn test_limit_sample_no_random() {
623 let mut disco = full_disco();
623 let mut disco = full_disco();
624 disco.randomize = false;
624 disco.randomize = false;
625 assert_eq!(
625 assert_eq!(
626 disco.limit_sample(
626 disco.limit_sample(
627 vec![R!(1), R!(8), R!(13), R!(5), R!(7), R!(3)],
627 vec![R!(1), R!(8), R!(13), R!(5), R!(7), R!(3)],
628 4
628 4
629 ),
629 ),
630 vec![1, 3, 5, 7]
630 vec![1, 3, 5, 7]
631 );
631 );
632 }
632 }
633
633
634 #[test]
634 #[test]
635 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
635 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
636 let mut disco = full_disco();
636 let mut disco = full_disco();
637 disco.undecided = Some((1..=13).map(Revision).collect());
637 disco.undecided = Some((1..=13).map(Revision).collect());
638
638
639 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
639 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
640 sample_vec.sort_unstable();
640 sample_vec.sort_unstable();
641 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
641 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
642 Ok(())
642 Ok(())
643 }
643 }
644
644
645 #[test]
645 #[test]
646 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
646 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
647 let mut disco = disco12();
647 let mut disco = disco12();
648 disco.ensure_undecided()?;
648 disco.ensure_undecided()?;
649
649
650 let mut sample_vec = disco.take_quick_sample(vec![R!(12)], 4)?;
650 let mut sample_vec = disco.take_quick_sample(vec![R!(12)], 4)?;
651 sample_vec.sort_unstable();
651 sample_vec.sort_unstable();
652 // r12's only parent is r9, whose unique grand-parent through the
652 // r12's only parent is r9, whose unique grand-parent through the
653 // diamond shape is r4. This ends there because the distance from r4
653 // diamond shape is r4. This ends there because the distance from r4
654 // to the root is only 3.
654 // to the root is only 3.
655 assert_eq!(sample_vec, vec![4, 9, 12]);
655 assert_eq!(sample_vec, vec![4, 9, 12]);
656 Ok(())
656 Ok(())
657 }
657 }
658
658
659 #[test]
659 #[test]
660 fn test_children_cache() -> Result<(), GraphError> {
660 fn test_children_cache() -> Result<(), GraphError> {
661 let mut disco = full_disco();
661 let mut disco = full_disco();
662 disco.ensure_children_cache()?;
662 disco.ensure_children_cache()?;
663
663
664 let cache = disco.children_cache.unwrap();
664 let cache = disco.children_cache.unwrap();
665 assert_eq!(cache.get(&R!(2)).cloned(), Some(vec![R!(4)]));
665 assert_eq!(cache.get(&R!(2)).cloned(), Some(vec![R!(4)]));
666 assert_eq!(cache.get(&R!(10)).cloned(), None);
666 assert_eq!(cache.get(&R!(10)).cloned(), None);
667
667
668 let mut children_4 = cache.get(&R!(4)).cloned().unwrap();
668 let mut children_4 = cache.get(&R!(4)).cloned().unwrap();
669 children_4.sort_unstable();
669 children_4.sort_unstable();
670 assert_eq!(children_4, vec![R!(5), R!(6), R!(7)]);
670 assert_eq!(children_4, vec![R!(5), R!(6), R!(7)]);
671
671
672 let mut children_7 = cache.get(&R!(7)).cloned().unwrap();
672 let mut children_7 = cache.get(&R!(7)).cloned().unwrap();
673 children_7.sort_unstable();
673 children_7.sort_unstable();
674 assert_eq!(children_7, vec![R!(9), R!(11)]);
674 assert_eq!(children_7, vec![R!(9), R!(11)]);
675
675
676 Ok(())
676 Ok(())
677 }
677 }
678
678
679 #[test]
679 #[test]
680 fn test_complete_sample() {
680 fn test_complete_sample() {
681 let mut disco = full_disco();
681 let mut disco = full_disco();
682 let undecided: HashSet<Revision> =
682 let undecided: HashSet<Revision> =
683 [4, 7, 9, 2, 3].iter().cloned().map(Revision).collect();
683 [4, 7, 9, 2, 3].iter().cloned().map(Revision).collect();
684 disco.undecided = Some(undecided);
684 disco.undecided = Some(undecided);
685
685
686 let mut sample = vec![R!(0)];
686 let mut sample = vec![R!(0)];
687 disco.random_complete_sample(&mut sample, 3);
687 disco.random_complete_sample(&mut sample, 3);
688 assert_eq!(sample.len(), 3);
688 assert_eq!(sample.len(), 3);
689
689
690 let mut sample = vec![R!(2), R!(4), R!(7)];
690 let mut sample = vec![R!(2), R!(4), R!(7)];
691 disco.random_complete_sample(&mut sample, 1);
691 disco.random_complete_sample(&mut sample, 1);
692 assert_eq!(sample.len(), 3);
692 assert_eq!(sample.len(), 3);
693 }
693 }
694
694
695 #[test]
695 #[test]
696 fn test_bidirectional_sample() -> Result<(), GraphError> {
696 fn test_bidirectional_sample() -> Result<(), GraphError> {
697 let mut disco = full_disco();
697 let mut disco = full_disco();
698 disco.undecided = Some((0..=13).into_iter().map(Revision).collect());
698 disco.undecided = Some((0..=13).map(Revision).collect());
699
699
700 let (sample_set, size) = disco.bidirectional_sample(7)?;
700 let (sample_set, size) = disco.bidirectional_sample(7)?;
701 assert_eq!(size, 7);
701 assert_eq!(size, 7);
702 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
702 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
703 sample.sort_unstable();
703 sample.sort_unstable();
704 // our DAG is a bit too small for the results to be really interesting
704 // our DAG is a bit too small for the results to be really interesting
705 // at least it shows that
705 // at least it shows that
706 // - we went both ways
706 // - we went both ways
707 // - we didn't take all Revisions (6 is not in the sample)
707 // - we didn't take all Revisions (6 is not in the sample)
708 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
708 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
709 Ok(())
709 Ok(())
710 }
710 }
711 }
711 }
@@ -1,876 +1,874 b''
1 // filepatterns.rs
1 // filepatterns.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Handling of Mercurial-specific patterns.
8 //! Handling of Mercurial-specific patterns.
9
9
10 use crate::{
10 use crate::{
11 utils::{
11 utils::{
12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
14 SliceExt,
14 SliceExt,
15 },
15 },
16 FastHashMap, PatternError,
16 FastHashMap, PatternError,
17 };
17 };
18 use lazy_static::lazy_static;
18 use lazy_static::lazy_static;
19 use regex::bytes::{NoExpand, Regex};
19 use regex::bytes::{NoExpand, Regex};
20 use std::ops::Deref;
20 use std::ops::Deref;
21 use std::path::{Path, PathBuf};
21 use std::path::{Path, PathBuf};
22 use std::vec::Vec;
22 use std::vec::Vec;
23
23
24 lazy_static! {
24 lazy_static! {
25 static ref RE_ESCAPE: Vec<Vec<u8>> = {
25 static ref RE_ESCAPE: Vec<Vec<u8>> = {
26 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
26 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
27 let to_escape = b"()[]{}?*+-|^$\\.&~#\t\n\r\x0b\x0c";
27 let to_escape = b"()[]{}?*+-|^$\\.&~#\t\n\r\x0b\x0c";
28 for byte in to_escape {
28 for byte in to_escape {
29 v[*byte as usize].insert(0, b'\\');
29 v[*byte as usize].insert(0, b'\\');
30 }
30 }
31 v
31 v
32 };
32 };
33 }
33 }
34
34
35 /// These are matched in order
35 /// These are matched in order
36 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
36 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
37 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
37 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
38
38
39 #[derive(Debug, Clone, PartialEq, Eq)]
39 #[derive(Debug, Clone, PartialEq, Eq)]
40 pub enum PatternSyntax {
40 pub enum PatternSyntax {
41 /// A regular expression
41 /// A regular expression
42 Regexp,
42 Regexp,
43 /// Glob that matches at the front of the path
43 /// Glob that matches at the front of the path
44 RootGlob,
44 RootGlob,
45 /// Glob that matches at any suffix of the path (still anchored at
45 /// Glob that matches at any suffix of the path (still anchored at
46 /// slashes)
46 /// slashes)
47 Glob,
47 Glob,
48 /// a path relative to repository root, which is matched recursively
48 /// a path relative to repository root, which is matched recursively
49 Path,
49 Path,
50 /// a single exact path relative to repository root
50 /// a single exact path relative to repository root
51 FilePath,
51 FilePath,
52 /// A path relative to cwd
52 /// A path relative to cwd
53 RelPath,
53 RelPath,
54 /// an unrooted glob (*.rs matches Rust files in all dirs)
54 /// an unrooted glob (*.rs matches Rust files in all dirs)
55 RelGlob,
55 RelGlob,
56 /// A regexp that needn't match the start of a name
56 /// A regexp that needn't match the start of a name
57 RelRegexp,
57 RelRegexp,
58 /// A path relative to repository root, which is matched non-recursively
58 /// A path relative to repository root, which is matched non-recursively
59 /// (will not match subdirectories)
59 /// (will not match subdirectories)
60 RootFiles,
60 RootFiles,
61 /// A file of patterns to read and include
61 /// A file of patterns to read and include
62 Include,
62 Include,
63 /// A file of patterns to match against files under the same directory
63 /// A file of patterns to match against files under the same directory
64 SubInclude,
64 SubInclude,
65 /// SubInclude with the result of parsing the included file
65 /// SubInclude with the result of parsing the included file
66 ///
66 ///
67 /// Note: there is no ExpandedInclude because that expansion can be done
67 /// Note: there is no ExpandedInclude because that expansion can be done
68 /// in place by replacing the Include pattern by the included patterns.
68 /// in place by replacing the Include pattern by the included patterns.
69 /// SubInclude requires more handling.
69 /// SubInclude requires more handling.
70 ///
70 ///
71 /// Note: `Box` is used to minimize size impact on other enum variants
71 /// Note: `Box` is used to minimize size impact on other enum variants
72 ExpandedSubInclude(Box<SubInclude>),
72 ExpandedSubInclude(Box<SubInclude>),
73 }
73 }
74
74
75 /// Transforms a glob pattern into a regex
75 /// Transforms a glob pattern into a regex
76 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
76 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
77 let mut input = pat;
77 let mut input = pat;
78 let mut res: Vec<u8> = vec![];
78 let mut res: Vec<u8> = vec![];
79 let mut group_depth = 0;
79 let mut group_depth = 0;
80
80
81 while let Some((c, rest)) = input.split_first() {
81 while let Some((c, rest)) = input.split_first() {
82 input = rest;
82 input = rest;
83
83
84 match c {
84 match c {
85 b'*' => {
85 b'*' => {
86 for (source, repl) in GLOB_REPLACEMENTS {
86 for (source, repl) in GLOB_REPLACEMENTS {
87 if let Some(rest) = input.drop_prefix(source) {
87 if let Some(rest) = input.drop_prefix(source) {
88 input = rest;
88 input = rest;
89 res.extend(*repl);
89 res.extend(*repl);
90 break;
90 break;
91 }
91 }
92 }
92 }
93 }
93 }
94 b'?' => res.extend(b"."),
94 b'?' => res.extend(b"."),
95 b'[' => {
95 b'[' => {
96 match input.iter().skip(1).position(|b| *b == b']') {
96 match input.iter().skip(1).position(|b| *b == b']') {
97 None => res.extend(b"\\["),
97 None => res.extend(b"\\["),
98 Some(end) => {
98 Some(end) => {
99 // Account for the one we skipped
99 // Account for the one we skipped
100 let end = end + 1;
100 let end = end + 1;
101
101
102 res.extend(b"[");
102 res.extend(b"[");
103
103
104 for (i, b) in input[..end].iter().enumerate() {
104 for (i, b) in input[..end].iter().enumerate() {
105 if *b == b'!' && i == 0 {
105 if *b == b'!' && i == 0 {
106 res.extend(b"^")
106 res.extend(b"^")
107 } else if *b == b'^' && i == 0 {
107 } else if *b == b'^' && i == 0 {
108 res.extend(b"\\^")
108 res.extend(b"\\^")
109 } else if *b == b'\\' {
109 } else if *b == b'\\' {
110 res.extend(b"\\\\")
110 res.extend(b"\\\\")
111 } else {
111 } else {
112 res.push(*b)
112 res.push(*b)
113 }
113 }
114 }
114 }
115 res.extend(b"]");
115 res.extend(b"]");
116 input = &input[end + 1..];
116 input = &input[end + 1..];
117 }
117 }
118 }
118 }
119 }
119 }
120 b'{' => {
120 b'{' => {
121 group_depth += 1;
121 group_depth += 1;
122 res.extend(b"(?:")
122 res.extend(b"(?:")
123 }
123 }
124 b'}' if group_depth > 0 => {
124 b'}' if group_depth > 0 => {
125 group_depth -= 1;
125 group_depth -= 1;
126 res.extend(b")");
126 res.extend(b")");
127 }
127 }
128 b',' if group_depth > 0 => res.extend(b"|"),
128 b',' if group_depth > 0 => res.extend(b"|"),
129 b'\\' => {
129 b'\\' => {
130 let c = {
130 let c = {
131 if let Some((c, rest)) = input.split_first() {
131 if let Some((c, rest)) = input.split_first() {
132 input = rest;
132 input = rest;
133 c
133 c
134 } else {
134 } else {
135 c
135 c
136 }
136 }
137 };
137 };
138 res.extend(&RE_ESCAPE[*c as usize])
138 res.extend(&RE_ESCAPE[*c as usize])
139 }
139 }
140 _ => res.extend(&RE_ESCAPE[*c as usize]),
140 _ => res.extend(&RE_ESCAPE[*c as usize]),
141 }
141 }
142 }
142 }
143 res
143 res
144 }
144 }
145
145
146 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
146 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
147 pattern
147 pattern
148 .iter()
148 .iter()
149 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
149 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
150 .collect()
150 .collect()
151 }
151 }
152
152
153 pub fn parse_pattern_syntax(
153 pub fn parse_pattern_syntax(
154 kind: &[u8],
154 kind: &[u8],
155 ) -> Result<PatternSyntax, PatternError> {
155 ) -> Result<PatternSyntax, PatternError> {
156 match kind {
156 match kind {
157 b"re:" => Ok(PatternSyntax::Regexp),
157 b"re:" => Ok(PatternSyntax::Regexp),
158 b"path:" => Ok(PatternSyntax::Path),
158 b"path:" => Ok(PatternSyntax::Path),
159 b"filepath:" => Ok(PatternSyntax::FilePath),
159 b"filepath:" => Ok(PatternSyntax::FilePath),
160 b"relpath:" => Ok(PatternSyntax::RelPath),
160 b"relpath:" => Ok(PatternSyntax::RelPath),
161 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
161 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
162 b"relglob:" => Ok(PatternSyntax::RelGlob),
162 b"relglob:" => Ok(PatternSyntax::RelGlob),
163 b"relre:" => Ok(PatternSyntax::RelRegexp),
163 b"relre:" => Ok(PatternSyntax::RelRegexp),
164 b"glob:" => Ok(PatternSyntax::Glob),
164 b"glob:" => Ok(PatternSyntax::Glob),
165 b"rootglob:" => Ok(PatternSyntax::RootGlob),
165 b"rootglob:" => Ok(PatternSyntax::RootGlob),
166 b"include:" => Ok(PatternSyntax::Include),
166 b"include:" => Ok(PatternSyntax::Include),
167 b"subinclude:" => Ok(PatternSyntax::SubInclude),
167 b"subinclude:" => Ok(PatternSyntax::SubInclude),
168 _ => Err(PatternError::UnsupportedSyntax(
168 _ => Err(PatternError::UnsupportedSyntax(
169 String::from_utf8_lossy(kind).to_string(),
169 String::from_utf8_lossy(kind).to_string(),
170 )),
170 )),
171 }
171 }
172 }
172 }
173
173
174 lazy_static! {
174 lazy_static! {
175 static ref FLAG_RE: Regex = Regex::new(r"^\(\?[aiLmsux]+\)").unwrap();
175 static ref FLAG_RE: Regex = Regex::new(r"^\(\?[aiLmsux]+\)").unwrap();
176 }
176 }
177
177
178 /// Builds the regex that corresponds to the given pattern.
178 /// Builds the regex that corresponds to the given pattern.
179 /// If within a `syntax: regexp` context, returns the pattern,
179 /// If within a `syntax: regexp` context, returns the pattern,
180 /// otherwise, returns the corresponding regex.
180 /// otherwise, returns the corresponding regex.
181 fn _build_single_regex(entry: &IgnorePattern, glob_suffix: &[u8]) -> Vec<u8> {
181 fn _build_single_regex(entry: &IgnorePattern, glob_suffix: &[u8]) -> Vec<u8> {
182 let IgnorePattern {
182 let IgnorePattern {
183 syntax, pattern, ..
183 syntax, pattern, ..
184 } = entry;
184 } = entry;
185 if pattern.is_empty() {
185 if pattern.is_empty() {
186 return vec![];
186 return vec![];
187 }
187 }
188 match syntax {
188 match syntax {
189 PatternSyntax::Regexp => pattern.to_owned(),
189 PatternSyntax::Regexp => pattern.to_owned(),
190 PatternSyntax::RelRegexp => {
190 PatternSyntax::RelRegexp => {
191 // The `regex` crate accepts `**` while `re2` and Python's `re`
191 // The `regex` crate accepts `**` while `re2` and Python's `re`
192 // do not. Checking for `*` correctly triggers the same error all
192 // do not. Checking for `*` correctly triggers the same error all
193 // engines.
193 // engines.
194 if pattern[0] == b'^'
194 if pattern[0] == b'^'
195 || pattern[0] == b'*'
195 || pattern[0] == b'*'
196 || pattern.starts_with(b".*")
196 || pattern.starts_with(b".*")
197 {
197 {
198 return pattern.to_owned();
198 return pattern.to_owned();
199 }
199 }
200 match FLAG_RE.find(pattern) {
200 match FLAG_RE.find(pattern) {
201 Some(mat) => {
201 Some(mat) => {
202 let s = mat.start();
202 let s = mat.start();
203 let e = mat.end();
203 let e = mat.end();
204 [
204 [
205 &b"(?"[..],
205 &b"(?"[..],
206 &pattern[s + 2..e - 1],
206 &pattern[s + 2..e - 1],
207 &b":"[..],
207 &b":"[..],
208 if pattern[e] == b'^'
208 if pattern[e] == b'^'
209 || pattern[e] == b'*'
209 || pattern[e] == b'*'
210 || pattern[e..].starts_with(b".*")
210 || pattern[e..].starts_with(b".*")
211 {
211 {
212 &b""[..]
212 &b""[..]
213 } else {
213 } else {
214 &b".*"[..]
214 &b".*"[..]
215 },
215 },
216 &pattern[e..],
216 &pattern[e..],
217 &b")"[..],
217 &b")"[..],
218 ]
218 ]
219 .concat()
219 .concat()
220 }
220 }
221 None => [&b".*"[..], pattern].concat(),
221 None => [&b".*"[..], pattern].concat(),
222 }
222 }
223 }
223 }
224 PatternSyntax::Path | PatternSyntax::RelPath => {
224 PatternSyntax::Path | PatternSyntax::RelPath => {
225 if pattern == b"." {
225 if pattern == b"." {
226 return vec![];
226 return vec![];
227 }
227 }
228 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
228 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
229 }
229 }
230 PatternSyntax::RootFiles => {
230 PatternSyntax::RootFiles => {
231 let mut res = if pattern == b"." {
231 let mut res = if pattern == b"." {
232 vec![]
232 vec![]
233 } else {
233 } else {
234 // Pattern is a directory name.
234 // Pattern is a directory name.
235 [escape_pattern(pattern).as_slice(), b"/"].concat()
235 [escape_pattern(pattern).as_slice(), b"/"].concat()
236 };
236 };
237
237
238 // Anything after the pattern must be a non-directory.
238 // Anything after the pattern must be a non-directory.
239 res.extend(b"[^/]+$");
239 res.extend(b"[^/]+$");
240 res
240 res
241 }
241 }
242 PatternSyntax::RelGlob => {
242 PatternSyntax::RelGlob => {
243 let glob_re = glob_to_re(pattern);
243 let glob_re = glob_to_re(pattern);
244 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
244 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
245 [b".*", rest, glob_suffix].concat()
245 [b".*", rest, glob_suffix].concat()
246 } else {
246 } else {
247 [b"(?:.*/)?", glob_re.as_slice(), glob_suffix].concat()
247 [b"(?:.*/)?", glob_re.as_slice(), glob_suffix].concat()
248 }
248 }
249 }
249 }
250 PatternSyntax::Glob | PatternSyntax::RootGlob => {
250 PatternSyntax::Glob | PatternSyntax::RootGlob => {
251 [glob_to_re(pattern).as_slice(), glob_suffix].concat()
251 [glob_to_re(pattern).as_slice(), glob_suffix].concat()
252 }
252 }
253 PatternSyntax::Include
253 PatternSyntax::Include
254 | PatternSyntax::SubInclude
254 | PatternSyntax::SubInclude
255 | PatternSyntax::ExpandedSubInclude(_)
255 | PatternSyntax::ExpandedSubInclude(_)
256 | PatternSyntax::FilePath => unreachable!(),
256 | PatternSyntax::FilePath => unreachable!(),
257 }
257 }
258 }
258 }
259
259
260 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
260 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
261 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
261 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
262
262
263 /// TODO support other platforms
263 /// TODO support other platforms
264 #[cfg(unix)]
264 #[cfg(unix)]
265 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
265 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
266 if bytes.is_empty() {
266 if bytes.is_empty() {
267 return b".".to_vec();
267 return b".".to_vec();
268 }
268 }
269 let sep = b'/';
269 let sep = b'/';
270
270
271 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
271 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
272 if initial_slashes > 2 {
272 if initial_slashes > 2 {
273 // POSIX allows one or two initial slashes, but treats three or more
273 // POSIX allows one or two initial slashes, but treats three or more
274 // as single slash.
274 // as single slash.
275 initial_slashes = 1;
275 initial_slashes = 1;
276 }
276 }
277 let components = bytes
277 let components = bytes
278 .split(|b| *b == sep)
278 .split(|b| *b == sep)
279 .filter(|c| !(c.is_empty() || c == b"."))
279 .filter(|c| !(c.is_empty() || c == b"."))
280 .fold(vec![], |mut acc, component| {
280 .fold(vec![], |mut acc, component| {
281 if component != b".."
281 if component != b".."
282 || (initial_slashes == 0 && acc.is_empty())
282 || (initial_slashes == 0 && acc.is_empty())
283 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
283 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
284 {
284 {
285 acc.push(component)
285 acc.push(component)
286 } else if !acc.is_empty() {
286 } else if !acc.is_empty() {
287 acc.pop();
287 acc.pop();
288 }
288 }
289 acc
289 acc
290 });
290 });
291 let mut new_bytes = components.join(&sep);
291 let mut new_bytes = components.join(&sep);
292
292
293 if initial_slashes > 0 {
293 if initial_slashes > 0 {
294 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
294 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
295 buf.extend(new_bytes);
295 buf.extend(new_bytes);
296 new_bytes = buf;
296 new_bytes = buf;
297 }
297 }
298 if new_bytes.is_empty() {
298 if new_bytes.is_empty() {
299 b".".to_vec()
299 b".".to_vec()
300 } else {
300 } else {
301 new_bytes
301 new_bytes
302 }
302 }
303 }
303 }
304
304
305 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
305 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
306 /// that don't need to be transformed into a regex.
306 /// that don't need to be transformed into a regex.
307 pub fn build_single_regex(
307 pub fn build_single_regex(
308 entry: &IgnorePattern,
308 entry: &IgnorePattern,
309 glob_suffix: &[u8],
309 glob_suffix: &[u8],
310 ) -> Result<Option<Vec<u8>>, PatternError> {
310 ) -> Result<Option<Vec<u8>>, PatternError> {
311 let IgnorePattern {
311 let IgnorePattern {
312 pattern, syntax, ..
312 pattern, syntax, ..
313 } = entry;
313 } = entry;
314 let pattern = match syntax {
314 let pattern = match syntax {
315 PatternSyntax::RootGlob
315 PatternSyntax::RootGlob
316 | PatternSyntax::Path
316 | PatternSyntax::Path
317 | PatternSyntax::RelGlob
317 | PatternSyntax::RelGlob
318 | PatternSyntax::RelPath
318 | PatternSyntax::RelPath
319 | PatternSyntax::RootFiles => normalize_path_bytes(pattern),
319 | PatternSyntax::RootFiles => normalize_path_bytes(pattern),
320 PatternSyntax::Include | PatternSyntax::SubInclude => {
320 PatternSyntax::Include | PatternSyntax::SubInclude => {
321 return Err(PatternError::NonRegexPattern(entry.clone()))
321 return Err(PatternError::NonRegexPattern(entry.clone()))
322 }
322 }
323 _ => pattern.to_owned(),
323 _ => pattern.to_owned(),
324 };
324 };
325 let is_simple_rootglob = *syntax == PatternSyntax::RootGlob
325 let is_simple_rootglob = *syntax == PatternSyntax::RootGlob
326 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b));
326 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b));
327 if is_simple_rootglob || syntax == &PatternSyntax::FilePath {
327 if is_simple_rootglob || syntax == &PatternSyntax::FilePath {
328 Ok(None)
328 Ok(None)
329 } else {
329 } else {
330 let mut entry = entry.clone();
330 let mut entry = entry.clone();
331 entry.pattern = pattern;
331 entry.pattern = pattern;
332 Ok(Some(_build_single_regex(&entry, glob_suffix)))
332 Ok(Some(_build_single_regex(&entry, glob_suffix)))
333 }
333 }
334 }
334 }
335
335
336 lazy_static! {
336 lazy_static! {
337 static ref SYNTAXES: FastHashMap<&'static [u8], PatternSyntax> = {
337 static ref SYNTAXES: FastHashMap<&'static [u8], PatternSyntax> = {
338 let mut m = FastHashMap::default();
338 let mut m = FastHashMap::default();
339
339
340 m.insert(b"re:".as_ref(), PatternSyntax::Regexp);
340 m.insert(b"re:".as_ref(), PatternSyntax::Regexp);
341 m.insert(b"regexp:".as_ref(), PatternSyntax::Regexp);
341 m.insert(b"regexp:".as_ref(), PatternSyntax::Regexp);
342 m.insert(b"path:".as_ref(), PatternSyntax::Path);
342 m.insert(b"path:".as_ref(), PatternSyntax::Path);
343 m.insert(b"filepath:".as_ref(), PatternSyntax::FilePath);
343 m.insert(b"filepath:".as_ref(), PatternSyntax::FilePath);
344 m.insert(b"relpath:".as_ref(), PatternSyntax::RelPath);
344 m.insert(b"relpath:".as_ref(), PatternSyntax::RelPath);
345 m.insert(b"rootfilesin:".as_ref(), PatternSyntax::RootFiles);
345 m.insert(b"rootfilesin:".as_ref(), PatternSyntax::RootFiles);
346 m.insert(b"relglob:".as_ref(), PatternSyntax::RelGlob);
346 m.insert(b"relglob:".as_ref(), PatternSyntax::RelGlob);
347 m.insert(b"relre:".as_ref(), PatternSyntax::RelRegexp);
347 m.insert(b"relre:".as_ref(), PatternSyntax::RelRegexp);
348 m.insert(b"glob:".as_ref(), PatternSyntax::Glob);
348 m.insert(b"glob:".as_ref(), PatternSyntax::Glob);
349 m.insert(b"rootglob:".as_ref(), PatternSyntax::RootGlob);
349 m.insert(b"rootglob:".as_ref(), PatternSyntax::RootGlob);
350 m.insert(b"include:".as_ref(), PatternSyntax::Include);
350 m.insert(b"include:".as_ref(), PatternSyntax::Include);
351 m.insert(b"subinclude:".as_ref(), PatternSyntax::SubInclude);
351 m.insert(b"subinclude:".as_ref(), PatternSyntax::SubInclude);
352
352
353 m
353 m
354 };
354 };
355 }
355 }
356
356
357 #[derive(Debug)]
357 #[derive(Debug)]
358 pub enum PatternFileWarning {
358 pub enum PatternFileWarning {
359 /// (file path, syntax bytes)
359 /// (file path, syntax bytes)
360 InvalidSyntax(PathBuf, Vec<u8>),
360 InvalidSyntax(PathBuf, Vec<u8>),
361 /// File path
361 /// File path
362 NoSuchFile(PathBuf),
362 NoSuchFile(PathBuf),
363 }
363 }
364
364
365 pub fn parse_one_pattern(
365 pub fn parse_one_pattern(
366 pattern: &[u8],
366 pattern: &[u8],
367 source: &Path,
367 source: &Path,
368 default: PatternSyntax,
368 default: PatternSyntax,
369 normalize: bool,
369 normalize: bool,
370 ) -> IgnorePattern {
370 ) -> IgnorePattern {
371 let mut pattern_bytes: &[u8] = pattern;
371 let mut pattern_bytes: &[u8] = pattern;
372 let mut syntax = default;
372 let mut syntax = default;
373
373
374 for (s, val) in SYNTAXES.iter() {
374 for (s, val) in SYNTAXES.iter() {
375 if let Some(rest) = pattern_bytes.drop_prefix(s) {
375 if let Some(rest) = pattern_bytes.drop_prefix(s) {
376 syntax = val.clone();
376 syntax = val.clone();
377 pattern_bytes = rest;
377 pattern_bytes = rest;
378 break;
378 break;
379 }
379 }
380 }
380 }
381
381
382 let pattern = match syntax {
382 let pattern = match syntax {
383 PatternSyntax::RootGlob
383 PatternSyntax::RootGlob
384 | PatternSyntax::Path
384 | PatternSyntax::Path
385 | PatternSyntax::Glob
385 | PatternSyntax::Glob
386 | PatternSyntax::RelGlob
386 | PatternSyntax::RelGlob
387 | PatternSyntax::RelPath
387 | PatternSyntax::RelPath
388 | PatternSyntax::RootFiles
388 | PatternSyntax::RootFiles
389 if normalize =>
389 if normalize =>
390 {
390 {
391 normalize_path_bytes(pattern_bytes)
391 normalize_path_bytes(pattern_bytes)
392 }
392 }
393 _ => pattern_bytes.to_vec(),
393 _ => pattern_bytes.to_vec(),
394 };
394 };
395
395
396 IgnorePattern {
396 IgnorePattern {
397 syntax,
397 syntax,
398 pattern,
398 pattern,
399 source: source.to_owned(),
399 source: source.to_owned(),
400 }
400 }
401 }
401 }
402
402
403 pub fn parse_pattern_file_contents(
403 pub fn parse_pattern_file_contents(
404 lines: &[u8],
404 lines: &[u8],
405 file_path: &Path,
405 file_path: &Path,
406 default_syntax_override: Option<PatternSyntax>,
406 default_syntax_override: Option<PatternSyntax>,
407 warn: bool,
407 warn: bool,
408 relativize: bool,
408 relativize: bool,
409 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
409 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
410 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
410 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
411
411
412 #[allow(clippy::trivial_regex)]
412 #[allow(clippy::trivial_regex)]
413 let comment_escape_regex = Regex::new(r"\\#").unwrap();
413 let comment_escape_regex = Regex::new(r"\\#").unwrap();
414 let mut inputs: Vec<IgnorePattern> = vec![];
414 let mut inputs: Vec<IgnorePattern> = vec![];
415 let mut warnings: Vec<PatternFileWarning> = vec![];
415 let mut warnings: Vec<PatternFileWarning> = vec![];
416
416
417 let mut current_syntax =
417 let mut current_syntax =
418 default_syntax_override.unwrap_or(PatternSyntax::RelRegexp);
418 default_syntax_override.unwrap_or(PatternSyntax::RelRegexp);
419
419
420 for mut line in lines.split(|c| *c == b'\n') {
420 for mut line in lines.split(|c| *c == b'\n') {
421 let line_buf;
421 let line_buf;
422 if line.contains(&b'#') {
422 if line.contains(&b'#') {
423 if let Some(cap) = comment_regex.captures(line) {
423 if let Some(cap) = comment_regex.captures(line) {
424 line = &line[..cap.get(1).unwrap().end()]
424 line = &line[..cap.get(1).unwrap().end()]
425 }
425 }
426 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
426 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
427 line = &line_buf;
427 line = &line_buf;
428 }
428 }
429
429
430 let line = line.trim_end();
430 let line = line.trim_end();
431
431
432 if line.is_empty() {
432 if line.is_empty() {
433 continue;
433 continue;
434 }
434 }
435
435
436 if let Some(syntax) = line.drop_prefix(b"syntax:") {
436 if let Some(syntax) = line.drop_prefix(b"syntax:") {
437 let syntax = syntax.trim();
437 let syntax = syntax.trim();
438
438
439 if let Some(parsed) =
439 if let Some(parsed) =
440 SYNTAXES.get([syntax, &b":"[..]].concat().as_slice())
440 SYNTAXES.get([syntax, &b":"[..]].concat().as_slice())
441 {
441 {
442 current_syntax = parsed.clone();
442 current_syntax = parsed.clone();
443 } else if warn {
443 } else if warn {
444 warnings.push(PatternFileWarning::InvalidSyntax(
444 warnings.push(PatternFileWarning::InvalidSyntax(
445 file_path.to_owned(),
445 file_path.to_owned(),
446 syntax.to_owned(),
446 syntax.to_owned(),
447 ));
447 ));
448 }
448 }
449 } else {
449 } else {
450 let pattern = parse_one_pattern(
450 let pattern = parse_one_pattern(
451 line,
451 line,
452 file_path,
452 file_path,
453 current_syntax.clone(),
453 current_syntax.clone(),
454 false,
454 false,
455 );
455 );
456 inputs.push(if relativize {
456 inputs.push(if relativize {
457 pattern.to_relative()
457 pattern.to_relative()
458 } else {
458 } else {
459 pattern
459 pattern
460 })
460 })
461 }
461 }
462 }
462 }
463 Ok((inputs, warnings))
463 Ok((inputs, warnings))
464 }
464 }
465
465
466 pub fn parse_pattern_args(
466 pub fn parse_pattern_args(
467 patterns: Vec<Vec<u8>>,
467 patterns: Vec<Vec<u8>>,
468 cwd: &Path,
468 cwd: &Path,
469 root: &Path,
469 root: &Path,
470 ) -> Result<Vec<IgnorePattern>, HgPathError> {
470 ) -> Result<Vec<IgnorePattern>, HgPathError> {
471 let mut ignore_patterns: Vec<IgnorePattern> = Vec::new();
471 let mut ignore_patterns: Vec<IgnorePattern> = Vec::new();
472 for pattern in patterns {
472 for pattern in patterns {
473 let pattern = parse_one_pattern(
473 let pattern = parse_one_pattern(
474 &pattern,
474 &pattern,
475 Path::new("<args>"),
475 Path::new("<args>"),
476 PatternSyntax::RelPath,
476 PatternSyntax::RelPath,
477 true,
477 true,
478 );
478 );
479 match pattern.syntax {
479 match pattern.syntax {
480 PatternSyntax::RelGlob | PatternSyntax::RelPath => {
480 PatternSyntax::RelGlob | PatternSyntax::RelPath => {
481 let name = get_path_from_bytes(&pattern.pattern);
481 let name = get_path_from_bytes(&pattern.pattern);
482 let canon = canonical_path(root, cwd, name)?;
482 let canon = canonical_path(root, cwd, name)?;
483 ignore_patterns.push(IgnorePattern {
483 ignore_patterns.push(IgnorePattern {
484 syntax: pattern.syntax,
484 syntax: pattern.syntax,
485 pattern: get_bytes_from_path(canon),
485 pattern: get_bytes_from_path(canon),
486 source: pattern.source,
486 source: pattern.source,
487 })
487 })
488 }
488 }
489 _ => ignore_patterns.push(pattern.to_owned()),
489 _ => ignore_patterns.push(pattern.to_owned()),
490 };
490 };
491 }
491 }
492 Ok(ignore_patterns)
492 Ok(ignore_patterns)
493 }
493 }
494
494
495 pub fn read_pattern_file(
495 pub fn read_pattern_file(
496 file_path: &Path,
496 file_path: &Path,
497 warn: bool,
497 warn: bool,
498 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
498 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
499 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
499 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
500 match std::fs::read(file_path) {
500 match std::fs::read(file_path) {
501 Ok(contents) => {
501 Ok(contents) => {
502 inspect_pattern_bytes(file_path, &contents);
502 inspect_pattern_bytes(file_path, &contents);
503 parse_pattern_file_contents(&contents, file_path, None, warn, true)
503 parse_pattern_file_contents(&contents, file_path, None, warn, true)
504 }
504 }
505 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok((
505 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok((
506 vec![],
506 vec![],
507 vec![PatternFileWarning::NoSuchFile(file_path.to_owned())],
507 vec![PatternFileWarning::NoSuchFile(file_path.to_owned())],
508 )),
508 )),
509 Err(e) => Err(e.into()),
509 Err(e) => Err(e.into()),
510 }
510 }
511 }
511 }
512
512
513 /// Represents an entry in an "ignore" file.
513 /// Represents an entry in an "ignore" file.
514 #[derive(Debug, Eq, PartialEq, Clone)]
514 #[derive(Debug, Eq, PartialEq, Clone)]
515 pub struct IgnorePattern {
515 pub struct IgnorePattern {
516 pub syntax: PatternSyntax,
516 pub syntax: PatternSyntax,
517 pub pattern: Vec<u8>,
517 pub pattern: Vec<u8>,
518 pub source: PathBuf,
518 pub source: PathBuf,
519 }
519 }
520
520
521 impl IgnorePattern {
521 impl IgnorePattern {
522 pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self {
522 pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self {
523 Self {
523 Self {
524 syntax,
524 syntax,
525 pattern: pattern.to_owned(),
525 pattern: pattern.to_owned(),
526 source: source.to_owned(),
526 source: source.to_owned(),
527 }
527 }
528 }
528 }
529
529
530 pub fn to_relative(self) -> Self {
530 pub fn to_relative(self) -> Self {
531 let Self {
531 let Self {
532 syntax,
532 syntax,
533 pattern,
533 pattern,
534 source,
534 source,
535 } = self;
535 } = self;
536 Self {
536 Self {
537 syntax: match syntax {
537 syntax: match syntax {
538 PatternSyntax::Regexp => PatternSyntax::RelRegexp,
538 PatternSyntax::Regexp => PatternSyntax::RelRegexp,
539 PatternSyntax::Glob => PatternSyntax::RelGlob,
539 PatternSyntax::Glob => PatternSyntax::RelGlob,
540 x => x,
540 x => x,
541 },
541 },
542 pattern,
542 pattern,
543 source,
543 source,
544 }
544 }
545 }
545 }
546 }
546 }
547
547
548 pub type PatternResult<T> = Result<T, PatternError>;
548 pub type PatternResult<T> = Result<T, PatternError>;
549
549
550 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
550 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
551 /// and `subinclude:` patterns.
551 /// and `subinclude:` patterns.
552 ///
552 ///
553 /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
553 /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
554 /// is used for the latter to form a tree of patterns.
554 /// is used for the latter to form a tree of patterns.
555 pub fn get_patterns_from_file(
555 pub fn get_patterns_from_file(
556 pattern_file: &Path,
556 pattern_file: &Path,
557 root_dir: &Path,
557 root_dir: &Path,
558 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
558 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
559 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
559 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
560 let (patterns, mut warnings) =
560 let (patterns, mut warnings) =
561 read_pattern_file(pattern_file, true, inspect_pattern_bytes)?;
561 read_pattern_file(pattern_file, true, inspect_pattern_bytes)?;
562 let patterns = patterns
562 let patterns = patterns
563 .into_iter()
563 .into_iter()
564 .flat_map(|entry| -> PatternResult<_> {
564 .flat_map(|entry| -> PatternResult<_> {
565 Ok(match &entry.syntax {
565 Ok(match &entry.syntax {
566 PatternSyntax::Include => {
566 PatternSyntax::Include => {
567 let inner_include =
567 let inner_include =
568 root_dir.join(get_path_from_bytes(&entry.pattern));
568 root_dir.join(get_path_from_bytes(&entry.pattern));
569 let (inner_pats, inner_warnings) = get_patterns_from_file(
569 let (inner_pats, inner_warnings) = get_patterns_from_file(
570 &inner_include,
570 &inner_include,
571 root_dir,
571 root_dir,
572 inspect_pattern_bytes,
572 inspect_pattern_bytes,
573 )?;
573 )?;
574 warnings.extend(inner_warnings);
574 warnings.extend(inner_warnings);
575 inner_pats
575 inner_pats
576 }
576 }
577 PatternSyntax::SubInclude => {
577 PatternSyntax::SubInclude => {
578 let mut sub_include = SubInclude::new(
578 let mut sub_include = SubInclude::new(
579 root_dir,
579 root_dir,
580 &entry.pattern,
580 &entry.pattern,
581 &entry.source,
581 &entry.source,
582 )?;
582 )?;
583 let (inner_patterns, inner_warnings) =
583 let (inner_patterns, inner_warnings) =
584 get_patterns_from_file(
584 get_patterns_from_file(
585 &sub_include.path,
585 &sub_include.path,
586 &sub_include.root,
586 &sub_include.root,
587 inspect_pattern_bytes,
587 inspect_pattern_bytes,
588 )?;
588 )?;
589 sub_include.included_patterns = inner_patterns;
589 sub_include.included_patterns = inner_patterns;
590 warnings.extend(inner_warnings);
590 warnings.extend(inner_warnings);
591 vec![IgnorePattern {
591 vec![IgnorePattern {
592 syntax: PatternSyntax::ExpandedSubInclude(Box::new(
592 syntax: PatternSyntax::ExpandedSubInclude(Box::new(
593 sub_include,
593 sub_include,
594 )),
594 )),
595 ..entry
595 ..entry
596 }]
596 }]
597 }
597 }
598 _ => vec![entry],
598 _ => vec![entry],
599 })
599 })
600 })
600 })
601 .flatten()
601 .flatten()
602 .collect();
602 .collect();
603
603
604 Ok((patterns, warnings))
604 Ok((patterns, warnings))
605 }
605 }
606
606
607 /// Holds all the information needed to handle a `subinclude:` pattern.
607 /// Holds all the information needed to handle a `subinclude:` pattern.
608 #[derive(Debug, PartialEq, Eq, Clone)]
608 #[derive(Debug, PartialEq, Eq, Clone)]
609 pub struct SubInclude {
609 pub struct SubInclude {
610 /// Will be used for repository (hg) paths that start with this prefix.
610 /// Will be used for repository (hg) paths that start with this prefix.
611 /// It is relative to the current working directory, so comparing against
611 /// It is relative to the current working directory, so comparing against
612 /// repository paths is painless.
612 /// repository paths is painless.
613 pub prefix: HgPathBuf,
613 pub prefix: HgPathBuf,
614 /// The file itself, containing the patterns
614 /// The file itself, containing the patterns
615 pub path: PathBuf,
615 pub path: PathBuf,
616 /// Folder in the filesystem where this it applies
616 /// Folder in the filesystem where this it applies
617 pub root: PathBuf,
617 pub root: PathBuf,
618
618
619 pub included_patterns: Vec<IgnorePattern>,
619 pub included_patterns: Vec<IgnorePattern>,
620 }
620 }
621
621
622 impl SubInclude {
622 impl SubInclude {
623 pub fn new(
623 pub fn new(
624 root_dir: &Path,
624 root_dir: &Path,
625 pattern: &[u8],
625 pattern: &[u8],
626 source: &Path,
626 source: &Path,
627 ) -> Result<SubInclude, HgPathError> {
627 ) -> Result<SubInclude, HgPathError> {
628 let normalized_source =
628 let normalized_source =
629 normalize_path_bytes(&get_bytes_from_path(source));
629 normalize_path_bytes(&get_bytes_from_path(source));
630
630
631 let source_root = get_path_from_bytes(&normalized_source);
631 let source_root = get_path_from_bytes(&normalized_source);
632 let source_root =
632 let source_root = source_root.parent().unwrap_or(source_root);
633 source_root.parent().unwrap_or_else(|| source_root.deref());
634
633
635 let path = source_root.join(get_path_from_bytes(pattern));
634 let path = source_root.join(get_path_from_bytes(pattern));
636 let new_root = path.parent().unwrap_or_else(|| path.deref());
635 let new_root = path.parent().unwrap_or_else(|| path.deref());
637
636
638 let prefix = canonical_path(root_dir, root_dir, new_root)?;
637 let prefix = canonical_path(root_dir, root_dir, new_root)?;
639
638
640 Ok(Self {
639 Ok(Self {
641 prefix: path_to_hg_path_buf(prefix).map(|mut p| {
640 prefix: path_to_hg_path_buf(prefix).map(|mut p| {
642 if !p.is_empty() {
641 if !p.is_empty() {
643 p.push_byte(b'/');
642 p.push_byte(b'/');
644 }
643 }
645 p
644 p
646 })?,
645 })?,
647 path: path.to_owned(),
646 path: path.to_owned(),
648 root: new_root.to_owned(),
647 root: new_root.to_owned(),
649 included_patterns: Vec::new(),
648 included_patterns: Vec::new(),
650 })
649 })
651 }
650 }
652 }
651 }
653
652
654 /// Separate and pre-process subincludes from other patterns for the "ignore"
653 /// Separate and pre-process subincludes from other patterns for the "ignore"
655 /// phase.
654 /// phase.
656 pub fn filter_subincludes(
655 pub fn filter_subincludes(
657 ignore_patterns: Vec<IgnorePattern>,
656 ignore_patterns: Vec<IgnorePattern>,
658 ) -> Result<(Vec<SubInclude>, Vec<IgnorePattern>), HgPathError> {
657 ) -> Result<(Vec<SubInclude>, Vec<IgnorePattern>), HgPathError> {
659 let mut subincludes = vec![];
658 let mut subincludes = vec![];
660 let mut others = vec![];
659 let mut others = vec![];
661
660
662 for pattern in ignore_patterns {
661 for pattern in ignore_patterns {
663 if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
662 if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
664 {
663 {
665 subincludes.push(*sub_include);
664 subincludes.push(*sub_include);
666 } else {
665 } else {
667 others.push(pattern)
666 others.push(pattern)
668 }
667 }
669 }
668 }
670 Ok((subincludes, others))
669 Ok((subincludes, others))
671 }
670 }
672
671
673 #[cfg(test)]
672 #[cfg(test)]
674 mod tests {
673 mod tests {
675 use super::*;
674 use super::*;
676 use pretty_assertions::assert_eq;
675 use pretty_assertions::assert_eq;
677
676
678 #[test]
677 #[test]
679 fn escape_pattern_test() {
678 fn escape_pattern_test() {
680 let untouched =
679 let untouched =
681 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
680 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
682 assert_eq!(escape_pattern(untouched), untouched.to_vec());
681 assert_eq!(escape_pattern(untouched), untouched.to_vec());
683 // All escape codes
682 // All escape codes
684 assert_eq!(
683 assert_eq!(
685 escape_pattern(br#"()[]{}?*+-|^$\\.&~#\t\n\r\v\f"#),
684 escape_pattern(br"()[]{}?*+-|^$\\.&~#\t\n\r\v\f"),
686 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\\t\\n\\r\\v\\f"#
685 br"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\\t\\n\\r\\v\\f".to_vec()
687 .to_vec()
688 );
686 );
689 }
687 }
690
688
691 #[test]
689 #[test]
692 fn glob_test() {
690 fn glob_test() {
693 assert_eq!(glob_to_re(br#"?"#), br#"."#);
691 assert_eq!(glob_to_re(br"?"), br".");
694 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
692 assert_eq!(glob_to_re(br"*"), br"[^/]*");
695 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
693 assert_eq!(glob_to_re(br"**"), br".*");
696 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
694 assert_eq!(glob_to_re(br"**/a"), br"(?:.*/)?a");
697 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
695 assert_eq!(glob_to_re(br"a/**/b"), br"a/(?:.*/)?b");
698 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
696 assert_eq!(glob_to_re(br"[a*?!^][^b][!c]"), br"[a*?!^][\^b][^c]");
699 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
697 assert_eq!(glob_to_re(br"{a,b}"), br"(?:a|b)");
700 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
698 assert_eq!(glob_to_re(br".\*\?"), br"\.\*\?");
701 }
699 }
702
700
703 #[test]
701 #[test]
704 fn test_parse_pattern_file_contents() {
702 fn test_parse_pattern_file_contents() {
705 let lines = b"syntax: glob\n*.elc";
703 let lines = b"syntax: glob\n*.elc";
706
704
707 assert_eq!(
705 assert_eq!(
708 parse_pattern_file_contents(
706 parse_pattern_file_contents(
709 lines,
707 lines,
710 Path::new("file_path"),
708 Path::new("file_path"),
711 None,
709 None,
712 false,
710 false,
713 true,
711 true,
714 )
712 )
715 .unwrap()
713 .unwrap()
716 .0,
714 .0,
717 vec![IgnorePattern::new(
715 vec![IgnorePattern::new(
718 PatternSyntax::RelGlob,
716 PatternSyntax::RelGlob,
719 b"*.elc",
717 b"*.elc",
720 Path::new("file_path")
718 Path::new("file_path")
721 )],
719 )],
722 );
720 );
723
721
724 let lines = b"syntax: include\nsyntax: glob";
722 let lines = b"syntax: include\nsyntax: glob";
725
723
726 assert_eq!(
724 assert_eq!(
727 parse_pattern_file_contents(
725 parse_pattern_file_contents(
728 lines,
726 lines,
729 Path::new("file_path"),
727 Path::new("file_path"),
730 None,
728 None,
731 false,
729 false,
732 true,
730 true,
733 )
731 )
734 .unwrap()
732 .unwrap()
735 .0,
733 .0,
736 vec![]
734 vec![]
737 );
735 );
738 let lines = b"glob:**.o";
736 let lines = b"glob:**.o";
739 assert_eq!(
737 assert_eq!(
740 parse_pattern_file_contents(
738 parse_pattern_file_contents(
741 lines,
739 lines,
742 Path::new("file_path"),
740 Path::new("file_path"),
743 None,
741 None,
744 false,
742 false,
745 true,
743 true,
746 )
744 )
747 .unwrap()
745 .unwrap()
748 .0,
746 .0,
749 vec![IgnorePattern::new(
747 vec![IgnorePattern::new(
750 PatternSyntax::RelGlob,
748 PatternSyntax::RelGlob,
751 b"**.o",
749 b"**.o",
752 Path::new("file_path")
750 Path::new("file_path")
753 )]
751 )]
754 );
752 );
755 }
753 }
756
754
757 #[test]
755 #[test]
758 fn test_build_single_regex() {
756 fn test_build_single_regex() {
759 assert_eq!(
757 assert_eq!(
760 build_single_regex(
758 build_single_regex(
761 &IgnorePattern::new(
759 &IgnorePattern::new(
762 PatternSyntax::RelGlob,
760 PatternSyntax::RelGlob,
763 b"rust/target/",
761 b"rust/target/",
764 Path::new("")
762 Path::new("")
765 ),
763 ),
766 b"(?:/|$)"
764 b"(?:/|$)"
767 )
765 )
768 .unwrap(),
766 .unwrap(),
769 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
767 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
770 );
768 );
771 assert_eq!(
769 assert_eq!(
772 build_single_regex(
770 build_single_regex(
773 &IgnorePattern::new(
771 &IgnorePattern::new(
774 PatternSyntax::Regexp,
772 PatternSyntax::Regexp,
775 br"rust/target/\d+",
773 br"rust/target/\d+",
776 Path::new("")
774 Path::new("")
777 ),
775 ),
778 b"(?:/|$)"
776 b"(?:/|$)"
779 )
777 )
780 .unwrap(),
778 .unwrap(),
781 Some(br"rust/target/\d+".to_vec()),
779 Some(br"rust/target/\d+".to_vec()),
782 );
780 );
783 }
781 }
784
782
785 #[test]
783 #[test]
786 fn test_build_single_regex_shortcut() {
784 fn test_build_single_regex_shortcut() {
787 assert_eq!(
785 assert_eq!(
788 build_single_regex(
786 build_single_regex(
789 &IgnorePattern::new(
787 &IgnorePattern::new(
790 PatternSyntax::RootGlob,
788 PatternSyntax::RootGlob,
791 b"",
789 b"",
792 Path::new("")
790 Path::new("")
793 ),
791 ),
794 b"(?:/|$)"
792 b"(?:/|$)"
795 )
793 )
796 .unwrap(),
794 .unwrap(),
797 None,
795 None,
798 );
796 );
799 assert_eq!(
797 assert_eq!(
800 build_single_regex(
798 build_single_regex(
801 &IgnorePattern::new(
799 &IgnorePattern::new(
802 PatternSyntax::RootGlob,
800 PatternSyntax::RootGlob,
803 b"whatever",
801 b"whatever",
804 Path::new("")
802 Path::new("")
805 ),
803 ),
806 b"(?:/|$)"
804 b"(?:/|$)"
807 )
805 )
808 .unwrap(),
806 .unwrap(),
809 None,
807 None,
810 );
808 );
811 assert_eq!(
809 assert_eq!(
812 build_single_regex(
810 build_single_regex(
813 &IgnorePattern::new(
811 &IgnorePattern::new(
814 PatternSyntax::RootGlob,
812 PatternSyntax::RootGlob,
815 b"*.o",
813 b"*.o",
816 Path::new("")
814 Path::new("")
817 ),
815 ),
818 b"(?:/|$)"
816 b"(?:/|$)"
819 )
817 )
820 .unwrap(),
818 .unwrap(),
821 Some(br"[^/]*\.o(?:/|$)".to_vec()),
819 Some(br"[^/]*\.o(?:/|$)".to_vec()),
822 );
820 );
823 }
821 }
824
822
825 #[test]
823 #[test]
826 fn test_build_single_relregex() {
824 fn test_build_single_relregex() {
827 assert_eq!(
825 assert_eq!(
828 build_single_regex(
826 build_single_regex(
829 &IgnorePattern::new(
827 &IgnorePattern::new(
830 PatternSyntax::RelRegexp,
828 PatternSyntax::RelRegexp,
831 b"^ba{2}r",
829 b"^ba{2}r",
832 Path::new("")
830 Path::new("")
833 ),
831 ),
834 b"(?:/|$)"
832 b"(?:/|$)"
835 )
833 )
836 .unwrap(),
834 .unwrap(),
837 Some(b"^ba{2}r".to_vec()),
835 Some(b"^ba{2}r".to_vec()),
838 );
836 );
839 assert_eq!(
837 assert_eq!(
840 build_single_regex(
838 build_single_regex(
841 &IgnorePattern::new(
839 &IgnorePattern::new(
842 PatternSyntax::RelRegexp,
840 PatternSyntax::RelRegexp,
843 b"ba{2}r",
841 b"ba{2}r",
844 Path::new("")
842 Path::new("")
845 ),
843 ),
846 b"(?:/|$)"
844 b"(?:/|$)"
847 )
845 )
848 .unwrap(),
846 .unwrap(),
849 Some(b".*ba{2}r".to_vec()),
847 Some(b".*ba{2}r".to_vec()),
850 );
848 );
851 assert_eq!(
849 assert_eq!(
852 build_single_regex(
850 build_single_regex(
853 &IgnorePattern::new(
851 &IgnorePattern::new(
854 PatternSyntax::RelRegexp,
852 PatternSyntax::RelRegexp,
855 b"(?ia)ba{2}r",
853 b"(?ia)ba{2}r",
856 Path::new("")
854 Path::new("")
857 ),
855 ),
858 b"(?:/|$)"
856 b"(?:/|$)"
859 )
857 )
860 .unwrap(),
858 .unwrap(),
861 Some(b"(?ia:.*ba{2}r)".to_vec()),
859 Some(b"(?ia:.*ba{2}r)".to_vec()),
862 );
860 );
863 assert_eq!(
861 assert_eq!(
864 build_single_regex(
862 build_single_regex(
865 &IgnorePattern::new(
863 &IgnorePattern::new(
866 PatternSyntax::RelRegexp,
864 PatternSyntax::RelRegexp,
867 b"(?ia)^ba{2}r",
865 b"(?ia)^ba{2}r",
868 Path::new("")
866 Path::new("")
869 ),
867 ),
870 b"(?:/|$)"
868 b"(?:/|$)"
871 )
869 )
872 .unwrap(),
870 .unwrap(),
873 Some(b"(?ia:^ba{2}r)".to_vec()),
871 Some(b"(?ia:^ba{2}r)".to_vec()),
874 );
872 );
875 }
873 }
876 }
874 }
@@ -1,2110 +1,2109 b''
1 // matchers.rs
1 // matchers.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Structs and types for matching files and directories.
8 //! Structs and types for matching files and directories.
9
9
10 use format_bytes::format_bytes;
10 use format_bytes::format_bytes;
11 use once_cell::sync::OnceCell;
11 use once_cell::sync::OnceCell;
12
12
13 use crate::{
13 use crate::{
14 dirstate::dirs_multiset::DirsChildrenMultiset,
14 dirstate::dirs_multiset::DirsChildrenMultiset,
15 filepatterns::{
15 filepatterns::{
16 build_single_regex, filter_subincludes, get_patterns_from_file,
16 build_single_regex, filter_subincludes, get_patterns_from_file,
17 PatternFileWarning, PatternResult,
17 PatternFileWarning, PatternResult,
18 },
18 },
19 utils::{
19 utils::{
20 files::find_dirs,
20 files::find_dirs,
21 hg_path::{HgPath, HgPathBuf, HgPathError},
21 hg_path::{HgPath, HgPathBuf, HgPathError},
22 Escaped,
22 Escaped,
23 },
23 },
24 DirsMultiset, FastHashMap, IgnorePattern, PatternError, PatternSyntax,
24 DirsMultiset, FastHashMap, IgnorePattern, PatternError, PatternSyntax,
25 };
25 };
26
26
27 use crate::dirstate::status::IgnoreFnType;
27 use crate::dirstate::status::IgnoreFnType;
28 use crate::filepatterns::normalize_path_bytes;
28 use crate::filepatterns::normalize_path_bytes;
29 use std::collections::HashSet;
29 use std::collections::HashSet;
30 use std::fmt::{Display, Error, Formatter};
30 use std::fmt::{Display, Error, Formatter};
31 use std::ops::Deref;
32 use std::path::{Path, PathBuf};
31 use std::path::{Path, PathBuf};
33 use std::{borrow::ToOwned, collections::BTreeSet};
32 use std::{borrow::ToOwned, collections::BTreeSet};
34
33
35 #[derive(Debug, PartialEq)]
34 #[derive(Debug, PartialEq)]
36 pub enum VisitChildrenSet {
35 pub enum VisitChildrenSet {
37 /// Don't visit anything
36 /// Don't visit anything
38 Empty,
37 Empty,
39 /// Only visit this directory
38 /// Only visit this directory
40 This,
39 This,
41 /// Visit this directory and these subdirectories
40 /// Visit this directory and these subdirectories
42 /// TODO Should we implement a `NonEmptyHashSet`?
41 /// TODO Should we implement a `NonEmptyHashSet`?
43 Set(HashSet<HgPathBuf>),
42 Set(HashSet<HgPathBuf>),
44 /// Visit this directory and all subdirectories
43 /// Visit this directory and all subdirectories
45 Recursive,
44 Recursive,
46 }
45 }
47
46
48 pub trait Matcher: core::fmt::Debug {
47 pub trait Matcher: core::fmt::Debug {
49 /// Explicitly listed files
48 /// Explicitly listed files
50 fn file_set(&self) -> Option<&HashSet<HgPathBuf>>;
49 fn file_set(&self) -> Option<&HashSet<HgPathBuf>>;
51 /// Returns whether `filename` is in `file_set`
50 /// Returns whether `filename` is in `file_set`
52 fn exact_match(&self, filename: &HgPath) -> bool;
51 fn exact_match(&self, filename: &HgPath) -> bool;
53 /// Returns whether `filename` is matched by this matcher
52 /// Returns whether `filename` is matched by this matcher
54 fn matches(&self, filename: &HgPath) -> bool;
53 fn matches(&self, filename: &HgPath) -> bool;
55 /// Decides whether a directory should be visited based on whether it
54 /// Decides whether a directory should be visited based on whether it
56 /// has potential matches in it or one of its subdirectories, and
55 /// has potential matches in it or one of its subdirectories, and
57 /// potentially lists which subdirectories of that directory should be
56 /// potentially lists which subdirectories of that directory should be
58 /// visited. This is based on the match's primary, included, and excluded
57 /// visited. This is based on the match's primary, included, and excluded
59 /// patterns.
58 /// patterns.
60 ///
59 ///
61 /// # Example
60 /// # Example
62 ///
61 ///
63 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
62 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
64 /// return the following values (assuming the implementation of
63 /// return the following values (assuming the implementation of
65 /// visit_children_set is capable of recognizing this; some implementations
64 /// visit_children_set is capable of recognizing this; some implementations
66 /// are not).
65 /// are not).
67 ///
66 ///
68 /// ```text
67 /// ```text
69 /// ```ignore
68 /// ```ignore
70 /// '' -> {'foo', 'qux'}
69 /// '' -> {'foo', 'qux'}
71 /// 'baz' -> set()
70 /// 'baz' -> set()
72 /// 'foo' -> {'bar'}
71 /// 'foo' -> {'bar'}
73 /// // Ideally this would be `Recursive`, but since the prefix nature of
72 /// // Ideally this would be `Recursive`, but since the prefix nature of
74 /// // matchers is applied to the entire matcher, we have to downgrade this
73 /// // matchers is applied to the entire matcher, we have to downgrade this
75 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
74 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
76 /// // `RootFilesIn'-kind matcher being mixed in.
75 /// // `RootFilesIn'-kind matcher being mixed in.
77 /// 'foo/bar' -> 'this'
76 /// 'foo/bar' -> 'this'
78 /// 'qux' -> 'this'
77 /// 'qux' -> 'this'
79 /// ```
78 /// ```
80 /// # Important
79 /// # Important
81 ///
80 ///
82 /// Most matchers do not know if they're representing files or
81 /// Most matchers do not know if they're representing files or
83 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
82 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
84 /// file or a directory, so `visit_children_set('dir')` for most matchers
83 /// file or a directory, so `visit_children_set('dir')` for most matchers
85 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
84 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
86 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
85 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
87 /// it may return `VisitChildrenSet::This`.
86 /// it may return `VisitChildrenSet::This`.
88 /// Do not rely on the return being a `HashSet` indicating that there are
87 /// Do not rely on the return being a `HashSet` indicating that there are
89 /// no files in this dir to investigate (or equivalently that if there are
88 /// no files in this dir to investigate (or equivalently that if there are
90 /// files to investigate in 'dir' that it will always return
89 /// files to investigate in 'dir' that it will always return
91 /// `VisitChildrenSet::This`).
90 /// `VisitChildrenSet::This`).
92 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet;
91 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet;
93 /// Matcher will match everything and `files_set()` will be empty:
92 /// Matcher will match everything and `files_set()` will be empty:
94 /// optimization might be possible.
93 /// optimization might be possible.
95 fn matches_everything(&self) -> bool;
94 fn matches_everything(&self) -> bool;
96 /// Matcher will match exactly the files in `files_set()`: optimization
95 /// Matcher will match exactly the files in `files_set()`: optimization
97 /// might be possible.
96 /// might be possible.
98 fn is_exact(&self) -> bool;
97 fn is_exact(&self) -> bool;
99 }
98 }
100
99
101 /// Matches everything.
100 /// Matches everything.
102 ///```
101 ///```
103 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
102 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
104 ///
103 ///
105 /// let matcher = AlwaysMatcher;
104 /// let matcher = AlwaysMatcher;
106 ///
105 ///
107 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
106 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
108 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
107 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
109 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
108 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
110 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
109 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
111 /// ```
110 /// ```
112 #[derive(Debug)]
111 #[derive(Debug)]
113 pub struct AlwaysMatcher;
112 pub struct AlwaysMatcher;
114
113
115 impl Matcher for AlwaysMatcher {
114 impl Matcher for AlwaysMatcher {
116 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
115 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
117 None
116 None
118 }
117 }
119 fn exact_match(&self, _filename: &HgPath) -> bool {
118 fn exact_match(&self, _filename: &HgPath) -> bool {
120 false
119 false
121 }
120 }
122 fn matches(&self, _filename: &HgPath) -> bool {
121 fn matches(&self, _filename: &HgPath) -> bool {
123 true
122 true
124 }
123 }
125 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
124 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
126 VisitChildrenSet::Recursive
125 VisitChildrenSet::Recursive
127 }
126 }
128 fn matches_everything(&self) -> bool {
127 fn matches_everything(&self) -> bool {
129 true
128 true
130 }
129 }
131 fn is_exact(&self) -> bool {
130 fn is_exact(&self) -> bool {
132 false
131 false
133 }
132 }
134 }
133 }
135
134
136 /// Matches nothing.
135 /// Matches nothing.
137 #[derive(Debug)]
136 #[derive(Debug)]
138 pub struct NeverMatcher;
137 pub struct NeverMatcher;
139
138
140 impl Matcher for NeverMatcher {
139 impl Matcher for NeverMatcher {
141 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
140 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
142 None
141 None
143 }
142 }
144 fn exact_match(&self, _filename: &HgPath) -> bool {
143 fn exact_match(&self, _filename: &HgPath) -> bool {
145 false
144 false
146 }
145 }
147 fn matches(&self, _filename: &HgPath) -> bool {
146 fn matches(&self, _filename: &HgPath) -> bool {
148 false
147 false
149 }
148 }
150 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
149 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
151 VisitChildrenSet::Empty
150 VisitChildrenSet::Empty
152 }
151 }
153 fn matches_everything(&self) -> bool {
152 fn matches_everything(&self) -> bool {
154 false
153 false
155 }
154 }
156 fn is_exact(&self) -> bool {
155 fn is_exact(&self) -> bool {
157 true
156 true
158 }
157 }
159 }
158 }
160
159
161 /// Matches the input files exactly. They are interpreted as paths, not
160 /// Matches the input files exactly. They are interpreted as paths, not
162 /// patterns.
161 /// patterns.
163 ///
162 ///
164 ///```
163 ///```
165 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} };
164 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} };
166 ///
165 ///
167 /// let files = vec![HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")];
166 /// let files = vec![HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")];
168 /// let matcher = FileMatcher::new(files).unwrap();
167 /// let matcher = FileMatcher::new(files).unwrap();
169 ///
168 ///
170 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
169 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
171 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
170 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
172 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
171 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
173 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
172 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
174 /// ```
173 /// ```
175 #[derive(Debug)]
174 #[derive(Debug)]
176 pub struct FileMatcher {
175 pub struct FileMatcher {
177 files: HashSet<HgPathBuf>,
176 files: HashSet<HgPathBuf>,
178 dirs: DirsMultiset,
177 dirs: DirsMultiset,
179 sorted_visitchildrenset_candidates: OnceCell<BTreeSet<HgPathBuf>>,
178 sorted_visitchildrenset_candidates: OnceCell<BTreeSet<HgPathBuf>>,
180 }
179 }
181
180
182 impl FileMatcher {
181 impl FileMatcher {
183 pub fn new(files: Vec<HgPathBuf>) -> Result<Self, HgPathError> {
182 pub fn new(files: Vec<HgPathBuf>) -> Result<Self, HgPathError> {
184 let dirs = DirsMultiset::from_manifest(&files)?;
183 let dirs = DirsMultiset::from_manifest(&files)?;
185 Ok(Self {
184 Ok(Self {
186 files: HashSet::from_iter(files.into_iter()),
185 files: HashSet::from_iter(files),
187 dirs,
186 dirs,
188 sorted_visitchildrenset_candidates: OnceCell::new(),
187 sorted_visitchildrenset_candidates: OnceCell::new(),
189 })
188 })
190 }
189 }
191 fn inner_matches(&self, filename: &HgPath) -> bool {
190 fn inner_matches(&self, filename: &HgPath) -> bool {
192 self.files.contains(filename.as_ref())
191 self.files.contains(filename.as_ref())
193 }
192 }
194 }
193 }
195
194
196 impl Matcher for FileMatcher {
195 impl Matcher for FileMatcher {
197 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
196 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
198 Some(&self.files)
197 Some(&self.files)
199 }
198 }
200 fn exact_match(&self, filename: &HgPath) -> bool {
199 fn exact_match(&self, filename: &HgPath) -> bool {
201 self.inner_matches(filename)
200 self.inner_matches(filename)
202 }
201 }
203 fn matches(&self, filename: &HgPath) -> bool {
202 fn matches(&self, filename: &HgPath) -> bool {
204 self.inner_matches(filename)
203 self.inner_matches(filename)
205 }
204 }
206 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
205 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
207 if self.files.is_empty() || !self.dirs.contains(directory) {
206 if self.files.is_empty() || !self.dirs.contains(directory) {
208 return VisitChildrenSet::Empty;
207 return VisitChildrenSet::Empty;
209 }
208 }
210
209
211 let compute_candidates = || -> BTreeSet<HgPathBuf> {
210 let compute_candidates = || -> BTreeSet<HgPathBuf> {
212 let mut candidates: BTreeSet<HgPathBuf> =
211 let mut candidates: BTreeSet<HgPathBuf> =
213 self.dirs.iter().cloned().collect();
212 self.dirs.iter().cloned().collect();
214 candidates.extend(self.files.iter().cloned());
213 candidates.extend(self.files.iter().cloned());
215 candidates.remove(HgPath::new(b""));
214 candidates.remove(HgPath::new(b""));
216 candidates
215 candidates
217 };
216 };
218 let candidates =
217 let candidates =
219 if directory.as_ref().is_empty() {
218 if directory.as_ref().is_empty() {
220 compute_candidates()
219 compute_candidates()
221 } else {
220 } else {
222 let sorted_candidates = self
221 let sorted_candidates = self
223 .sorted_visitchildrenset_candidates
222 .sorted_visitchildrenset_candidates
224 .get_or_init(compute_candidates);
223 .get_or_init(compute_candidates);
225 let directory_bytes = directory.as_ref().as_bytes();
224 let directory_bytes = directory.as_ref().as_bytes();
226 let start: HgPathBuf =
225 let start: HgPathBuf =
227 format_bytes!(b"{}/", directory_bytes).into();
226 format_bytes!(b"{}/", directory_bytes).into();
228 let start_len = start.len();
227 let start_len = start.len();
229 // `0` sorts after `/`
228 // `0` sorts after `/`
230 let end = format_bytes!(b"{}0", directory_bytes).into();
229 let end = format_bytes!(b"{}0", directory_bytes).into();
231 BTreeSet::from_iter(sorted_candidates.range(start..end).map(
230 BTreeSet::from_iter(sorted_candidates.range(start..end).map(
232 |c| HgPathBuf::from_bytes(&c.as_bytes()[start_len..]),
231 |c| HgPathBuf::from_bytes(&c.as_bytes()[start_len..]),
233 ))
232 ))
234 };
233 };
235
234
236 // `self.dirs` includes all of the directories, recursively, so if
235 // `self.dirs` includes all of the directories, recursively, so if
237 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
236 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
238 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
237 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
239 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
238 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
240 // subdir will be in there without a slash.
239 // subdir will be in there without a slash.
241 VisitChildrenSet::Set(
240 VisitChildrenSet::Set(
242 candidates
241 candidates
243 .into_iter()
242 .into_iter()
244 .filter_map(|c| {
243 .filter_map(|c| {
245 if c.bytes().all(|b| *b != b'/') {
244 if c.bytes().all(|b| *b != b'/') {
246 Some(c)
245 Some(c)
247 } else {
246 } else {
248 None
247 None
249 }
248 }
250 })
249 })
251 .collect(),
250 .collect(),
252 )
251 )
253 }
252 }
254 fn matches_everything(&self) -> bool {
253 fn matches_everything(&self) -> bool {
255 false
254 false
256 }
255 }
257 fn is_exact(&self) -> bool {
256 fn is_exact(&self) -> bool {
258 true
257 true
259 }
258 }
260 }
259 }
261
260
262 /// Matches a set of (kind, pat, source) against a 'root' directory.
261 /// Matches a set of (kind, pat, source) against a 'root' directory.
263 /// (Currently the 'root' directory is effectively always empty)
262 /// (Currently the 'root' directory is effectively always empty)
264 /// ```
263 /// ```
265 /// use hg::{
264 /// use hg::{
266 /// matchers::{PatternMatcher, Matcher},
265 /// matchers::{PatternMatcher, Matcher},
267 /// IgnorePattern,
266 /// IgnorePattern,
268 /// PatternSyntax,
267 /// PatternSyntax,
269 /// utils::hg_path::{HgPath, HgPathBuf}
268 /// utils::hg_path::{HgPath, HgPathBuf}
270 /// };
269 /// };
271 /// use std::collections::HashSet;
270 /// use std::collections::HashSet;
272 /// use std::path::Path;
271 /// use std::path::Path;
273 /// ///
272 /// ///
274 /// let ignore_patterns : Vec<IgnorePattern> =
273 /// let ignore_patterns : Vec<IgnorePattern> =
275 /// vec![IgnorePattern::new(PatternSyntax::Regexp, br".*\.c$", Path::new("")),
274 /// vec![IgnorePattern::new(PatternSyntax::Regexp, br".*\.c$", Path::new("")),
276 /// IgnorePattern::new(PatternSyntax::Path, b"foo/a", Path::new("")),
275 /// IgnorePattern::new(PatternSyntax::Path, b"foo/a", Path::new("")),
277 /// IgnorePattern::new(PatternSyntax::RelPath, b"b", Path::new("")),
276 /// IgnorePattern::new(PatternSyntax::RelPath, b"b", Path::new("")),
278 /// IgnorePattern::new(PatternSyntax::Glob, b"*.h", Path::new("")),
277 /// IgnorePattern::new(PatternSyntax::Glob, b"*.h", Path::new("")),
279 /// ];
278 /// ];
280 /// let matcher = PatternMatcher::new(ignore_patterns).unwrap();
279 /// let matcher = PatternMatcher::new(ignore_patterns).unwrap();
281 /// ///
280 /// ///
282 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true); // matches re:.*\.c$
281 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true); // matches re:.*\.c$
283 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
282 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
284 /// assert_eq!(matcher.matches(HgPath::new(b"foo/a")), true); // matches path:foo/a
283 /// assert_eq!(matcher.matches(HgPath::new(b"foo/a")), true); // matches path:foo/a
285 /// assert_eq!(matcher.matches(HgPath::new(b"a")), false); // does not match path:b, since 'root' is 'foo'
284 /// assert_eq!(matcher.matches(HgPath::new(b"a")), false); // does not match path:b, since 'root' is 'foo'
286 /// assert_eq!(matcher.matches(HgPath::new(b"b")), true); // matches relpath:b, since 'root' is 'foo'
285 /// assert_eq!(matcher.matches(HgPath::new(b"b")), true); // matches relpath:b, since 'root' is 'foo'
287 /// assert_eq!(matcher.matches(HgPath::new(b"lib.h")), true); // matches glob:*.h
286 /// assert_eq!(matcher.matches(HgPath::new(b"lib.h")), true); // matches glob:*.h
288 /// assert_eq!(matcher.file_set().unwrap(),
287 /// assert_eq!(matcher.file_set().unwrap(),
289 /// &HashSet::from([HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"foo/a"),
288 /// &HashSet::from([HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"foo/a"),
290 /// HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"b")]));
289 /// HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"b")]));
291 /// assert_eq!(matcher.exact_match(HgPath::new(b"foo/a")), true);
290 /// assert_eq!(matcher.exact_match(HgPath::new(b"foo/a")), true);
292 /// assert_eq!(matcher.exact_match(HgPath::new(b"b")), true);
291 /// assert_eq!(matcher.exact_match(HgPath::new(b"b")), true);
293 /// assert_eq!(matcher.exact_match(HgPath::new(b"lib.h")), false); // exact matches are for (rel)path kinds
292 /// assert_eq!(matcher.exact_match(HgPath::new(b"lib.h")), false); // exact matches are for (rel)path kinds
294 /// ```
293 /// ```
295 pub struct PatternMatcher<'a> {
294 pub struct PatternMatcher<'a> {
296 patterns: Vec<u8>,
295 patterns: Vec<u8>,
297 match_fn: IgnoreFnType<'a>,
296 match_fn: IgnoreFnType<'a>,
298 /// Whether all the patterns match a prefix (i.e. recursively)
297 /// Whether all the patterns match a prefix (i.e. recursively)
299 prefix: bool,
298 prefix: bool,
300 files: HashSet<HgPathBuf>,
299 files: HashSet<HgPathBuf>,
301 dirs: DirsMultiset,
300 dirs: DirsMultiset,
302 }
301 }
303
302
304 impl core::fmt::Debug for PatternMatcher<'_> {
303 impl core::fmt::Debug for PatternMatcher<'_> {
305 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
304 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
306 f.debug_struct("PatternMatcher")
305 f.debug_struct("PatternMatcher")
307 .field("patterns", &String::from_utf8_lossy(&self.patterns))
306 .field("patterns", &String::from_utf8_lossy(&self.patterns))
308 .field("prefix", &self.prefix)
307 .field("prefix", &self.prefix)
309 .field("files", &self.files)
308 .field("files", &self.files)
310 .field("dirs", &self.dirs)
309 .field("dirs", &self.dirs)
311 .finish()
310 .finish()
312 }
311 }
313 }
312 }
314
313
315 impl<'a> PatternMatcher<'a> {
314 impl<'a> PatternMatcher<'a> {
316 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
315 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
317 let (files, _) = roots_and_dirs(&ignore_patterns);
316 let (files, _) = roots_and_dirs(&ignore_patterns);
318 let dirs = DirsMultiset::from_manifest(&files)?;
317 let dirs = DirsMultiset::from_manifest(&files)?;
319 let files: HashSet<HgPathBuf> = HashSet::from_iter(files.into_iter());
318 let files: HashSet<HgPathBuf> = HashSet::from_iter(files);
320
319
321 let prefix = ignore_patterns.iter().all(|k| {
320 let prefix = ignore_patterns.iter().all(|k| {
322 matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
321 matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
323 });
322 });
324 let (patterns, match_fn) = build_match(ignore_patterns, b"$")?;
323 let (patterns, match_fn) = build_match(ignore_patterns, b"$")?;
325
324
326 Ok(Self {
325 Ok(Self {
327 patterns,
326 patterns,
328 match_fn,
327 match_fn,
329 prefix,
328 prefix,
330 files,
329 files,
331 dirs,
330 dirs,
332 })
331 })
333 }
332 }
334 }
333 }
335
334
336 impl<'a> Matcher for PatternMatcher<'a> {
335 impl<'a> Matcher for PatternMatcher<'a> {
337 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
336 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
338 Some(&self.files)
337 Some(&self.files)
339 }
338 }
340
339
341 fn exact_match(&self, filename: &HgPath) -> bool {
340 fn exact_match(&self, filename: &HgPath) -> bool {
342 self.files.contains(filename)
341 self.files.contains(filename)
343 }
342 }
344
343
345 fn matches(&self, filename: &HgPath) -> bool {
344 fn matches(&self, filename: &HgPath) -> bool {
346 if self.files.contains(filename) {
345 if self.files.contains(filename) {
347 return true;
346 return true;
348 }
347 }
349 (self.match_fn)(filename)
348 (self.match_fn)(filename)
350 }
349 }
351
350
352 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
351 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
353 if self.prefix && self.files.contains(directory) {
352 if self.prefix && self.files.contains(directory) {
354 return VisitChildrenSet::Recursive;
353 return VisitChildrenSet::Recursive;
355 }
354 }
356 let path_or_parents_in_set = find_dirs(directory)
355 let path_or_parents_in_set = find_dirs(directory)
357 .any(|parent_dir| self.files.contains(parent_dir));
356 .any(|parent_dir| self.files.contains(parent_dir));
358 if self.dirs.contains(directory) || path_or_parents_in_set {
357 if self.dirs.contains(directory) || path_or_parents_in_set {
359 VisitChildrenSet::This
358 VisitChildrenSet::This
360 } else {
359 } else {
361 VisitChildrenSet::Empty
360 VisitChildrenSet::Empty
362 }
361 }
363 }
362 }
364
363
365 fn matches_everything(&self) -> bool {
364 fn matches_everything(&self) -> bool {
366 false
365 false
367 }
366 }
368
367
369 fn is_exact(&self) -> bool {
368 fn is_exact(&self) -> bool {
370 false
369 false
371 }
370 }
372 }
371 }
373
372
374 /// Matches files that are included in the ignore rules.
373 /// Matches files that are included in the ignore rules.
375 /// ```
374 /// ```
376 /// use hg::{
375 /// use hg::{
377 /// matchers::{IncludeMatcher, Matcher},
376 /// matchers::{IncludeMatcher, Matcher},
378 /// IgnorePattern,
377 /// IgnorePattern,
379 /// PatternSyntax,
378 /// PatternSyntax,
380 /// utils::hg_path::HgPath
379 /// utils::hg_path::HgPath
381 /// };
380 /// };
382 /// use std::path::Path;
381 /// use std::path::Path;
383 /// ///
382 /// ///
384 /// let ignore_patterns =
383 /// let ignore_patterns =
385 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
384 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
386 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
385 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
387 /// ///
386 /// ///
388 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
387 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
389 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
388 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
390 /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
389 /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
391 /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
390 /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
392 /// ```
391 /// ```
393 pub struct IncludeMatcher<'a> {
392 pub struct IncludeMatcher<'a> {
394 patterns: Vec<u8>,
393 patterns: Vec<u8>,
395 match_fn: IgnoreFnType<'a>,
394 match_fn: IgnoreFnType<'a>,
396 /// Whether all the patterns match a prefix (i.e. recursively)
395 /// Whether all the patterns match a prefix (i.e. recursively)
397 prefix: bool,
396 prefix: bool,
398 roots: HashSet<HgPathBuf>,
397 roots: HashSet<HgPathBuf>,
399 dirs: HashSet<HgPathBuf>,
398 dirs: HashSet<HgPathBuf>,
400 parents: HashSet<HgPathBuf>,
399 parents: HashSet<HgPathBuf>,
401 }
400 }
402
401
403 impl core::fmt::Debug for IncludeMatcher<'_> {
402 impl core::fmt::Debug for IncludeMatcher<'_> {
404 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
403 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
405 f.debug_struct("IncludeMatcher")
404 f.debug_struct("IncludeMatcher")
406 .field("patterns", &String::from_utf8_lossy(&self.patterns))
405 .field("patterns", &String::from_utf8_lossy(&self.patterns))
407 .field("prefix", &self.prefix)
406 .field("prefix", &self.prefix)
408 .field("roots", &self.roots)
407 .field("roots", &self.roots)
409 .field("dirs", &self.dirs)
408 .field("dirs", &self.dirs)
410 .field("parents", &self.parents)
409 .field("parents", &self.parents)
411 .finish()
410 .finish()
412 }
411 }
413 }
412 }
414
413
415 impl<'a> Matcher for IncludeMatcher<'a> {
414 impl<'a> Matcher for IncludeMatcher<'a> {
416 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
415 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
417 None
416 None
418 }
417 }
419
418
420 fn exact_match(&self, _filename: &HgPath) -> bool {
419 fn exact_match(&self, _filename: &HgPath) -> bool {
421 false
420 false
422 }
421 }
423
422
424 fn matches(&self, filename: &HgPath) -> bool {
423 fn matches(&self, filename: &HgPath) -> bool {
425 (self.match_fn)(filename)
424 (self.match_fn)(filename)
426 }
425 }
427
426
428 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
427 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
429 let dir = directory;
428 let dir = directory;
430 if self.prefix && self.roots.contains(dir) {
429 if self.prefix && self.roots.contains(dir) {
431 return VisitChildrenSet::Recursive;
430 return VisitChildrenSet::Recursive;
432 }
431 }
433 if self.roots.contains(HgPath::new(b""))
432 if self.roots.contains(HgPath::new(b""))
434 || self.roots.contains(dir)
433 || self.roots.contains(dir)
435 || self.dirs.contains(dir)
434 || self.dirs.contains(dir)
436 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
435 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
437 {
436 {
438 return VisitChildrenSet::This;
437 return VisitChildrenSet::This;
439 }
438 }
440
439
441 if self.parents.contains(dir.as_ref()) {
440 if self.parents.contains(dir.as_ref()) {
442 let multiset = self.get_all_parents_children();
441 let multiset = self.get_all_parents_children();
443 if let Some(children) = multiset.get(dir) {
442 if let Some(children) = multiset.get(dir) {
444 return VisitChildrenSet::Set(
443 return VisitChildrenSet::Set(
445 children.iter().map(HgPathBuf::from).collect(),
444 children.iter().map(HgPathBuf::from).collect(),
446 );
445 );
447 }
446 }
448 }
447 }
449 VisitChildrenSet::Empty
448 VisitChildrenSet::Empty
450 }
449 }
451
450
452 fn matches_everything(&self) -> bool {
451 fn matches_everything(&self) -> bool {
453 false
452 false
454 }
453 }
455
454
456 fn is_exact(&self) -> bool {
455 fn is_exact(&self) -> bool {
457 false
456 false
458 }
457 }
459 }
458 }
460
459
461 /// The union of multiple matchers. Will match if any of the matchers match.
460 /// The union of multiple matchers. Will match if any of the matchers match.
462 #[derive(Debug)]
461 #[derive(Debug)]
463 pub struct UnionMatcher {
462 pub struct UnionMatcher {
464 matchers: Vec<Box<dyn Matcher + Sync>>,
463 matchers: Vec<Box<dyn Matcher + Sync>>,
465 }
464 }
466
465
467 impl Matcher for UnionMatcher {
466 impl Matcher for UnionMatcher {
468 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
467 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
469 None
468 None
470 }
469 }
471
470
472 fn exact_match(&self, _filename: &HgPath) -> bool {
471 fn exact_match(&self, _filename: &HgPath) -> bool {
473 false
472 false
474 }
473 }
475
474
476 fn matches(&self, filename: &HgPath) -> bool {
475 fn matches(&self, filename: &HgPath) -> bool {
477 self.matchers.iter().any(|m| m.matches(filename))
476 self.matchers.iter().any(|m| m.matches(filename))
478 }
477 }
479
478
480 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
479 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
481 let mut result = HashSet::new();
480 let mut result = HashSet::new();
482 let mut this = false;
481 let mut this = false;
483 for matcher in self.matchers.iter() {
482 for matcher in self.matchers.iter() {
484 let visit = matcher.visit_children_set(directory);
483 let visit = matcher.visit_children_set(directory);
485 match visit {
484 match visit {
486 VisitChildrenSet::Empty => continue,
485 VisitChildrenSet::Empty => continue,
487 VisitChildrenSet::This => {
486 VisitChildrenSet::This => {
488 this = true;
487 this = true;
489 // Don't break, we might have an 'all' in here.
488 // Don't break, we might have an 'all' in here.
490 continue;
489 continue;
491 }
490 }
492 VisitChildrenSet::Set(set) => {
491 VisitChildrenSet::Set(set) => {
493 result.extend(set);
492 result.extend(set);
494 }
493 }
495 VisitChildrenSet::Recursive => {
494 VisitChildrenSet::Recursive => {
496 return visit;
495 return visit;
497 }
496 }
498 }
497 }
499 }
498 }
500 if this {
499 if this {
501 return VisitChildrenSet::This;
500 return VisitChildrenSet::This;
502 }
501 }
503 if result.is_empty() {
502 if result.is_empty() {
504 VisitChildrenSet::Empty
503 VisitChildrenSet::Empty
505 } else {
504 } else {
506 VisitChildrenSet::Set(result)
505 VisitChildrenSet::Set(result)
507 }
506 }
508 }
507 }
509
508
510 fn matches_everything(&self) -> bool {
509 fn matches_everything(&self) -> bool {
511 // TODO Maybe if all are AlwaysMatcher?
510 // TODO Maybe if all are AlwaysMatcher?
512 false
511 false
513 }
512 }
514
513
515 fn is_exact(&self) -> bool {
514 fn is_exact(&self) -> bool {
516 false
515 false
517 }
516 }
518 }
517 }
519
518
520 impl UnionMatcher {
519 impl UnionMatcher {
521 pub fn new(matchers: Vec<Box<dyn Matcher + Sync>>) -> Self {
520 pub fn new(matchers: Vec<Box<dyn Matcher + Sync>>) -> Self {
522 Self { matchers }
521 Self { matchers }
523 }
522 }
524 }
523 }
525
524
526 #[derive(Debug)]
525 #[derive(Debug)]
527 pub struct IntersectionMatcher {
526 pub struct IntersectionMatcher {
528 m1: Box<dyn Matcher + Sync>,
527 m1: Box<dyn Matcher + Sync>,
529 m2: Box<dyn Matcher + Sync>,
528 m2: Box<dyn Matcher + Sync>,
530 files: Option<HashSet<HgPathBuf>>,
529 files: Option<HashSet<HgPathBuf>>,
531 }
530 }
532
531
533 impl Matcher for IntersectionMatcher {
532 impl Matcher for IntersectionMatcher {
534 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
533 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
535 self.files.as_ref()
534 self.files.as_ref()
536 }
535 }
537
536
538 fn exact_match(&self, filename: &HgPath) -> bool {
537 fn exact_match(&self, filename: &HgPath) -> bool {
539 self.files.as_ref().map_or(false, |f| f.contains(filename))
538 self.files.as_ref().map_or(false, |f| f.contains(filename))
540 }
539 }
541
540
542 fn matches(&self, filename: &HgPath) -> bool {
541 fn matches(&self, filename: &HgPath) -> bool {
543 self.m1.matches(filename) && self.m2.matches(filename)
542 self.m1.matches(filename) && self.m2.matches(filename)
544 }
543 }
545
544
546 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
545 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
547 let m1_set = self.m1.visit_children_set(directory);
546 let m1_set = self.m1.visit_children_set(directory);
548 if m1_set == VisitChildrenSet::Empty {
547 if m1_set == VisitChildrenSet::Empty {
549 return VisitChildrenSet::Empty;
548 return VisitChildrenSet::Empty;
550 }
549 }
551 let m2_set = self.m2.visit_children_set(directory);
550 let m2_set = self.m2.visit_children_set(directory);
552 if m2_set == VisitChildrenSet::Empty {
551 if m2_set == VisitChildrenSet::Empty {
553 return VisitChildrenSet::Empty;
552 return VisitChildrenSet::Empty;
554 }
553 }
555
554
556 if m1_set == VisitChildrenSet::Recursive {
555 if m1_set == VisitChildrenSet::Recursive {
557 return m2_set;
556 return m2_set;
558 } else if m2_set == VisitChildrenSet::Recursive {
557 } else if m2_set == VisitChildrenSet::Recursive {
559 return m1_set;
558 return m1_set;
560 }
559 }
561
560
562 match (&m1_set, &m2_set) {
561 match (&m1_set, &m2_set) {
563 (VisitChildrenSet::Recursive, _) => m2_set,
562 (VisitChildrenSet::Recursive, _) => m2_set,
564 (_, VisitChildrenSet::Recursive) => m1_set,
563 (_, VisitChildrenSet::Recursive) => m1_set,
565 (VisitChildrenSet::This, _) | (_, VisitChildrenSet::This) => {
564 (VisitChildrenSet::This, _) | (_, VisitChildrenSet::This) => {
566 VisitChildrenSet::This
565 VisitChildrenSet::This
567 }
566 }
568 (VisitChildrenSet::Set(m1), VisitChildrenSet::Set(m2)) => {
567 (VisitChildrenSet::Set(m1), VisitChildrenSet::Set(m2)) => {
569 let set: HashSet<_> = m1.intersection(m2).cloned().collect();
568 let set: HashSet<_> = m1.intersection(m2).cloned().collect();
570 if set.is_empty() {
569 if set.is_empty() {
571 VisitChildrenSet::Empty
570 VisitChildrenSet::Empty
572 } else {
571 } else {
573 VisitChildrenSet::Set(set)
572 VisitChildrenSet::Set(set)
574 }
573 }
575 }
574 }
576 _ => unreachable!(),
575 _ => unreachable!(),
577 }
576 }
578 }
577 }
579
578
580 fn matches_everything(&self) -> bool {
579 fn matches_everything(&self) -> bool {
581 self.m1.matches_everything() && self.m2.matches_everything()
580 self.m1.matches_everything() && self.m2.matches_everything()
582 }
581 }
583
582
584 fn is_exact(&self) -> bool {
583 fn is_exact(&self) -> bool {
585 self.m1.is_exact() || self.m2.is_exact()
584 self.m1.is_exact() || self.m2.is_exact()
586 }
585 }
587 }
586 }
588
587
589 impl IntersectionMatcher {
588 impl IntersectionMatcher {
590 pub fn new(
589 pub fn new(
591 mut m1: Box<dyn Matcher + Sync>,
590 mut m1: Box<dyn Matcher + Sync>,
592 mut m2: Box<dyn Matcher + Sync>,
591 mut m2: Box<dyn Matcher + Sync>,
593 ) -> Self {
592 ) -> Self {
594 let files = if m1.is_exact() || m2.is_exact() {
593 let files = if m1.is_exact() || m2.is_exact() {
595 if !m1.is_exact() {
594 if !m1.is_exact() {
596 std::mem::swap(&mut m1, &mut m2);
595 std::mem::swap(&mut m1, &mut m2);
597 }
596 }
598 m1.file_set().map(|m1_files| {
597 m1.file_set().map(|m1_files| {
599 m1_files.iter().cloned().filter(|f| m2.matches(f)).collect()
598 m1_files.iter().cloned().filter(|f| m2.matches(f)).collect()
600 })
599 })
601 } else {
600 } else {
602 // without exact input file sets, we can't do an exact
601 // without exact input file sets, we can't do an exact
603 // intersection, so we must over-approximate by
602 // intersection, so we must over-approximate by
604 // unioning instead
603 // unioning instead
605 m1.file_set().map(|m1_files| match m2.file_set() {
604 m1.file_set().map(|m1_files| match m2.file_set() {
606 Some(m2_files) => m1_files.union(m2_files).cloned().collect(),
605 Some(m2_files) => m1_files.union(m2_files).cloned().collect(),
607 None => m1_files.iter().cloned().collect(),
606 None => m1_files.iter().cloned().collect(),
608 })
607 })
609 };
608 };
610 Self { m1, m2, files }
609 Self { m1, m2, files }
611 }
610 }
612 }
611 }
613
612
614 #[derive(Debug)]
613 #[derive(Debug)]
615 pub struct DifferenceMatcher {
614 pub struct DifferenceMatcher {
616 base: Box<dyn Matcher + Sync>,
615 base: Box<dyn Matcher + Sync>,
617 excluded: Box<dyn Matcher + Sync>,
616 excluded: Box<dyn Matcher + Sync>,
618 files: Option<HashSet<HgPathBuf>>,
617 files: Option<HashSet<HgPathBuf>>,
619 }
618 }
620
619
621 impl Matcher for DifferenceMatcher {
620 impl Matcher for DifferenceMatcher {
622 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
621 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
623 self.files.as_ref()
622 self.files.as_ref()
624 }
623 }
625
624
626 fn exact_match(&self, filename: &HgPath) -> bool {
625 fn exact_match(&self, filename: &HgPath) -> bool {
627 self.files.as_ref().map_or(false, |f| f.contains(filename))
626 self.files.as_ref().map_or(false, |f| f.contains(filename))
628 }
627 }
629
628
630 fn matches(&self, filename: &HgPath) -> bool {
629 fn matches(&self, filename: &HgPath) -> bool {
631 self.base.matches(filename) && !self.excluded.matches(filename)
630 self.base.matches(filename) && !self.excluded.matches(filename)
632 }
631 }
633
632
634 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
633 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
635 let excluded_set = self.excluded.visit_children_set(directory);
634 let excluded_set = self.excluded.visit_children_set(directory);
636 if excluded_set == VisitChildrenSet::Recursive {
635 if excluded_set == VisitChildrenSet::Recursive {
637 return VisitChildrenSet::Empty;
636 return VisitChildrenSet::Empty;
638 }
637 }
639 let base_set = self.base.visit_children_set(directory);
638 let base_set = self.base.visit_children_set(directory);
640 // Possible values for base: 'recursive', 'this', set(...), set()
639 // Possible values for base: 'recursive', 'this', set(...), set()
641 // Possible values for excluded: 'this', set(...), set()
640 // Possible values for excluded: 'this', set(...), set()
642 // If excluded has nothing under here that we care about, return base,
641 // If excluded has nothing under here that we care about, return base,
643 // even if it's 'recursive'.
642 // even if it's 'recursive'.
644 if excluded_set == VisitChildrenSet::Empty {
643 if excluded_set == VisitChildrenSet::Empty {
645 return base_set;
644 return base_set;
646 }
645 }
647 match base_set {
646 match base_set {
648 VisitChildrenSet::This | VisitChildrenSet::Recursive => {
647 VisitChildrenSet::This | VisitChildrenSet::Recursive => {
649 // Never return 'recursive' here if excluded_set is any kind of
648 // Never return 'recursive' here if excluded_set is any kind of
650 // non-empty (either 'this' or set(foo)), since excluded might
649 // non-empty (either 'this' or set(foo)), since excluded might
651 // return set() for a subdirectory.
650 // return set() for a subdirectory.
652 VisitChildrenSet::This
651 VisitChildrenSet::This
653 }
652 }
654 set => {
653 set => {
655 // Possible values for base: set(...), set()
654 // Possible values for base: set(...), set()
656 // Possible values for excluded: 'this', set(...)
655 // Possible values for excluded: 'this', set(...)
657 // We ignore excluded set results. They're possibly incorrect:
656 // We ignore excluded set results. They're possibly incorrect:
658 // base = path:dir/subdir
657 // base = path:dir/subdir
659 // excluded=rootfilesin:dir,
658 // excluded=rootfilesin:dir,
660 // visit_children_set(''):
659 // visit_children_set(''):
661 // base returns {'dir'}, excluded returns {'dir'}, if we
660 // base returns {'dir'}, excluded returns {'dir'}, if we
662 // subtracted we'd return set(), which is *not* correct, we
661 // subtracted we'd return set(), which is *not* correct, we
663 // still need to visit 'dir'!
662 // still need to visit 'dir'!
664 set
663 set
665 }
664 }
666 }
665 }
667 }
666 }
668
667
669 fn matches_everything(&self) -> bool {
668 fn matches_everything(&self) -> bool {
670 false
669 false
671 }
670 }
672
671
673 fn is_exact(&self) -> bool {
672 fn is_exact(&self) -> bool {
674 self.base.is_exact()
673 self.base.is_exact()
675 }
674 }
676 }
675 }
677
676
678 impl DifferenceMatcher {
677 impl DifferenceMatcher {
679 pub fn new(
678 pub fn new(
680 base: Box<dyn Matcher + Sync>,
679 base: Box<dyn Matcher + Sync>,
681 excluded: Box<dyn Matcher + Sync>,
680 excluded: Box<dyn Matcher + Sync>,
682 ) -> Self {
681 ) -> Self {
683 let base_is_exact = base.is_exact();
682 let base_is_exact = base.is_exact();
684 let base_files = base.file_set().map(ToOwned::to_owned);
683 let base_files = base.file_set().map(ToOwned::to_owned);
685 let mut new = Self {
684 let mut new = Self {
686 base,
685 base,
687 excluded,
686 excluded,
688 files: None,
687 files: None,
689 };
688 };
690 if base_is_exact {
689 if base_is_exact {
691 new.files = base_files.map(|files| {
690 new.files = base_files.map(|files| {
692 files.iter().cloned().filter(|f| new.matches(f)).collect()
691 files.iter().cloned().filter(|f| new.matches(f)).collect()
693 });
692 });
694 }
693 }
695 new
694 new
696 }
695 }
697 }
696 }
698
697
699 /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded
698 /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded
700 /// contexts.
699 /// contexts.
701 ///
700 ///
702 /// The `status` algorithm makes heavy use of threads, and calling `is_match`
701 /// The `status` algorithm makes heavy use of threads, and calling `is_match`
703 /// from many threads at once is prone to contention, probably within the
702 /// from many threads at once is prone to contention, probably within the
704 /// scratch space needed as the regex DFA is built lazily.
703 /// scratch space needed as the regex DFA is built lazily.
705 ///
704 ///
706 /// We are in the process of raising the issue upstream, but for now
705 /// We are in the process of raising the issue upstream, but for now
707 /// the workaround used here is to store the `Regex` in a lazily populated
706 /// the workaround used here is to store the `Regex` in a lazily populated
708 /// thread-local variable, sharing the initial read-only compilation, but
707 /// thread-local variable, sharing the initial read-only compilation, but
709 /// not the lazy dfa scratch space mentioned above.
708 /// not the lazy dfa scratch space mentioned above.
710 ///
709 ///
711 /// This reduces the contention observed with 16+ threads, but does not
710 /// This reduces the contention observed with 16+ threads, but does not
712 /// completely remove it. Hopefully this can be addressed upstream.
711 /// completely remove it. Hopefully this can be addressed upstream.
713 struct RegexMatcher {
712 struct RegexMatcher {
714 /// Compiled at the start of the status algorithm, used as a base for
713 /// Compiled at the start of the status algorithm, used as a base for
715 /// cloning in each thread-local `self.local`, thus sharing the expensive
714 /// cloning in each thread-local `self.local`, thus sharing the expensive
716 /// first compilation.
715 /// first compilation.
717 base: regex::bytes::Regex,
716 base: regex::bytes::Regex,
718 /// Thread-local variable that holds the `Regex` that is actually queried
717 /// Thread-local variable that holds the `Regex` that is actually queried
719 /// from each thread.
718 /// from each thread.
720 local: thread_local::ThreadLocal<regex::bytes::Regex>,
719 local: thread_local::ThreadLocal<regex::bytes::Regex>,
721 }
720 }
722
721
723 impl RegexMatcher {
722 impl RegexMatcher {
724 /// Returns whether the path matches the stored `Regex`.
723 /// Returns whether the path matches the stored `Regex`.
725 pub fn is_match(&self, path: &HgPath) -> bool {
724 pub fn is_match(&self, path: &HgPath) -> bool {
726 self.local
725 self.local
727 .get_or(|| self.base.clone())
726 .get_or(|| self.base.clone())
728 .is_match(path.as_bytes())
727 .is_match(path.as_bytes())
729 }
728 }
730 }
729 }
731
730
732 /// Returns a function that matches an `HgPath` against the given regex
731 /// Returns a function that matches an `HgPath` against the given regex
733 /// pattern.
732 /// pattern.
734 ///
733 ///
735 /// This can fail when the pattern is invalid or not supported by the
734 /// This can fail when the pattern is invalid or not supported by the
736 /// underlying engine (the `regex` crate), for instance anything with
735 /// underlying engine (the `regex` crate), for instance anything with
737 /// back-references.
736 /// back-references.
738 #[logging_timer::time("trace")]
737 #[logging_timer::time("trace")]
739 fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> {
738 fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> {
740 use std::io::Write;
739 use std::io::Write;
741
740
742 // The `regex` crate adds `.*` to the start and end of expressions if there
741 // The `regex` crate adds `.*` to the start and end of expressions if there
743 // are no anchors, so add the start anchor.
742 // are no anchors, so add the start anchor.
744 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
743 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
745 for byte in pattern {
744 for byte in pattern {
746 if *byte > 127 {
745 if *byte > 127 {
747 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
746 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
748 } else {
747 } else {
749 escaped_bytes.push(*byte);
748 escaped_bytes.push(*byte);
750 }
749 }
751 }
750 }
752 escaped_bytes.push(b')');
751 escaped_bytes.push(b')');
753
752
754 // Avoid the cost of UTF8 checking
753 // Avoid the cost of UTF8 checking
755 //
754 //
756 // # Safety
755 // # Safety
757 // This is safe because we escaped all non-ASCII bytes.
756 // This is safe because we escaped all non-ASCII bytes.
758 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
757 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
759 let re = regex::bytes::RegexBuilder::new(&pattern_string)
758 let re = regex::bytes::RegexBuilder::new(&pattern_string)
760 .unicode(false)
759 .unicode(false)
761 // Big repos with big `.hgignore` will hit the default limit and
760 // Big repos with big `.hgignore` will hit the default limit and
762 // incur a significant performance hit. One repo's `hg status` hit
761 // incur a significant performance hit. One repo's `hg status` hit
763 // multiple *minutes*.
762 // multiple *minutes*.
764 .dfa_size_limit(50 * (1 << 20))
763 .dfa_size_limit(50 * (1 << 20))
765 .build()
764 .build()
766 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
765 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
767
766
768 Ok(RegexMatcher {
767 Ok(RegexMatcher {
769 base: re,
768 base: re,
770 local: Default::default(),
769 local: Default::default(),
771 })
770 })
772 }
771 }
773
772
774 /// Returns the regex pattern and a function that matches an `HgPath` against
773 /// Returns the regex pattern and a function that matches an `HgPath` against
775 /// said regex formed by the given ignore patterns.
774 /// said regex formed by the given ignore patterns.
776 fn build_regex_match<'a, 'b>(
775 fn build_regex_match<'a>(
777 ignore_patterns: &'a [IgnorePattern],
776 ignore_patterns: &[IgnorePattern],
778 glob_suffix: &[u8],
777 glob_suffix: &[u8],
779 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'b>)> {
778 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
780 let mut regexps = vec![];
779 let mut regexps = vec![];
781 let mut exact_set = HashSet::new();
780 let mut exact_set = HashSet::new();
782
781
783 for pattern in ignore_patterns {
782 for pattern in ignore_patterns {
784 if let Some(re) = build_single_regex(pattern, glob_suffix)? {
783 if let Some(re) = build_single_regex(pattern, glob_suffix)? {
785 regexps.push(re);
784 regexps.push(re);
786 } else {
785 } else {
787 let exact = normalize_path_bytes(&pattern.pattern);
786 let exact = normalize_path_bytes(&pattern.pattern);
788 exact_set.insert(HgPathBuf::from_bytes(&exact));
787 exact_set.insert(HgPathBuf::from_bytes(&exact));
789 }
788 }
790 }
789 }
791
790
792 let full_regex = regexps.join(&b'|');
791 let full_regex = regexps.join(&b'|');
793
792
794 // An empty pattern would cause the regex engine to incorrectly match the
793 // An empty pattern would cause the regex engine to incorrectly match the
795 // (empty) root directory
794 // (empty) root directory
796 let func = if !(regexps.is_empty()) {
795 let func = if !(regexps.is_empty()) {
797 let matcher = re_matcher(&full_regex)?;
796 let matcher = re_matcher(&full_regex)?;
798 let func = move |filename: &HgPath| {
797 let func = move |filename: &HgPath| {
799 exact_set.contains(filename) || matcher.is_match(filename)
798 exact_set.contains(filename) || matcher.is_match(filename)
800 };
799 };
801 Box::new(func) as IgnoreFnType
800 Box::new(func) as IgnoreFnType
802 } else {
801 } else {
803 let func = move |filename: &HgPath| exact_set.contains(filename);
802 let func = move |filename: &HgPath| exact_set.contains(filename);
804 Box::new(func) as IgnoreFnType
803 Box::new(func) as IgnoreFnType
805 };
804 };
806
805
807 Ok((full_regex, func))
806 Ok((full_regex, func))
808 }
807 }
809
808
810 /// Returns roots and directories corresponding to each pattern.
809 /// Returns roots and directories corresponding to each pattern.
811 ///
810 ///
812 /// This calculates the roots and directories exactly matching the patterns and
811 /// This calculates the roots and directories exactly matching the patterns and
813 /// returns a tuple of (roots, dirs). It does not return other directories
812 /// returns a tuple of (roots, dirs). It does not return other directories
814 /// which may also need to be considered, like the parent directories.
813 /// which may also need to be considered, like the parent directories.
815 fn roots_and_dirs(
814 fn roots_and_dirs(
816 ignore_patterns: &[IgnorePattern],
815 ignore_patterns: &[IgnorePattern],
817 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
816 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
818 let mut roots = Vec::new();
817 let mut roots = Vec::new();
819 let mut dirs = Vec::new();
818 let mut dirs = Vec::new();
820
819
821 for ignore_pattern in ignore_patterns {
820 for ignore_pattern in ignore_patterns {
822 let IgnorePattern {
821 let IgnorePattern {
823 syntax, pattern, ..
822 syntax, pattern, ..
824 } = ignore_pattern;
823 } = ignore_pattern;
825 match syntax {
824 match syntax {
826 PatternSyntax::RootGlob | PatternSyntax::Glob => {
825 PatternSyntax::RootGlob | PatternSyntax::Glob => {
827 let mut root = HgPathBuf::new();
826 let mut root = HgPathBuf::new();
828 for p in pattern.split(|c| *c == b'/') {
827 for p in pattern.split(|c| *c == b'/') {
829 if p.iter()
828 if p.iter()
830 .any(|c| matches!(*c, b'[' | b'{' | b'*' | b'?'))
829 .any(|c| matches!(*c, b'[' | b'{' | b'*' | b'?'))
831 {
830 {
832 break;
831 break;
833 }
832 }
834 root.push(HgPathBuf::from_bytes(p).as_ref());
833 root.push(HgPathBuf::from_bytes(p).as_ref());
835 }
834 }
836 roots.push(root);
835 roots.push(root);
837 }
836 }
838 PatternSyntax::Path
837 PatternSyntax::Path
839 | PatternSyntax::RelPath
838 | PatternSyntax::RelPath
840 | PatternSyntax::FilePath => {
839 | PatternSyntax::FilePath => {
841 let pat = HgPath::new(if pattern == b"." {
840 let pat = HgPath::new(if pattern == b"." {
842 &[] as &[u8]
841 &[] as &[u8]
843 } else {
842 } else {
844 pattern
843 pattern
845 });
844 });
846 roots.push(pat.to_owned());
845 roots.push(pat.to_owned());
847 }
846 }
848 PatternSyntax::RootFiles => {
847 PatternSyntax::RootFiles => {
849 let pat = if pattern == b"." {
848 let pat = if pattern == b"." {
850 &[] as &[u8]
849 &[] as &[u8]
851 } else {
850 } else {
852 pattern
851 pattern
853 };
852 };
854 dirs.push(HgPathBuf::from_bytes(pat));
853 dirs.push(HgPathBuf::from_bytes(pat));
855 }
854 }
856 _ => {
855 _ => {
857 roots.push(HgPathBuf::new());
856 roots.push(HgPathBuf::new());
858 }
857 }
859 }
858 }
860 }
859 }
861 (roots, dirs)
860 (roots, dirs)
862 }
861 }
863
862
864 /// Paths extracted from patterns
863 /// Paths extracted from patterns
865 #[derive(Debug, PartialEq)]
864 #[derive(Debug, PartialEq)]
866 struct RootsDirsAndParents {
865 struct RootsDirsAndParents {
867 /// Directories to match recursively
866 /// Directories to match recursively
868 pub roots: HashSet<HgPathBuf>,
867 pub roots: HashSet<HgPathBuf>,
869 /// Directories to match non-recursively
868 /// Directories to match non-recursively
870 pub dirs: HashSet<HgPathBuf>,
869 pub dirs: HashSet<HgPathBuf>,
871 /// Implicitly required directories to go to items in either roots or dirs
870 /// Implicitly required directories to go to items in either roots or dirs
872 pub parents: HashSet<HgPathBuf>,
871 pub parents: HashSet<HgPathBuf>,
873 }
872 }
874
873
875 /// Extract roots, dirs and parents from patterns.
874 /// Extract roots, dirs and parents from patterns.
876 fn roots_dirs_and_parents(
875 fn roots_dirs_and_parents(
877 ignore_patterns: &[IgnorePattern],
876 ignore_patterns: &[IgnorePattern],
878 ) -> PatternResult<RootsDirsAndParents> {
877 ) -> PatternResult<RootsDirsAndParents> {
879 let (roots, dirs) = roots_and_dirs(ignore_patterns);
878 let (roots, dirs) = roots_and_dirs(ignore_patterns);
880
879
881 let mut parents = HashSet::new();
880 let mut parents = HashSet::new();
882
881
883 parents.extend(
882 parents.extend(
884 DirsMultiset::from_manifest(&dirs)?
883 DirsMultiset::from_manifest(&dirs)?
885 .iter()
884 .iter()
886 .map(ToOwned::to_owned),
885 .map(ToOwned::to_owned),
887 );
886 );
888 parents.extend(
887 parents.extend(
889 DirsMultiset::from_manifest(&roots)?
888 DirsMultiset::from_manifest(&roots)?
890 .iter()
889 .iter()
891 .map(ToOwned::to_owned),
890 .map(ToOwned::to_owned),
892 );
891 );
893
892
894 Ok(RootsDirsAndParents {
893 Ok(RootsDirsAndParents {
895 roots: HashSet::from_iter(roots),
894 roots: HashSet::from_iter(roots),
896 dirs: HashSet::from_iter(dirs),
895 dirs: HashSet::from_iter(dirs),
897 parents,
896 parents,
898 })
897 })
899 }
898 }
900
899
901 /// Returns a function that checks whether a given file (in the general sense)
900 /// Returns a function that checks whether a given file (in the general sense)
902 /// should be matched.
901 /// should be matched.
903 fn build_match<'a>(
902 fn build_match<'a>(
904 ignore_patterns: Vec<IgnorePattern>,
903 ignore_patterns: Vec<IgnorePattern>,
905 glob_suffix: &[u8],
904 glob_suffix: &[u8],
906 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
905 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
907 let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![];
906 let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![];
908 // For debugging and printing
907 // For debugging and printing
909 let mut patterns = vec![];
908 let mut patterns = vec![];
910
909
911 let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
910 let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
912
911
913 if !subincludes.is_empty() {
912 if !subincludes.is_empty() {
914 // Build prefix-based matcher functions for subincludes
913 // Build prefix-based matcher functions for subincludes
915 let mut submatchers = FastHashMap::default();
914 let mut submatchers = FastHashMap::default();
916 let mut prefixes = vec![];
915 let mut prefixes = vec![];
917
916
918 for sub_include in subincludes {
917 for sub_include in subincludes {
919 let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
918 let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
920 let match_fn =
919 let match_fn =
921 Box::new(move |path: &HgPath| matcher.matches(path));
920 Box::new(move |path: &HgPath| matcher.matches(path));
922 prefixes.push(sub_include.prefix.clone());
921 prefixes.push(sub_include.prefix.clone());
923 submatchers.insert(sub_include.prefix.clone(), match_fn);
922 submatchers.insert(sub_include.prefix.clone(), match_fn);
924 }
923 }
925
924
926 let match_subinclude = move |filename: &HgPath| {
925 let match_subinclude = move |filename: &HgPath| {
927 for prefix in prefixes.iter() {
926 for prefix in prefixes.iter() {
928 if let Some(rel) = filename.relative_to(prefix) {
927 if let Some(rel) = filename.relative_to(prefix) {
929 if (submatchers[prefix])(rel) {
928 if (submatchers[prefix])(rel) {
930 return true;
929 return true;
931 }
930 }
932 }
931 }
933 }
932 }
934 false
933 false
935 };
934 };
936
935
937 match_funcs.push(Box::new(match_subinclude));
936 match_funcs.push(Box::new(match_subinclude));
938 }
937 }
939
938
940 if !ignore_patterns.is_empty() {
939 if !ignore_patterns.is_empty() {
941 // Either do dumb matching if all patterns are rootfiles, or match
940 // Either do dumb matching if all patterns are rootfiles, or match
942 // with a regex.
941 // with a regex.
943 if ignore_patterns
942 if ignore_patterns
944 .iter()
943 .iter()
945 .all(|k| k.syntax == PatternSyntax::RootFiles)
944 .all(|k| k.syntax == PatternSyntax::RootFiles)
946 {
945 {
947 let dirs: HashSet<_> = ignore_patterns
946 let dirs: HashSet<_> = ignore_patterns
948 .iter()
947 .iter()
949 .map(|k| k.pattern.to_owned())
948 .map(|k| k.pattern.to_owned())
950 .collect();
949 .collect();
951 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
950 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
952
951
953 let match_func = move |path: &HgPath| -> bool {
952 let match_func = move |path: &HgPath| -> bool {
954 let path = path.as_bytes();
953 let path = path.as_bytes();
955 let i = path.iter().rfind(|a| **a == b'/');
954 let i = path.iter().rfind(|a| **a == b'/');
956 let dir = if let Some(i) = i {
955 let dir = if let Some(i) = i {
957 &path[..*i as usize]
956 &path[..*i as usize]
958 } else {
957 } else {
959 b"."
958 b"."
960 };
959 };
961 dirs.contains(dir.deref())
960 dirs.contains(dir)
962 };
961 };
963 match_funcs.push(Box::new(match_func));
962 match_funcs.push(Box::new(match_func));
964
963
965 patterns.extend(b"rootfilesin: ");
964 patterns.extend(b"rootfilesin: ");
966 dirs_vec.sort();
965 dirs_vec.sort();
967 patterns.extend(dirs_vec.escaped_bytes());
966 patterns.extend(dirs_vec.escaped_bytes());
968 } else {
967 } else {
969 let (new_re, match_func) =
968 let (new_re, match_func) =
970 build_regex_match(&ignore_patterns, glob_suffix)?;
969 build_regex_match(&ignore_patterns, glob_suffix)?;
971 patterns = new_re;
970 patterns = new_re;
972 match_funcs.push(match_func)
971 match_funcs.push(match_func)
973 }
972 }
974 }
973 }
975
974
976 Ok(if match_funcs.len() == 1 {
975 Ok(if match_funcs.len() == 1 {
977 (patterns, match_funcs.remove(0))
976 (patterns, match_funcs.remove(0))
978 } else {
977 } else {
979 (
978 (
980 patterns,
979 patterns,
981 Box::new(move |f: &HgPath| -> bool {
980 Box::new(move |f: &HgPath| -> bool {
982 match_funcs.iter().any(|match_func| match_func(f))
981 match_funcs.iter().any(|match_func| match_func(f))
983 }),
982 }),
984 )
983 )
985 })
984 })
986 }
985 }
987
986
988 /// Parses all "ignore" files with their recursive includes and returns a
987 /// Parses all "ignore" files with their recursive includes and returns a
989 /// function that checks whether a given file (in the general sense) should be
988 /// function that checks whether a given file (in the general sense) should be
990 /// ignored.
989 /// ignored.
991 pub fn get_ignore_matcher<'a>(
990 pub fn get_ignore_matcher<'a>(
992 mut all_pattern_files: Vec<PathBuf>,
991 mut all_pattern_files: Vec<PathBuf>,
993 root_dir: &Path,
992 root_dir: &Path,
994 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
993 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
995 ) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> {
994 ) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> {
996 let mut all_patterns = vec![];
995 let mut all_patterns = vec![];
997 let mut all_warnings = vec![];
996 let mut all_warnings = vec![];
998
997
999 // Sort to make the ordering of calls to `inspect_pattern_bytes`
998 // Sort to make the ordering of calls to `inspect_pattern_bytes`
1000 // deterministic even if the ordering of `all_pattern_files` is not (such
999 // deterministic even if the ordering of `all_pattern_files` is not (such
1001 // as when a iteration order of a Python dict or Rust HashMap is involved).
1000 // as when a iteration order of a Python dict or Rust HashMap is involved).
1002 // Sort by "string" representation instead of the default by component
1001 // Sort by "string" representation instead of the default by component
1003 // (with a Rust-specific definition of a component)
1002 // (with a Rust-specific definition of a component)
1004 all_pattern_files
1003 all_pattern_files
1005 .sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str()));
1004 .sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str()));
1006
1005
1007 for pattern_file in &all_pattern_files {
1006 for pattern_file in &all_pattern_files {
1008 let (patterns, warnings) = get_patterns_from_file(
1007 let (patterns, warnings) = get_patterns_from_file(
1009 pattern_file,
1008 pattern_file,
1010 root_dir,
1009 root_dir,
1011 inspect_pattern_bytes,
1010 inspect_pattern_bytes,
1012 )?;
1011 )?;
1013
1012
1014 all_patterns.extend(patterns.to_owned());
1013 all_patterns.extend(patterns.to_owned());
1015 all_warnings.extend(warnings);
1014 all_warnings.extend(warnings);
1016 }
1015 }
1017 let matcher = IncludeMatcher::new(all_patterns)?;
1016 let matcher = IncludeMatcher::new(all_patterns)?;
1018 Ok((matcher, all_warnings))
1017 Ok((matcher, all_warnings))
1019 }
1018 }
1020
1019
1021 /// Parses all "ignore" files with their recursive includes and returns a
1020 /// Parses all "ignore" files with their recursive includes and returns a
1022 /// function that checks whether a given file (in the general sense) should be
1021 /// function that checks whether a given file (in the general sense) should be
1023 /// ignored.
1022 /// ignored.
1024 pub fn get_ignore_function<'a>(
1023 pub fn get_ignore_function<'a>(
1025 all_pattern_files: Vec<PathBuf>,
1024 all_pattern_files: Vec<PathBuf>,
1026 root_dir: &Path,
1025 root_dir: &Path,
1027 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
1026 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
1028 ) -> PatternResult<(IgnoreFnType<'a>, Vec<PatternFileWarning>)> {
1027 ) -> PatternResult<(IgnoreFnType<'a>, Vec<PatternFileWarning>)> {
1029 let res =
1028 let res =
1030 get_ignore_matcher(all_pattern_files, root_dir, inspect_pattern_bytes);
1029 get_ignore_matcher(all_pattern_files, root_dir, inspect_pattern_bytes);
1031 res.map(|(matcher, all_warnings)| {
1030 res.map(|(matcher, all_warnings)| {
1032 let res: IgnoreFnType<'a> =
1031 let res: IgnoreFnType<'a> =
1033 Box::new(move |path: &HgPath| matcher.matches(path));
1032 Box::new(move |path: &HgPath| matcher.matches(path));
1034
1033
1035 (res, all_warnings)
1034 (res, all_warnings)
1036 })
1035 })
1037 }
1036 }
1038
1037
1039 impl<'a> IncludeMatcher<'a> {
1038 impl<'a> IncludeMatcher<'a> {
1040 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
1039 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
1041 let RootsDirsAndParents {
1040 let RootsDirsAndParents {
1042 roots,
1041 roots,
1043 dirs,
1042 dirs,
1044 parents,
1043 parents,
1045 } = roots_dirs_and_parents(&ignore_patterns)?;
1044 } = roots_dirs_and_parents(&ignore_patterns)?;
1046 let prefix = ignore_patterns.iter().all(|k| {
1045 let prefix = ignore_patterns.iter().all(|k| {
1047 matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
1046 matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
1048 });
1047 });
1049 let (patterns, match_fn) = build_match(ignore_patterns, b"(?:/|$)")?;
1048 let (patterns, match_fn) = build_match(ignore_patterns, b"(?:/|$)")?;
1050
1049
1051 Ok(Self {
1050 Ok(Self {
1052 patterns,
1051 patterns,
1053 match_fn,
1052 match_fn,
1054 prefix,
1053 prefix,
1055 roots,
1054 roots,
1056 dirs,
1055 dirs,
1057 parents,
1056 parents,
1058 })
1057 })
1059 }
1058 }
1060
1059
1061 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
1060 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
1062 // TODO cache
1061 // TODO cache
1063 let thing = self
1062 let thing = self
1064 .dirs
1063 .dirs
1065 .iter()
1064 .iter()
1066 .chain(self.roots.iter())
1065 .chain(self.roots.iter())
1067 .chain(self.parents.iter());
1066 .chain(self.parents.iter());
1068 DirsChildrenMultiset::new(thing, Some(&self.parents))
1067 DirsChildrenMultiset::new(thing, Some(&self.parents))
1069 }
1068 }
1070
1069
1071 pub fn debug_get_patterns(&self) -> &[u8] {
1070 pub fn debug_get_patterns(&self) -> &[u8] {
1072 self.patterns.as_ref()
1071 self.patterns.as_ref()
1073 }
1072 }
1074 }
1073 }
1075
1074
1076 impl<'a> Display for IncludeMatcher<'a> {
1075 impl<'a> Display for IncludeMatcher<'a> {
1077 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
1076 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
1078 // XXX What about exact matches?
1077 // XXX What about exact matches?
1079 // I'm not sure it's worth it to clone the HashSet and keep it
1078 // I'm not sure it's worth it to clone the HashSet and keep it
1080 // around just in case someone wants to display the matcher, plus
1079 // around just in case someone wants to display the matcher, plus
1081 // it's going to be unreadable after a few entries, but we need to
1080 // it's going to be unreadable after a few entries, but we need to
1082 // inform in this display that exact matches are being used and are
1081 // inform in this display that exact matches are being used and are
1083 // (on purpose) missing from the `includes`.
1082 // (on purpose) missing from the `includes`.
1084 write!(
1083 write!(
1085 f,
1084 f,
1086 "IncludeMatcher(includes='{}')",
1085 "IncludeMatcher(includes='{}')",
1087 String::from_utf8_lossy(&self.patterns.escaped_bytes())
1086 String::from_utf8_lossy(&self.patterns.escaped_bytes())
1088 )
1087 )
1089 }
1088 }
1090 }
1089 }
1091
1090
1092 #[cfg(test)]
1091 #[cfg(test)]
1093 mod tests {
1092 mod tests {
1094 use super::*;
1093 use super::*;
1095 use pretty_assertions::assert_eq;
1094 use pretty_assertions::assert_eq;
1096 use std::path::Path;
1095 use std::path::Path;
1097
1096
1098 #[test]
1097 #[test]
1099 fn test_roots_and_dirs() {
1098 fn test_roots_and_dirs() {
1100 let pats = vec![
1099 let pats = vec![
1101 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
1100 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
1102 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
1101 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
1103 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1102 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1104 ];
1103 ];
1105 let (roots, dirs) = roots_and_dirs(&pats);
1104 let (roots, dirs) = roots_and_dirs(&pats);
1106
1105
1107 assert_eq!(
1106 assert_eq!(
1108 roots,
1107 roots,
1109 vec!(
1108 vec!(
1110 HgPathBuf::from_bytes(b"g/h"),
1109 HgPathBuf::from_bytes(b"g/h"),
1111 HgPathBuf::from_bytes(b"g/h"),
1110 HgPathBuf::from_bytes(b"g/h"),
1112 HgPathBuf::new()
1111 HgPathBuf::new()
1113 ),
1112 ),
1114 );
1113 );
1115 assert_eq!(dirs, vec!());
1114 assert_eq!(dirs, vec!());
1116 }
1115 }
1117
1116
1118 #[test]
1117 #[test]
1119 fn test_roots_dirs_and_parents() {
1118 fn test_roots_dirs_and_parents() {
1120 let pats = vec![
1119 let pats = vec![
1121 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
1120 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
1122 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
1121 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
1123 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1122 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1124 ];
1123 ];
1125
1124
1126 let mut roots = HashSet::new();
1125 let mut roots = HashSet::new();
1127 roots.insert(HgPathBuf::from_bytes(b"g/h"));
1126 roots.insert(HgPathBuf::from_bytes(b"g/h"));
1128 roots.insert(HgPathBuf::new());
1127 roots.insert(HgPathBuf::new());
1129
1128
1130 let dirs = HashSet::new();
1129 let dirs = HashSet::new();
1131
1130
1132 let mut parents = HashSet::new();
1131 let mut parents = HashSet::new();
1133 parents.insert(HgPathBuf::new());
1132 parents.insert(HgPathBuf::new());
1134 parents.insert(HgPathBuf::from_bytes(b"g"));
1133 parents.insert(HgPathBuf::from_bytes(b"g"));
1135
1134
1136 assert_eq!(
1135 assert_eq!(
1137 roots_dirs_and_parents(&pats).unwrap(),
1136 roots_dirs_and_parents(&pats).unwrap(),
1138 RootsDirsAndParents {
1137 RootsDirsAndParents {
1139 roots,
1138 roots,
1140 dirs,
1139 dirs,
1141 parents
1140 parents
1142 }
1141 }
1143 );
1142 );
1144 }
1143 }
1145
1144
1146 #[test]
1145 #[test]
1147 fn test_filematcher_visit_children_set() {
1146 fn test_filematcher_visit_children_set() {
1148 // Visitchildrenset
1147 // Visitchildrenset
1149 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")];
1148 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")];
1150 let matcher = FileMatcher::new(files).unwrap();
1149 let matcher = FileMatcher::new(files).unwrap();
1151
1150
1152 let mut set = HashSet::new();
1151 let mut set = HashSet::new();
1153 set.insert(HgPathBuf::from_bytes(b"dir"));
1152 set.insert(HgPathBuf::from_bytes(b"dir"));
1154 assert_eq!(
1153 assert_eq!(
1155 matcher.visit_children_set(HgPath::new(b"")),
1154 matcher.visit_children_set(HgPath::new(b"")),
1156 VisitChildrenSet::Set(set)
1155 VisitChildrenSet::Set(set)
1157 );
1156 );
1158
1157
1159 let mut set = HashSet::new();
1158 let mut set = HashSet::new();
1160 set.insert(HgPathBuf::from_bytes(b"subdir"));
1159 set.insert(HgPathBuf::from_bytes(b"subdir"));
1161 assert_eq!(
1160 assert_eq!(
1162 matcher.visit_children_set(HgPath::new(b"dir")),
1161 matcher.visit_children_set(HgPath::new(b"dir")),
1163 VisitChildrenSet::Set(set)
1162 VisitChildrenSet::Set(set)
1164 );
1163 );
1165
1164
1166 let mut set = HashSet::new();
1165 let mut set = HashSet::new();
1167 set.insert(HgPathBuf::from_bytes(b"foo.txt"));
1166 set.insert(HgPathBuf::from_bytes(b"foo.txt"));
1168 assert_eq!(
1167 assert_eq!(
1169 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1168 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1170 VisitChildrenSet::Set(set)
1169 VisitChildrenSet::Set(set)
1171 );
1170 );
1172
1171
1173 assert_eq!(
1172 assert_eq!(
1174 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1173 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1175 VisitChildrenSet::Empty
1174 VisitChildrenSet::Empty
1176 );
1175 );
1177 assert_eq!(
1176 assert_eq!(
1178 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
1177 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
1179 VisitChildrenSet::Empty
1178 VisitChildrenSet::Empty
1180 );
1179 );
1181 assert_eq!(
1180 assert_eq!(
1182 matcher.visit_children_set(HgPath::new(b"folder")),
1181 matcher.visit_children_set(HgPath::new(b"folder")),
1183 VisitChildrenSet::Empty
1182 VisitChildrenSet::Empty
1184 );
1183 );
1185 }
1184 }
1186
1185
1187 #[test]
1186 #[test]
1188 fn test_filematcher_visit_children_set_files_and_dirs() {
1187 fn test_filematcher_visit_children_set_files_and_dirs() {
1189 let files = vec![
1188 let files = vec![
1190 HgPathBuf::from_bytes(b"rootfile.txt"),
1189 HgPathBuf::from_bytes(b"rootfile.txt"),
1191 HgPathBuf::from_bytes(b"a/file1.txt"),
1190 HgPathBuf::from_bytes(b"a/file1.txt"),
1192 HgPathBuf::from_bytes(b"a/b/file2.txt"),
1191 HgPathBuf::from_bytes(b"a/b/file2.txt"),
1193 // No file in a/b/c
1192 // No file in a/b/c
1194 HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"),
1193 HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"),
1195 ];
1194 ];
1196 let matcher = FileMatcher::new(files).unwrap();
1195 let matcher = FileMatcher::new(files).unwrap();
1197
1196
1198 let mut set = HashSet::new();
1197 let mut set = HashSet::new();
1199 set.insert(HgPathBuf::from_bytes(b"a"));
1198 set.insert(HgPathBuf::from_bytes(b"a"));
1200 set.insert(HgPathBuf::from_bytes(b"rootfile.txt"));
1199 set.insert(HgPathBuf::from_bytes(b"rootfile.txt"));
1201 assert_eq!(
1200 assert_eq!(
1202 matcher.visit_children_set(HgPath::new(b"")),
1201 matcher.visit_children_set(HgPath::new(b"")),
1203 VisitChildrenSet::Set(set)
1202 VisitChildrenSet::Set(set)
1204 );
1203 );
1205
1204
1206 let mut set = HashSet::new();
1205 let mut set = HashSet::new();
1207 set.insert(HgPathBuf::from_bytes(b"b"));
1206 set.insert(HgPathBuf::from_bytes(b"b"));
1208 set.insert(HgPathBuf::from_bytes(b"file1.txt"));
1207 set.insert(HgPathBuf::from_bytes(b"file1.txt"));
1209 assert_eq!(
1208 assert_eq!(
1210 matcher.visit_children_set(HgPath::new(b"a")),
1209 matcher.visit_children_set(HgPath::new(b"a")),
1211 VisitChildrenSet::Set(set)
1210 VisitChildrenSet::Set(set)
1212 );
1211 );
1213
1212
1214 let mut set = HashSet::new();
1213 let mut set = HashSet::new();
1215 set.insert(HgPathBuf::from_bytes(b"c"));
1214 set.insert(HgPathBuf::from_bytes(b"c"));
1216 set.insert(HgPathBuf::from_bytes(b"file2.txt"));
1215 set.insert(HgPathBuf::from_bytes(b"file2.txt"));
1217 assert_eq!(
1216 assert_eq!(
1218 matcher.visit_children_set(HgPath::new(b"a/b")),
1217 matcher.visit_children_set(HgPath::new(b"a/b")),
1219 VisitChildrenSet::Set(set)
1218 VisitChildrenSet::Set(set)
1220 );
1219 );
1221
1220
1222 let mut set = HashSet::new();
1221 let mut set = HashSet::new();
1223 set.insert(HgPathBuf::from_bytes(b"d"));
1222 set.insert(HgPathBuf::from_bytes(b"d"));
1224 assert_eq!(
1223 assert_eq!(
1225 matcher.visit_children_set(HgPath::new(b"a/b/c")),
1224 matcher.visit_children_set(HgPath::new(b"a/b/c")),
1226 VisitChildrenSet::Set(set)
1225 VisitChildrenSet::Set(set)
1227 );
1226 );
1228 let mut set = HashSet::new();
1227 let mut set = HashSet::new();
1229 set.insert(HgPathBuf::from_bytes(b"file4.txt"));
1228 set.insert(HgPathBuf::from_bytes(b"file4.txt"));
1230 assert_eq!(
1229 assert_eq!(
1231 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
1230 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
1232 VisitChildrenSet::Set(set)
1231 VisitChildrenSet::Set(set)
1233 );
1232 );
1234
1233
1235 assert_eq!(
1234 assert_eq!(
1236 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
1235 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
1237 VisitChildrenSet::Empty
1236 VisitChildrenSet::Empty
1238 );
1237 );
1239 assert_eq!(
1238 assert_eq!(
1240 matcher.visit_children_set(HgPath::new(b"folder")),
1239 matcher.visit_children_set(HgPath::new(b"folder")),
1241 VisitChildrenSet::Empty
1240 VisitChildrenSet::Empty
1242 );
1241 );
1243 }
1242 }
1244
1243
1245 #[test]
1244 #[test]
1246 fn test_patternmatcher() {
1245 fn test_patternmatcher() {
1247 // VisitdirPrefix
1246 // VisitdirPrefix
1248 let m = PatternMatcher::new(vec![IgnorePattern::new(
1247 let m = PatternMatcher::new(vec![IgnorePattern::new(
1249 PatternSyntax::Path,
1248 PatternSyntax::Path,
1250 b"dir/subdir",
1249 b"dir/subdir",
1251 Path::new(""),
1250 Path::new(""),
1252 )])
1251 )])
1253 .unwrap();
1252 .unwrap();
1254 assert_eq!(
1253 assert_eq!(
1255 m.visit_children_set(HgPath::new(b"")),
1254 m.visit_children_set(HgPath::new(b"")),
1256 VisitChildrenSet::This
1255 VisitChildrenSet::This
1257 );
1256 );
1258 assert_eq!(
1257 assert_eq!(
1259 m.visit_children_set(HgPath::new(b"dir")),
1258 m.visit_children_set(HgPath::new(b"dir")),
1260 VisitChildrenSet::This
1259 VisitChildrenSet::This
1261 );
1260 );
1262 assert_eq!(
1261 assert_eq!(
1263 m.visit_children_set(HgPath::new(b"dir/subdir")),
1262 m.visit_children_set(HgPath::new(b"dir/subdir")),
1264 VisitChildrenSet::Recursive
1263 VisitChildrenSet::Recursive
1265 );
1264 );
1266 // OPT: This should probably be Recursive if its parent is?
1265 // OPT: This should probably be Recursive if its parent is?
1267 assert_eq!(
1266 assert_eq!(
1268 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1267 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1269 VisitChildrenSet::This
1268 VisitChildrenSet::This
1270 );
1269 );
1271 assert_eq!(
1270 assert_eq!(
1272 m.visit_children_set(HgPath::new(b"folder")),
1271 m.visit_children_set(HgPath::new(b"folder")),
1273 VisitChildrenSet::Empty
1272 VisitChildrenSet::Empty
1274 );
1273 );
1275
1274
1276 // VisitchildrensetPrefix
1275 // VisitchildrensetPrefix
1277 let m = PatternMatcher::new(vec![IgnorePattern::new(
1276 let m = PatternMatcher::new(vec![IgnorePattern::new(
1278 PatternSyntax::Path,
1277 PatternSyntax::Path,
1279 b"dir/subdir",
1278 b"dir/subdir",
1280 Path::new(""),
1279 Path::new(""),
1281 )])
1280 )])
1282 .unwrap();
1281 .unwrap();
1283 assert_eq!(
1282 assert_eq!(
1284 m.visit_children_set(HgPath::new(b"")),
1283 m.visit_children_set(HgPath::new(b"")),
1285 VisitChildrenSet::This
1284 VisitChildrenSet::This
1286 );
1285 );
1287 assert_eq!(
1286 assert_eq!(
1288 m.visit_children_set(HgPath::new(b"dir")),
1287 m.visit_children_set(HgPath::new(b"dir")),
1289 VisitChildrenSet::This
1288 VisitChildrenSet::This
1290 );
1289 );
1291 assert_eq!(
1290 assert_eq!(
1292 m.visit_children_set(HgPath::new(b"dir/subdir")),
1291 m.visit_children_set(HgPath::new(b"dir/subdir")),
1293 VisitChildrenSet::Recursive
1292 VisitChildrenSet::Recursive
1294 );
1293 );
1295 // OPT: This should probably be Recursive if its parent is?
1294 // OPT: This should probably be Recursive if its parent is?
1296 assert_eq!(
1295 assert_eq!(
1297 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1296 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1298 VisitChildrenSet::This
1297 VisitChildrenSet::This
1299 );
1298 );
1300 assert_eq!(
1299 assert_eq!(
1301 m.visit_children_set(HgPath::new(b"folder")),
1300 m.visit_children_set(HgPath::new(b"folder")),
1302 VisitChildrenSet::Empty
1301 VisitChildrenSet::Empty
1303 );
1302 );
1304
1303
1305 // VisitdirRootfilesin
1304 // VisitdirRootfilesin
1306 let m = PatternMatcher::new(vec![IgnorePattern::new(
1305 let m = PatternMatcher::new(vec![IgnorePattern::new(
1307 PatternSyntax::RootFiles,
1306 PatternSyntax::RootFiles,
1308 b"dir/subdir",
1307 b"dir/subdir",
1309 Path::new(""),
1308 Path::new(""),
1310 )])
1309 )])
1311 .unwrap();
1310 .unwrap();
1312 assert_eq!(
1311 assert_eq!(
1313 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1312 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1314 VisitChildrenSet::Empty
1313 VisitChildrenSet::Empty
1315 );
1314 );
1316 assert_eq!(
1315 assert_eq!(
1317 m.visit_children_set(HgPath::new(b"folder")),
1316 m.visit_children_set(HgPath::new(b"folder")),
1318 VisitChildrenSet::Empty
1317 VisitChildrenSet::Empty
1319 );
1318 );
1320 // FIXME: These should probably be This.
1319 // FIXME: These should probably be This.
1321 assert_eq!(
1320 assert_eq!(
1322 m.visit_children_set(HgPath::new(b"")),
1321 m.visit_children_set(HgPath::new(b"")),
1323 VisitChildrenSet::Empty
1322 VisitChildrenSet::Empty
1324 );
1323 );
1325 assert_eq!(
1324 assert_eq!(
1326 m.visit_children_set(HgPath::new(b"dir")),
1325 m.visit_children_set(HgPath::new(b"dir")),
1327 VisitChildrenSet::Empty
1326 VisitChildrenSet::Empty
1328 );
1327 );
1329 assert_eq!(
1328 assert_eq!(
1330 m.visit_children_set(HgPath::new(b"dir/subdir")),
1329 m.visit_children_set(HgPath::new(b"dir/subdir")),
1331 VisitChildrenSet::Empty
1330 VisitChildrenSet::Empty
1332 );
1331 );
1333
1332
1334 // VisitchildrensetRootfilesin
1333 // VisitchildrensetRootfilesin
1335 let m = PatternMatcher::new(vec![IgnorePattern::new(
1334 let m = PatternMatcher::new(vec![IgnorePattern::new(
1336 PatternSyntax::RootFiles,
1335 PatternSyntax::RootFiles,
1337 b"dir/subdir",
1336 b"dir/subdir",
1338 Path::new(""),
1337 Path::new(""),
1339 )])
1338 )])
1340 .unwrap();
1339 .unwrap();
1341 assert_eq!(
1340 assert_eq!(
1342 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1341 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1343 VisitChildrenSet::Empty
1342 VisitChildrenSet::Empty
1344 );
1343 );
1345 assert_eq!(
1344 assert_eq!(
1346 m.visit_children_set(HgPath::new(b"folder")),
1345 m.visit_children_set(HgPath::new(b"folder")),
1347 VisitChildrenSet::Empty
1346 VisitChildrenSet::Empty
1348 );
1347 );
1349 // FIXME: These should probably be {'dir'}, {'subdir'} and This,
1348 // FIXME: These should probably be {'dir'}, {'subdir'} and This,
1350 // respectively, or at least This for all three.
1349 // respectively, or at least This for all three.
1351 assert_eq!(
1350 assert_eq!(
1352 m.visit_children_set(HgPath::new(b"")),
1351 m.visit_children_set(HgPath::new(b"")),
1353 VisitChildrenSet::Empty
1352 VisitChildrenSet::Empty
1354 );
1353 );
1355 assert_eq!(
1354 assert_eq!(
1356 m.visit_children_set(HgPath::new(b"dir")),
1355 m.visit_children_set(HgPath::new(b"dir")),
1357 VisitChildrenSet::Empty
1356 VisitChildrenSet::Empty
1358 );
1357 );
1359 assert_eq!(
1358 assert_eq!(
1360 m.visit_children_set(HgPath::new(b"dir/subdir")),
1359 m.visit_children_set(HgPath::new(b"dir/subdir")),
1361 VisitChildrenSet::Empty
1360 VisitChildrenSet::Empty
1362 );
1361 );
1363
1362
1364 // VisitdirGlob
1363 // VisitdirGlob
1365 let m = PatternMatcher::new(vec![IgnorePattern::new(
1364 let m = PatternMatcher::new(vec![IgnorePattern::new(
1366 PatternSyntax::Glob,
1365 PatternSyntax::Glob,
1367 b"dir/z*",
1366 b"dir/z*",
1368 Path::new(""),
1367 Path::new(""),
1369 )])
1368 )])
1370 .unwrap();
1369 .unwrap();
1371 assert_eq!(
1370 assert_eq!(
1372 m.visit_children_set(HgPath::new(b"")),
1371 m.visit_children_set(HgPath::new(b"")),
1373 VisitChildrenSet::This
1372 VisitChildrenSet::This
1374 );
1373 );
1375 // FIXME: This probably should be This
1374 // FIXME: This probably should be This
1376 assert_eq!(
1375 assert_eq!(
1377 m.visit_children_set(HgPath::new(b"dir")),
1376 m.visit_children_set(HgPath::new(b"dir")),
1378 VisitChildrenSet::Empty
1377 VisitChildrenSet::Empty
1379 );
1378 );
1380 assert_eq!(
1379 assert_eq!(
1381 m.visit_children_set(HgPath::new(b"folder")),
1380 m.visit_children_set(HgPath::new(b"folder")),
1382 VisitChildrenSet::Empty
1381 VisitChildrenSet::Empty
1383 );
1382 );
1384 // OPT: these should probably be False.
1383 // OPT: these should probably be False.
1385 assert_eq!(
1384 assert_eq!(
1386 m.visit_children_set(HgPath::new(b"dir/subdir")),
1385 m.visit_children_set(HgPath::new(b"dir/subdir")),
1387 VisitChildrenSet::This
1386 VisitChildrenSet::This
1388 );
1387 );
1389 assert_eq!(
1388 assert_eq!(
1390 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1389 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1391 VisitChildrenSet::This
1390 VisitChildrenSet::This
1392 );
1391 );
1393
1392
1394 // VisitchildrensetGlob
1393 // VisitchildrensetGlob
1395 let m = PatternMatcher::new(vec![IgnorePattern::new(
1394 let m = PatternMatcher::new(vec![IgnorePattern::new(
1396 PatternSyntax::Glob,
1395 PatternSyntax::Glob,
1397 b"dir/z*",
1396 b"dir/z*",
1398 Path::new(""),
1397 Path::new(""),
1399 )])
1398 )])
1400 .unwrap();
1399 .unwrap();
1401 assert_eq!(
1400 assert_eq!(
1402 m.visit_children_set(HgPath::new(b"")),
1401 m.visit_children_set(HgPath::new(b"")),
1403 VisitChildrenSet::This
1402 VisitChildrenSet::This
1404 );
1403 );
1405 assert_eq!(
1404 assert_eq!(
1406 m.visit_children_set(HgPath::new(b"folder")),
1405 m.visit_children_set(HgPath::new(b"folder")),
1407 VisitChildrenSet::Empty
1406 VisitChildrenSet::Empty
1408 );
1407 );
1409 // FIXME: This probably should be This
1408 // FIXME: This probably should be This
1410 assert_eq!(
1409 assert_eq!(
1411 m.visit_children_set(HgPath::new(b"dir")),
1410 m.visit_children_set(HgPath::new(b"dir")),
1412 VisitChildrenSet::Empty
1411 VisitChildrenSet::Empty
1413 );
1412 );
1414 // OPT: these should probably be Empty
1413 // OPT: these should probably be Empty
1415 assert_eq!(
1414 assert_eq!(
1416 m.visit_children_set(HgPath::new(b"dir/subdir")),
1415 m.visit_children_set(HgPath::new(b"dir/subdir")),
1417 VisitChildrenSet::This
1416 VisitChildrenSet::This
1418 );
1417 );
1419 assert_eq!(
1418 assert_eq!(
1420 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1419 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1421 VisitChildrenSet::This
1420 VisitChildrenSet::This
1422 );
1421 );
1423
1422
1424 // VisitdirFilepath
1423 // VisitdirFilepath
1425 let m = PatternMatcher::new(vec![IgnorePattern::new(
1424 let m = PatternMatcher::new(vec![IgnorePattern::new(
1426 PatternSyntax::FilePath,
1425 PatternSyntax::FilePath,
1427 b"dir/z",
1426 b"dir/z",
1428 Path::new(""),
1427 Path::new(""),
1429 )])
1428 )])
1430 .unwrap();
1429 .unwrap();
1431 assert_eq!(
1430 assert_eq!(
1432 m.visit_children_set(HgPath::new(b"")),
1431 m.visit_children_set(HgPath::new(b"")),
1433 VisitChildrenSet::This
1432 VisitChildrenSet::This
1434 );
1433 );
1435 assert_eq!(
1434 assert_eq!(
1436 m.visit_children_set(HgPath::new(b"dir")),
1435 m.visit_children_set(HgPath::new(b"dir")),
1437 VisitChildrenSet::This
1436 VisitChildrenSet::This
1438 );
1437 );
1439 assert_eq!(
1438 assert_eq!(
1440 m.visit_children_set(HgPath::new(b"folder")),
1439 m.visit_children_set(HgPath::new(b"folder")),
1441 VisitChildrenSet::Empty
1440 VisitChildrenSet::Empty
1442 );
1441 );
1443 assert_eq!(
1442 assert_eq!(
1444 m.visit_children_set(HgPath::new(b"dir/subdir")),
1443 m.visit_children_set(HgPath::new(b"dir/subdir")),
1445 VisitChildrenSet::Empty
1444 VisitChildrenSet::Empty
1446 );
1445 );
1447 assert_eq!(
1446 assert_eq!(
1448 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1447 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1449 VisitChildrenSet::Empty
1448 VisitChildrenSet::Empty
1450 );
1449 );
1451
1450
1452 // VisitchildrensetFilepath
1451 // VisitchildrensetFilepath
1453 let m = PatternMatcher::new(vec![IgnorePattern::new(
1452 let m = PatternMatcher::new(vec![IgnorePattern::new(
1454 PatternSyntax::FilePath,
1453 PatternSyntax::FilePath,
1455 b"dir/z",
1454 b"dir/z",
1456 Path::new(""),
1455 Path::new(""),
1457 )])
1456 )])
1458 .unwrap();
1457 .unwrap();
1459 assert_eq!(
1458 assert_eq!(
1460 m.visit_children_set(HgPath::new(b"")),
1459 m.visit_children_set(HgPath::new(b"")),
1461 VisitChildrenSet::This
1460 VisitChildrenSet::This
1462 );
1461 );
1463 assert_eq!(
1462 assert_eq!(
1464 m.visit_children_set(HgPath::new(b"folder")),
1463 m.visit_children_set(HgPath::new(b"folder")),
1465 VisitChildrenSet::Empty
1464 VisitChildrenSet::Empty
1466 );
1465 );
1467 assert_eq!(
1466 assert_eq!(
1468 m.visit_children_set(HgPath::new(b"dir")),
1467 m.visit_children_set(HgPath::new(b"dir")),
1469 VisitChildrenSet::This
1468 VisitChildrenSet::This
1470 );
1469 );
1471 assert_eq!(
1470 assert_eq!(
1472 m.visit_children_set(HgPath::new(b"dir/subdir")),
1471 m.visit_children_set(HgPath::new(b"dir/subdir")),
1473 VisitChildrenSet::Empty
1472 VisitChildrenSet::Empty
1474 );
1473 );
1475 assert_eq!(
1474 assert_eq!(
1476 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1475 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1477 VisitChildrenSet::Empty
1476 VisitChildrenSet::Empty
1478 );
1477 );
1479 }
1478 }
1480
1479
1481 #[test]
1480 #[test]
1482 fn test_includematcher() {
1481 fn test_includematcher() {
1483 // VisitchildrensetPrefix
1482 // VisitchildrensetPrefix
1484 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1483 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1485 PatternSyntax::RelPath,
1484 PatternSyntax::RelPath,
1486 b"dir/subdir",
1485 b"dir/subdir",
1487 Path::new(""),
1486 Path::new(""),
1488 )])
1487 )])
1489 .unwrap();
1488 .unwrap();
1490
1489
1491 let mut set = HashSet::new();
1490 let mut set = HashSet::new();
1492 set.insert(HgPathBuf::from_bytes(b"dir"));
1491 set.insert(HgPathBuf::from_bytes(b"dir"));
1493 assert_eq!(
1492 assert_eq!(
1494 matcher.visit_children_set(HgPath::new(b"")),
1493 matcher.visit_children_set(HgPath::new(b"")),
1495 VisitChildrenSet::Set(set)
1494 VisitChildrenSet::Set(set)
1496 );
1495 );
1497
1496
1498 let mut set = HashSet::new();
1497 let mut set = HashSet::new();
1499 set.insert(HgPathBuf::from_bytes(b"subdir"));
1498 set.insert(HgPathBuf::from_bytes(b"subdir"));
1500 assert_eq!(
1499 assert_eq!(
1501 matcher.visit_children_set(HgPath::new(b"dir")),
1500 matcher.visit_children_set(HgPath::new(b"dir")),
1502 VisitChildrenSet::Set(set)
1501 VisitChildrenSet::Set(set)
1503 );
1502 );
1504 assert_eq!(
1503 assert_eq!(
1505 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1504 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1506 VisitChildrenSet::Recursive
1505 VisitChildrenSet::Recursive
1507 );
1506 );
1508 // OPT: This should probably be 'all' if its parent is?
1507 // OPT: This should probably be 'all' if its parent is?
1509 assert_eq!(
1508 assert_eq!(
1510 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1509 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1511 VisitChildrenSet::This
1510 VisitChildrenSet::This
1512 );
1511 );
1513 assert_eq!(
1512 assert_eq!(
1514 matcher.visit_children_set(HgPath::new(b"folder")),
1513 matcher.visit_children_set(HgPath::new(b"folder")),
1515 VisitChildrenSet::Empty
1514 VisitChildrenSet::Empty
1516 );
1515 );
1517
1516
1518 // VisitchildrensetRootfilesin
1517 // VisitchildrensetRootfilesin
1519 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1518 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1520 PatternSyntax::RootFiles,
1519 PatternSyntax::RootFiles,
1521 b"dir/subdir",
1520 b"dir/subdir",
1522 Path::new(""),
1521 Path::new(""),
1523 )])
1522 )])
1524 .unwrap();
1523 .unwrap();
1525
1524
1526 let mut set = HashSet::new();
1525 let mut set = HashSet::new();
1527 set.insert(HgPathBuf::from_bytes(b"dir"));
1526 set.insert(HgPathBuf::from_bytes(b"dir"));
1528 assert_eq!(
1527 assert_eq!(
1529 matcher.visit_children_set(HgPath::new(b"")),
1528 matcher.visit_children_set(HgPath::new(b"")),
1530 VisitChildrenSet::Set(set)
1529 VisitChildrenSet::Set(set)
1531 );
1530 );
1532
1531
1533 let mut set = HashSet::new();
1532 let mut set = HashSet::new();
1534 set.insert(HgPathBuf::from_bytes(b"subdir"));
1533 set.insert(HgPathBuf::from_bytes(b"subdir"));
1535 assert_eq!(
1534 assert_eq!(
1536 matcher.visit_children_set(HgPath::new(b"dir")),
1535 matcher.visit_children_set(HgPath::new(b"dir")),
1537 VisitChildrenSet::Set(set)
1536 VisitChildrenSet::Set(set)
1538 );
1537 );
1539
1538
1540 assert_eq!(
1539 assert_eq!(
1541 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1540 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1542 VisitChildrenSet::This
1541 VisitChildrenSet::This
1543 );
1542 );
1544 assert_eq!(
1543 assert_eq!(
1545 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1544 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1546 VisitChildrenSet::Empty
1545 VisitChildrenSet::Empty
1547 );
1546 );
1548 assert_eq!(
1547 assert_eq!(
1549 matcher.visit_children_set(HgPath::new(b"folder")),
1548 matcher.visit_children_set(HgPath::new(b"folder")),
1550 VisitChildrenSet::Empty
1549 VisitChildrenSet::Empty
1551 );
1550 );
1552
1551
1553 // VisitchildrensetGlob
1552 // VisitchildrensetGlob
1554 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1553 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1555 PatternSyntax::Glob,
1554 PatternSyntax::Glob,
1556 b"dir/z*",
1555 b"dir/z*",
1557 Path::new(""),
1556 Path::new(""),
1558 )])
1557 )])
1559 .unwrap();
1558 .unwrap();
1560
1559
1561 let mut set = HashSet::new();
1560 let mut set = HashSet::new();
1562 set.insert(HgPathBuf::from_bytes(b"dir"));
1561 set.insert(HgPathBuf::from_bytes(b"dir"));
1563 assert_eq!(
1562 assert_eq!(
1564 matcher.visit_children_set(HgPath::new(b"")),
1563 matcher.visit_children_set(HgPath::new(b"")),
1565 VisitChildrenSet::Set(set)
1564 VisitChildrenSet::Set(set)
1566 );
1565 );
1567 assert_eq!(
1566 assert_eq!(
1568 matcher.visit_children_set(HgPath::new(b"folder")),
1567 matcher.visit_children_set(HgPath::new(b"folder")),
1569 VisitChildrenSet::Empty
1568 VisitChildrenSet::Empty
1570 );
1569 );
1571 assert_eq!(
1570 assert_eq!(
1572 matcher.visit_children_set(HgPath::new(b"dir")),
1571 matcher.visit_children_set(HgPath::new(b"dir")),
1573 VisitChildrenSet::This
1572 VisitChildrenSet::This
1574 );
1573 );
1575 // OPT: these should probably be set().
1574 // OPT: these should probably be set().
1576 assert_eq!(
1575 assert_eq!(
1577 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1576 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1578 VisitChildrenSet::This
1577 VisitChildrenSet::This
1579 );
1578 );
1580 assert_eq!(
1579 assert_eq!(
1581 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1580 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1582 VisitChildrenSet::This
1581 VisitChildrenSet::This
1583 );
1582 );
1584
1583
1585 // VisitchildrensetFilePath
1584 // VisitchildrensetFilePath
1586 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1585 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1587 PatternSyntax::FilePath,
1586 PatternSyntax::FilePath,
1588 b"dir/z",
1587 b"dir/z",
1589 Path::new(""),
1588 Path::new(""),
1590 )])
1589 )])
1591 .unwrap();
1590 .unwrap();
1592
1591
1593 let mut set = HashSet::new();
1592 let mut set = HashSet::new();
1594 set.insert(HgPathBuf::from_bytes(b"dir"));
1593 set.insert(HgPathBuf::from_bytes(b"dir"));
1595 assert_eq!(
1594 assert_eq!(
1596 matcher.visit_children_set(HgPath::new(b"")),
1595 matcher.visit_children_set(HgPath::new(b"")),
1597 VisitChildrenSet::Set(set)
1596 VisitChildrenSet::Set(set)
1598 );
1597 );
1599 assert_eq!(
1598 assert_eq!(
1600 matcher.visit_children_set(HgPath::new(b"folder")),
1599 matcher.visit_children_set(HgPath::new(b"folder")),
1601 VisitChildrenSet::Empty
1600 VisitChildrenSet::Empty
1602 );
1601 );
1603 let mut set = HashSet::new();
1602 let mut set = HashSet::new();
1604 set.insert(HgPathBuf::from_bytes(b"z"));
1603 set.insert(HgPathBuf::from_bytes(b"z"));
1605 assert_eq!(
1604 assert_eq!(
1606 matcher.visit_children_set(HgPath::new(b"dir")),
1605 matcher.visit_children_set(HgPath::new(b"dir")),
1607 VisitChildrenSet::Set(set)
1606 VisitChildrenSet::Set(set)
1608 );
1607 );
1609 // OPT: these should probably be set().
1608 // OPT: these should probably be set().
1610 assert_eq!(
1609 assert_eq!(
1611 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1610 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1612 VisitChildrenSet::Empty
1611 VisitChildrenSet::Empty
1613 );
1612 );
1614 assert_eq!(
1613 assert_eq!(
1615 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1614 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1616 VisitChildrenSet::Empty
1615 VisitChildrenSet::Empty
1617 );
1616 );
1618
1617
1619 // Test multiple patterns
1618 // Test multiple patterns
1620 let matcher = IncludeMatcher::new(vec![
1619 let matcher = IncludeMatcher::new(vec![
1621 IgnorePattern::new(PatternSyntax::RelPath, b"foo", Path::new("")),
1620 IgnorePattern::new(PatternSyntax::RelPath, b"foo", Path::new("")),
1622 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1621 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1623 ])
1622 ])
1624 .unwrap();
1623 .unwrap();
1625
1624
1626 assert_eq!(
1625 assert_eq!(
1627 matcher.visit_children_set(HgPath::new(b"")),
1626 matcher.visit_children_set(HgPath::new(b"")),
1628 VisitChildrenSet::This
1627 VisitChildrenSet::This
1629 );
1628 );
1630
1629
1631 // Test multiple patterns
1630 // Test multiple patterns
1632 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1631 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1633 PatternSyntax::Glob,
1632 PatternSyntax::Glob,
1634 b"**/*.exe",
1633 b"**/*.exe",
1635 Path::new(""),
1634 Path::new(""),
1636 )])
1635 )])
1637 .unwrap();
1636 .unwrap();
1638
1637
1639 assert_eq!(
1638 assert_eq!(
1640 matcher.visit_children_set(HgPath::new(b"")),
1639 matcher.visit_children_set(HgPath::new(b"")),
1641 VisitChildrenSet::This
1640 VisitChildrenSet::This
1642 );
1641 );
1643 }
1642 }
1644
1643
1645 #[test]
1644 #[test]
1646 fn test_unionmatcher() {
1645 fn test_unionmatcher() {
1647 // Path + Rootfiles
1646 // Path + Rootfiles
1648 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1647 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1649 PatternSyntax::RelPath,
1648 PatternSyntax::RelPath,
1650 b"dir/subdir",
1649 b"dir/subdir",
1651 Path::new(""),
1650 Path::new(""),
1652 )])
1651 )])
1653 .unwrap();
1652 .unwrap();
1654 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1653 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1655 PatternSyntax::RootFiles,
1654 PatternSyntax::RootFiles,
1656 b"dir",
1655 b"dir",
1657 Path::new(""),
1656 Path::new(""),
1658 )])
1657 )])
1659 .unwrap();
1658 .unwrap();
1660 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1659 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1661
1660
1662 let mut set = HashSet::new();
1661 let mut set = HashSet::new();
1663 set.insert(HgPathBuf::from_bytes(b"dir"));
1662 set.insert(HgPathBuf::from_bytes(b"dir"));
1664 assert_eq!(
1663 assert_eq!(
1665 matcher.visit_children_set(HgPath::new(b"")),
1664 matcher.visit_children_set(HgPath::new(b"")),
1666 VisitChildrenSet::Set(set)
1665 VisitChildrenSet::Set(set)
1667 );
1666 );
1668 assert_eq!(
1667 assert_eq!(
1669 matcher.visit_children_set(HgPath::new(b"dir")),
1668 matcher.visit_children_set(HgPath::new(b"dir")),
1670 VisitChildrenSet::This
1669 VisitChildrenSet::This
1671 );
1670 );
1672 assert_eq!(
1671 assert_eq!(
1673 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1672 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1674 VisitChildrenSet::Recursive
1673 VisitChildrenSet::Recursive
1675 );
1674 );
1676 assert_eq!(
1675 assert_eq!(
1677 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1676 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1678 VisitChildrenSet::Empty
1677 VisitChildrenSet::Empty
1679 );
1678 );
1680 assert_eq!(
1679 assert_eq!(
1681 matcher.visit_children_set(HgPath::new(b"folder")),
1680 matcher.visit_children_set(HgPath::new(b"folder")),
1682 VisitChildrenSet::Empty
1681 VisitChildrenSet::Empty
1683 );
1682 );
1684 assert_eq!(
1683 assert_eq!(
1685 matcher.visit_children_set(HgPath::new(b"folder")),
1684 matcher.visit_children_set(HgPath::new(b"folder")),
1686 VisitChildrenSet::Empty
1685 VisitChildrenSet::Empty
1687 );
1686 );
1688
1687
1689 // OPT: These next two could be 'all' instead of 'this'.
1688 // OPT: These next two could be 'all' instead of 'this'.
1690 assert_eq!(
1689 assert_eq!(
1691 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1690 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1692 VisitChildrenSet::This
1691 VisitChildrenSet::This
1693 );
1692 );
1694 assert_eq!(
1693 assert_eq!(
1695 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1694 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1696 VisitChildrenSet::This
1695 VisitChildrenSet::This
1697 );
1696 );
1698
1697
1699 // Path + unrelated Path
1698 // Path + unrelated Path
1700 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1699 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1701 PatternSyntax::RelPath,
1700 PatternSyntax::RelPath,
1702 b"dir/subdir",
1701 b"dir/subdir",
1703 Path::new(""),
1702 Path::new(""),
1704 )])
1703 )])
1705 .unwrap();
1704 .unwrap();
1706 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1705 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1707 PatternSyntax::RelPath,
1706 PatternSyntax::RelPath,
1708 b"folder",
1707 b"folder",
1709 Path::new(""),
1708 Path::new(""),
1710 )])
1709 )])
1711 .unwrap();
1710 .unwrap();
1712 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1711 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1713
1712
1714 let mut set = HashSet::new();
1713 let mut set = HashSet::new();
1715 set.insert(HgPathBuf::from_bytes(b"folder"));
1714 set.insert(HgPathBuf::from_bytes(b"folder"));
1716 set.insert(HgPathBuf::from_bytes(b"dir"));
1715 set.insert(HgPathBuf::from_bytes(b"dir"));
1717 assert_eq!(
1716 assert_eq!(
1718 matcher.visit_children_set(HgPath::new(b"")),
1717 matcher.visit_children_set(HgPath::new(b"")),
1719 VisitChildrenSet::Set(set)
1718 VisitChildrenSet::Set(set)
1720 );
1719 );
1721 let mut set = HashSet::new();
1720 let mut set = HashSet::new();
1722 set.insert(HgPathBuf::from_bytes(b"subdir"));
1721 set.insert(HgPathBuf::from_bytes(b"subdir"));
1723 assert_eq!(
1722 assert_eq!(
1724 matcher.visit_children_set(HgPath::new(b"dir")),
1723 matcher.visit_children_set(HgPath::new(b"dir")),
1725 VisitChildrenSet::Set(set)
1724 VisitChildrenSet::Set(set)
1726 );
1725 );
1727
1726
1728 assert_eq!(
1727 assert_eq!(
1729 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1728 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1730 VisitChildrenSet::Recursive
1729 VisitChildrenSet::Recursive
1731 );
1730 );
1732 assert_eq!(
1731 assert_eq!(
1733 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1732 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1734 VisitChildrenSet::Empty
1733 VisitChildrenSet::Empty
1735 );
1734 );
1736
1735
1737 assert_eq!(
1736 assert_eq!(
1738 matcher.visit_children_set(HgPath::new(b"folder")),
1737 matcher.visit_children_set(HgPath::new(b"folder")),
1739 VisitChildrenSet::Recursive
1738 VisitChildrenSet::Recursive
1740 );
1739 );
1741 // OPT: These next two could be 'all' instead of 'this'.
1740 // OPT: These next two could be 'all' instead of 'this'.
1742 assert_eq!(
1741 assert_eq!(
1743 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1742 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1744 VisitChildrenSet::This
1743 VisitChildrenSet::This
1745 );
1744 );
1746 assert_eq!(
1745 assert_eq!(
1747 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1746 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1748 VisitChildrenSet::This
1747 VisitChildrenSet::This
1749 );
1748 );
1750
1749
1751 // Path + subpath
1750 // Path + subpath
1752 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1751 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1753 PatternSyntax::RelPath,
1752 PatternSyntax::RelPath,
1754 b"dir/subdir/x",
1753 b"dir/subdir/x",
1755 Path::new(""),
1754 Path::new(""),
1756 )])
1755 )])
1757 .unwrap();
1756 .unwrap();
1758 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1757 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1759 PatternSyntax::RelPath,
1758 PatternSyntax::RelPath,
1760 b"dir/subdir",
1759 b"dir/subdir",
1761 Path::new(""),
1760 Path::new(""),
1762 )])
1761 )])
1763 .unwrap();
1762 .unwrap();
1764 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1763 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1765
1764
1766 let mut set = HashSet::new();
1765 let mut set = HashSet::new();
1767 set.insert(HgPathBuf::from_bytes(b"dir"));
1766 set.insert(HgPathBuf::from_bytes(b"dir"));
1768 assert_eq!(
1767 assert_eq!(
1769 matcher.visit_children_set(HgPath::new(b"")),
1768 matcher.visit_children_set(HgPath::new(b"")),
1770 VisitChildrenSet::Set(set)
1769 VisitChildrenSet::Set(set)
1771 );
1770 );
1772 let mut set = HashSet::new();
1771 let mut set = HashSet::new();
1773 set.insert(HgPathBuf::from_bytes(b"subdir"));
1772 set.insert(HgPathBuf::from_bytes(b"subdir"));
1774 assert_eq!(
1773 assert_eq!(
1775 matcher.visit_children_set(HgPath::new(b"dir")),
1774 matcher.visit_children_set(HgPath::new(b"dir")),
1776 VisitChildrenSet::Set(set)
1775 VisitChildrenSet::Set(set)
1777 );
1776 );
1778
1777
1779 assert_eq!(
1778 assert_eq!(
1780 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1779 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1781 VisitChildrenSet::Recursive
1780 VisitChildrenSet::Recursive
1782 );
1781 );
1783 assert_eq!(
1782 assert_eq!(
1784 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1783 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1785 VisitChildrenSet::Empty
1784 VisitChildrenSet::Empty
1786 );
1785 );
1787
1786
1788 assert_eq!(
1787 assert_eq!(
1789 matcher.visit_children_set(HgPath::new(b"folder")),
1788 matcher.visit_children_set(HgPath::new(b"folder")),
1790 VisitChildrenSet::Empty
1789 VisitChildrenSet::Empty
1791 );
1790 );
1792 assert_eq!(
1791 assert_eq!(
1793 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1792 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1794 VisitChildrenSet::Recursive
1793 VisitChildrenSet::Recursive
1795 );
1794 );
1796 // OPT: this should probably be 'all' not 'this'.
1795 // OPT: this should probably be 'all' not 'this'.
1797 assert_eq!(
1796 assert_eq!(
1798 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1797 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1799 VisitChildrenSet::This
1798 VisitChildrenSet::This
1800 );
1799 );
1801 }
1800 }
1802
1801
1803 #[test]
1802 #[test]
1804 fn test_intersectionmatcher() {
1803 fn test_intersectionmatcher() {
1805 // Include path + Include rootfiles
1804 // Include path + Include rootfiles
1806 let m1 = Box::new(
1805 let m1 = Box::new(
1807 IncludeMatcher::new(vec![IgnorePattern::new(
1806 IncludeMatcher::new(vec![IgnorePattern::new(
1808 PatternSyntax::RelPath,
1807 PatternSyntax::RelPath,
1809 b"dir/subdir",
1808 b"dir/subdir",
1810 Path::new(""),
1809 Path::new(""),
1811 )])
1810 )])
1812 .unwrap(),
1811 .unwrap(),
1813 );
1812 );
1814 let m2 = Box::new(
1813 let m2 = Box::new(
1815 IncludeMatcher::new(vec![IgnorePattern::new(
1814 IncludeMatcher::new(vec![IgnorePattern::new(
1816 PatternSyntax::RootFiles,
1815 PatternSyntax::RootFiles,
1817 b"dir",
1816 b"dir",
1818 Path::new(""),
1817 Path::new(""),
1819 )])
1818 )])
1820 .unwrap(),
1819 .unwrap(),
1821 );
1820 );
1822 let matcher = IntersectionMatcher::new(m1, m2);
1821 let matcher = IntersectionMatcher::new(m1, m2);
1823
1822
1824 let mut set = HashSet::new();
1823 let mut set = HashSet::new();
1825 set.insert(HgPathBuf::from_bytes(b"dir"));
1824 set.insert(HgPathBuf::from_bytes(b"dir"));
1826 assert_eq!(
1825 assert_eq!(
1827 matcher.visit_children_set(HgPath::new(b"")),
1826 matcher.visit_children_set(HgPath::new(b"")),
1828 VisitChildrenSet::Set(set)
1827 VisitChildrenSet::Set(set)
1829 );
1828 );
1830 assert_eq!(
1829 assert_eq!(
1831 matcher.visit_children_set(HgPath::new(b"dir")),
1830 matcher.visit_children_set(HgPath::new(b"dir")),
1832 VisitChildrenSet::This
1831 VisitChildrenSet::This
1833 );
1832 );
1834 assert_eq!(
1833 assert_eq!(
1835 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1834 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1836 VisitChildrenSet::Empty
1835 VisitChildrenSet::Empty
1837 );
1836 );
1838 assert_eq!(
1837 assert_eq!(
1839 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1838 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1840 VisitChildrenSet::Empty
1839 VisitChildrenSet::Empty
1841 );
1840 );
1842 assert_eq!(
1841 assert_eq!(
1843 matcher.visit_children_set(HgPath::new(b"folder")),
1842 matcher.visit_children_set(HgPath::new(b"folder")),
1844 VisitChildrenSet::Empty
1843 VisitChildrenSet::Empty
1845 );
1844 );
1846 assert_eq!(
1845 assert_eq!(
1847 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1846 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1848 VisitChildrenSet::Empty
1847 VisitChildrenSet::Empty
1849 );
1848 );
1850 assert_eq!(
1849 assert_eq!(
1851 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1850 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1852 VisitChildrenSet::Empty
1851 VisitChildrenSet::Empty
1853 );
1852 );
1854
1853
1855 // Non intersecting paths
1854 // Non intersecting paths
1856 let m1 = Box::new(
1855 let m1 = Box::new(
1857 IncludeMatcher::new(vec![IgnorePattern::new(
1856 IncludeMatcher::new(vec![IgnorePattern::new(
1858 PatternSyntax::RelPath,
1857 PatternSyntax::RelPath,
1859 b"dir/subdir",
1858 b"dir/subdir",
1860 Path::new(""),
1859 Path::new(""),
1861 )])
1860 )])
1862 .unwrap(),
1861 .unwrap(),
1863 );
1862 );
1864 let m2 = Box::new(
1863 let m2 = Box::new(
1865 IncludeMatcher::new(vec![IgnorePattern::new(
1864 IncludeMatcher::new(vec![IgnorePattern::new(
1866 PatternSyntax::RelPath,
1865 PatternSyntax::RelPath,
1867 b"folder",
1866 b"folder",
1868 Path::new(""),
1867 Path::new(""),
1869 )])
1868 )])
1870 .unwrap(),
1869 .unwrap(),
1871 );
1870 );
1872 let matcher = IntersectionMatcher::new(m1, m2);
1871 let matcher = IntersectionMatcher::new(m1, m2);
1873
1872
1874 assert_eq!(
1873 assert_eq!(
1875 matcher.visit_children_set(HgPath::new(b"")),
1874 matcher.visit_children_set(HgPath::new(b"")),
1876 VisitChildrenSet::Empty
1875 VisitChildrenSet::Empty
1877 );
1876 );
1878 assert_eq!(
1877 assert_eq!(
1879 matcher.visit_children_set(HgPath::new(b"dir")),
1878 matcher.visit_children_set(HgPath::new(b"dir")),
1880 VisitChildrenSet::Empty
1879 VisitChildrenSet::Empty
1881 );
1880 );
1882 assert_eq!(
1881 assert_eq!(
1883 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1882 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1884 VisitChildrenSet::Empty
1883 VisitChildrenSet::Empty
1885 );
1884 );
1886 assert_eq!(
1885 assert_eq!(
1887 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1886 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1888 VisitChildrenSet::Empty
1887 VisitChildrenSet::Empty
1889 );
1888 );
1890 assert_eq!(
1889 assert_eq!(
1891 matcher.visit_children_set(HgPath::new(b"folder")),
1890 matcher.visit_children_set(HgPath::new(b"folder")),
1892 VisitChildrenSet::Empty
1891 VisitChildrenSet::Empty
1893 );
1892 );
1894 assert_eq!(
1893 assert_eq!(
1895 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1894 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1896 VisitChildrenSet::Empty
1895 VisitChildrenSet::Empty
1897 );
1896 );
1898 assert_eq!(
1897 assert_eq!(
1899 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1898 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1900 VisitChildrenSet::Empty
1899 VisitChildrenSet::Empty
1901 );
1900 );
1902
1901
1903 // Nested paths
1902 // Nested paths
1904 let m1 = Box::new(
1903 let m1 = Box::new(
1905 IncludeMatcher::new(vec![IgnorePattern::new(
1904 IncludeMatcher::new(vec![IgnorePattern::new(
1906 PatternSyntax::RelPath,
1905 PatternSyntax::RelPath,
1907 b"dir/subdir/x",
1906 b"dir/subdir/x",
1908 Path::new(""),
1907 Path::new(""),
1909 )])
1908 )])
1910 .unwrap(),
1909 .unwrap(),
1911 );
1910 );
1912 let m2 = Box::new(
1911 let m2 = Box::new(
1913 IncludeMatcher::new(vec![IgnorePattern::new(
1912 IncludeMatcher::new(vec![IgnorePattern::new(
1914 PatternSyntax::RelPath,
1913 PatternSyntax::RelPath,
1915 b"dir/subdir",
1914 b"dir/subdir",
1916 Path::new(""),
1915 Path::new(""),
1917 )])
1916 )])
1918 .unwrap(),
1917 .unwrap(),
1919 );
1918 );
1920 let matcher = IntersectionMatcher::new(m1, m2);
1919 let matcher = IntersectionMatcher::new(m1, m2);
1921
1920
1922 let mut set = HashSet::new();
1921 let mut set = HashSet::new();
1923 set.insert(HgPathBuf::from_bytes(b"dir"));
1922 set.insert(HgPathBuf::from_bytes(b"dir"));
1924 assert_eq!(
1923 assert_eq!(
1925 matcher.visit_children_set(HgPath::new(b"")),
1924 matcher.visit_children_set(HgPath::new(b"")),
1926 VisitChildrenSet::Set(set)
1925 VisitChildrenSet::Set(set)
1927 );
1926 );
1928
1927
1929 let mut set = HashSet::new();
1928 let mut set = HashSet::new();
1930 set.insert(HgPathBuf::from_bytes(b"subdir"));
1929 set.insert(HgPathBuf::from_bytes(b"subdir"));
1931 assert_eq!(
1930 assert_eq!(
1932 matcher.visit_children_set(HgPath::new(b"dir")),
1931 matcher.visit_children_set(HgPath::new(b"dir")),
1933 VisitChildrenSet::Set(set)
1932 VisitChildrenSet::Set(set)
1934 );
1933 );
1935 let mut set = HashSet::new();
1934 let mut set = HashSet::new();
1936 set.insert(HgPathBuf::from_bytes(b"x"));
1935 set.insert(HgPathBuf::from_bytes(b"x"));
1937 assert_eq!(
1936 assert_eq!(
1938 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1937 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1939 VisitChildrenSet::Set(set)
1938 VisitChildrenSet::Set(set)
1940 );
1939 );
1941 assert_eq!(
1940 assert_eq!(
1942 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1941 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1943 VisitChildrenSet::Empty
1942 VisitChildrenSet::Empty
1944 );
1943 );
1945 assert_eq!(
1944 assert_eq!(
1946 matcher.visit_children_set(HgPath::new(b"folder")),
1945 matcher.visit_children_set(HgPath::new(b"folder")),
1947 VisitChildrenSet::Empty
1946 VisitChildrenSet::Empty
1948 );
1947 );
1949 assert_eq!(
1948 assert_eq!(
1950 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1949 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1951 VisitChildrenSet::Empty
1950 VisitChildrenSet::Empty
1952 );
1951 );
1953 // OPT: this should probably be 'all' not 'this'.
1952 // OPT: this should probably be 'all' not 'this'.
1954 assert_eq!(
1953 assert_eq!(
1955 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1954 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1956 VisitChildrenSet::This
1955 VisitChildrenSet::This
1957 );
1956 );
1958
1957
1959 // Diverging paths
1958 // Diverging paths
1960 let m1 = Box::new(
1959 let m1 = Box::new(
1961 IncludeMatcher::new(vec![IgnorePattern::new(
1960 IncludeMatcher::new(vec![IgnorePattern::new(
1962 PatternSyntax::RelPath,
1961 PatternSyntax::RelPath,
1963 b"dir/subdir/x",
1962 b"dir/subdir/x",
1964 Path::new(""),
1963 Path::new(""),
1965 )])
1964 )])
1966 .unwrap(),
1965 .unwrap(),
1967 );
1966 );
1968 let m2 = Box::new(
1967 let m2 = Box::new(
1969 IncludeMatcher::new(vec![IgnorePattern::new(
1968 IncludeMatcher::new(vec![IgnorePattern::new(
1970 PatternSyntax::RelPath,
1969 PatternSyntax::RelPath,
1971 b"dir/subdir/z",
1970 b"dir/subdir/z",
1972 Path::new(""),
1971 Path::new(""),
1973 )])
1972 )])
1974 .unwrap(),
1973 .unwrap(),
1975 );
1974 );
1976 let matcher = IntersectionMatcher::new(m1, m2);
1975 let matcher = IntersectionMatcher::new(m1, m2);
1977
1976
1978 // OPT: these next two could probably be Empty as well.
1977 // OPT: these next two could probably be Empty as well.
1979 let mut set = HashSet::new();
1978 let mut set = HashSet::new();
1980 set.insert(HgPathBuf::from_bytes(b"dir"));
1979 set.insert(HgPathBuf::from_bytes(b"dir"));
1981 assert_eq!(
1980 assert_eq!(
1982 matcher.visit_children_set(HgPath::new(b"")),
1981 matcher.visit_children_set(HgPath::new(b"")),
1983 VisitChildrenSet::Set(set)
1982 VisitChildrenSet::Set(set)
1984 );
1983 );
1985 // OPT: these next two could probably be Empty as well.
1984 // OPT: these next two could probably be Empty as well.
1986 let mut set = HashSet::new();
1985 let mut set = HashSet::new();
1987 set.insert(HgPathBuf::from_bytes(b"subdir"));
1986 set.insert(HgPathBuf::from_bytes(b"subdir"));
1988 assert_eq!(
1987 assert_eq!(
1989 matcher.visit_children_set(HgPath::new(b"dir")),
1988 matcher.visit_children_set(HgPath::new(b"dir")),
1990 VisitChildrenSet::Set(set)
1989 VisitChildrenSet::Set(set)
1991 );
1990 );
1992 assert_eq!(
1991 assert_eq!(
1993 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1992 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1994 VisitChildrenSet::Empty
1993 VisitChildrenSet::Empty
1995 );
1994 );
1996 assert_eq!(
1995 assert_eq!(
1997 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1996 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1998 VisitChildrenSet::Empty
1997 VisitChildrenSet::Empty
1999 );
1998 );
2000 assert_eq!(
1999 assert_eq!(
2001 matcher.visit_children_set(HgPath::new(b"folder")),
2000 matcher.visit_children_set(HgPath::new(b"folder")),
2002 VisitChildrenSet::Empty
2001 VisitChildrenSet::Empty
2003 );
2002 );
2004 assert_eq!(
2003 assert_eq!(
2005 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
2004 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
2006 VisitChildrenSet::Empty
2005 VisitChildrenSet::Empty
2007 );
2006 );
2008 assert_eq!(
2007 assert_eq!(
2009 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
2008 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
2010 VisitChildrenSet::Empty
2009 VisitChildrenSet::Empty
2011 );
2010 );
2012 }
2011 }
2013
2012
2014 #[test]
2013 #[test]
2015 fn test_differencematcher() {
2014 fn test_differencematcher() {
2016 // Two alwaysmatchers should function like a nevermatcher
2015 // Two alwaysmatchers should function like a nevermatcher
2017 let m1 = AlwaysMatcher;
2016 let m1 = AlwaysMatcher;
2018 let m2 = AlwaysMatcher;
2017 let m2 = AlwaysMatcher;
2019 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
2018 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
2020
2019
2021 for case in &[
2020 for case in &[
2022 &b""[..],
2021 &b""[..],
2023 b"dir",
2022 b"dir",
2024 b"dir/subdir",
2023 b"dir/subdir",
2025 b"dir/subdir/z",
2024 b"dir/subdir/z",
2026 b"dir/foo",
2025 b"dir/foo",
2027 b"dir/subdir/x",
2026 b"dir/subdir/x",
2028 b"folder",
2027 b"folder",
2029 ] {
2028 ] {
2030 assert_eq!(
2029 assert_eq!(
2031 matcher.visit_children_set(HgPath::new(case)),
2030 matcher.visit_children_set(HgPath::new(case)),
2032 VisitChildrenSet::Empty
2031 VisitChildrenSet::Empty
2033 );
2032 );
2034 }
2033 }
2035
2034
2036 // One always and one never should behave the same as an always
2035 // One always and one never should behave the same as an always
2037 let m1 = AlwaysMatcher;
2036 let m1 = AlwaysMatcher;
2038 let m2 = NeverMatcher;
2037 let m2 = NeverMatcher;
2039 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
2038 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
2040
2039
2041 for case in &[
2040 for case in &[
2042 &b""[..],
2041 &b""[..],
2043 b"dir",
2042 b"dir",
2044 b"dir/subdir",
2043 b"dir/subdir",
2045 b"dir/subdir/z",
2044 b"dir/subdir/z",
2046 b"dir/foo",
2045 b"dir/foo",
2047 b"dir/subdir/x",
2046 b"dir/subdir/x",
2048 b"folder",
2047 b"folder",
2049 ] {
2048 ] {
2050 assert_eq!(
2049 assert_eq!(
2051 matcher.visit_children_set(HgPath::new(case)),
2050 matcher.visit_children_set(HgPath::new(case)),
2052 VisitChildrenSet::Recursive
2051 VisitChildrenSet::Recursive
2053 );
2052 );
2054 }
2053 }
2055
2054
2056 // Two include matchers
2055 // Two include matchers
2057 let m1 = Box::new(
2056 let m1 = Box::new(
2058 IncludeMatcher::new(vec![IgnorePattern::new(
2057 IncludeMatcher::new(vec![IgnorePattern::new(
2059 PatternSyntax::RelPath,
2058 PatternSyntax::RelPath,
2060 b"dir/subdir",
2059 b"dir/subdir",
2061 Path::new("/repo"),
2060 Path::new("/repo"),
2062 )])
2061 )])
2063 .unwrap(),
2062 .unwrap(),
2064 );
2063 );
2065 let m2 = Box::new(
2064 let m2 = Box::new(
2066 IncludeMatcher::new(vec![IgnorePattern::new(
2065 IncludeMatcher::new(vec![IgnorePattern::new(
2067 PatternSyntax::RootFiles,
2066 PatternSyntax::RootFiles,
2068 b"dir",
2067 b"dir",
2069 Path::new("/repo"),
2068 Path::new("/repo"),
2070 )])
2069 )])
2071 .unwrap(),
2070 .unwrap(),
2072 );
2071 );
2073
2072
2074 let matcher = DifferenceMatcher::new(m1, m2);
2073 let matcher = DifferenceMatcher::new(m1, m2);
2075
2074
2076 let mut set = HashSet::new();
2075 let mut set = HashSet::new();
2077 set.insert(HgPathBuf::from_bytes(b"dir"));
2076 set.insert(HgPathBuf::from_bytes(b"dir"));
2078 assert_eq!(
2077 assert_eq!(
2079 matcher.visit_children_set(HgPath::new(b"")),
2078 matcher.visit_children_set(HgPath::new(b"")),
2080 VisitChildrenSet::Set(set)
2079 VisitChildrenSet::Set(set)
2081 );
2080 );
2082
2081
2083 let mut set = HashSet::new();
2082 let mut set = HashSet::new();
2084 set.insert(HgPathBuf::from_bytes(b"subdir"));
2083 set.insert(HgPathBuf::from_bytes(b"subdir"));
2085 assert_eq!(
2084 assert_eq!(
2086 matcher.visit_children_set(HgPath::new(b"dir")),
2085 matcher.visit_children_set(HgPath::new(b"dir")),
2087 VisitChildrenSet::Set(set)
2086 VisitChildrenSet::Set(set)
2088 );
2087 );
2089 assert_eq!(
2088 assert_eq!(
2090 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
2089 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
2091 VisitChildrenSet::Recursive
2090 VisitChildrenSet::Recursive
2092 );
2091 );
2093 assert_eq!(
2092 assert_eq!(
2094 matcher.visit_children_set(HgPath::new(b"dir/foo")),
2093 matcher.visit_children_set(HgPath::new(b"dir/foo")),
2095 VisitChildrenSet::Empty
2094 VisitChildrenSet::Empty
2096 );
2095 );
2097 assert_eq!(
2096 assert_eq!(
2098 matcher.visit_children_set(HgPath::new(b"folder")),
2097 matcher.visit_children_set(HgPath::new(b"folder")),
2099 VisitChildrenSet::Empty
2098 VisitChildrenSet::Empty
2100 );
2099 );
2101 assert_eq!(
2100 assert_eq!(
2102 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
2101 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
2103 VisitChildrenSet::This
2102 VisitChildrenSet::This
2104 );
2103 );
2105 assert_eq!(
2104 assert_eq!(
2106 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
2105 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
2107 VisitChildrenSet::This
2106 VisitChildrenSet::This
2108 );
2107 );
2109 }
2108 }
2110 }
2109 }
@@ -1,782 +1,782 b''
1 use crate::changelog::Changelog;
1 use crate::changelog::Changelog;
2 use crate::config::{Config, ConfigError, ConfigParseError};
2 use crate::config::{Config, ConfigError, ConfigParseError};
3 use crate::dirstate::DirstateParents;
3 use crate::dirstate::DirstateParents;
4 use crate::dirstate_tree::dirstate_map::DirstateMapWriteMode;
4 use crate::dirstate_tree::dirstate_map::DirstateMapWriteMode;
5 use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
5 use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
6 use crate::dirstate_tree::owning::OwningDirstateMap;
6 use crate::dirstate_tree::owning::OwningDirstateMap;
7 use crate::errors::HgResultExt;
7 use crate::errors::HgResultExt;
8 use crate::errors::{HgError, IoResultExt};
8 use crate::errors::{HgError, IoResultExt};
9 use crate::lock::{try_with_lock_no_wait, LockError};
9 use crate::lock::{try_with_lock_no_wait, LockError};
10 use crate::manifest::{Manifest, Manifestlog};
10 use crate::manifest::{Manifest, Manifestlog};
11 use crate::revlog::filelog::Filelog;
11 use crate::revlog::filelog::Filelog;
12 use crate::revlog::RevlogError;
12 use crate::revlog::RevlogError;
13 use crate::utils::debug::debug_wait_for_file_or_print;
13 use crate::utils::debug::debug_wait_for_file_or_print;
14 use crate::utils::files::get_path_from_bytes;
14 use crate::utils::files::get_path_from_bytes;
15 use crate::utils::hg_path::HgPath;
15 use crate::utils::hg_path::HgPath;
16 use crate::utils::SliceExt;
16 use crate::utils::SliceExt;
17 use crate::vfs::{is_dir, is_file, Vfs};
17 use crate::vfs::{is_dir, is_file, Vfs};
18 use crate::DirstateError;
18 use crate::DirstateError;
19 use crate::{requirements, NodePrefix, UncheckedRevision};
19 use crate::{requirements, NodePrefix, UncheckedRevision};
20 use std::cell::{Ref, RefCell, RefMut};
20 use std::cell::{Ref, RefCell, RefMut};
21 use std::collections::HashSet;
21 use std::collections::HashSet;
22 use std::io::Seek;
22 use std::io::Seek;
23 use std::io::SeekFrom;
23 use std::io::SeekFrom;
24 use std::io::Write as IoWrite;
24 use std::io::Write as IoWrite;
25 use std::path::{Path, PathBuf};
25 use std::path::{Path, PathBuf};
26
26
27 const V2_MAX_READ_ATTEMPTS: usize = 5;
27 const V2_MAX_READ_ATTEMPTS: usize = 5;
28
28
29 type DirstateMapIdentity = (Option<u64>, Option<Vec<u8>>, usize);
29 type DirstateMapIdentity = (Option<u64>, Option<Vec<u8>>, usize);
30
30
31 /// A repository on disk
31 /// A repository on disk
32 pub struct Repo {
32 pub struct Repo {
33 working_directory: PathBuf,
33 working_directory: PathBuf,
34 dot_hg: PathBuf,
34 dot_hg: PathBuf,
35 store: PathBuf,
35 store: PathBuf,
36 requirements: HashSet<String>,
36 requirements: HashSet<String>,
37 config: Config,
37 config: Config,
38 dirstate_parents: LazyCell<DirstateParents>,
38 dirstate_parents: LazyCell<DirstateParents>,
39 dirstate_map: LazyCell<OwningDirstateMap>,
39 dirstate_map: LazyCell<OwningDirstateMap>,
40 changelog: LazyCell<Changelog>,
40 changelog: LazyCell<Changelog>,
41 manifestlog: LazyCell<Manifestlog>,
41 manifestlog: LazyCell<Manifestlog>,
42 }
42 }
43
43
44 #[derive(Debug, derive_more::From)]
44 #[derive(Debug, derive_more::From)]
45 pub enum RepoError {
45 pub enum RepoError {
46 NotFound {
46 NotFound {
47 at: PathBuf,
47 at: PathBuf,
48 },
48 },
49 #[from]
49 #[from]
50 ConfigParseError(ConfigParseError),
50 ConfigParseError(ConfigParseError),
51 #[from]
51 #[from]
52 Other(HgError),
52 Other(HgError),
53 }
53 }
54
54
55 impl From<ConfigError> for RepoError {
55 impl From<ConfigError> for RepoError {
56 fn from(error: ConfigError) -> Self {
56 fn from(error: ConfigError) -> Self {
57 match error {
57 match error {
58 ConfigError::Parse(error) => error.into(),
58 ConfigError::Parse(error) => error.into(),
59 ConfigError::Other(error) => error.into(),
59 ConfigError::Other(error) => error.into(),
60 }
60 }
61 }
61 }
62 }
62 }
63
63
64 impl Repo {
64 impl Repo {
65 /// tries to find nearest repository root in current working directory or
65 /// tries to find nearest repository root in current working directory or
66 /// its ancestors
66 /// its ancestors
67 pub fn find_repo_root() -> Result<PathBuf, RepoError> {
67 pub fn find_repo_root() -> Result<PathBuf, RepoError> {
68 let current_directory = crate::utils::current_dir()?;
68 let current_directory = crate::utils::current_dir()?;
69 // ancestors() is inclusive: it first yields `current_directory`
69 // ancestors() is inclusive: it first yields `current_directory`
70 // as-is.
70 // as-is.
71 for ancestor in current_directory.ancestors() {
71 for ancestor in current_directory.ancestors() {
72 if is_dir(ancestor.join(".hg"))? {
72 if is_dir(ancestor.join(".hg"))? {
73 return Ok(ancestor.to_path_buf());
73 return Ok(ancestor.to_path_buf());
74 }
74 }
75 }
75 }
76 Err(RepoError::NotFound {
76 Err(RepoError::NotFound {
77 at: current_directory,
77 at: current_directory,
78 })
78 })
79 }
79 }
80
80
81 /// Find a repository, either at the given path (which must contain a `.hg`
81 /// Find a repository, either at the given path (which must contain a `.hg`
82 /// sub-directory) or by searching the current directory and its
82 /// sub-directory) or by searching the current directory and its
83 /// ancestors.
83 /// ancestors.
84 ///
84 ///
85 /// A method with two very different "modes" like this usually a code smell
85 /// A method with two very different "modes" like this usually a code smell
86 /// to make two methods instead, but in this case an `Option` is what rhg
86 /// to make two methods instead, but in this case an `Option` is what rhg
87 /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
87 /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
88 /// Having two methods would just move that `if` to almost all callers.
88 /// Having two methods would just move that `if` to almost all callers.
89 pub fn find(
89 pub fn find(
90 config: &Config,
90 config: &Config,
91 explicit_path: Option<PathBuf>,
91 explicit_path: Option<PathBuf>,
92 ) -> Result<Self, RepoError> {
92 ) -> Result<Self, RepoError> {
93 if let Some(root) = explicit_path {
93 if let Some(root) = explicit_path {
94 if is_dir(root.join(".hg"))? {
94 if is_dir(root.join(".hg"))? {
95 Self::new_at_path(root, config)
95 Self::new_at_path(root, config)
96 } else if is_file(&root)? {
96 } else if is_file(&root)? {
97 Err(HgError::unsupported("bundle repository").into())
97 Err(HgError::unsupported("bundle repository").into())
98 } else {
98 } else {
99 Err(RepoError::NotFound { at: root })
99 Err(RepoError::NotFound { at: root })
100 }
100 }
101 } else {
101 } else {
102 let root = Self::find_repo_root()?;
102 let root = Self::find_repo_root()?;
103 Self::new_at_path(root, config)
103 Self::new_at_path(root, config)
104 }
104 }
105 }
105 }
106
106
107 /// To be called after checking that `.hg` is a sub-directory
107 /// To be called after checking that `.hg` is a sub-directory
108 fn new_at_path(
108 fn new_at_path(
109 working_directory: PathBuf,
109 working_directory: PathBuf,
110 config: &Config,
110 config: &Config,
111 ) -> Result<Self, RepoError> {
111 ) -> Result<Self, RepoError> {
112 let dot_hg = working_directory.join(".hg");
112 let dot_hg = working_directory.join(".hg");
113
113
114 let mut repo_config_files =
114 let mut repo_config_files =
115 vec![dot_hg.join("hgrc"), dot_hg.join("hgrc-not-shared")];
115 vec![dot_hg.join("hgrc"), dot_hg.join("hgrc-not-shared")];
116
116
117 let hg_vfs = Vfs { base: &dot_hg };
117 let hg_vfs = Vfs { base: &dot_hg };
118 let mut reqs = requirements::load_if_exists(hg_vfs)?;
118 let mut reqs = requirements::load_if_exists(hg_vfs)?;
119 let relative =
119 let relative =
120 reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
120 reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
121 let shared =
121 let shared =
122 reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
122 reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
123
123
124 // From `mercurial/localrepo.py`:
124 // From `mercurial/localrepo.py`:
125 //
125 //
126 // if .hg/requires contains the sharesafe requirement, it means
126 // if .hg/requires contains the sharesafe requirement, it means
127 // there exists a `.hg/store/requires` too and we should read it
127 // there exists a `.hg/store/requires` too and we should read it
128 // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
128 // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
129 // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
129 // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
130 // is not present, refer checkrequirementscompat() for that
130 // is not present, refer checkrequirementscompat() for that
131 //
131 //
132 // However, if SHARESAFE_REQUIREMENT is not present, it means that the
132 // However, if SHARESAFE_REQUIREMENT is not present, it means that the
133 // repository was shared the old way. We check the share source
133 // repository was shared the old way. We check the share source
134 // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
134 // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
135 // current repository needs to be reshared
135 // current repository needs to be reshared
136 let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
136 let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
137
137
138 let store_path;
138 let store_path;
139 if !shared {
139 if !shared {
140 store_path = dot_hg.join("store");
140 store_path = dot_hg.join("store");
141 } else {
141 } else {
142 let bytes = hg_vfs.read("sharedpath")?;
142 let bytes = hg_vfs.read("sharedpath")?;
143 let mut shared_path =
143 let mut shared_path =
144 get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n'))
144 get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n'))
145 .to_owned();
145 .to_owned();
146 if relative {
146 if relative {
147 shared_path = dot_hg.join(shared_path)
147 shared_path = dot_hg.join(shared_path)
148 }
148 }
149 if !is_dir(&shared_path)? {
149 if !is_dir(&shared_path)? {
150 return Err(HgError::corrupted(format!(
150 return Err(HgError::corrupted(format!(
151 ".hg/sharedpath points to nonexistent directory {}",
151 ".hg/sharedpath points to nonexistent directory {}",
152 shared_path.display()
152 shared_path.display()
153 ))
153 ))
154 .into());
154 .into());
155 }
155 }
156
156
157 store_path = shared_path.join("store");
157 store_path = shared_path.join("store");
158
158
159 let source_is_share_safe =
159 let source_is_share_safe =
160 requirements::load(Vfs { base: &shared_path })?
160 requirements::load(Vfs { base: &shared_path })?
161 .contains(requirements::SHARESAFE_REQUIREMENT);
161 .contains(requirements::SHARESAFE_REQUIREMENT);
162
162
163 if share_safe != source_is_share_safe {
163 if share_safe != source_is_share_safe {
164 return Err(HgError::unsupported("share-safe mismatch").into());
164 return Err(HgError::unsupported("share-safe mismatch").into());
165 }
165 }
166
166
167 if share_safe {
167 if share_safe {
168 repo_config_files.insert(0, shared_path.join("hgrc"))
168 repo_config_files.insert(0, shared_path.join("hgrc"))
169 }
169 }
170 }
170 }
171 if share_safe {
171 if share_safe {
172 reqs.extend(requirements::load(Vfs { base: &store_path })?);
172 reqs.extend(requirements::load(Vfs { base: &store_path })?);
173 }
173 }
174
174
175 let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
175 let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
176 config.combine_with_repo(&repo_config_files)?
176 config.combine_with_repo(&repo_config_files)?
177 } else {
177 } else {
178 config.clone()
178 config.clone()
179 };
179 };
180
180
181 let repo = Self {
181 let repo = Self {
182 requirements: reqs,
182 requirements: reqs,
183 working_directory,
183 working_directory,
184 store: store_path,
184 store: store_path,
185 dot_hg,
185 dot_hg,
186 config: repo_config,
186 config: repo_config,
187 dirstate_parents: LazyCell::new(),
187 dirstate_parents: LazyCell::new(),
188 dirstate_map: LazyCell::new(),
188 dirstate_map: LazyCell::new(),
189 changelog: LazyCell::new(),
189 changelog: LazyCell::new(),
190 manifestlog: LazyCell::new(),
190 manifestlog: LazyCell::new(),
191 };
191 };
192
192
193 requirements::check(&repo)?;
193 requirements::check(&repo)?;
194
194
195 Ok(repo)
195 Ok(repo)
196 }
196 }
197
197
198 pub fn working_directory_path(&self) -> &Path {
198 pub fn working_directory_path(&self) -> &Path {
199 &self.working_directory
199 &self.working_directory
200 }
200 }
201
201
202 pub fn requirements(&self) -> &HashSet<String> {
202 pub fn requirements(&self) -> &HashSet<String> {
203 &self.requirements
203 &self.requirements
204 }
204 }
205
205
206 pub fn config(&self) -> &Config {
206 pub fn config(&self) -> &Config {
207 &self.config
207 &self.config
208 }
208 }
209
209
210 /// For accessing repository files (in `.hg`), except for the store
210 /// For accessing repository files (in `.hg`), except for the store
211 /// (`.hg/store`).
211 /// (`.hg/store`).
212 pub fn hg_vfs(&self) -> Vfs<'_> {
212 pub fn hg_vfs(&self) -> Vfs<'_> {
213 Vfs { base: &self.dot_hg }
213 Vfs { base: &self.dot_hg }
214 }
214 }
215
215
216 /// For accessing repository store files (in `.hg/store`)
216 /// For accessing repository store files (in `.hg/store`)
217 pub fn store_vfs(&self) -> Vfs<'_> {
217 pub fn store_vfs(&self) -> Vfs<'_> {
218 Vfs { base: &self.store }
218 Vfs { base: &self.store }
219 }
219 }
220
220
221 /// For accessing the working copy
221 /// For accessing the working copy
222 pub fn working_directory_vfs(&self) -> Vfs<'_> {
222 pub fn working_directory_vfs(&self) -> Vfs<'_> {
223 Vfs {
223 Vfs {
224 base: &self.working_directory,
224 base: &self.working_directory,
225 }
225 }
226 }
226 }
227
227
228 pub fn try_with_wlock_no_wait<R>(
228 pub fn try_with_wlock_no_wait<R>(
229 &self,
229 &self,
230 f: impl FnOnce() -> R,
230 f: impl FnOnce() -> R,
231 ) -> Result<R, LockError> {
231 ) -> Result<R, LockError> {
232 try_with_lock_no_wait(self.hg_vfs(), "wlock", f)
232 try_with_lock_no_wait(self.hg_vfs(), "wlock", f)
233 }
233 }
234
234
235 /// Whether this repo should use dirstate-v2.
235 /// Whether this repo should use dirstate-v2.
236 /// The presence of `dirstate-v2` in the requirements does not mean that
236 /// The presence of `dirstate-v2` in the requirements does not mean that
237 /// the on-disk dirstate is necessarily in version 2. In most cases,
237 /// the on-disk dirstate is necessarily in version 2. In most cases,
238 /// a dirstate-v2 file will indeed be found, but in rare cases (like the
238 /// a dirstate-v2 file will indeed be found, but in rare cases (like the
239 /// upgrade mechanism being cut short), the on-disk version will be a
239 /// upgrade mechanism being cut short), the on-disk version will be a
240 /// v1 file.
240 /// v1 file.
241 /// Semantically, having a requirement only means that a client cannot
241 /// Semantically, having a requirement only means that a client cannot
242 /// properly understand or properly update the repo if it lacks the support
242 /// properly understand or properly update the repo if it lacks the support
243 /// for the required feature, but not that that feature is actually used
243 /// for the required feature, but not that that feature is actually used
244 /// in all occasions.
244 /// in all occasions.
245 pub fn use_dirstate_v2(&self) -> bool {
245 pub fn use_dirstate_v2(&self) -> bool {
246 self.requirements
246 self.requirements
247 .contains(requirements::DIRSTATE_V2_REQUIREMENT)
247 .contains(requirements::DIRSTATE_V2_REQUIREMENT)
248 }
248 }
249
249
250 pub fn has_sparse(&self) -> bool {
250 pub fn has_sparse(&self) -> bool {
251 self.requirements.contains(requirements::SPARSE_REQUIREMENT)
251 self.requirements.contains(requirements::SPARSE_REQUIREMENT)
252 }
252 }
253
253
254 pub fn has_narrow(&self) -> bool {
254 pub fn has_narrow(&self) -> bool {
255 self.requirements.contains(requirements::NARROW_REQUIREMENT)
255 self.requirements.contains(requirements::NARROW_REQUIREMENT)
256 }
256 }
257
257
258 pub fn has_nodemap(&self) -> bool {
258 pub fn has_nodemap(&self) -> bool {
259 self.requirements
259 self.requirements
260 .contains(requirements::NODEMAP_REQUIREMENT)
260 .contains(requirements::NODEMAP_REQUIREMENT)
261 }
261 }
262
262
263 fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> {
263 fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> {
264 Ok(self
264 Ok(self
265 .hg_vfs()
265 .hg_vfs()
266 .read("dirstate")
266 .read("dirstate")
267 .io_not_found_as_none()?
267 .io_not_found_as_none()?
268 .unwrap_or_default())
268 .unwrap_or_default())
269 }
269 }
270
270
271 fn dirstate_identity(&self) -> Result<Option<u64>, HgError> {
271 fn dirstate_identity(&self) -> Result<Option<u64>, HgError> {
272 use std::os::unix::fs::MetadataExt;
272 use std::os::unix::fs::MetadataExt;
273 Ok(self
273 Ok(self
274 .hg_vfs()
274 .hg_vfs()
275 .symlink_metadata("dirstate")
275 .symlink_metadata("dirstate")
276 .io_not_found_as_none()?
276 .io_not_found_as_none()?
277 .map(|meta| meta.ino()))
277 .map(|meta| meta.ino()))
278 }
278 }
279
279
280 pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
280 pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
281 Ok(*self
281 Ok(*self
282 .dirstate_parents
282 .dirstate_parents
283 .get_or_init(|| self.read_dirstate_parents())?)
283 .get_or_init(|| self.read_dirstate_parents())?)
284 }
284 }
285
285
286 fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
286 fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
287 let dirstate = self.dirstate_file_contents()?;
287 let dirstate = self.dirstate_file_contents()?;
288 let parents = if dirstate.is_empty() {
288 let parents = if dirstate.is_empty() {
289 DirstateParents::NULL
289 DirstateParents::NULL
290 } else if self.use_dirstate_v2() {
290 } else if self.use_dirstate_v2() {
291 let docket_res =
291 let docket_res =
292 crate::dirstate_tree::on_disk::read_docket(&dirstate);
292 crate::dirstate_tree::on_disk::read_docket(&dirstate);
293 match docket_res {
293 match docket_res {
294 Ok(docket) => docket.parents(),
294 Ok(docket) => docket.parents(),
295 Err(_) => {
295 Err(_) => {
296 log::info!(
296 log::info!(
297 "Parsing dirstate docket failed, \
297 "Parsing dirstate docket failed, \
298 falling back to dirstate-v1"
298 falling back to dirstate-v1"
299 );
299 );
300 *crate::dirstate::parsers::parse_dirstate_parents(
300 *crate::dirstate::parsers::parse_dirstate_parents(
301 &dirstate,
301 &dirstate,
302 )?
302 )?
303 }
303 }
304 }
304 }
305 } else {
305 } else {
306 *crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
306 *crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
307 };
307 };
308 self.dirstate_parents.set(parents);
308 self.dirstate_parents.set(parents);
309 Ok(parents)
309 Ok(parents)
310 }
310 }
311
311
312 /// Returns the information read from the dirstate docket necessary to
312 /// Returns the information read from the dirstate docket necessary to
313 /// check if the data file has been updated/deleted by another process
313 /// check if the data file has been updated/deleted by another process
314 /// since we last read the dirstate.
314 /// since we last read the dirstate.
315 /// Namely, the inode, data file uuid and the data size.
315 /// Namely, the inode, data file uuid and the data size.
316 fn get_dirstate_data_file_integrity(
316 fn get_dirstate_data_file_integrity(
317 &self,
317 &self,
318 ) -> Result<DirstateMapIdentity, HgError> {
318 ) -> Result<DirstateMapIdentity, HgError> {
319 assert!(
319 assert!(
320 self.use_dirstate_v2(),
320 self.use_dirstate_v2(),
321 "accessing dirstate data file ID without dirstate-v2"
321 "accessing dirstate data file ID without dirstate-v2"
322 );
322 );
323 // Get the identity before the contents since we could have a race
323 // Get the identity before the contents since we could have a race
324 // between the two. Having an identity that is too old is fine, but
324 // between the two. Having an identity that is too old is fine, but
325 // one that is younger than the content change is bad.
325 // one that is younger than the content change is bad.
326 let identity = self.dirstate_identity()?;
326 let identity = self.dirstate_identity()?;
327 let dirstate = self.dirstate_file_contents()?;
327 let dirstate = self.dirstate_file_contents()?;
328 if dirstate.is_empty() {
328 if dirstate.is_empty() {
329 self.dirstate_parents.set(DirstateParents::NULL);
329 self.dirstate_parents.set(DirstateParents::NULL);
330 Ok((identity, None, 0))
330 Ok((identity, None, 0))
331 } else {
331 } else {
332 let docket_res =
332 let docket_res =
333 crate::dirstate_tree::on_disk::read_docket(&dirstate);
333 crate::dirstate_tree::on_disk::read_docket(&dirstate);
334 match docket_res {
334 match docket_res {
335 Ok(docket) => {
335 Ok(docket) => {
336 self.dirstate_parents.set(docket.parents());
336 self.dirstate_parents.set(docket.parents());
337 Ok((
337 Ok((
338 identity,
338 identity,
339 Some(docket.uuid.to_owned()),
339 Some(docket.uuid.to_owned()),
340 docket.data_size(),
340 docket.data_size(),
341 ))
341 ))
342 }
342 }
343 Err(_) => {
343 Err(_) => {
344 log::info!(
344 log::info!(
345 "Parsing dirstate docket failed, \
345 "Parsing dirstate docket failed, \
346 falling back to dirstate-v1"
346 falling back to dirstate-v1"
347 );
347 );
348 let parents =
348 let parents =
349 *crate::dirstate::parsers::parse_dirstate_parents(
349 *crate::dirstate::parsers::parse_dirstate_parents(
350 &dirstate,
350 &dirstate,
351 )?;
351 )?;
352 self.dirstate_parents.set(parents);
352 self.dirstate_parents.set(parents);
353 Ok((identity, None, 0))
353 Ok((identity, None, 0))
354 }
354 }
355 }
355 }
356 }
356 }
357 }
357 }
358
358
359 fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
359 fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
360 if self.use_dirstate_v2() {
360 if self.use_dirstate_v2() {
361 // The v2 dirstate is split into a docket and a data file.
361 // The v2 dirstate is split into a docket and a data file.
362 // Since we don't always take the `wlock` to read it
362 // Since we don't always take the `wlock` to read it
363 // (like in `hg status`), it is susceptible to races.
363 // (like in `hg status`), it is susceptible to races.
364 // A simple retry method should be enough since full rewrites
364 // A simple retry method should be enough since full rewrites
365 // only happen when too much garbage data is present and
365 // only happen when too much garbage data is present and
366 // this race is unlikely.
366 // this race is unlikely.
367 let mut tries = 0;
367 let mut tries = 0;
368
368
369 while tries < V2_MAX_READ_ATTEMPTS {
369 while tries < V2_MAX_READ_ATTEMPTS {
370 tries += 1;
370 tries += 1;
371 match self.read_docket_and_data_file() {
371 match self.read_docket_and_data_file() {
372 Ok(m) => {
372 Ok(m) => {
373 return Ok(m);
373 return Ok(m);
374 }
374 }
375 Err(e) => match e {
375 Err(e) => match e {
376 DirstateError::Common(HgError::RaceDetected(
376 DirstateError::Common(HgError::RaceDetected(
377 context,
377 context,
378 )) => {
378 )) => {
379 log::info!(
379 log::info!(
380 "dirstate read race detected {} (retry {}/{})",
380 "dirstate read race detected {} (retry {}/{})",
381 context,
381 context,
382 tries,
382 tries,
383 V2_MAX_READ_ATTEMPTS,
383 V2_MAX_READ_ATTEMPTS,
384 );
384 );
385 continue;
385 continue;
386 }
386 }
387 _ => {
387 _ => {
388 log::info!(
388 log::info!(
389 "Reading dirstate v2 failed, \
389 "Reading dirstate v2 failed, \
390 falling back to v1"
390 falling back to v1"
391 );
391 );
392 return self.new_dirstate_map_v1();
392 return self.new_dirstate_map_v1();
393 }
393 }
394 },
394 },
395 }
395 }
396 }
396 }
397 let error = HgError::abort(
397 let error = HgError::abort(
398 format!("dirstate read race happened {tries} times in a row"),
398 format!("dirstate read race happened {tries} times in a row"),
399 255,
399 255,
400 None,
400 None,
401 );
401 );
402 Err(DirstateError::Common(error))
402 Err(DirstateError::Common(error))
403 } else {
403 } else {
404 self.new_dirstate_map_v1()
404 self.new_dirstate_map_v1()
405 }
405 }
406 }
406 }
407
407
408 fn new_dirstate_map_v1(&self) -> Result<OwningDirstateMap, DirstateError> {
408 fn new_dirstate_map_v1(&self) -> Result<OwningDirstateMap, DirstateError> {
409 debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
409 debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
410 let identity = self.dirstate_identity()?;
410 let identity = self.dirstate_identity()?;
411 let dirstate_file_contents = self.dirstate_file_contents()?;
411 let dirstate_file_contents = self.dirstate_file_contents()?;
412 if dirstate_file_contents.is_empty() {
412 if dirstate_file_contents.is_empty() {
413 self.dirstate_parents.set(DirstateParents::NULL);
413 self.dirstate_parents.set(DirstateParents::NULL);
414 Ok(OwningDirstateMap::new_empty(Vec::new()))
414 Ok(OwningDirstateMap::new_empty(Vec::new()))
415 } else {
415 } else {
416 let (map, parents) =
416 let (map, parents) =
417 OwningDirstateMap::new_v1(dirstate_file_contents, identity)?;
417 OwningDirstateMap::new_v1(dirstate_file_contents, identity)?;
418 self.dirstate_parents.set(parents);
418 self.dirstate_parents.set(parents);
419 Ok(map)
419 Ok(map)
420 }
420 }
421 }
421 }
422
422
423 fn read_docket_and_data_file(
423 fn read_docket_and_data_file(
424 &self,
424 &self,
425 ) -> Result<OwningDirstateMap, DirstateError> {
425 ) -> Result<OwningDirstateMap, DirstateError> {
426 debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
426 debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
427 let dirstate_file_contents = self.dirstate_file_contents()?;
427 let dirstate_file_contents = self.dirstate_file_contents()?;
428 let identity = self.dirstate_identity()?;
428 let identity = self.dirstate_identity()?;
429 if dirstate_file_contents.is_empty() {
429 if dirstate_file_contents.is_empty() {
430 self.dirstate_parents.set(DirstateParents::NULL);
430 self.dirstate_parents.set(DirstateParents::NULL);
431 return Ok(OwningDirstateMap::new_empty(Vec::new()));
431 return Ok(OwningDirstateMap::new_empty(Vec::new()));
432 }
432 }
433 let docket = crate::dirstate_tree::on_disk::read_docket(
433 let docket = crate::dirstate_tree::on_disk::read_docket(
434 &dirstate_file_contents,
434 &dirstate_file_contents,
435 )?;
435 )?;
436 debug_wait_for_file_or_print(
436 debug_wait_for_file_or_print(
437 self.config(),
437 self.config(),
438 "dirstate.post-docket-read-file",
438 "dirstate.post-docket-read-file",
439 );
439 );
440 self.dirstate_parents.set(docket.parents());
440 self.dirstate_parents.set(docket.parents());
441 let uuid = docket.uuid.to_owned();
441 let uuid = docket.uuid.to_owned();
442 let data_size = docket.data_size();
442 let data_size = docket.data_size();
443
443
444 let context = "between reading dirstate docket and data file";
444 let context = "between reading dirstate docket and data file";
445 let race_error = HgError::RaceDetected(context.into());
445 let race_error = HgError::RaceDetected(context.into());
446 let metadata = docket.tree_metadata();
446 let metadata = docket.tree_metadata();
447
447
448 let mut map = if crate::vfs::is_on_nfs_mount(docket.data_filename()) {
448 let mut map = if crate::vfs::is_on_nfs_mount(docket.data_filename()) {
449 // Don't mmap on NFS to prevent `SIGBUS` error on deletion
449 // Don't mmap on NFS to prevent `SIGBUS` error on deletion
450 let contents = self.hg_vfs().read(docket.data_filename());
450 let contents = self.hg_vfs().read(docket.data_filename());
451 let contents = match contents {
451 let contents = match contents {
452 Ok(c) => c,
452 Ok(c) => c,
453 Err(HgError::IoError { error, context }) => {
453 Err(HgError::IoError { error, context }) => {
454 match error.raw_os_error().expect("real os error") {
454 match error.raw_os_error().expect("real os error") {
455 // 2 = ENOENT, No such file or directory
455 // 2 = ENOENT, No such file or directory
456 // 116 = ESTALE, Stale NFS file handle
456 // 116 = ESTALE, Stale NFS file handle
457 //
457 //
458 // TODO match on `error.kind()` when
458 // TODO match on `error.kind()` when
459 // `ErrorKind::StaleNetworkFileHandle` is stable.
459 // `ErrorKind::StaleNetworkFileHandle` is stable.
460 2 | 116 => {
460 2 | 116 => {
461 // Race where the data file was deleted right after
461 // Race where the data file was deleted right after
462 // we read the docket, try again
462 // we read the docket, try again
463 return Err(race_error.into());
463 return Err(race_error.into());
464 }
464 }
465 _ => {
465 _ => {
466 return Err(
466 return Err(
467 HgError::IoError { error, context }.into()
467 HgError::IoError { error, context }.into()
468 )
468 )
469 }
469 }
470 }
470 }
471 }
471 }
472 Err(e) => return Err(e.into()),
472 Err(e) => return Err(e.into()),
473 };
473 };
474 OwningDirstateMap::new_v2(
474 OwningDirstateMap::new_v2(
475 contents, data_size, metadata, uuid, identity,
475 contents, data_size, metadata, uuid, identity,
476 )
476 )
477 } else {
477 } else {
478 match self
478 match self
479 .hg_vfs()
479 .hg_vfs()
480 .mmap_open(docket.data_filename())
480 .mmap_open(docket.data_filename())
481 .io_not_found_as_none()
481 .io_not_found_as_none()
482 {
482 {
483 Ok(Some(data_mmap)) => OwningDirstateMap::new_v2(
483 Ok(Some(data_mmap)) => OwningDirstateMap::new_v2(
484 data_mmap, data_size, metadata, uuid, identity,
484 data_mmap, data_size, metadata, uuid, identity,
485 ),
485 ),
486 Ok(None) => {
486 Ok(None) => {
487 // Race where the data file was deleted right after we
487 // Race where the data file was deleted right after we
488 // read the docket, try again
488 // read the docket, try again
489 return Err(race_error.into());
489 return Err(race_error.into());
490 }
490 }
491 Err(e) => return Err(e.into()),
491 Err(e) => return Err(e.into()),
492 }
492 }
493 }?;
493 }?;
494
494
495 let write_mode_config = self
495 let write_mode_config = self
496 .config()
496 .config()
497 .get_str(b"devel", b"dirstate.v2.data_update_mode")
497 .get_str(b"devel", b"dirstate.v2.data_update_mode")
498 .unwrap_or(Some("auto"))
498 .unwrap_or(Some("auto"))
499 .unwrap_or("auto"); // don't bother for devel options
499 .unwrap_or("auto"); // don't bother for devel options
500 let write_mode = match write_mode_config {
500 let write_mode = match write_mode_config {
501 "auto" => DirstateMapWriteMode::Auto,
501 "auto" => DirstateMapWriteMode::Auto,
502 "force-new" => DirstateMapWriteMode::ForceNewDataFile,
502 "force-new" => DirstateMapWriteMode::ForceNewDataFile,
503 "force-append" => DirstateMapWriteMode::ForceAppend,
503 "force-append" => DirstateMapWriteMode::ForceAppend,
504 _ => DirstateMapWriteMode::Auto,
504 _ => DirstateMapWriteMode::Auto,
505 };
505 };
506
506
507 map.with_dmap_mut(|m| m.set_write_mode(write_mode));
507 map.with_dmap_mut(|m| m.set_write_mode(write_mode));
508
508
509 Ok(map)
509 Ok(map)
510 }
510 }
511
511
512 pub fn dirstate_map(
512 pub fn dirstate_map(
513 &self,
513 &self,
514 ) -> Result<Ref<OwningDirstateMap>, DirstateError> {
514 ) -> Result<Ref<OwningDirstateMap>, DirstateError> {
515 self.dirstate_map.get_or_init(|| self.new_dirstate_map())
515 self.dirstate_map.get_or_init(|| self.new_dirstate_map())
516 }
516 }
517
517
518 pub fn dirstate_map_mut(
518 pub fn dirstate_map_mut(
519 &self,
519 &self,
520 ) -> Result<RefMut<OwningDirstateMap>, DirstateError> {
520 ) -> Result<RefMut<OwningDirstateMap>, DirstateError> {
521 self.dirstate_map
521 self.dirstate_map
522 .get_mut_or_init(|| self.new_dirstate_map())
522 .get_mut_or_init(|| self.new_dirstate_map())
523 }
523 }
524
524
525 fn new_changelog(&self) -> Result<Changelog, HgError> {
525 fn new_changelog(&self) -> Result<Changelog, HgError> {
526 Changelog::open(&self.store_vfs(), self.has_nodemap())
526 Changelog::open(&self.store_vfs(), self.has_nodemap())
527 }
527 }
528
528
529 pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> {
529 pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> {
530 self.changelog.get_or_init(|| self.new_changelog())
530 self.changelog.get_or_init(|| self.new_changelog())
531 }
531 }
532
532
533 pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> {
533 pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> {
534 self.changelog.get_mut_or_init(|| self.new_changelog())
534 self.changelog.get_mut_or_init(|| self.new_changelog())
535 }
535 }
536
536
537 fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
537 fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
538 Manifestlog::open(&self.store_vfs(), self.has_nodemap())
538 Manifestlog::open(&self.store_vfs(), self.has_nodemap())
539 }
539 }
540
540
541 pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> {
541 pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> {
542 self.manifestlog.get_or_init(|| self.new_manifestlog())
542 self.manifestlog.get_or_init(|| self.new_manifestlog())
543 }
543 }
544
544
545 pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> {
545 pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> {
546 self.manifestlog.get_mut_or_init(|| self.new_manifestlog())
546 self.manifestlog.get_mut_or_init(|| self.new_manifestlog())
547 }
547 }
548
548
549 /// Returns the manifest of the *changeset* with the given node ID
549 /// Returns the manifest of the *changeset* with the given node ID
550 pub fn manifest_for_node(
550 pub fn manifest_for_node(
551 &self,
551 &self,
552 node: impl Into<NodePrefix>,
552 node: impl Into<NodePrefix>,
553 ) -> Result<Manifest, RevlogError> {
553 ) -> Result<Manifest, RevlogError> {
554 self.manifestlog()?.data_for_node(
554 self.manifestlog()?.data_for_node(
555 self.changelog()?
555 self.changelog()?
556 .data_for_node(node.into())?
556 .data_for_node(node.into())?
557 .manifest_node()?
557 .manifest_node()?
558 .into(),
558 .into(),
559 )
559 )
560 }
560 }
561
561
562 /// Returns the manifest of the *changeset* with the given revision number
562 /// Returns the manifest of the *changeset* with the given revision number
563 pub fn manifest_for_rev(
563 pub fn manifest_for_rev(
564 &self,
564 &self,
565 revision: UncheckedRevision,
565 revision: UncheckedRevision,
566 ) -> Result<Manifest, RevlogError> {
566 ) -> Result<Manifest, RevlogError> {
567 self.manifestlog()?.data_for_node(
567 self.manifestlog()?.data_for_node(
568 self.changelog()?
568 self.changelog()?
569 .data_for_rev(revision)?
569 .data_for_rev(revision)?
570 .manifest_node()?
570 .manifest_node()?
571 .into(),
571 .into(),
572 )
572 )
573 }
573 }
574
574
575 pub fn has_subrepos(&self) -> Result<bool, DirstateError> {
575 pub fn has_subrepos(&self) -> Result<bool, DirstateError> {
576 if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? {
576 if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? {
577 Ok(entry.tracked())
577 Ok(entry.tracked())
578 } else {
578 } else {
579 Ok(false)
579 Ok(false)
580 }
580 }
581 }
581 }
582
582
583 pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
583 pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
584 Filelog::open(self, path)
584 Filelog::open(self, path)
585 }
585 }
586
586
587 /// Write to disk any updates that were made through `dirstate_map_mut`.
587 /// Write to disk any updates that were made through `dirstate_map_mut`.
588 ///
588 ///
589 /// The "wlock" must be held while calling this.
589 /// The "wlock" must be held while calling this.
590 /// See for example `try_with_wlock_no_wait`.
590 /// See for example `try_with_wlock_no_wait`.
591 ///
591 ///
592 /// TODO: have a `WritableRepo` type only accessible while holding the
592 /// TODO: have a `WritableRepo` type only accessible while holding the
593 /// lock?
593 /// lock?
594 pub fn write_dirstate(&self) -> Result<(), DirstateError> {
594 pub fn write_dirstate(&self) -> Result<(), DirstateError> {
595 let map = self.dirstate_map()?;
595 let map = self.dirstate_map()?;
596 // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
596 // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
597 // it’s unset
597 // it’s unset
598 let parents = self.dirstate_parents()?;
598 let parents = self.dirstate_parents()?;
599 let (packed_dirstate, old_uuid_to_remove) = if self.use_dirstate_v2() {
599 let (packed_dirstate, old_uuid_to_remove) = if self.use_dirstate_v2() {
600 let (identity, uuid, data_size) =
600 let (identity, uuid, data_size) =
601 self.get_dirstate_data_file_integrity()?;
601 self.get_dirstate_data_file_integrity()?;
602 let identity_changed = identity != map.old_identity();
602 let identity_changed = identity != map.old_identity();
603 let uuid_changed = uuid.as_deref() != map.old_uuid();
603 let uuid_changed = uuid.as_deref() != map.old_uuid();
604 let data_length_changed = data_size != map.old_data_size();
604 let data_length_changed = data_size != map.old_data_size();
605
605
606 if identity_changed || uuid_changed || data_length_changed {
606 if identity_changed || uuid_changed || data_length_changed {
607 // If any of identity, uuid or length have changed since
607 // If any of identity, uuid or length have changed since
608 // last disk read, don't write.
608 // last disk read, don't write.
609 // This is fine because either we're in a command that doesn't
609 // This is fine because either we're in a command that doesn't
610 // write anything too important (like `hg status`), or we're in
610 // write anything too important (like `hg status`), or we're in
611 // `hg add` and we're supposed to have taken the lock before
611 // `hg add` and we're supposed to have taken the lock before
612 // reading anyway.
612 // reading anyway.
613 //
613 //
614 // TODO complain loudly if we've changed anything important
614 // TODO complain loudly if we've changed anything important
615 // without taking the lock.
615 // without taking the lock.
616 // (see `hg help config.format.use-dirstate-tracked-hint`)
616 // (see `hg help config.format.use-dirstate-tracked-hint`)
617 log::debug!(
617 log::debug!(
618 "dirstate has changed since last read, not updating."
618 "dirstate has changed since last read, not updating."
619 );
619 );
620 return Ok(());
620 return Ok(());
621 }
621 }
622
622
623 let uuid_opt = map.old_uuid();
623 let uuid_opt = map.old_uuid();
624 let write_mode = if uuid_opt.is_some() {
624 let write_mode = if uuid_opt.is_some() {
625 DirstateMapWriteMode::Auto
625 DirstateMapWriteMode::Auto
626 } else {
626 } else {
627 DirstateMapWriteMode::ForceNewDataFile
627 DirstateMapWriteMode::ForceNewDataFile
628 };
628 };
629 let (data, tree_metadata, append, old_data_size) =
629 let (data, tree_metadata, append, old_data_size) =
630 map.pack_v2(write_mode)?;
630 map.pack_v2(write_mode)?;
631
631
632 // Reuse the uuid, or generate a new one, keeping the old for
632 // Reuse the uuid, or generate a new one, keeping the old for
633 // deletion.
633 // deletion.
634 let (uuid, old_uuid) = match uuid_opt {
634 let (uuid, old_uuid) = match uuid_opt {
635 Some(uuid) => {
635 Some(uuid) => {
636 let as_str = std::str::from_utf8(uuid)
636 let as_str = std::str::from_utf8(uuid)
637 .map_err(|_| {
637 .map_err(|_| {
638 HgError::corrupted(
638 HgError::corrupted(
639 "non-UTF-8 dirstate data file ID",
639 "non-UTF-8 dirstate data file ID",
640 )
640 )
641 })?
641 })?
642 .to_owned();
642 .to_owned();
643 if append {
643 if append {
644 (as_str, None)
644 (as_str, None)
645 } else {
645 } else {
646 (DirstateDocket::new_uid(), Some(as_str))
646 (DirstateDocket::new_uid(), Some(as_str))
647 }
647 }
648 }
648 }
649 None => (DirstateDocket::new_uid(), None),
649 None => (DirstateDocket::new_uid(), None),
650 };
650 };
651
651
652 let data_filename = format!("dirstate.{}", uuid);
652 let data_filename = format!("dirstate.{}", uuid);
653 let data_filename = self.hg_vfs().join(data_filename);
653 let data_filename = self.hg_vfs().join(data_filename);
654 let mut options = std::fs::OpenOptions::new();
654 let mut options = std::fs::OpenOptions::new();
655 options.write(true);
655 options.write(true);
656
656
657 // Why are we not using the O_APPEND flag when appending?
657 // Why are we not using the O_APPEND flag when appending?
658 //
658 //
659 // - O_APPEND makes it trickier to deal with garbage at the end of
659 // - O_APPEND makes it trickier to deal with garbage at the end of
660 // the file, left by a previous uncommitted transaction. By
660 // the file, left by a previous uncommitted transaction. By
661 // starting the write at [old_data_size] we make sure we erase
661 // starting the write at [old_data_size] we make sure we erase
662 // all such garbage.
662 // all such garbage.
663 //
663 //
664 // - O_APPEND requires to special-case 0-byte writes, whereas we
664 // - O_APPEND requires to special-case 0-byte writes, whereas we
665 // don't need that.
665 // don't need that.
666 //
666 //
667 // - Some OSes have bugs in implementation O_APPEND:
667 // - Some OSes have bugs in implementation O_APPEND:
668 // revlog.py talks about a Solaris bug, but we also saw some ZFS
668 // revlog.py talks about a Solaris bug, but we also saw some ZFS
669 // bug: https://github.com/openzfs/zfs/pull/3124,
669 // bug: https://github.com/openzfs/zfs/pull/3124,
670 // https://github.com/openzfs/zfs/issues/13370
670 // https://github.com/openzfs/zfs/issues/13370
671 //
671 //
672 if !append {
672 if !append {
673 log::trace!("creating a new dirstate data file");
673 log::trace!("creating a new dirstate data file");
674 options.create_new(true);
674 options.create_new(true);
675 } else {
675 } else {
676 log::trace!("appending to the dirstate data file");
676 log::trace!("appending to the dirstate data file");
677 }
677 }
678
678
679 let data_size = (|| {
679 let data_size = (|| {
680 // TODO: loop and try another random ID if !append and this
680 // TODO: loop and try another random ID if !append and this
681 // returns `ErrorKind::AlreadyExists`? Collision chance of two
681 // returns `ErrorKind::AlreadyExists`? Collision chance of two
682 // random IDs is one in 2**32
682 // random IDs is one in 2**32
683 let mut file = options.open(&data_filename)?;
683 let mut file = options.open(&data_filename)?;
684 if append {
684 if append {
685 file.seek(SeekFrom::Start(old_data_size as u64))?;
685 file.seek(SeekFrom::Start(old_data_size as u64))?;
686 }
686 }
687 file.write_all(&data)?;
687 file.write_all(&data)?;
688 file.flush()?;
688 file.flush()?;
689 file.seek(SeekFrom::Current(0))
689 file.stream_position()
690 })()
690 })()
691 .when_writing_file(&data_filename)?;
691 .when_writing_file(&data_filename)?;
692
692
693 let packed_dirstate = DirstateDocket::serialize(
693 let packed_dirstate = DirstateDocket::serialize(
694 parents,
694 parents,
695 tree_metadata,
695 tree_metadata,
696 data_size,
696 data_size,
697 uuid.as_bytes(),
697 uuid.as_bytes(),
698 )
698 )
699 .map_err(|_: std::num::TryFromIntError| {
699 .map_err(|_: std::num::TryFromIntError| {
700 HgError::corrupted("overflow in dirstate docket serialization")
700 HgError::corrupted("overflow in dirstate docket serialization")
701 })?;
701 })?;
702
702
703 (packed_dirstate, old_uuid)
703 (packed_dirstate, old_uuid)
704 } else {
704 } else {
705 let identity = self.dirstate_identity()?;
705 let identity = self.dirstate_identity()?;
706 if identity != map.old_identity() {
706 if identity != map.old_identity() {
707 // If identity changed since last disk read, don't write.
707 // If identity changed since last disk read, don't write.
708 // This is fine because either we're in a command that doesn't
708 // This is fine because either we're in a command that doesn't
709 // write anything too important (like `hg status`), or we're in
709 // write anything too important (like `hg status`), or we're in
710 // `hg add` and we're supposed to have taken the lock before
710 // `hg add` and we're supposed to have taken the lock before
711 // reading anyway.
711 // reading anyway.
712 //
712 //
713 // TODO complain loudly if we've changed anything important
713 // TODO complain loudly if we've changed anything important
714 // without taking the lock.
714 // without taking the lock.
715 // (see `hg help config.format.use-dirstate-tracked-hint`)
715 // (see `hg help config.format.use-dirstate-tracked-hint`)
716 log::debug!(
716 log::debug!(
717 "dirstate has changed since last read, not updating."
717 "dirstate has changed since last read, not updating."
718 );
718 );
719 return Ok(());
719 return Ok(());
720 }
720 }
721 (map.pack_v1(parents)?, None)
721 (map.pack_v1(parents)?, None)
722 };
722 };
723
723
724 let vfs = self.hg_vfs();
724 let vfs = self.hg_vfs();
725 vfs.atomic_write("dirstate", &packed_dirstate)?;
725 vfs.atomic_write("dirstate", &packed_dirstate)?;
726 if let Some(uuid) = old_uuid_to_remove {
726 if let Some(uuid) = old_uuid_to_remove {
727 // Remove the old data file after the new docket pointing to the
727 // Remove the old data file after the new docket pointing to the
728 // new data file was written.
728 // new data file was written.
729 vfs.remove_file(format!("dirstate.{}", uuid))?;
729 vfs.remove_file(format!("dirstate.{}", uuid))?;
730 }
730 }
731 Ok(())
731 Ok(())
732 }
732 }
733 }
733 }
734
734
735 /// Lazily-initialized component of `Repo` with interior mutability
735 /// Lazily-initialized component of `Repo` with interior mutability
736 ///
736 ///
737 /// This differs from `OnceCell` in that the value can still be "deinitialized"
737 /// This differs from `OnceCell` in that the value can still be "deinitialized"
738 /// later by setting its inner `Option` to `None`. It also takes the
738 /// later by setting its inner `Option` to `None`. It also takes the
739 /// initialization function as an argument when the value is requested, not
739 /// initialization function as an argument when the value is requested, not
740 /// when the instance is created.
740 /// when the instance is created.
741 struct LazyCell<T> {
741 struct LazyCell<T> {
742 value: RefCell<Option<T>>,
742 value: RefCell<Option<T>>,
743 }
743 }
744
744
745 impl<T> LazyCell<T> {
745 impl<T> LazyCell<T> {
746 fn new() -> Self {
746 fn new() -> Self {
747 Self {
747 Self {
748 value: RefCell::new(None),
748 value: RefCell::new(None),
749 }
749 }
750 }
750 }
751
751
752 fn set(&self, value: T) {
752 fn set(&self, value: T) {
753 *self.value.borrow_mut() = Some(value)
753 *self.value.borrow_mut() = Some(value)
754 }
754 }
755
755
756 fn get_or_init<E>(
756 fn get_or_init<E>(
757 &self,
757 &self,
758 init: impl Fn() -> Result<T, E>,
758 init: impl Fn() -> Result<T, E>,
759 ) -> Result<Ref<T>, E> {
759 ) -> Result<Ref<T>, E> {
760 let mut borrowed = self.value.borrow();
760 let mut borrowed = self.value.borrow();
761 if borrowed.is_none() {
761 if borrowed.is_none() {
762 drop(borrowed);
762 drop(borrowed);
763 // Only use `borrow_mut` if it is really needed to avoid panic in
763 // Only use `borrow_mut` if it is really needed to avoid panic in
764 // case there is another outstanding borrow but mutation is not
764 // case there is another outstanding borrow but mutation is not
765 // needed.
765 // needed.
766 *self.value.borrow_mut() = Some(init()?);
766 *self.value.borrow_mut() = Some(init()?);
767 borrowed = self.value.borrow()
767 borrowed = self.value.borrow()
768 }
768 }
769 Ok(Ref::map(borrowed, |option| option.as_ref().unwrap()))
769 Ok(Ref::map(borrowed, |option| option.as_ref().unwrap()))
770 }
770 }
771
771
772 fn get_mut_or_init<E>(
772 fn get_mut_or_init<E>(
773 &self,
773 &self,
774 init: impl Fn() -> Result<T, E>,
774 init: impl Fn() -> Result<T, E>,
775 ) -> Result<RefMut<T>, E> {
775 ) -> Result<RefMut<T>, E> {
776 let mut borrowed = self.value.borrow_mut();
776 let mut borrowed = self.value.borrow_mut();
777 if borrowed.is_none() {
777 if borrowed.is_none() {
778 *borrowed = Some(init()?);
778 *borrowed = Some(init()?);
779 }
779 }
780 Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap()))
780 Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap()))
781 }
781 }
782 }
782 }
@@ -1,965 +1,965 b''
1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
2 // and Mercurial contributors
2 // and Mercurial contributors
3 //
3 //
4 // This software may be used and distributed according to the terms of the
4 // This software may be used and distributed according to the terms of the
5 // GNU General Public License version 2 or any later version.
5 // GNU General Public License version 2 or any later version.
6 //! Mercurial concepts for handling revision history
6 //! Mercurial concepts for handling revision history
7
7
8 pub mod node;
8 pub mod node;
9 pub mod nodemap;
9 pub mod nodemap;
10 mod nodemap_docket;
10 mod nodemap_docket;
11 pub mod path_encode;
11 pub mod path_encode;
12 pub use node::{FromHexError, Node, NodePrefix};
12 pub use node::{FromHexError, Node, NodePrefix};
13 pub mod changelog;
13 pub mod changelog;
14 pub mod filelog;
14 pub mod filelog;
15 pub mod index;
15 pub mod index;
16 pub mod manifest;
16 pub mod manifest;
17 pub mod patch;
17 pub mod patch;
18
18
19 use std::borrow::Cow;
19 use std::borrow::Cow;
20 use std::io::Read;
20 use std::io::Read;
21 use std::ops::Deref;
21 use std::ops::Deref;
22 use std::path::Path;
22 use std::path::Path;
23
23
24 use flate2::read::ZlibDecoder;
24 use flate2::read::ZlibDecoder;
25 use sha1::{Digest, Sha1};
25 use sha1::{Digest, Sha1};
26 use std::cell::RefCell;
26 use std::cell::RefCell;
27 use zstd;
27 use zstd;
28
28
29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
30 use self::nodemap_docket::NodeMapDocket;
30 use self::nodemap_docket::NodeMapDocket;
31 use super::index::Index;
31 use super::index::Index;
32 use super::nodemap::{NodeMap, NodeMapError};
32 use super::nodemap::{NodeMap, NodeMapError};
33 use crate::errors::HgError;
33 use crate::errors::HgError;
34 use crate::vfs::Vfs;
34 use crate::vfs::Vfs;
35
35
36 /// As noted in revlog.c, revision numbers are actually encoded in
36 /// As noted in revlog.c, revision numbers are actually encoded in
37 /// 4 bytes, and are liberally converted to ints, whence the i32
37 /// 4 bytes, and are liberally converted to ints, whence the i32
38 pub type BaseRevision = i32;
38 pub type BaseRevision = i32;
39
39
40 /// Mercurial revision numbers
40 /// Mercurial revision numbers
41 /// In contrast to the more general [`UncheckedRevision`], these are "checked"
41 /// In contrast to the more general [`UncheckedRevision`], these are "checked"
42 /// in the sense that they should only be used for revisions that are
42 /// in the sense that they should only be used for revisions that are
43 /// valid for a given index (i.e. in bounds).
43 /// valid for a given index (i.e. in bounds).
44 #[derive(
44 #[derive(
45 Debug,
45 Debug,
46 derive_more::Display,
46 derive_more::Display,
47 Clone,
47 Clone,
48 Copy,
48 Copy,
49 Hash,
49 Hash,
50 PartialEq,
50 PartialEq,
51 Eq,
51 Eq,
52 PartialOrd,
52 PartialOrd,
53 Ord,
53 Ord,
54 )]
54 )]
55 pub struct Revision(pub BaseRevision);
55 pub struct Revision(pub BaseRevision);
56
56
57 impl format_bytes::DisplayBytes for Revision {
57 impl format_bytes::DisplayBytes for Revision {
58 fn display_bytes(
58 fn display_bytes(
59 &self,
59 &self,
60 output: &mut dyn std::io::Write,
60 output: &mut dyn std::io::Write,
61 ) -> std::io::Result<()> {
61 ) -> std::io::Result<()> {
62 self.0.display_bytes(output)
62 self.0.display_bytes(output)
63 }
63 }
64 }
64 }
65
65
66 /// Unchecked Mercurial revision numbers.
66 /// Unchecked Mercurial revision numbers.
67 ///
67 ///
68 /// Values of this type have no guarantee of being a valid revision number
68 /// Values of this type have no guarantee of being a valid revision number
69 /// in any context. Use method `check_revision` to get a valid revision within
69 /// in any context. Use method `check_revision` to get a valid revision within
70 /// the appropriate index object.
70 /// the appropriate index object.
71 #[derive(
71 #[derive(
72 Debug,
72 Debug,
73 derive_more::Display,
73 derive_more::Display,
74 Clone,
74 Clone,
75 Copy,
75 Copy,
76 Hash,
76 Hash,
77 PartialEq,
77 PartialEq,
78 Eq,
78 Eq,
79 PartialOrd,
79 PartialOrd,
80 Ord,
80 Ord,
81 )]
81 )]
82 pub struct UncheckedRevision(pub BaseRevision);
82 pub struct UncheckedRevision(pub BaseRevision);
83
83
84 impl format_bytes::DisplayBytes for UncheckedRevision {
84 impl format_bytes::DisplayBytes for UncheckedRevision {
85 fn display_bytes(
85 fn display_bytes(
86 &self,
86 &self,
87 output: &mut dyn std::io::Write,
87 output: &mut dyn std::io::Write,
88 ) -> std::io::Result<()> {
88 ) -> std::io::Result<()> {
89 self.0.display_bytes(output)
89 self.0.display_bytes(output)
90 }
90 }
91 }
91 }
92
92
93 impl From<Revision> for UncheckedRevision {
93 impl From<Revision> for UncheckedRevision {
94 fn from(value: Revision) -> Self {
94 fn from(value: Revision) -> Self {
95 Self(value.0)
95 Self(value.0)
96 }
96 }
97 }
97 }
98
98
99 impl From<BaseRevision> for UncheckedRevision {
99 impl From<BaseRevision> for UncheckedRevision {
100 fn from(value: BaseRevision) -> Self {
100 fn from(value: BaseRevision) -> Self {
101 Self(value)
101 Self(value)
102 }
102 }
103 }
103 }
104
104
105 /// Marker expressing the absence of a parent
105 /// Marker expressing the absence of a parent
106 ///
106 ///
107 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
107 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
108 /// to be smaller than all existing revisions.
108 /// to be smaller than all existing revisions.
109 pub const NULL_REVISION: Revision = Revision(-1);
109 pub const NULL_REVISION: Revision = Revision(-1);
110
110
111 /// Same as `mercurial.node.wdirrev`
111 /// Same as `mercurial.node.wdirrev`
112 ///
112 ///
113 /// This is also equal to `i32::max_value()`, but it's better to spell
113 /// This is also equal to `i32::max_value()`, but it's better to spell
114 /// it out explicitely, same as in `mercurial.node`
114 /// it out explicitely, same as in `mercurial.node`
115 #[allow(clippy::unreadable_literal)]
115 #[allow(clippy::unreadable_literal)]
116 pub const WORKING_DIRECTORY_REVISION: UncheckedRevision =
116 pub const WORKING_DIRECTORY_REVISION: UncheckedRevision =
117 UncheckedRevision(0x7fffffff);
117 UncheckedRevision(0x7fffffff);
118
118
119 pub const WORKING_DIRECTORY_HEX: &str =
119 pub const WORKING_DIRECTORY_HEX: &str =
120 "ffffffffffffffffffffffffffffffffffffffff";
120 "ffffffffffffffffffffffffffffffffffffffff";
121
121
122 /// The simplest expression of what we need of Mercurial DAGs.
122 /// The simplest expression of what we need of Mercurial DAGs.
123 pub trait Graph {
123 pub trait Graph {
124 /// Return the two parents of the given `Revision`.
124 /// Return the two parents of the given `Revision`.
125 ///
125 ///
126 /// Each of the parents can be independently `NULL_REVISION`
126 /// Each of the parents can be independently `NULL_REVISION`
127 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
127 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
128 }
128 }
129
129
130 #[derive(Clone, Debug, PartialEq)]
130 #[derive(Clone, Debug, PartialEq)]
131 pub enum GraphError {
131 pub enum GraphError {
132 ParentOutOfRange(Revision),
132 ParentOutOfRange(Revision),
133 }
133 }
134
134
135 /// The Mercurial Revlog Index
135 /// The Mercurial Revlog Index
136 ///
136 ///
137 /// This is currently limited to the minimal interface that is needed for
137 /// This is currently limited to the minimal interface that is needed for
138 /// the [`nodemap`](nodemap/index.html) module
138 /// the [`nodemap`](nodemap/index.html) module
139 pub trait RevlogIndex {
139 pub trait RevlogIndex {
140 /// Total number of Revisions referenced in this index
140 /// Total number of Revisions referenced in this index
141 fn len(&self) -> usize;
141 fn len(&self) -> usize;
142
142
143 fn is_empty(&self) -> bool {
143 fn is_empty(&self) -> bool {
144 self.len() == 0
144 self.len() == 0
145 }
145 }
146
146
147 /// Return a reference to the Node or `None` for `NULL_REVISION`
147 /// Return a reference to the Node or `None` for `NULL_REVISION`
148 fn node(&self, rev: Revision) -> Option<&Node>;
148 fn node(&self, rev: Revision) -> Option<&Node>;
149
149
150 /// Return a [`Revision`] if `rev` is a valid revision number for this
150 /// Return a [`Revision`] if `rev` is a valid revision number for this
151 /// index
151 /// index
152 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
152 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
153 let rev = rev.0;
153 let rev = rev.0;
154
154
155 if rev == NULL_REVISION.0 || (rev >= 0 && (rev as usize) < self.len())
155 if rev == NULL_REVISION.0 || (rev >= 0 && (rev as usize) < self.len())
156 {
156 {
157 Some(Revision(rev))
157 Some(Revision(rev))
158 } else {
158 } else {
159 None
159 None
160 }
160 }
161 }
161 }
162 }
162 }
163
163
164 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
164 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
165 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
165 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
166 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
166 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
167 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
167 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
168
168
169 // Keep this in sync with REVIDX_KNOWN_FLAGS in
169 // Keep this in sync with REVIDX_KNOWN_FLAGS in
170 // mercurial/revlogutils/flagutil.py
170 // mercurial/revlogutils/flagutil.py
171 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
171 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
172 | REVISION_FLAG_ELLIPSIS
172 | REVISION_FLAG_ELLIPSIS
173 | REVISION_FLAG_EXTSTORED
173 | REVISION_FLAG_EXTSTORED
174 | REVISION_FLAG_HASCOPIESINFO;
174 | REVISION_FLAG_HASCOPIESINFO;
175
175
176 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
176 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
177
177
178 #[derive(Debug, derive_more::From, derive_more::Display)]
178 #[derive(Debug, derive_more::From, derive_more::Display)]
179 pub enum RevlogError {
179 pub enum RevlogError {
180 InvalidRevision,
180 InvalidRevision,
181 /// Working directory is not supported
181 /// Working directory is not supported
182 WDirUnsupported,
182 WDirUnsupported,
183 /// Found more than one entry whose ID match the requested prefix
183 /// Found more than one entry whose ID match the requested prefix
184 AmbiguousPrefix,
184 AmbiguousPrefix,
185 #[from]
185 #[from]
186 Other(HgError),
186 Other(HgError),
187 }
187 }
188
188
189 impl From<NodeMapError> for RevlogError {
189 impl From<NodeMapError> for RevlogError {
190 fn from(error: NodeMapError) -> Self {
190 fn from(error: NodeMapError) -> Self {
191 match error {
191 match error {
192 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
192 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
193 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
193 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
194 format!("nodemap point to revision {} not in index", rev),
194 format!("nodemap point to revision {} not in index", rev),
195 ),
195 ),
196 }
196 }
197 }
197 }
198 }
198 }
199
199
200 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
200 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
201 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
201 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
202 }
202 }
203
203
204 impl RevlogError {
204 impl RevlogError {
205 fn corrupted<S: AsRef<str>>(context: S) -> Self {
205 fn corrupted<S: AsRef<str>>(context: S) -> Self {
206 RevlogError::Other(corrupted(context))
206 RevlogError::Other(corrupted(context))
207 }
207 }
208 }
208 }
209
209
210 /// Read only implementation of revlog.
210 /// Read only implementation of revlog.
211 pub struct Revlog {
211 pub struct Revlog {
212 /// When index and data are not interleaved: bytes of the revlog index.
212 /// When index and data are not interleaved: bytes of the revlog index.
213 /// When index and data are interleaved: bytes of the revlog index and
213 /// When index and data are interleaved: bytes of the revlog index and
214 /// data.
214 /// data.
215 index: Index,
215 index: Index,
216 /// When index and data are not interleaved: bytes of the revlog data
216 /// When index and data are not interleaved: bytes of the revlog data
217 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
217 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
218 /// When present on disk: the persistent nodemap for this revlog
218 /// When present on disk: the persistent nodemap for this revlog
219 nodemap: Option<nodemap::NodeTree>,
219 nodemap: Option<nodemap::NodeTree>,
220 }
220 }
221
221
222 impl Graph for Revlog {
222 impl Graph for Revlog {
223 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
223 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
224 self.index.parents(rev)
224 self.index.parents(rev)
225 }
225 }
226 }
226 }
227
227
228 impl Revlog {
228 impl Revlog {
229 /// Open a revlog index file.
229 /// Open a revlog index file.
230 ///
230 ///
231 /// It will also open the associated data file if index and data are not
231 /// It will also open the associated data file if index and data are not
232 /// interleaved.
232 /// interleaved.
233 pub fn open(
233 pub fn open(
234 store_vfs: &Vfs,
234 store_vfs: &Vfs,
235 index_path: impl AsRef<Path>,
235 index_path: impl AsRef<Path>,
236 data_path: Option<&Path>,
236 data_path: Option<&Path>,
237 use_nodemap: bool,
237 use_nodemap: bool,
238 ) -> Result<Self, HgError> {
238 ) -> Result<Self, HgError> {
239 Self::open_gen(store_vfs, index_path, data_path, use_nodemap, None)
239 Self::open_gen(store_vfs, index_path, data_path, use_nodemap, None)
240 }
240 }
241
241
242 fn open_gen(
242 fn open_gen(
243 store_vfs: &Vfs,
243 store_vfs: &Vfs,
244 index_path: impl AsRef<Path>,
244 index_path: impl AsRef<Path>,
245 data_path: Option<&Path>,
245 data_path: Option<&Path>,
246 use_nodemap: bool,
246 use_nodemap: bool,
247 nodemap_for_test: Option<nodemap::NodeTree>,
247 nodemap_for_test: Option<nodemap::NodeTree>,
248 ) -> Result<Self, HgError> {
248 ) -> Result<Self, HgError> {
249 let index_path = index_path.as_ref();
249 let index_path = index_path.as_ref();
250 let index = {
250 let index = {
251 match store_vfs.mmap_open_opt(&index_path)? {
251 match store_vfs.mmap_open_opt(index_path)? {
252 None => Index::new(Box::new(vec![])),
252 None => Index::new(Box::<Vec<_>>::default()),
253 Some(index_mmap) => {
253 Some(index_mmap) => {
254 let index = Index::new(Box::new(index_mmap))?;
254 let index = Index::new(Box::new(index_mmap))?;
255 Ok(index)
255 Ok(index)
256 }
256 }
257 }
257 }
258 }?;
258 }?;
259
259
260 let default_data_path = index_path.with_extension("d");
260 let default_data_path = index_path.with_extension("d");
261
261
262 // type annotation required
262 // type annotation required
263 // won't recognize Mmap as Deref<Target = [u8]>
263 // won't recognize Mmap as Deref<Target = [u8]>
264 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
264 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
265 if index.is_inline() {
265 if index.is_inline() {
266 None
266 None
267 } else {
267 } else {
268 let data_path = data_path.unwrap_or(&default_data_path);
268 let data_path = data_path.unwrap_or(&default_data_path);
269 let data_mmap = store_vfs.mmap_open(data_path)?;
269 let data_mmap = store_vfs.mmap_open(data_path)?;
270 Some(Box::new(data_mmap))
270 Some(Box::new(data_mmap))
271 };
271 };
272
272
273 let nodemap = if index.is_inline() || !use_nodemap {
273 let nodemap = if index.is_inline() || !use_nodemap {
274 None
274 None
275 } else {
275 } else {
276 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
276 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
277 |(docket, data)| {
277 |(docket, data)| {
278 nodemap::NodeTree::load_bytes(
278 nodemap::NodeTree::load_bytes(
279 Box::new(data),
279 Box::new(data),
280 docket.data_length,
280 docket.data_length,
281 )
281 )
282 },
282 },
283 )
283 )
284 };
284 };
285
285
286 let nodemap = nodemap_for_test.or(nodemap);
286 let nodemap = nodemap_for_test.or(nodemap);
287
287
288 Ok(Revlog {
288 Ok(Revlog {
289 index,
289 index,
290 data_bytes,
290 data_bytes,
291 nodemap,
291 nodemap,
292 })
292 })
293 }
293 }
294
294
295 /// Return number of entries of the `Revlog`.
295 /// Return number of entries of the `Revlog`.
296 pub fn len(&self) -> usize {
296 pub fn len(&self) -> usize {
297 self.index.len()
297 self.index.len()
298 }
298 }
299
299
300 /// Returns `true` if the `Revlog` has zero `entries`.
300 /// Returns `true` if the `Revlog` has zero `entries`.
301 pub fn is_empty(&self) -> bool {
301 pub fn is_empty(&self) -> bool {
302 self.index.is_empty()
302 self.index.is_empty()
303 }
303 }
304
304
305 /// Returns the node ID for the given revision number, if it exists in this
305 /// Returns the node ID for the given revision number, if it exists in this
306 /// revlog
306 /// revlog
307 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
307 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
308 if rev == NULL_REVISION.into() {
308 if rev == NULL_REVISION.into() {
309 return Some(&NULL_NODE);
309 return Some(&NULL_NODE);
310 }
310 }
311 let rev = self.index.check_revision(rev)?;
311 let rev = self.index.check_revision(rev)?;
312 Some(self.index.get_entry(rev)?.hash())
312 Some(self.index.get_entry(rev)?.hash())
313 }
313 }
314
314
315 /// Return the revision number for the given node ID, if it exists in this
315 /// Return the revision number for the given node ID, if it exists in this
316 /// revlog
316 /// revlog
317 pub fn rev_from_node(
317 pub fn rev_from_node(
318 &self,
318 &self,
319 node: NodePrefix,
319 node: NodePrefix,
320 ) -> Result<Revision, RevlogError> {
320 ) -> Result<Revision, RevlogError> {
321 if let Some(nodemap) = &self.nodemap {
321 if let Some(nodemap) = &self.nodemap {
322 nodemap
322 nodemap
323 .find_bin(&self.index, node)?
323 .find_bin(&self.index, node)?
324 .ok_or(RevlogError::InvalidRevision)
324 .ok_or(RevlogError::InvalidRevision)
325 } else {
325 } else {
326 self.rev_from_node_no_persistent_nodemap(node)
326 self.rev_from_node_no_persistent_nodemap(node)
327 }
327 }
328 }
328 }
329
329
330 /// Same as `rev_from_node`, without using a persistent nodemap
330 /// Same as `rev_from_node`, without using a persistent nodemap
331 ///
331 ///
332 /// This is used as fallback when a persistent nodemap is not present.
332 /// This is used as fallback when a persistent nodemap is not present.
333 /// This happens when the persistent-nodemap experimental feature is not
333 /// This happens when the persistent-nodemap experimental feature is not
334 /// enabled, or for small revlogs.
334 /// enabled, or for small revlogs.
335 fn rev_from_node_no_persistent_nodemap(
335 fn rev_from_node_no_persistent_nodemap(
336 &self,
336 &self,
337 node: NodePrefix,
337 node: NodePrefix,
338 ) -> Result<Revision, RevlogError> {
338 ) -> Result<Revision, RevlogError> {
339 // Linear scan of the revlog
339 // Linear scan of the revlog
340 // TODO: consider building a non-persistent nodemap in memory to
340 // TODO: consider building a non-persistent nodemap in memory to
341 // optimize these cases.
341 // optimize these cases.
342 let mut found_by_prefix = None;
342 let mut found_by_prefix = None;
343 for rev in (-1..self.len() as BaseRevision).rev() {
343 for rev in (-1..self.len() as BaseRevision).rev() {
344 let rev = Revision(rev as BaseRevision);
344 let rev = Revision(rev as BaseRevision);
345 let candidate_node = if rev == Revision(-1) {
345 let candidate_node = if rev == Revision(-1) {
346 NULL_NODE
346 NULL_NODE
347 } else {
347 } else {
348 let index_entry =
348 let index_entry =
349 self.index.get_entry(rev).ok_or_else(|| {
349 self.index.get_entry(rev).ok_or_else(|| {
350 HgError::corrupted(
350 HgError::corrupted(
351 "revlog references a revision not in the index",
351 "revlog references a revision not in the index",
352 )
352 )
353 })?;
353 })?;
354 *index_entry.hash()
354 *index_entry.hash()
355 };
355 };
356 if node == candidate_node {
356 if node == candidate_node {
357 return Ok(rev);
357 return Ok(rev);
358 }
358 }
359 if node.is_prefix_of(&candidate_node) {
359 if node.is_prefix_of(&candidate_node) {
360 if found_by_prefix.is_some() {
360 if found_by_prefix.is_some() {
361 return Err(RevlogError::AmbiguousPrefix);
361 return Err(RevlogError::AmbiguousPrefix);
362 }
362 }
363 found_by_prefix = Some(rev)
363 found_by_prefix = Some(rev)
364 }
364 }
365 }
365 }
366 found_by_prefix.ok_or(RevlogError::InvalidRevision)
366 found_by_prefix.ok_or(RevlogError::InvalidRevision)
367 }
367 }
368
368
369 /// Returns whether the given revision exists in this revlog.
369 /// Returns whether the given revision exists in this revlog.
370 pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
370 pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
371 self.index.check_revision(rev).is_some()
371 self.index.check_revision(rev).is_some()
372 }
372 }
373
373
374 /// Return the full data associated to a revision.
374 /// Return the full data associated to a revision.
375 ///
375 ///
376 /// All entries required to build the final data out of deltas will be
376 /// All entries required to build the final data out of deltas will be
377 /// retrieved as needed, and the deltas will be applied to the inital
377 /// retrieved as needed, and the deltas will be applied to the inital
378 /// snapshot to rebuild the final data.
378 /// snapshot to rebuild the final data.
379 pub fn get_rev_data(
379 pub fn get_rev_data(
380 &self,
380 &self,
381 rev: UncheckedRevision,
381 rev: UncheckedRevision,
382 ) -> Result<Cow<[u8]>, RevlogError> {
382 ) -> Result<Cow<[u8]>, RevlogError> {
383 if rev == NULL_REVISION.into() {
383 if rev == NULL_REVISION.into() {
384 return Ok(Cow::Borrowed(&[]));
384 return Ok(Cow::Borrowed(&[]));
385 };
385 };
386 self.get_entry(rev)?.data()
386 self.get_entry(rev)?.data()
387 }
387 }
388
388
389 /// [`Self::get_rev_data`] for checked revisions.
389 /// [`Self::get_rev_data`] for checked revisions.
390 pub fn get_rev_data_for_checked_rev(
390 pub fn get_rev_data_for_checked_rev(
391 &self,
391 &self,
392 rev: Revision,
392 rev: Revision,
393 ) -> Result<Cow<[u8]>, RevlogError> {
393 ) -> Result<Cow<[u8]>, RevlogError> {
394 if rev == NULL_REVISION {
394 if rev == NULL_REVISION {
395 return Ok(Cow::Borrowed(&[]));
395 return Ok(Cow::Borrowed(&[]));
396 };
396 };
397 self.get_entry_for_checked_rev(rev)?.data()
397 self.get_entry_for_checked_rev(rev)?.data()
398 }
398 }
399
399
400 /// Check the hash of some given data against the recorded hash.
400 /// Check the hash of some given data against the recorded hash.
401 pub fn check_hash(
401 pub fn check_hash(
402 &self,
402 &self,
403 p1: Revision,
403 p1: Revision,
404 p2: Revision,
404 p2: Revision,
405 expected: &[u8],
405 expected: &[u8],
406 data: &[u8],
406 data: &[u8],
407 ) -> bool {
407 ) -> bool {
408 let e1 = self.index.get_entry(p1);
408 let e1 = self.index.get_entry(p1);
409 let h1 = match e1 {
409 let h1 = match e1 {
410 Some(ref entry) => entry.hash(),
410 Some(ref entry) => entry.hash(),
411 None => &NULL_NODE,
411 None => &NULL_NODE,
412 };
412 };
413 let e2 = self.index.get_entry(p2);
413 let e2 = self.index.get_entry(p2);
414 let h2 = match e2 {
414 let h2 = match e2 {
415 Some(ref entry) => entry.hash(),
415 Some(ref entry) => entry.hash(),
416 None => &NULL_NODE,
416 None => &NULL_NODE,
417 };
417 };
418
418
419 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
419 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
420 }
420 }
421
421
422 /// Build the full data of a revision out its snapshot
422 /// Build the full data of a revision out its snapshot
423 /// and its deltas.
423 /// and its deltas.
424 fn build_data_from_deltas(
424 fn build_data_from_deltas(
425 snapshot: RevlogEntry,
425 snapshot: RevlogEntry,
426 deltas: &[RevlogEntry],
426 deltas: &[RevlogEntry],
427 ) -> Result<Vec<u8>, HgError> {
427 ) -> Result<Vec<u8>, HgError> {
428 let snapshot = snapshot.data_chunk()?;
428 let snapshot = snapshot.data_chunk()?;
429 let deltas = deltas
429 let deltas = deltas
430 .iter()
430 .iter()
431 .rev()
431 .rev()
432 .map(RevlogEntry::data_chunk)
432 .map(RevlogEntry::data_chunk)
433 .collect::<Result<Vec<_>, _>>()?;
433 .collect::<Result<Vec<_>, _>>()?;
434 let patches: Vec<_> =
434 let patches: Vec<_> =
435 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
435 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
436 let patch = patch::fold_patch_lists(&patches);
436 let patch = patch::fold_patch_lists(&patches);
437 Ok(patch.apply(&snapshot))
437 Ok(patch.apply(&snapshot))
438 }
438 }
439
439
440 /// Return the revlog data.
440 /// Return the revlog data.
441 fn data(&self) -> &[u8] {
441 fn data(&self) -> &[u8] {
442 match &self.data_bytes {
442 match &self.data_bytes {
443 Some(data_bytes) => data_bytes,
443 Some(data_bytes) => data_bytes,
444 None => panic!(
444 None => panic!(
445 "forgot to load the data or trying to access inline data"
445 "forgot to load the data or trying to access inline data"
446 ),
446 ),
447 }
447 }
448 }
448 }
449
449
450 pub fn make_null_entry(&self) -> RevlogEntry {
450 pub fn make_null_entry(&self) -> RevlogEntry {
451 RevlogEntry {
451 RevlogEntry {
452 revlog: self,
452 revlog: self,
453 rev: NULL_REVISION,
453 rev: NULL_REVISION,
454 bytes: b"",
454 bytes: b"",
455 compressed_len: 0,
455 compressed_len: 0,
456 uncompressed_len: 0,
456 uncompressed_len: 0,
457 base_rev_or_base_of_delta_chain: None,
457 base_rev_or_base_of_delta_chain: None,
458 p1: NULL_REVISION,
458 p1: NULL_REVISION,
459 p2: NULL_REVISION,
459 p2: NULL_REVISION,
460 flags: NULL_REVLOG_ENTRY_FLAGS,
460 flags: NULL_REVLOG_ENTRY_FLAGS,
461 hash: NULL_NODE,
461 hash: NULL_NODE,
462 }
462 }
463 }
463 }
464
464
465 fn get_entry_for_checked_rev(
465 fn get_entry_for_checked_rev(
466 &self,
466 &self,
467 rev: Revision,
467 rev: Revision,
468 ) -> Result<RevlogEntry, RevlogError> {
468 ) -> Result<RevlogEntry, RevlogError> {
469 if rev == NULL_REVISION {
469 if rev == NULL_REVISION {
470 return Ok(self.make_null_entry());
470 return Ok(self.make_null_entry());
471 }
471 }
472 let index_entry = self
472 let index_entry = self
473 .index
473 .index
474 .get_entry(rev)
474 .get_entry(rev)
475 .ok_or(RevlogError::InvalidRevision)?;
475 .ok_or(RevlogError::InvalidRevision)?;
476 let start = index_entry.offset();
476 let start = index_entry.offset();
477 let end = start + index_entry.compressed_len() as usize;
477 let end = start + index_entry.compressed_len() as usize;
478 let data = if self.index.is_inline() {
478 let data = if self.index.is_inline() {
479 self.index.data(start, end)
479 self.index.data(start, end)
480 } else {
480 } else {
481 &self.data()[start..end]
481 &self.data()[start..end]
482 };
482 };
483 let base_rev = self
483 let base_rev = self
484 .index
484 .index
485 .check_revision(index_entry.base_revision_or_base_of_delta_chain())
485 .check_revision(index_entry.base_revision_or_base_of_delta_chain())
486 .ok_or_else(|| {
486 .ok_or_else(|| {
487 RevlogError::corrupted(format!(
487 RevlogError::corrupted(format!(
488 "base revision for rev {} is invalid",
488 "base revision for rev {} is invalid",
489 rev
489 rev
490 ))
490 ))
491 })?;
491 })?;
492 let p1 =
492 let p1 =
493 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
493 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
494 RevlogError::corrupted(format!(
494 RevlogError::corrupted(format!(
495 "p1 for rev {} is invalid",
495 "p1 for rev {} is invalid",
496 rev
496 rev
497 ))
497 ))
498 })?;
498 })?;
499 let p2 =
499 let p2 =
500 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
500 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
501 RevlogError::corrupted(format!(
501 RevlogError::corrupted(format!(
502 "p2 for rev {} is invalid",
502 "p2 for rev {} is invalid",
503 rev
503 rev
504 ))
504 ))
505 })?;
505 })?;
506 let entry = RevlogEntry {
506 let entry = RevlogEntry {
507 revlog: self,
507 revlog: self,
508 rev,
508 rev,
509 bytes: data,
509 bytes: data,
510 compressed_len: index_entry.compressed_len(),
510 compressed_len: index_entry.compressed_len(),
511 uncompressed_len: index_entry.uncompressed_len(),
511 uncompressed_len: index_entry.uncompressed_len(),
512 base_rev_or_base_of_delta_chain: if base_rev == rev {
512 base_rev_or_base_of_delta_chain: if base_rev == rev {
513 None
513 None
514 } else {
514 } else {
515 Some(base_rev)
515 Some(base_rev)
516 },
516 },
517 p1,
517 p1,
518 p2,
518 p2,
519 flags: index_entry.flags(),
519 flags: index_entry.flags(),
520 hash: *index_entry.hash(),
520 hash: *index_entry.hash(),
521 };
521 };
522 Ok(entry)
522 Ok(entry)
523 }
523 }
524
524
525 /// Get an entry of the revlog.
525 /// Get an entry of the revlog.
526 pub fn get_entry(
526 pub fn get_entry(
527 &self,
527 &self,
528 rev: UncheckedRevision,
528 rev: UncheckedRevision,
529 ) -> Result<RevlogEntry, RevlogError> {
529 ) -> Result<RevlogEntry, RevlogError> {
530 if rev == NULL_REVISION.into() {
530 if rev == NULL_REVISION.into() {
531 return Ok(self.make_null_entry());
531 return Ok(self.make_null_entry());
532 }
532 }
533 let rev = self.index.check_revision(rev).ok_or_else(|| {
533 let rev = self.index.check_revision(rev).ok_or_else(|| {
534 RevlogError::corrupted(format!("rev {} is invalid", rev))
534 RevlogError::corrupted(format!("rev {} is invalid", rev))
535 })?;
535 })?;
536 self.get_entry_for_checked_rev(rev)
536 self.get_entry_for_checked_rev(rev)
537 }
537 }
538 }
538 }
539
539
540 /// The revlog entry's bytes and the necessary informations to extract
540 /// The revlog entry's bytes and the necessary informations to extract
541 /// the entry's data.
541 /// the entry's data.
542 #[derive(Clone)]
542 #[derive(Clone)]
543 pub struct RevlogEntry<'revlog> {
543 pub struct RevlogEntry<'revlog> {
544 revlog: &'revlog Revlog,
544 revlog: &'revlog Revlog,
545 rev: Revision,
545 rev: Revision,
546 bytes: &'revlog [u8],
546 bytes: &'revlog [u8],
547 compressed_len: u32,
547 compressed_len: u32,
548 uncompressed_len: i32,
548 uncompressed_len: i32,
549 base_rev_or_base_of_delta_chain: Option<Revision>,
549 base_rev_or_base_of_delta_chain: Option<Revision>,
550 p1: Revision,
550 p1: Revision,
551 p2: Revision,
551 p2: Revision,
552 flags: u16,
552 flags: u16,
553 hash: Node,
553 hash: Node,
554 }
554 }
555
555
556 thread_local! {
556 thread_local! {
557 // seems fine to [unwrap] here: this can only fail due to memory allocation
557 // seems fine to [unwrap] here: this can only fail due to memory allocation
558 // failing, and it's normal for that to cause panic.
558 // failing, and it's normal for that to cause panic.
559 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
559 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
560 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
560 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
561 }
561 }
562
562
563 fn zstd_decompress_to_buffer(
563 fn zstd_decompress_to_buffer(
564 bytes: &[u8],
564 bytes: &[u8],
565 buf: &mut Vec<u8>,
565 buf: &mut Vec<u8>,
566 ) -> Result<usize, std::io::Error> {
566 ) -> Result<usize, std::io::Error> {
567 ZSTD_DECODER
567 ZSTD_DECODER
568 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
568 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
569 }
569 }
570
570
571 impl<'revlog> RevlogEntry<'revlog> {
571 impl<'revlog> RevlogEntry<'revlog> {
572 pub fn revision(&self) -> Revision {
572 pub fn revision(&self) -> Revision {
573 self.rev
573 self.rev
574 }
574 }
575
575
576 pub fn node(&self) -> &Node {
576 pub fn node(&self) -> &Node {
577 &self.hash
577 &self.hash
578 }
578 }
579
579
580 pub fn uncompressed_len(&self) -> Option<u32> {
580 pub fn uncompressed_len(&self) -> Option<u32> {
581 u32::try_from(self.uncompressed_len).ok()
581 u32::try_from(self.uncompressed_len).ok()
582 }
582 }
583
583
584 pub fn has_p1(&self) -> bool {
584 pub fn has_p1(&self) -> bool {
585 self.p1 != NULL_REVISION
585 self.p1 != NULL_REVISION
586 }
586 }
587
587
588 pub fn p1_entry(
588 pub fn p1_entry(
589 &self,
589 &self,
590 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
590 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
591 if self.p1 == NULL_REVISION {
591 if self.p1 == NULL_REVISION {
592 Ok(None)
592 Ok(None)
593 } else {
593 } else {
594 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?))
594 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?))
595 }
595 }
596 }
596 }
597
597
598 pub fn p2_entry(
598 pub fn p2_entry(
599 &self,
599 &self,
600 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
600 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
601 if self.p2 == NULL_REVISION {
601 if self.p2 == NULL_REVISION {
602 Ok(None)
602 Ok(None)
603 } else {
603 } else {
604 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?))
604 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?))
605 }
605 }
606 }
606 }
607
607
608 pub fn p1(&self) -> Option<Revision> {
608 pub fn p1(&self) -> Option<Revision> {
609 if self.p1 == NULL_REVISION {
609 if self.p1 == NULL_REVISION {
610 None
610 None
611 } else {
611 } else {
612 Some(self.p1)
612 Some(self.p1)
613 }
613 }
614 }
614 }
615
615
616 pub fn p2(&self) -> Option<Revision> {
616 pub fn p2(&self) -> Option<Revision> {
617 if self.p2 == NULL_REVISION {
617 if self.p2 == NULL_REVISION {
618 None
618 None
619 } else {
619 } else {
620 Some(self.p2)
620 Some(self.p2)
621 }
621 }
622 }
622 }
623
623
624 pub fn is_censored(&self) -> bool {
624 pub fn is_censored(&self) -> bool {
625 (self.flags & REVISION_FLAG_CENSORED) != 0
625 (self.flags & REVISION_FLAG_CENSORED) != 0
626 }
626 }
627
627
628 pub fn has_length_affecting_flag_processor(&self) -> bool {
628 pub fn has_length_affecting_flag_processor(&self) -> bool {
629 // Relevant Python code: revlog.size()
629 // Relevant Python code: revlog.size()
630 // note: ELLIPSIS is known to not change the content
630 // note: ELLIPSIS is known to not change the content
631 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
631 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
632 }
632 }
633
633
634 /// The data for this entry, after resolving deltas if any.
634 /// The data for this entry, after resolving deltas if any.
635 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
635 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
636 let mut entry = self.clone();
636 let mut entry = self.clone();
637 let mut delta_chain = vec![];
637 let mut delta_chain = vec![];
638
638
639 // The meaning of `base_rev_or_base_of_delta_chain` depends on
639 // The meaning of `base_rev_or_base_of_delta_chain` depends on
640 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
640 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
641 // `mercurial/revlogutils/constants.py` and the code in
641 // `mercurial/revlogutils/constants.py` and the code in
642 // [_chaininfo] and in [index_deltachain].
642 // [_chaininfo] and in [index_deltachain].
643 let uses_generaldelta = self.revlog.index.uses_generaldelta();
643 let uses_generaldelta = self.revlog.index.uses_generaldelta();
644 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
644 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
645 entry = if uses_generaldelta {
645 entry = if uses_generaldelta {
646 delta_chain.push(entry);
646 delta_chain.push(entry);
647 self.revlog.get_entry_for_checked_rev(base_rev)?
647 self.revlog.get_entry_for_checked_rev(base_rev)?
648 } else {
648 } else {
649 let base_rev = UncheckedRevision(entry.rev.0 - 1);
649 let base_rev = UncheckedRevision(entry.rev.0 - 1);
650 delta_chain.push(entry);
650 delta_chain.push(entry);
651 self.revlog.get_entry(base_rev)?
651 self.revlog.get_entry(base_rev)?
652 };
652 };
653 }
653 }
654
654
655 let data = if delta_chain.is_empty() {
655 let data = if delta_chain.is_empty() {
656 entry.data_chunk()?
656 entry.data_chunk()?
657 } else {
657 } else {
658 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
658 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
659 };
659 };
660
660
661 Ok(data)
661 Ok(data)
662 }
662 }
663
663
664 fn check_data(
664 fn check_data(
665 &self,
665 &self,
666 data: Cow<'revlog, [u8]>,
666 data: Cow<'revlog, [u8]>,
667 ) -> Result<Cow<'revlog, [u8]>, RevlogError> {
667 ) -> Result<Cow<'revlog, [u8]>, RevlogError> {
668 if self.revlog.check_hash(
668 if self.revlog.check_hash(
669 self.p1,
669 self.p1,
670 self.p2,
670 self.p2,
671 self.hash.as_bytes(),
671 self.hash.as_bytes(),
672 &data,
672 &data,
673 ) {
673 ) {
674 Ok(data)
674 Ok(data)
675 } else {
675 } else {
676 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
676 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
677 return Err(HgError::unsupported(
677 return Err(HgError::unsupported(
678 "ellipsis revisions are not supported by rhg",
678 "ellipsis revisions are not supported by rhg",
679 )
679 )
680 .into());
680 .into());
681 }
681 }
682 Err(corrupted(format!(
682 Err(corrupted(format!(
683 "hash check failed for revision {}",
683 "hash check failed for revision {}",
684 self.rev
684 self.rev
685 ))
685 ))
686 .into())
686 .into())
687 }
687 }
688 }
688 }
689
689
690 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
690 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
691 let data = self.rawdata()?;
691 let data = self.rawdata()?;
692 if self.rev == NULL_REVISION {
692 if self.rev == NULL_REVISION {
693 return Ok(data);
693 return Ok(data);
694 }
694 }
695 if self.is_censored() {
695 if self.is_censored() {
696 return Err(HgError::CensoredNodeError.into());
696 return Err(HgError::CensoredNodeError.into());
697 }
697 }
698 self.check_data(data)
698 self.check_data(data)
699 }
699 }
700
700
701 /// Extract the data contained in the entry.
701 /// Extract the data contained in the entry.
702 /// This may be a delta. (See `is_delta`.)
702 /// This may be a delta. (See `is_delta`.)
703 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
703 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
704 if self.bytes.is_empty() {
704 if self.bytes.is_empty() {
705 return Ok(Cow::Borrowed(&[]));
705 return Ok(Cow::Borrowed(&[]));
706 }
706 }
707 match self.bytes[0] {
707 match self.bytes[0] {
708 // Revision data is the entirety of the entry, including this
708 // Revision data is the entirety of the entry, including this
709 // header.
709 // header.
710 b'\0' => Ok(Cow::Borrowed(self.bytes)),
710 b'\0' => Ok(Cow::Borrowed(self.bytes)),
711 // Raw revision data follows.
711 // Raw revision data follows.
712 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
712 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
713 // zlib (RFC 1950) data.
713 // zlib (RFC 1950) data.
714 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
714 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
715 // zstd data.
715 // zstd data.
716 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
716 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
717 // A proper new format should have had a repo/store requirement.
717 // A proper new format should have had a repo/store requirement.
718 format_type => Err(corrupted(format!(
718 format_type => Err(corrupted(format!(
719 "unknown compression header '{}'",
719 "unknown compression header '{}'",
720 format_type
720 format_type
721 ))),
721 ))),
722 }
722 }
723 }
723 }
724
724
725 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
725 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
726 let mut decoder = ZlibDecoder::new(self.bytes);
726 let mut decoder = ZlibDecoder::new(self.bytes);
727 if self.is_delta() {
727 if self.is_delta() {
728 let mut buf = Vec::with_capacity(self.compressed_len as usize);
728 let mut buf = Vec::with_capacity(self.compressed_len as usize);
729 decoder
729 decoder
730 .read_to_end(&mut buf)
730 .read_to_end(&mut buf)
731 .map_err(|e| corrupted(e.to_string()))?;
731 .map_err(|e| corrupted(e.to_string()))?;
732 Ok(buf)
732 Ok(buf)
733 } else {
733 } else {
734 let cap = self.uncompressed_len.max(0) as usize;
734 let cap = self.uncompressed_len.max(0) as usize;
735 let mut buf = vec![0; cap];
735 let mut buf = vec![0; cap];
736 decoder
736 decoder
737 .read_exact(&mut buf)
737 .read_exact(&mut buf)
738 .map_err(|e| corrupted(e.to_string()))?;
738 .map_err(|e| corrupted(e.to_string()))?;
739 Ok(buf)
739 Ok(buf)
740 }
740 }
741 }
741 }
742
742
743 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
743 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
744 let cap = self.uncompressed_len.max(0) as usize;
744 let cap = self.uncompressed_len.max(0) as usize;
745 if self.is_delta() {
745 if self.is_delta() {
746 // [cap] is usually an over-estimate of the space needed because
746 // [cap] is usually an over-estimate of the space needed because
747 // it's the length of delta-decoded data, but we're interested
747 // it's the length of delta-decoded data, but we're interested
748 // in the size of the delta.
748 // in the size of the delta.
749 // This means we have to [shrink_to_fit] to avoid holding on
749 // This means we have to [shrink_to_fit] to avoid holding on
750 // to a large chunk of memory, but it also means we must have a
750 // to a large chunk of memory, but it also means we must have a
751 // fallback branch, for the case when the delta is longer than
751 // fallback branch, for the case when the delta is longer than
752 // the original data (surprisingly, this does happen in practice)
752 // the original data (surprisingly, this does happen in practice)
753 let mut buf = Vec::with_capacity(cap);
753 let mut buf = Vec::with_capacity(cap);
754 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
754 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
755 Ok(_) => buf.shrink_to_fit(),
755 Ok(_) => buf.shrink_to_fit(),
756 Err(_) => {
756 Err(_) => {
757 buf.clear();
757 buf.clear();
758 zstd::stream::copy_decode(self.bytes, &mut buf)
758 zstd::stream::copy_decode(self.bytes, &mut buf)
759 .map_err(|e| corrupted(e.to_string()))?;
759 .map_err(|e| corrupted(e.to_string()))?;
760 }
760 }
761 };
761 };
762 Ok(buf)
762 Ok(buf)
763 } else {
763 } else {
764 let mut buf = Vec::with_capacity(cap);
764 let mut buf = Vec::with_capacity(cap);
765 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
765 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
766 .map_err(|e| corrupted(e.to_string()))?;
766 .map_err(|e| corrupted(e.to_string()))?;
767 if len != self.uncompressed_len as usize {
767 if len != self.uncompressed_len as usize {
768 Err(corrupted("uncompressed length does not match"))
768 Err(corrupted("uncompressed length does not match"))
769 } else {
769 } else {
770 Ok(buf)
770 Ok(buf)
771 }
771 }
772 }
772 }
773 }
773 }
774
774
775 /// Tell if the entry is a snapshot or a delta
775 /// Tell if the entry is a snapshot or a delta
776 /// (influences on decompression).
776 /// (influences on decompression).
777 fn is_delta(&self) -> bool {
777 fn is_delta(&self) -> bool {
778 self.base_rev_or_base_of_delta_chain.is_some()
778 self.base_rev_or_base_of_delta_chain.is_some()
779 }
779 }
780 }
780 }
781
781
782 /// Calculate the hash of a revision given its data and its parents.
782 /// Calculate the hash of a revision given its data and its parents.
783 fn hash(
783 fn hash(
784 data: &[u8],
784 data: &[u8],
785 p1_hash: &[u8],
785 p1_hash: &[u8],
786 p2_hash: &[u8],
786 p2_hash: &[u8],
787 ) -> [u8; NODE_BYTES_LENGTH] {
787 ) -> [u8; NODE_BYTES_LENGTH] {
788 let mut hasher = Sha1::new();
788 let mut hasher = Sha1::new();
789 let (a, b) = (p1_hash, p2_hash);
789 let (a, b) = (p1_hash, p2_hash);
790 if a > b {
790 if a > b {
791 hasher.update(b);
791 hasher.update(b);
792 hasher.update(a);
792 hasher.update(a);
793 } else {
793 } else {
794 hasher.update(a);
794 hasher.update(a);
795 hasher.update(b);
795 hasher.update(b);
796 }
796 }
797 hasher.update(data);
797 hasher.update(data);
798 *hasher.finalize().as_ref()
798 *hasher.finalize().as_ref()
799 }
799 }
800
800
801 #[cfg(test)]
801 #[cfg(test)]
802 mod tests {
802 mod tests {
803 use super::*;
803 use super::*;
804 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
804 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
805 use itertools::Itertools;
805 use itertools::Itertools;
806
806
807 #[test]
807 #[test]
808 fn test_empty() {
808 fn test_empty() {
809 let temp = tempfile::tempdir().unwrap();
809 let temp = tempfile::tempdir().unwrap();
810 let vfs = Vfs { base: temp.path() };
810 let vfs = Vfs { base: temp.path() };
811 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
811 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
812 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
812 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
813 assert!(revlog.is_empty());
813 assert!(revlog.is_empty());
814 assert_eq!(revlog.len(), 0);
814 assert_eq!(revlog.len(), 0);
815 assert!(revlog.get_entry(0.into()).is_err());
815 assert!(revlog.get_entry(0.into()).is_err());
816 assert!(!revlog.has_rev(0.into()));
816 assert!(!revlog.has_rev(0.into()));
817 assert_eq!(
817 assert_eq!(
818 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
818 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
819 NULL_REVISION
819 NULL_REVISION
820 );
820 );
821 let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap();
821 let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap();
822 assert_eq!(null_entry.revision(), NULL_REVISION);
822 assert_eq!(null_entry.revision(), NULL_REVISION);
823 assert!(null_entry.data().unwrap().is_empty());
823 assert!(null_entry.data().unwrap().is_empty());
824 }
824 }
825
825
826 #[test]
826 #[test]
827 fn test_inline() {
827 fn test_inline() {
828 let temp = tempfile::tempdir().unwrap();
828 let temp = tempfile::tempdir().unwrap();
829 let vfs = Vfs { base: temp.path() };
829 let vfs = Vfs { base: temp.path() };
830 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
830 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
831 .unwrap();
831 .unwrap();
832 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
832 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
833 .unwrap();
833 .unwrap();
834 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
834 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
835 .unwrap();
835 .unwrap();
836 let entry0_bytes = IndexEntryBuilder::new()
836 let entry0_bytes = IndexEntryBuilder::new()
837 .is_first(true)
837 .is_first(true)
838 .with_version(1)
838 .with_version(1)
839 .with_inline(true)
839 .with_inline(true)
840 .with_offset(INDEX_ENTRY_SIZE)
840 .with_offset(INDEX_ENTRY_SIZE)
841 .with_node(node0)
841 .with_node(node0)
842 .build();
842 .build();
843 let entry1_bytes = IndexEntryBuilder::new()
843 let entry1_bytes = IndexEntryBuilder::new()
844 .with_offset(INDEX_ENTRY_SIZE)
844 .with_offset(INDEX_ENTRY_SIZE)
845 .with_node(node1)
845 .with_node(node1)
846 .build();
846 .build();
847 let entry2_bytes = IndexEntryBuilder::new()
847 let entry2_bytes = IndexEntryBuilder::new()
848 .with_offset(INDEX_ENTRY_SIZE)
848 .with_offset(INDEX_ENTRY_SIZE)
849 .with_p1(Revision(0))
849 .with_p1(Revision(0))
850 .with_p2(Revision(1))
850 .with_p2(Revision(1))
851 .with_node(node2)
851 .with_node(node2)
852 .build();
852 .build();
853 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
853 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
854 .into_iter()
854 .into_iter()
855 .flatten()
855 .flatten()
856 .collect_vec();
856 .collect_vec();
857 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
857 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
858 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
858 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
859
859
860 let entry0 = revlog.get_entry(0.into()).ok().unwrap();
860 let entry0 = revlog.get_entry(0.into()).ok().unwrap();
861 assert_eq!(entry0.revision(), Revision(0));
861 assert_eq!(entry0.revision(), Revision(0));
862 assert_eq!(*entry0.node(), node0);
862 assert_eq!(*entry0.node(), node0);
863 assert!(!entry0.has_p1());
863 assert!(!entry0.has_p1());
864 assert_eq!(entry0.p1(), None);
864 assert_eq!(entry0.p1(), None);
865 assert_eq!(entry0.p2(), None);
865 assert_eq!(entry0.p2(), None);
866 let p1_entry = entry0.p1_entry().unwrap();
866 let p1_entry = entry0.p1_entry().unwrap();
867 assert!(p1_entry.is_none());
867 assert!(p1_entry.is_none());
868 let p2_entry = entry0.p2_entry().unwrap();
868 let p2_entry = entry0.p2_entry().unwrap();
869 assert!(p2_entry.is_none());
869 assert!(p2_entry.is_none());
870
870
871 let entry1 = revlog.get_entry(1.into()).ok().unwrap();
871 let entry1 = revlog.get_entry(1.into()).ok().unwrap();
872 assert_eq!(entry1.revision(), Revision(1));
872 assert_eq!(entry1.revision(), Revision(1));
873 assert_eq!(*entry1.node(), node1);
873 assert_eq!(*entry1.node(), node1);
874 assert!(!entry1.has_p1());
874 assert!(!entry1.has_p1());
875 assert_eq!(entry1.p1(), None);
875 assert_eq!(entry1.p1(), None);
876 assert_eq!(entry1.p2(), None);
876 assert_eq!(entry1.p2(), None);
877 let p1_entry = entry1.p1_entry().unwrap();
877 let p1_entry = entry1.p1_entry().unwrap();
878 assert!(p1_entry.is_none());
878 assert!(p1_entry.is_none());
879 let p2_entry = entry1.p2_entry().unwrap();
879 let p2_entry = entry1.p2_entry().unwrap();
880 assert!(p2_entry.is_none());
880 assert!(p2_entry.is_none());
881
881
882 let entry2 = revlog.get_entry(2.into()).ok().unwrap();
882 let entry2 = revlog.get_entry(2.into()).ok().unwrap();
883 assert_eq!(entry2.revision(), Revision(2));
883 assert_eq!(entry2.revision(), Revision(2));
884 assert_eq!(*entry2.node(), node2);
884 assert_eq!(*entry2.node(), node2);
885 assert!(entry2.has_p1());
885 assert!(entry2.has_p1());
886 assert_eq!(entry2.p1(), Some(Revision(0)));
886 assert_eq!(entry2.p1(), Some(Revision(0)));
887 assert_eq!(entry2.p2(), Some(Revision(1)));
887 assert_eq!(entry2.p2(), Some(Revision(1)));
888 let p1_entry = entry2.p1_entry().unwrap();
888 let p1_entry = entry2.p1_entry().unwrap();
889 assert!(p1_entry.is_some());
889 assert!(p1_entry.is_some());
890 assert_eq!(p1_entry.unwrap().revision(), Revision(0));
890 assert_eq!(p1_entry.unwrap().revision(), Revision(0));
891 let p2_entry = entry2.p2_entry().unwrap();
891 let p2_entry = entry2.p2_entry().unwrap();
892 assert!(p2_entry.is_some());
892 assert!(p2_entry.is_some());
893 assert_eq!(p2_entry.unwrap().revision(), Revision(1));
893 assert_eq!(p2_entry.unwrap().revision(), Revision(1));
894 }
894 }
895
895
896 #[test]
896 #[test]
897 fn test_nodemap() {
897 fn test_nodemap() {
898 let temp = tempfile::tempdir().unwrap();
898 let temp = tempfile::tempdir().unwrap();
899 let vfs = Vfs { base: temp.path() };
899 let vfs = Vfs { base: temp.path() };
900
900
901 // building a revlog with a forced Node starting with zeros
901 // building a revlog with a forced Node starting with zeros
902 // This is a corruption, but it does not preclude using the nodemap
902 // This is a corruption, but it does not preclude using the nodemap
903 // if we don't try and access the data
903 // if we don't try and access the data
904 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
904 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
905 .unwrap();
905 .unwrap();
906 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
906 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
907 .unwrap();
907 .unwrap();
908 let entry0_bytes = IndexEntryBuilder::new()
908 let entry0_bytes = IndexEntryBuilder::new()
909 .is_first(true)
909 .is_first(true)
910 .with_version(1)
910 .with_version(1)
911 .with_inline(true)
911 .with_inline(true)
912 .with_offset(INDEX_ENTRY_SIZE)
912 .with_offset(INDEX_ENTRY_SIZE)
913 .with_node(node0)
913 .with_node(node0)
914 .build();
914 .build();
915 let entry1_bytes = IndexEntryBuilder::new()
915 let entry1_bytes = IndexEntryBuilder::new()
916 .with_offset(INDEX_ENTRY_SIZE)
916 .with_offset(INDEX_ENTRY_SIZE)
917 .with_node(node1)
917 .with_node(node1)
918 .build();
918 .build();
919 let contents = vec![entry0_bytes, entry1_bytes]
919 let contents = vec![entry0_bytes, entry1_bytes]
920 .into_iter()
920 .into_iter()
921 .flatten()
921 .flatten()
922 .collect_vec();
922 .collect_vec();
923 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
923 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
924
924
925 let mut idx = nodemap::tests::TestNtIndex::new();
925 let mut idx = nodemap::tests::TestNtIndex::new();
926 idx.insert_node(Revision(0), node0).unwrap();
926 idx.insert_node(Revision(0), node0).unwrap();
927 idx.insert_node(Revision(1), node1).unwrap();
927 idx.insert_node(Revision(1), node1).unwrap();
928
928
929 let revlog =
929 let revlog =
930 Revlog::open_gen(&vfs, "foo.i", None, true, Some(idx.nt)).unwrap();
930 Revlog::open_gen(&vfs, "foo.i", None, true, Some(idx.nt)).unwrap();
931
931
932 // accessing the data shows the corruption
932 // accessing the data shows the corruption
933 revlog.get_entry(0.into()).unwrap().data().unwrap_err();
933 revlog.get_entry(0.into()).unwrap().data().unwrap_err();
934
934
935 assert_eq!(
935 assert_eq!(
936 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
936 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
937 Revision(-1)
937 Revision(-1)
938 );
938 );
939 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), Revision(0));
939 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), Revision(0));
940 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), Revision(1));
940 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), Revision(1));
941 assert_eq!(
941 assert_eq!(
942 revlog
942 revlog
943 .rev_from_node(NodePrefix::from_hex("000").unwrap())
943 .rev_from_node(NodePrefix::from_hex("000").unwrap())
944 .unwrap(),
944 .unwrap(),
945 Revision(-1)
945 Revision(-1)
946 );
946 );
947 assert_eq!(
947 assert_eq!(
948 revlog
948 revlog
949 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
949 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
950 .unwrap(),
950 .unwrap(),
951 Revision(1)
951 Revision(1)
952 );
952 );
953 // RevlogError does not implement PartialEq
953 // RevlogError does not implement PartialEq
954 // (ultimately because io::Error does not)
954 // (ultimately because io::Error does not)
955 match revlog
955 match revlog
956 .rev_from_node(NodePrefix::from_hex("00").unwrap())
956 .rev_from_node(NodePrefix::from_hex("00").unwrap())
957 .expect_err("Expected to give AmbiguousPrefix error")
957 .expect_err("Expected to give AmbiguousPrefix error")
958 {
958 {
959 RevlogError::AmbiguousPrefix => (),
959 RevlogError::AmbiguousPrefix => (),
960 e => {
960 e => {
961 panic!("Got another error than AmbiguousPrefix: {:?}", e);
961 panic!("Got another error than AmbiguousPrefix: {:?}", e);
962 }
962 }
963 };
963 };
964 }
964 }
965 }
965 }
@@ -1,429 +1,429 b''
1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
2 //
2 //
3 // This software may be used and distributed according to the terms of the
3 // This software may be used and distributed according to the terms of the
4 // GNU General Public License version 2 or any later version.
4 // GNU General Public License version 2 or any later version.
5
5
6 //! Definitions and utilities for Revision nodes
6 //! Definitions and utilities for Revision nodes
7 //!
7 //!
8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
9 //! of a revision.
9 //! of a revision.
10
10
11 use crate::errors::HgError;
11 use crate::errors::HgError;
12 use bytes_cast::BytesCast;
12 use bytes_cast::BytesCast;
13 use std::fmt;
13 use std::fmt;
14
14
15 /// The length in bytes of a `Node`
15 /// The length in bytes of a `Node`
16 ///
16 ///
17 /// This constant is meant to ease refactors of this module, and
17 /// This constant is meant to ease refactors of this module, and
18 /// are private so that calling code does not expect all nodes have
18 /// are private so that calling code does not expect all nodes have
19 /// the same size, should we support several formats concurrently in
19 /// the same size, should we support several formats concurrently in
20 /// the future.
20 /// the future.
21 pub const NODE_BYTES_LENGTH: usize = 20;
21 pub const NODE_BYTES_LENGTH: usize = 20;
22
22
23 /// Id of the null node.
23 /// Id of the null node.
24 ///
24 ///
25 /// Used to indicate the absence of node.
25 /// Used to indicate the absence of node.
26 pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
26 pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
27
27
28 /// The length in bytes of a `Node`
28 /// The length in bytes of a `Node`
29 ///
29 ///
30 /// see also `NODES_BYTES_LENGTH` about it being private.
30 /// see also `NODES_BYTES_LENGTH` about it being private.
31 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
31 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
32
32
33 /// Default for UI presentation
33 /// Default for UI presentation
34 const SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH: u8 = 12;
34 const SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH: u8 = 12;
35
35
36 /// Private alias for readability and to ease future change
36 /// Private alias for readability and to ease future change
37 type NodeData = [u8; NODE_BYTES_LENGTH];
37 type NodeData = [u8; NODE_BYTES_LENGTH];
38
38
39 /// Binary revision SHA
39 /// Binary revision SHA
40 ///
40 ///
41 /// ## Future changes of hash size
41 /// ## Future changes of hash size
42 ///
42 ///
43 /// To accomodate future changes of hash size, Rust callers
43 /// To accomodate future changes of hash size, Rust callers
44 /// should use the conversion methods at the boundaries (FFI, actual
44 /// should use the conversion methods at the boundaries (FFI, actual
45 /// computation of hashes and I/O) only, and only if required.
45 /// computation of hashes and I/O) only, and only if required.
46 ///
46 ///
47 /// All other callers outside of unit tests should just handle `Node` values
47 /// All other callers outside of unit tests should just handle `Node` values
48 /// and never make any assumption on the actual length, using [`nybbles_len`]
48 /// and never make any assumption on the actual length, using [`nybbles_len`]
49 /// if they need a loop boundary.
49 /// if they need a loop boundary.
50 ///
50 ///
51 /// All methods that create a `Node` either take a type that enforces
51 /// All methods that create a `Node` either take a type that enforces
52 /// the size or return an error at runtime.
52 /// the size or return an error at runtime.
53 ///
53 ///
54 /// [`nybbles_len`]: #method.nybbles_len
54 /// [`nybbles_len`]: #method.nybbles_len
55 #[derive(Copy, Clone, PartialEq, BytesCast, derive_more::From)]
55 #[derive(Copy, Clone, PartialEq, BytesCast, derive_more::From)]
56 #[repr(transparent)]
56 #[repr(transparent)]
57 pub struct Node {
57 pub struct Node {
58 data: NodeData,
58 data: NodeData,
59 }
59 }
60
60
61 impl fmt::Debug for Node {
61 impl fmt::Debug for Node {
62 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
62 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
63 let n = format!("{:x?}", self.data);
63 let n = format!("{:x?}", self.data);
64 // We're using debug_tuple because it makes the output a little
64 // We're using debug_tuple because it makes the output a little
65 // more compact without losing data.
65 // more compact without losing data.
66 f.debug_tuple("Node").field(&n).finish()
66 f.debug_tuple("Node").field(&n).finish()
67 }
67 }
68 }
68 }
69
69
70 /// The node value for NULL_REVISION
70 /// The node value for NULL_REVISION
71 pub const NULL_NODE: Node = Node {
71 pub const NULL_NODE: Node = Node {
72 data: [0; NODE_BYTES_LENGTH],
72 data: [0; NODE_BYTES_LENGTH],
73 };
73 };
74
74
75 /// Return an error if the slice has an unexpected length
75 /// Return an error if the slice has an unexpected length
76 impl<'a> TryFrom<&'a [u8]> for &'a Node {
76 impl<'a> TryFrom<&'a [u8]> for &'a Node {
77 type Error = ();
77 type Error = ();
78
78
79 #[inline]
79 #[inline]
80 fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
80 fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
81 match Node::from_bytes(bytes) {
81 match Node::from_bytes(bytes) {
82 Ok((node, rest)) if rest.is_empty() => Ok(node),
82 Ok((node, rest)) if rest.is_empty() => Ok(node),
83 _ => Err(()),
83 _ => Err(()),
84 }
84 }
85 }
85 }
86 }
86 }
87
87
88 /// Return an error if the slice has an unexpected length
88 /// Return an error if the slice has an unexpected length
89 impl TryFrom<&'_ [u8]> for Node {
89 impl TryFrom<&'_ [u8]> for Node {
90 type Error = std::array::TryFromSliceError;
90 type Error = std::array::TryFromSliceError;
91
91
92 #[inline]
92 #[inline]
93 fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> {
93 fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> {
94 let data = bytes.try_into()?;
94 let data = bytes.try_into()?;
95 Ok(Self { data })
95 Ok(Self { data })
96 }
96 }
97 }
97 }
98
98
99 impl From<&'_ NodeData> for Node {
99 impl From<&'_ NodeData> for Node {
100 #[inline]
100 #[inline]
101 fn from(data: &'_ NodeData) -> Self {
101 fn from(data: &'_ NodeData) -> Self {
102 Self { data: *data }
102 Self { data: *data }
103 }
103 }
104 }
104 }
105
105
106 impl fmt::LowerHex for Node {
106 impl fmt::LowerHex for Node {
107 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
107 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
108 for &byte in &self.data {
108 for &byte in &self.data {
109 write!(f, "{:02x}", byte)?
109 write!(f, "{:02x}", byte)?
110 }
110 }
111 Ok(())
111 Ok(())
112 }
112 }
113 }
113 }
114
114
115 #[derive(Debug)]
115 #[derive(Debug)]
116 pub struct FromHexError;
116 pub struct FromHexError;
117
117
118 /// Low level utility function, also for prefixes
118 /// Low level utility function, also for prefixes
119 fn get_nybble(s: &[u8], i: usize) -> u8 {
119 fn get_nybble(s: &[u8], i: usize) -> u8 {
120 if i % 2 == 0 {
120 if i % 2 == 0 {
121 s[i / 2] >> 4
121 s[i / 2] >> 4
122 } else {
122 } else {
123 s[i / 2] & 0x0f
123 s[i / 2] & 0x0f
124 }
124 }
125 }
125 }
126
126
127 impl Node {
127 impl Node {
128 /// Retrieve the `i`th half-byte of the binary data.
128 /// Retrieve the `i`th half-byte of the binary data.
129 ///
129 ///
130 /// This is also the `i`th hexadecimal digit in numeric form,
130 /// This is also the `i`th hexadecimal digit in numeric form,
131 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
131 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
132 pub fn get_nybble(&self, i: usize) -> u8 {
132 pub fn get_nybble(&self, i: usize) -> u8 {
133 get_nybble(&self.data, i)
133 get_nybble(&self.data, i)
134 }
134 }
135
135
136 /// Length of the data, in nybbles
136 /// Length of the data, in nybbles
137 pub fn nybbles_len(&self) -> usize {
137 pub fn nybbles_len(&self) -> usize {
138 // public exposure as an instance method only, so that we can
138 // public exposure as an instance method only, so that we can
139 // easily support several sizes of hashes if needed in the future.
139 // easily support several sizes of hashes if needed in the future.
140 NODE_NYBBLES_LENGTH
140 NODE_NYBBLES_LENGTH
141 }
141 }
142
142
143 /// Convert from hexadecimal string representation
143 /// Convert from hexadecimal string representation
144 ///
144 ///
145 /// Exact length is required.
145 /// Exact length is required.
146 ///
146 ///
147 /// To be used in FFI and I/O only, in order to facilitate future
147 /// To be used in FFI and I/O only, in order to facilitate future
148 /// changes of hash format.
148 /// changes of hash format.
149 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> {
149 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> {
150 let prefix = NodePrefix::from_hex(hex)?;
150 let prefix = NodePrefix::from_hex(hex)?;
151 if prefix.nybbles_len() == NODE_NYBBLES_LENGTH {
151 if prefix.nybbles_len() == NODE_NYBBLES_LENGTH {
152 Ok(Self { data: prefix.data })
152 Ok(Self { data: prefix.data })
153 } else {
153 } else {
154 Err(FromHexError)
154 Err(FromHexError)
155 }
155 }
156 }
156 }
157
157
158 /// `from_hex`, but for input from an internal file of the repository such
158 /// `from_hex`, but for input from an internal file of the repository such
159 /// as a changelog or manifest entry.
159 /// as a changelog or manifest entry.
160 ///
160 ///
161 /// An error is treated as repository corruption.
161 /// An error is treated as repository corruption.
162 pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> {
162 pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> {
163 Self::from_hex(hex.as_ref()).map_err(|FromHexError| {
163 Self::from_hex(hex.as_ref()).map_err(|FromHexError| {
164 HgError::CorruptedRepository(format!(
164 HgError::CorruptedRepository(format!(
165 "Expected a full hexadecimal node ID, found {}",
165 "Expected a full hexadecimal node ID, found {}",
166 String::from_utf8_lossy(hex.as_ref())
166 String::from_utf8_lossy(hex.as_ref())
167 ))
167 ))
168 })
168 })
169 }
169 }
170
170
171 /// Provide access to binary data
171 /// Provide access to binary data
172 ///
172 ///
173 /// This is needed by FFI layers, for instance to return expected
173 /// This is needed by FFI layers, for instance to return expected
174 /// binary values to Python.
174 /// binary values to Python.
175 pub fn as_bytes(&self) -> &[u8] {
175 pub fn as_bytes(&self) -> &[u8] {
176 &self.data
176 &self.data
177 }
177 }
178
178
179 pub fn short(&self) -> NodePrefix {
179 pub fn short(&self) -> NodePrefix {
180 NodePrefix {
180 NodePrefix {
181 nybbles_len: SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH,
181 nybbles_len: SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH,
182 data: self.data,
182 data: self.data,
183 }
183 }
184 }
184 }
185
185
186 pub fn pad_to_256_bits(&self) -> [u8; 32] {
186 pub fn pad_to_256_bits(&self) -> [u8; 32] {
187 let mut bits = [0; 32];
187 let mut bits = [0; 32];
188 bits[..NODE_BYTES_LENGTH].copy_from_slice(&self.data);
188 bits[..NODE_BYTES_LENGTH].copy_from_slice(&self.data);
189 bits
189 bits
190 }
190 }
191 }
191 }
192
192
193 /// The beginning of a binary revision SHA.
193 /// The beginning of a binary revision SHA.
194 ///
194 ///
195 /// Since it can potentially come from an hexadecimal representation with
195 /// Since it can potentially come from an hexadecimal representation with
196 /// odd length, it needs to carry around whether the last 4 bits are relevant
196 /// odd length, it needs to carry around whether the last 4 bits are relevant
197 /// or not.
197 /// or not.
198 #[derive(Debug, PartialEq, Copy, Clone)]
198 #[derive(Debug, PartialEq, Copy, Clone)]
199 pub struct NodePrefix {
199 pub struct NodePrefix {
200 /// In `1..=NODE_NYBBLES_LENGTH`
200 /// In `1..=NODE_NYBBLES_LENGTH`
201 nybbles_len: u8,
201 nybbles_len: u8,
202 /// The first `4 * length_in_nybbles` bits are used (considering bits
202 /// The first `4 * length_in_nybbles` bits are used (considering bits
203 /// within a bytes in big-endian: most significant first), the rest
203 /// within a bytes in big-endian: most significant first), the rest
204 /// are zero.
204 /// are zero.
205 data: NodeData,
205 data: NodeData,
206 }
206 }
207
207
208 impl NodePrefix {
208 impl NodePrefix {
209 /// Convert from hexadecimal string representation
209 /// Convert from hexadecimal string representation
210 ///
210 ///
211 /// Similarly to `hex::decode`, can be used with Unicode string types
211 /// Similarly to `hex::decode`, can be used with Unicode string types
212 /// (`String`, `&str`) as well as bytes.
212 /// (`String`, `&str`) as well as bytes.
213 ///
213 ///
214 /// To be used in FFI and I/O only, in order to facilitate future
214 /// To be used in FFI and I/O only, in order to facilitate future
215 /// changes of hash format.
215 /// changes of hash format.
216 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> {
216 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> {
217 let hex = hex.as_ref();
217 let hex = hex.as_ref();
218 let len = hex.len();
218 let len = hex.len();
219 if len > NODE_NYBBLES_LENGTH || len == 0 {
219 if len > NODE_NYBBLES_LENGTH || len == 0 {
220 return Err(FromHexError);
220 return Err(FromHexError);
221 }
221 }
222
222
223 let mut data = [0; NODE_BYTES_LENGTH];
223 let mut data = [0; NODE_BYTES_LENGTH];
224 let mut nybbles_len = 0;
224 let mut nybbles_len = 0;
225 for &ascii_byte in hex {
225 for &ascii_byte in hex {
226 let nybble = match char::from(ascii_byte).to_digit(16) {
226 let nybble = match char::from(ascii_byte).to_digit(16) {
227 Some(digit) => digit as u8,
227 Some(digit) => digit as u8,
228 None => return Err(FromHexError),
228 None => return Err(FromHexError),
229 };
229 };
230 // Fill in the upper half of a byte first, then the lower half.
230 // Fill in the upper half of a byte first, then the lower half.
231 let shift = if nybbles_len % 2 == 0 { 4 } else { 0 };
231 let shift = if nybbles_len % 2 == 0 { 4 } else { 0 };
232 data[nybbles_len as usize / 2] |= nybble << shift;
232 data[nybbles_len as usize / 2] |= nybble << shift;
233 nybbles_len += 1;
233 nybbles_len += 1;
234 }
234 }
235 Ok(Self { data, nybbles_len })
235 Ok(Self { data, nybbles_len })
236 }
236 }
237
237
238 pub fn nybbles_len(&self) -> usize {
238 pub fn nybbles_len(&self) -> usize {
239 self.nybbles_len as _
239 self.nybbles_len as _
240 }
240 }
241
241
242 pub fn is_prefix_of(&self, node: &Node) -> bool {
242 pub fn is_prefix_of(&self, node: &Node) -> bool {
243 let full_bytes = self.nybbles_len() / 2;
243 let full_bytes = self.nybbles_len() / 2;
244 if self.data[..full_bytes] != node.data[..full_bytes] {
244 if self.data[..full_bytes] != node.data[..full_bytes] {
245 return false;
245 return false;
246 }
246 }
247 if self.nybbles_len() % 2 == 0 {
247 if self.nybbles_len() % 2 == 0 {
248 return true;
248 return true;
249 }
249 }
250 let last = self.nybbles_len() - 1;
250 let last = self.nybbles_len() - 1;
251 self.get_nybble(last) == node.get_nybble(last)
251 self.get_nybble(last) == node.get_nybble(last)
252 }
252 }
253
253
254 /// Retrieve the `i`th half-byte from the prefix.
254 /// Retrieve the `i`th half-byte from the prefix.
255 ///
255 ///
256 /// This is also the `i`th hexadecimal digit in numeric form,
256 /// This is also the `i`th hexadecimal digit in numeric form,
257 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
257 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
258 pub fn get_nybble(&self, i: usize) -> u8 {
258 pub fn get_nybble(&self, i: usize) -> u8 {
259 assert!(i < self.nybbles_len());
259 assert!(i < self.nybbles_len());
260 get_nybble(&self.data, i)
260 get_nybble(&self.data, i)
261 }
261 }
262
262
263 fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ {
263 fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ {
264 (0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i))
264 (0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i))
265 }
265 }
266
266
267 /// Return the index first nybble that's different from `node`
267 /// Return the index first nybble that's different from `node`
268 ///
268 ///
269 /// If the return value is `None` that means that `self` is
269 /// If the return value is `None` that means that `self` is
270 /// a prefix of `node`, but the current method is a bit slower
270 /// a prefix of `node`, but the current method is a bit slower
271 /// than `is_prefix_of`.
271 /// than `is_prefix_of`.
272 ///
272 ///
273 /// Returned index is as in `get_nybble`, i.e., starting at 0.
273 /// Returned index is as in `get_nybble`, i.e., starting at 0.
274 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
274 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
275 self.iter_nybbles()
275 self.iter_nybbles()
276 .zip(NodePrefix::from(*node).iter_nybbles())
276 .zip(NodePrefix::from(*node).iter_nybbles())
277 .position(|(a, b)| a != b)
277 .position(|(a, b)| a != b)
278 }
278 }
279 }
279 }
280
280
281 impl fmt::LowerHex for NodePrefix {
281 impl fmt::LowerHex for NodePrefix {
282 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
282 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
283 let full_bytes = self.nybbles_len() / 2;
283 let full_bytes = self.nybbles_len() / 2;
284 for &byte in &self.data[..full_bytes] {
284 for &byte in &self.data[..full_bytes] {
285 write!(f, "{:02x}", byte)?
285 write!(f, "{:02x}", byte)?
286 }
286 }
287 if self.nybbles_len() % 2 == 1 {
287 if self.nybbles_len() % 2 == 1 {
288 let last = self.nybbles_len() - 1;
288 let last = self.nybbles_len() - 1;
289 write!(f, "{:x}", self.get_nybble(last))?
289 write!(f, "{:x}", self.get_nybble(last))?
290 }
290 }
291 Ok(())
291 Ok(())
292 }
292 }
293 }
293 }
294
294
295 /// A shortcut for full `Node` references
295 /// A shortcut for full `Node` references
296 impl From<&'_ Node> for NodePrefix {
296 impl From<&'_ Node> for NodePrefix {
297 fn from(node: &'_ Node) -> Self {
297 fn from(node: &'_ Node) -> Self {
298 NodePrefix {
298 NodePrefix {
299 nybbles_len: node.nybbles_len() as _,
299 nybbles_len: node.nybbles_len() as _,
300 data: node.data,
300 data: node.data,
301 }
301 }
302 }
302 }
303 }
303 }
304
304
305 /// A shortcut for full `Node` references
305 /// A shortcut for full `Node` references
306 impl From<Node> for NodePrefix {
306 impl From<Node> for NodePrefix {
307 fn from(node: Node) -> Self {
307 fn from(node: Node) -> Self {
308 NodePrefix {
308 NodePrefix {
309 nybbles_len: node.nybbles_len() as _,
309 nybbles_len: node.nybbles_len() as _,
310 data: node.data,
310 data: node.data,
311 }
311 }
312 }
312 }
313 }
313 }
314
314
315 impl PartialEq<Node> for NodePrefix {
315 impl PartialEq<Node> for NodePrefix {
316 fn eq(&self, other: &Node) -> bool {
316 fn eq(&self, other: &Node) -> bool {
317 self.data == other.data && self.nybbles_len() == other.nybbles_len()
317 self.data == other.data && self.nybbles_len() == other.nybbles_len()
318 }
318 }
319 }
319 }
320
320
321 #[cfg(test)]
321 #[cfg(test)]
322 mod tests {
322 mod tests {
323 use super::*;
323 use super::*;
324
324
325 const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef";
325 const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef";
326 const SAMPLE_NODE: Node = Node {
326 const SAMPLE_NODE: Node = Node {
327 data: [
327 data: [
328 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
328 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
329 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
329 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
330 ],
330 ],
331 };
331 };
332
332
333 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
333 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
334 /// The padding is made with zeros.
334 /// The padding is made with zeros.
335 pub fn hex_pad_right(hex: &str) -> String {
335 pub fn hex_pad_right(hex: &str) -> String {
336 let mut res = hex.to_string();
336 let mut res = hex.to_string();
337 while res.len() < NODE_NYBBLES_LENGTH {
337 while res.len() < NODE_NYBBLES_LENGTH {
338 res.push('0');
338 res.push('0');
339 }
339 }
340 res
340 res
341 }
341 }
342
342
343 #[test]
343 #[test]
344 fn test_node_from_hex() {
344 fn test_node_from_hex() {
345 let not_hex = "012... oops";
345 let not_hex = "012... oops";
346 let too_short = "0123";
346 let too_short = "0123";
347 let too_long = format!("{}0", SAMPLE_NODE_HEX);
347 let too_long = format!("{}0", SAMPLE_NODE_HEX);
348 assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE);
348 assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE);
349 assert!(Node::from_hex(not_hex).is_err());
349 assert!(Node::from_hex(not_hex).is_err());
350 assert!(Node::from_hex(too_short).is_err());
350 assert!(Node::from_hex(too_short).is_err());
351 assert!(Node::from_hex(&too_long).is_err());
351 assert!(Node::from_hex(too_long).is_err());
352 }
352 }
353
353
354 #[test]
354 #[test]
355 fn test_node_encode_hex() {
355 fn test_node_encode_hex() {
356 assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX);
356 assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX);
357 }
357 }
358
358
359 #[test]
359 #[test]
360 fn test_prefix_from_to_hex() -> Result<(), FromHexError> {
360 fn test_prefix_from_to_hex() -> Result<(), FromHexError> {
361 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1");
361 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1");
362 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a");
362 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a");
363 assert_eq!(
363 assert_eq!(
364 format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?),
364 format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?),
365 SAMPLE_NODE_HEX
365 SAMPLE_NODE_HEX
366 );
366 );
367 Ok(())
367 Ok(())
368 }
368 }
369
369
370 #[test]
370 #[test]
371 fn test_prefix_from_hex_errors() {
371 fn test_prefix_from_hex_errors() {
372 assert!(NodePrefix::from_hex("testgr").is_err());
372 assert!(NodePrefix::from_hex("testgr").is_err());
373 let mut long = format!("{:x}", NULL_NODE);
373 let mut long = format!("{:x}", NULL_NODE);
374 long.push('c');
374 long.push('c');
375 assert!(NodePrefix::from_hex(&long).is_err())
375 assert!(NodePrefix::from_hex(&long).is_err())
376 }
376 }
377
377
378 #[test]
378 #[test]
379 fn test_is_prefix_of() -> Result<(), FromHexError> {
379 fn test_is_prefix_of() -> Result<(), FromHexError> {
380 let mut node_data = [0; NODE_BYTES_LENGTH];
380 let mut node_data = [0; NODE_BYTES_LENGTH];
381 node_data[0] = 0x12;
381 node_data[0] = 0x12;
382 node_data[1] = 0xca;
382 node_data[1] = 0xca;
383 let node = Node::from(node_data);
383 let node = Node::from(node_data);
384 assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node));
384 assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node));
385 assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node));
385 assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node));
386 assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node));
386 assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node));
387 assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node));
387 assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node));
388 Ok(())
388 Ok(())
389 }
389 }
390
390
391 #[test]
391 #[test]
392 fn test_get_nybble() -> Result<(), FromHexError> {
392 fn test_get_nybble() -> Result<(), FromHexError> {
393 let prefix = NodePrefix::from_hex("dead6789cafe")?;
393 let prefix = NodePrefix::from_hex("dead6789cafe")?;
394 assert_eq!(prefix.get_nybble(0), 13);
394 assert_eq!(prefix.get_nybble(0), 13);
395 assert_eq!(prefix.get_nybble(7), 9);
395 assert_eq!(prefix.get_nybble(7), 9);
396 Ok(())
396 Ok(())
397 }
397 }
398
398
399 #[test]
399 #[test]
400 fn test_first_different_nybble_even_prefix() {
400 fn test_first_different_nybble_even_prefix() {
401 let prefix = NodePrefix::from_hex("12ca").unwrap();
401 let prefix = NodePrefix::from_hex("12ca").unwrap();
402 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
402 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
403 assert_eq!(prefix.first_different_nybble(&node), Some(0));
403 assert_eq!(prefix.first_different_nybble(&node), Some(0));
404 node.data[0] = 0x13;
404 node.data[0] = 0x13;
405 assert_eq!(prefix.first_different_nybble(&node), Some(1));
405 assert_eq!(prefix.first_different_nybble(&node), Some(1));
406 node.data[0] = 0x12;
406 node.data[0] = 0x12;
407 assert_eq!(prefix.first_different_nybble(&node), Some(2));
407 assert_eq!(prefix.first_different_nybble(&node), Some(2));
408 node.data[1] = 0xca;
408 node.data[1] = 0xca;
409 // now it is a prefix
409 // now it is a prefix
410 assert_eq!(prefix.first_different_nybble(&node), None);
410 assert_eq!(prefix.first_different_nybble(&node), None);
411 }
411 }
412
412
413 #[test]
413 #[test]
414 fn test_first_different_nybble_odd_prefix() {
414 fn test_first_different_nybble_odd_prefix() {
415 let prefix = NodePrefix::from_hex("12c").unwrap();
415 let prefix = NodePrefix::from_hex("12c").unwrap();
416 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
416 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
417 assert_eq!(prefix.first_different_nybble(&node), Some(0));
417 assert_eq!(prefix.first_different_nybble(&node), Some(0));
418 node.data[0] = 0x13;
418 node.data[0] = 0x13;
419 assert_eq!(prefix.first_different_nybble(&node), Some(1));
419 assert_eq!(prefix.first_different_nybble(&node), Some(1));
420 node.data[0] = 0x12;
420 node.data[0] = 0x12;
421 assert_eq!(prefix.first_different_nybble(&node), Some(2));
421 assert_eq!(prefix.first_different_nybble(&node), Some(2));
422 node.data[1] = 0xca;
422 node.data[1] = 0xca;
423 // now it is a prefix
423 // now it is a prefix
424 assert_eq!(prefix.first_different_nybble(&node), None);
424 assert_eq!(prefix.first_different_nybble(&node), None);
425 }
425 }
426 }
426 }
427
427
428 #[cfg(test)]
428 #[cfg(test)]
429 pub use tests::hex_pad_right;
429 pub use tests::hex_pad_right;
@@ -1,1102 +1,1108 b''
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 // and Mercurial contributors
2 // and Mercurial contributors
3 //
3 //
4 // This software may be used and distributed according to the terms of the
4 // This software may be used and distributed according to the terms of the
5 // GNU General Public License version 2 or any later version.
5 // GNU General Public License version 2 or any later version.
6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
7 //!
7 //!
8 //! This provides a variation on the 16-ary radix tree that is
8 //! This provides a variation on the 16-ary radix tree that is
9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
10 //! on disk.
10 //! on disk.
11 //!
11 //!
12 //! Following existing implicit conventions, the "nodemap" terminology
12 //! Following existing implicit conventions, the "nodemap" terminology
13 //! is used in a more abstract context.
13 //! is used in a more abstract context.
14
14
15 use crate::UncheckedRevision;
15 use crate::UncheckedRevision;
16
16
17 use super::{
17 use super::{
18 node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION,
18 node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION,
19 };
19 };
20
20
21 use bytes_cast::{unaligned, BytesCast};
21 use bytes_cast::{unaligned, BytesCast};
22 use std::cmp::max;
22 use std::cmp::max;
23 use std::fmt;
23 use std::fmt;
24 use std::mem::{self, align_of, size_of};
24 use std::mem::{self, align_of, size_of};
25 use std::ops::Deref;
25 use std::ops::Deref;
26 use std::ops::Index;
26 use std::ops::Index;
27
27
28 #[derive(Debug, PartialEq)]
28 #[derive(Debug, PartialEq)]
29 pub enum NodeMapError {
29 pub enum NodeMapError {
30 /// A `NodePrefix` matches several [`Revision`]s.
30 /// A `NodePrefix` matches several [`Revision`]s.
31 ///
31 ///
32 /// This can be returned by methods meant for (at most) one match.
32 /// This can be returned by methods meant for (at most) one match.
33 MultipleResults,
33 MultipleResults,
34 /// A `Revision` stored in the nodemap could not be found in the index
34 /// A `Revision` stored in the nodemap could not be found in the index
35 RevisionNotInIndex(UncheckedRevision),
35 RevisionNotInIndex(UncheckedRevision),
36 }
36 }
37
37
38 /// Mapping system from Mercurial nodes to revision numbers.
38 /// Mapping system from Mercurial nodes to revision numbers.
39 ///
39 ///
40 /// ## `RevlogIndex` and `NodeMap`
40 /// ## `RevlogIndex` and `NodeMap`
41 ///
41 ///
42 /// One way to think about their relationship is that
42 /// One way to think about their relationship is that
43 /// the `NodeMap` is a prefix-oriented reverse index of the [`Node`]
43 /// the `NodeMap` is a prefix-oriented reverse index of the [`Node`]
44 /// information carried by a [`RevlogIndex`].
44 /// information carried by a [`RevlogIndex`].
45 ///
45 ///
46 /// Many of the methods in this trait take a `RevlogIndex` argument
46 /// Many of the methods in this trait take a `RevlogIndex` argument
47 /// which is used for validation of their results. This index must naturally
47 /// which is used for validation of their results. This index must naturally
48 /// be the one the `NodeMap` is about, and it must be consistent.
48 /// be the one the `NodeMap` is about, and it must be consistent.
49 ///
49 ///
50 /// Notably, the `NodeMap` must not store
50 /// Notably, the `NodeMap` must not store
51 /// information about more `Revision` values than there are in the index.
51 /// information about more `Revision` values than there are in the index.
52 /// In these methods, an encountered `Revision` is not in the index, a
52 /// In these methods, an encountered `Revision` is not in the index, a
53 /// [RevisionNotInIndex](NodeMapError) error is returned.
53 /// [RevisionNotInIndex](NodeMapError) error is returned.
54 ///
54 ///
55 /// In insert operations, the rule is thus that the `NodeMap` must always
55 /// In insert operations, the rule is thus that the `NodeMap` must always
56 /// be updated after the `RevlogIndex` it is about.
56 /// be updated after the `RevlogIndex` it is about.
57 pub trait NodeMap {
57 pub trait NodeMap {
58 /// Find the unique `Revision` having the given `Node`
58 /// Find the unique `Revision` having the given `Node`
59 ///
59 ///
60 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
60 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
61 fn find_node(
61 fn find_node(
62 &self,
62 &self,
63 index: &impl RevlogIndex,
63 index: &impl RevlogIndex,
64 node: &Node,
64 node: &Node,
65 ) -> Result<Option<Revision>, NodeMapError> {
65 ) -> Result<Option<Revision>, NodeMapError> {
66 self.find_bin(index, node.into())
66 self.find_bin(index, node.into())
67 }
67 }
68
68
69 /// Find the unique Revision whose `Node` starts with a given binary prefix
69 /// Find the unique Revision whose `Node` starts with a given binary prefix
70 ///
70 ///
71 /// If no Revision matches the given prefix, `Ok(None)` is returned.
71 /// If no Revision matches the given prefix, `Ok(None)` is returned.
72 ///
72 ///
73 /// If several Revisions match the given prefix, a
73 /// If several Revisions match the given prefix, a
74 /// [MultipleResults](NodeMapError) error is returned.
74 /// [MultipleResults](NodeMapError) error is returned.
75 fn find_bin(
75 fn find_bin(
76 &self,
76 &self,
77 idx: &impl RevlogIndex,
77 idx: &impl RevlogIndex,
78 prefix: NodePrefix,
78 prefix: NodePrefix,
79 ) -> Result<Option<Revision>, NodeMapError>;
79 ) -> Result<Option<Revision>, NodeMapError>;
80
80
81 /// Give the size of the shortest node prefix that determines
81 /// Give the size of the shortest node prefix that determines
82 /// the revision uniquely.
82 /// the revision uniquely.
83 ///
83 ///
84 /// From a binary node prefix, if it is matched in the node map, this
84 /// From a binary node prefix, if it is matched in the node map, this
85 /// returns the number of hexadecimal digits that would had sufficed
85 /// returns the number of hexadecimal digits that would had sufficed
86 /// to find the revision uniquely.
86 /// to find the revision uniquely.
87 ///
87 ///
88 /// Returns `None` if no [`Revision`] could be found for the prefix.
88 /// Returns `None` if no [`Revision`] could be found for the prefix.
89 ///
89 ///
90 /// If several Revisions match the given prefix, a
90 /// If several Revisions match the given prefix, a
91 /// [MultipleResults](NodeMapError) error is returned.
91 /// [MultipleResults](NodeMapError) error is returned.
92 fn unique_prefix_len_bin(
92 fn unique_prefix_len_bin(
93 &self,
93 &self,
94 idx: &impl RevlogIndex,
94 idx: &impl RevlogIndex,
95 node_prefix: NodePrefix,
95 node_prefix: NodePrefix,
96 ) -> Result<Option<usize>, NodeMapError>;
96 ) -> Result<Option<usize>, NodeMapError>;
97
97
98 /// Same as [unique_prefix_len_bin](Self::unique_prefix_len_bin), with
98 /// Same as [unique_prefix_len_bin](Self::unique_prefix_len_bin), with
99 /// a full [`Node`] as input
99 /// a full [`Node`] as input
100 fn unique_prefix_len_node(
100 fn unique_prefix_len_node(
101 &self,
101 &self,
102 idx: &impl RevlogIndex,
102 idx: &impl RevlogIndex,
103 node: &Node,
103 node: &Node,
104 ) -> Result<Option<usize>, NodeMapError> {
104 ) -> Result<Option<usize>, NodeMapError> {
105 self.unique_prefix_len_bin(idx, node.into())
105 self.unique_prefix_len_bin(idx, node.into())
106 }
106 }
107 }
107 }
108
108
109 pub trait MutableNodeMap: NodeMap {
109 pub trait MutableNodeMap: NodeMap {
110 fn insert<I: RevlogIndex>(
110 fn insert<I: RevlogIndex>(
111 &mut self,
111 &mut self,
112 index: &I,
112 index: &I,
113 node: &Node,
113 node: &Node,
114 rev: Revision,
114 rev: Revision,
115 ) -> Result<(), NodeMapError>;
115 ) -> Result<(), NodeMapError>;
116 }
116 }
117
117
118 /// Low level NodeTree [`Block`] elements
118 /// Low level NodeTree [`Block`] elements
119 ///
119 ///
120 /// These are exactly as for instance on persistent storage.
120 /// These are exactly as for instance on persistent storage.
121 type RawElement = unaligned::I32Be;
121 type RawElement = unaligned::I32Be;
122
122
123 /// High level representation of values in NodeTree
123 /// High level representation of values in NodeTree
124 /// [`Blocks`](struct.Block.html)
124 /// [`Blocks`](struct.Block.html)
125 ///
125 ///
126 /// This is the high level representation that most algorithms should
126 /// This is the high level representation that most algorithms should
127 /// use.
127 /// use.
128 #[derive(Clone, Debug, Eq, PartialEq)]
128 #[derive(Clone, Debug, Eq, PartialEq)]
129 enum Element {
129 enum Element {
130 // This is not a Mercurial revision. It's a `i32` because this is the
130 // This is not a Mercurial revision. It's a `i32` because this is the
131 // right type for this structure.
131 // right type for this structure.
132 Rev(i32),
132 Rev(i32),
133 Block(usize),
133 Block(usize),
134 None,
134 None,
135 }
135 }
136
136
137 impl From<RawElement> for Element {
137 impl From<RawElement> for Element {
138 /// Conversion from low level representation, after endianness conversion.
138 /// Conversion from low level representation, after endianness conversion.
139 ///
139 ///
140 /// See [`Block`](struct.Block.html) for explanation about the encoding.
140 /// See [`Block`](struct.Block.html) for explanation about the encoding.
141 fn from(raw: RawElement) -> Element {
141 fn from(raw: RawElement) -> Element {
142 let int = raw.get();
142 let int = raw.get();
143 if int >= 0 {
143 if int >= 0 {
144 Element::Block(int as usize)
144 Element::Block(int as usize)
145 } else if int == -1 {
145 } else if int == -1 {
146 Element::None
146 Element::None
147 } else {
147 } else {
148 Element::Rev(-int - 2)
148 Element::Rev(-int - 2)
149 }
149 }
150 }
150 }
151 }
151 }
152
152
153 impl From<Element> for RawElement {
153 impl From<Element> for RawElement {
154 fn from(element: Element) -> RawElement {
154 fn from(element: Element) -> RawElement {
155 RawElement::from(match element {
155 RawElement::from(match element {
156 Element::None => 0,
156 Element::None => 0,
157 Element::Block(i) => i as i32,
157 Element::Block(i) => i as i32,
158 Element::Rev(rev) => -rev - 2,
158 Element::Rev(rev) => -rev - 2,
159 })
159 })
160 }
160 }
161 }
161 }
162
162
163 const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble
163 const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble
164
164
165 /// A logical block of the [`NodeTree`], packed with a fixed size.
165 /// A logical block of the [`NodeTree`], packed with a fixed size.
166 ///
166 ///
167 /// These are always used in container types implementing `Index<Block>`,
167 /// These are always used in container types implementing `Index<Block>`,
168 /// such as `&Block`
168 /// such as `&Block`
169 ///
169 ///
170 /// As an array of integers, its ith element encodes that the
170 /// As an array of integers, its ith element encodes that the
171 /// ith potential edge from the block, representing the ith hexadecimal digit
171 /// ith potential edge from the block, representing the ith hexadecimal digit
172 /// (nybble) `i` is either:
172 /// (nybble) `i` is either:
173 ///
173 ///
174 /// - absent (value -1)
174 /// - absent (value -1)
175 /// - another `Block` in the same indexable container (value ≥ 0)
175 /// - another `Block` in the same indexable container (value ≥ 0)
176 /// - a [`Revision`] leaf (value ≤ -2)
176 /// - a [`Revision`] leaf (value ≤ -2)
177 ///
177 ///
178 /// Endianness has to be fixed for consistency on shared storage across
178 /// Endianness has to be fixed for consistency on shared storage across
179 /// different architectures.
179 /// different architectures.
180 ///
180 ///
181 /// A key difference with the C `nodetree` is that we need to be
181 /// A key difference with the C `nodetree` is that we need to be
182 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
182 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
183 /// rather than 0 and the [`Revision`] range upper limit of -2 instead of -1.
183 /// rather than 0 and the [`Revision`] range upper limit of -2 instead of -1.
184 ///
184 ///
185 /// Another related difference is that `NULL_REVISION` (-1) is not
185 /// Another related difference is that `NULL_REVISION` (-1) is not
186 /// represented at all, because we want an immutable empty nodetree
186 /// represented at all, because we want an immutable empty nodetree
187 /// to be valid.
187 /// to be valid.
188 #[derive(Copy, Clone, BytesCast, PartialEq)]
188 #[derive(Copy, Clone, BytesCast, PartialEq)]
189 #[repr(transparent)]
189 #[repr(transparent)]
190 pub struct Block([RawElement; ELEMENTS_PER_BLOCK]);
190 pub struct Block([RawElement; ELEMENTS_PER_BLOCK]);
191
191
192 impl Block {
192 impl Block {
193 fn new() -> Self {
193 fn new() -> Self {
194 let absent_node = RawElement::from(-1);
194 let absent_node = RawElement::from(-1);
195 Block([absent_node; ELEMENTS_PER_BLOCK])
195 Block([absent_node; ELEMENTS_PER_BLOCK])
196 }
196 }
197
197
198 fn get(&self, nybble: u8) -> Element {
198 fn get(&self, nybble: u8) -> Element {
199 self.0[nybble as usize].into()
199 self.0[nybble as usize].into()
200 }
200 }
201
201
202 fn set(&mut self, nybble: u8, element: Element) {
202 fn set(&mut self, nybble: u8, element: Element) {
203 self.0[nybble as usize] = element.into()
203 self.0[nybble as usize] = element.into()
204 }
204 }
205 }
205 }
206
206
207 impl fmt::Debug for Block {
207 impl fmt::Debug for Block {
208 /// sparse representation for testing and debugging purposes
208 /// sparse representation for testing and debugging purposes
209 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
209 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
210 f.debug_map()
210 f.debug_map()
211 .entries((0..16).filter_map(|i| match self.get(i) {
211 .entries((0..16).filter_map(|i| match self.get(i) {
212 Element::None => None,
212 Element::None => None,
213 element => Some((i, element)),
213 element => Some((i, element)),
214 }))
214 }))
215 .finish()
215 .finish()
216 }
216 }
217 }
217 }
218
218
219 /// A mutable 16-radix tree with the root block logically at the end
219 /// A mutable 16-radix tree with the root block logically at the end
220 ///
220 ///
221 /// Because of the append only nature of our node trees, we need to
221 /// Because of the append only nature of our node trees, we need to
222 /// keep the original untouched and store new blocks separately.
222 /// keep the original untouched and store new blocks separately.
223 ///
223 ///
224 /// The mutable root [`Block`] is kept apart so that we don't have to rebump
224 /// The mutable root [`Block`] is kept apart so that we don't have to rebump
225 /// it on each insertion.
225 /// it on each insertion.
226 pub struct NodeTree {
226 pub struct NodeTree {
227 readonly: Box<dyn Deref<Target = [Block]> + Send>,
227 readonly: Box<dyn Deref<Target = [Block]> + Send>,
228 growable: Vec<Block>,
228 growable: Vec<Block>,
229 root: Block,
229 root: Block,
230 masked_inner_blocks: usize,
230 masked_inner_blocks: usize,
231 }
231 }
232
232
233 impl Index<usize> for NodeTree {
233 impl Index<usize> for NodeTree {
234 type Output = Block;
234 type Output = Block;
235
235
236 fn index(&self, i: usize) -> &Block {
236 fn index(&self, i: usize) -> &Block {
237 let ro_len = self.readonly.len();
237 let ro_len = self.readonly.len();
238 if i < ro_len {
238 if i < ro_len {
239 &self.readonly[i]
239 &self.readonly[i]
240 } else if i == ro_len + self.growable.len() {
240 } else if i == ro_len + self.growable.len() {
241 &self.root
241 &self.root
242 } else {
242 } else {
243 &self.growable[i - ro_len]
243 &self.growable[i - ro_len]
244 }
244 }
245 }
245 }
246 }
246 }
247
247
248 /// Return `None` unless the [`Node`] for `rev` has given prefix in `idx`.
248 /// Return `None` unless the [`Node`] for `rev` has given prefix in `idx`.
249 fn has_prefix_or_none(
249 fn has_prefix_or_none(
250 idx: &impl RevlogIndex,
250 idx: &impl RevlogIndex,
251 prefix: NodePrefix,
251 prefix: NodePrefix,
252 rev: UncheckedRevision,
252 rev: UncheckedRevision,
253 ) -> Result<Option<Revision>, NodeMapError> {
253 ) -> Result<Option<Revision>, NodeMapError> {
254 match idx.check_revision(rev) {
254 match idx.check_revision(rev) {
255 Some(checked) => idx
255 Some(checked) => idx
256 .node(checked)
256 .node(checked)
257 .ok_or(NodeMapError::RevisionNotInIndex(rev))
257 .ok_or(NodeMapError::RevisionNotInIndex(rev))
258 .map(|node| {
258 .map(|node| {
259 if prefix.is_prefix_of(node) {
259 if prefix.is_prefix_of(node) {
260 Some(checked)
260 Some(checked)
261 } else {
261 } else {
262 None
262 None
263 }
263 }
264 }),
264 }),
265 None => Err(NodeMapError::RevisionNotInIndex(rev)),
265 None => Err(NodeMapError::RevisionNotInIndex(rev)),
266 }
266 }
267 }
267 }
268
268
269 /// validate that the candidate's node starts indeed with given prefix,
269 /// validate that the candidate's node starts indeed with given prefix,
270 /// and treat ambiguities related to [`NULL_REVISION`].
270 /// and treat ambiguities related to [`NULL_REVISION`].
271 ///
271 ///
272 /// From the data in the NodeTree, one can only conclude that some
272 /// From the data in the NodeTree, one can only conclude that some
273 /// revision is the only one for a *subprefix* of the one being looked up.
273 /// revision is the only one for a *subprefix* of the one being looked up.
274 fn validate_candidate(
274 fn validate_candidate(
275 idx: &impl RevlogIndex,
275 idx: &impl RevlogIndex,
276 prefix: NodePrefix,
276 prefix: NodePrefix,
277 candidate: (Option<UncheckedRevision>, usize),
277 candidate: (Option<UncheckedRevision>, usize),
278 ) -> Result<(Option<Revision>, usize), NodeMapError> {
278 ) -> Result<(Option<Revision>, usize), NodeMapError> {
279 let (rev, steps) = candidate;
279 let (rev, steps) = candidate;
280 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
280 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
281 rev.map_or(Ok((None, steps)), |r| {
281 rev.map_or(Ok((None, steps)), |r| {
282 has_prefix_or_none(idx, prefix, r)
282 has_prefix_or_none(idx, prefix, r)
283 .map(|opt| (opt, max(steps, nz_nybble + 1)))
283 .map(|opt| (opt, max(steps, nz_nybble + 1)))
284 })
284 })
285 } else {
285 } else {
286 // the prefix is only made of zeros; NULL_REVISION always matches it
286 // the prefix is only made of zeros; NULL_REVISION always matches it
287 // and any other *valid* result is an ambiguity
287 // and any other *valid* result is an ambiguity
288 match rev {
288 match rev {
289 None => Ok((Some(NULL_REVISION), steps + 1)),
289 None => Ok((Some(NULL_REVISION), steps + 1)),
290 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
290 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
291 None => Ok((Some(NULL_REVISION), steps + 1)),
291 None => Ok((Some(NULL_REVISION), steps + 1)),
292 _ => Err(NodeMapError::MultipleResults),
292 _ => Err(NodeMapError::MultipleResults),
293 },
293 },
294 }
294 }
295 }
295 }
296 }
296 }
297
297
298 impl NodeTree {
298 impl NodeTree {
299 /// Initiate a NodeTree from an immutable slice-like of `Block`
299 /// Initiate a NodeTree from an immutable slice-like of `Block`
300 ///
300 ///
301 /// We keep `readonly` and clone its root block if it isn't empty.
301 /// We keep `readonly` and clone its root block if it isn't empty.
302 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
302 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
303 let root = readonly.last().cloned().unwrap_or_else(Block::new);
303 let root = readonly.last().cloned().unwrap_or_else(Block::new);
304 NodeTree {
304 NodeTree {
305 readonly,
305 readonly,
306 growable: Vec::new(),
306 growable: Vec::new(),
307 root,
307 root,
308 masked_inner_blocks: 0,
308 masked_inner_blocks: 0,
309 }
309 }
310 }
310 }
311
311
312 /// Create from an opaque bunch of bytes
312 /// Create from an opaque bunch of bytes
313 ///
313 ///
314 /// The created [`NodeTreeBytes`] from `bytes`,
314 /// The created [`NodeTreeBytes`] from `bytes`,
315 /// of which exactly `amount` bytes are used.
315 /// of which exactly `amount` bytes are used.
316 ///
316 ///
317 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
317 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
318 /// - `amount` is expressed in bytes, and is not automatically derived from
318 /// - `amount` is expressed in bytes, and is not automatically derived from
319 /// `bytes`, so that a caller that manages them atomically can perform
319 /// `bytes`, so that a caller that manages them atomically can perform
320 /// temporary disk serializations and still rollback easily if needed.
320 /// temporary disk serializations and still rollback easily if needed.
321 /// First use-case for this would be to support Mercurial shell hooks.
321 /// First use-case for this would be to support Mercurial shell hooks.
322 ///
322 ///
323 /// panics if `buffer` is smaller than `amount`
323 /// panics if `buffer` is smaller than `amount`
324 pub fn load_bytes(
324 pub fn load_bytes(
325 bytes: Box<dyn Deref<Target = [u8]> + Send>,
325 bytes: Box<dyn Deref<Target = [u8]> + Send>,
326 amount: usize,
326 amount: usize,
327 ) -> Self {
327 ) -> Self {
328 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
328 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
329 }
329 }
330
330
331 /// Retrieve added [`Block`]s and the original immutable data
331 /// Retrieve added [`Block`]s and the original immutable data
332 pub fn into_readonly_and_added(
332 pub fn into_readonly_and_added(
333 self,
333 self,
334 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
334 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
335 let mut vec = self.growable;
335 let mut vec = self.growable;
336 let readonly = self.readonly;
336 let readonly = self.readonly;
337 if readonly.last() != Some(&self.root) {
337 if readonly.last() != Some(&self.root) {
338 vec.push(self.root);
338 vec.push(self.root);
339 }
339 }
340 (readonly, vec)
340 (readonly, vec)
341 }
341 }
342
342
343 /// Retrieve added [`Block]s as bytes, ready to be written to persistent
343 /// Retrieve added [`Block]s as bytes, ready to be written to persistent
344 /// storage
344 /// storage
345 pub fn into_readonly_and_added_bytes(
345 pub fn into_readonly_and_added_bytes(
346 self,
346 self,
347 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
347 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
348 let (readonly, vec) = self.into_readonly_and_added();
348 let (readonly, vec) = self.into_readonly_and_added();
349 // Prevent running `v`'s destructor so we are in complete control
349 // Prevent running `v`'s destructor so we are in complete control
350 // of the allocation.
350 // of the allocation.
351 let vec = mem::ManuallyDrop::new(vec);
351 let vec = mem::ManuallyDrop::new(vec);
352
352
353 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
353 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
354 // bytes, so this is perfectly safe.
354 // bytes, so this is perfectly safe.
355 let bytes = unsafe {
355 let bytes = unsafe {
356 // Check for compatible allocation layout.
356 // Check for compatible allocation layout.
357 // (Optimized away by constant-folding + dead code elimination.)
357 // (Optimized away by constant-folding + dead code elimination.)
358 assert_eq!(size_of::<Block>(), 64);
358 assert_eq!(size_of::<Block>(), 64);
359 assert_eq!(align_of::<Block>(), 1);
359 assert_eq!(align_of::<Block>(), 1);
360
360
361 // /!\ Any use of `vec` after this is use-after-free.
361 // /!\ Any use of `vec` after this is use-after-free.
362 // TODO: use `into_raw_parts` once stabilized
362 // TODO: use `into_raw_parts` once stabilized
363 Vec::from_raw_parts(
363 Vec::from_raw_parts(
364 vec.as_ptr() as *mut u8,
364 vec.as_ptr() as *mut u8,
365 vec.len() * size_of::<Block>(),
365 vec.len() * size_of::<Block>(),
366 vec.capacity() * size_of::<Block>(),
366 vec.capacity() * size_of::<Block>(),
367 )
367 )
368 };
368 };
369 (readonly, bytes)
369 (readonly, bytes)
370 }
370 }
371
371
372 /// Total number of blocks
372 /// Total number of blocks
373 fn len(&self) -> usize {
373 fn len(&self) -> usize {
374 self.readonly.len() + self.growable.len() + 1
374 self.readonly.len() + self.growable.len() + 1
375 }
375 }
376
376
377 /// Implemented for completeness
377 /// Implemented for completeness
378 ///
378 ///
379 /// A `NodeTree` always has at least the mutable root block.
379 /// A `NodeTree` always has at least the mutable root block.
380 #[allow(dead_code)]
380 #[allow(dead_code)]
381 fn is_empty(&self) -> bool {
381 fn is_empty(&self) -> bool {
382 false
382 false
383 }
383 }
384
384
385 /// Main working method for `NodeTree` searches
385 /// Main working method for `NodeTree` searches
386 ///
386 ///
387 /// The first returned value is the result of analysing `NodeTree` data
387 /// The first returned value is the result of analysing `NodeTree` data
388 /// *alone*: whereas `None` guarantees that the given prefix is absent
388 /// *alone*: whereas `None` guarantees that the given prefix is absent
389 /// from the [`NodeTree`] data (but still could match [`NULL_NODE`]), with
389 /// from the [`NodeTree`] data (but still could match [`NULL_NODE`]), with
390 /// `Some(rev)`, it is to be understood that `rev` is the unique
390 /// `Some(rev)`, it is to be understood that `rev` is the unique
391 /// [`Revision`] that could match the prefix. Actually, all that can
391 /// [`Revision`] that could match the prefix. Actually, all that can
392 /// be inferred from
392 /// be inferred from
393 /// the `NodeTree` data is that `rev` is the revision with the longest
393 /// the `NodeTree` data is that `rev` is the revision with the longest
394 /// common node prefix with the given prefix.
394 /// common node prefix with the given prefix.
395 /// We return an [`UncheckedRevision`] because we have no guarantee that
395 /// We return an [`UncheckedRevision`] because we have no guarantee that
396 /// the revision we found is valid for the index.
396 /// the revision we found is valid for the index.
397 ///
397 ///
398 /// The second returned value is the size of the smallest subprefix
398 /// The second returned value is the size of the smallest subprefix
399 /// of `prefix` that would give the same result, i.e. not the
399 /// of `prefix` that would give the same result, i.e. not the
400 /// [MultipleResults](NodeMapError) error variant (again, using only the
400 /// [MultipleResults](NodeMapError) error variant (again, using only the
401 /// data of the [`NodeTree`]).
401 /// data of the [`NodeTree`]).
402 fn lookup(
402 fn lookup(
403 &self,
403 &self,
404 prefix: NodePrefix,
404 prefix: NodePrefix,
405 ) -> Result<(Option<UncheckedRevision>, usize), NodeMapError> {
405 ) -> Result<(Option<UncheckedRevision>, usize), NodeMapError> {
406 for (i, visit_item) in self.visit(prefix).enumerate() {
406 for (i, visit_item) in self.visit(prefix).enumerate() {
407 if let Some(opt) = visit_item.final_revision() {
407 if let Some(opt) = visit_item.final_revision() {
408 return Ok((opt, i + 1));
408 return Ok((opt, i + 1));
409 }
409 }
410 }
410 }
411 Err(NodeMapError::MultipleResults)
411 Err(NodeMapError::MultipleResults)
412 }
412 }
413
413
414 fn visit(&self, prefix: NodePrefix) -> NodeTreeVisitor {
414 fn visit(&self, prefix: NodePrefix) -> NodeTreeVisitor {
415 NodeTreeVisitor {
415 NodeTreeVisitor {
416 nt: self,
416 nt: self,
417 prefix,
417 prefix,
418 visit: self.len() - 1,
418 visit: self.len() - 1,
419 nybble_idx: 0,
419 nybble_idx: 0,
420 done: false,
420 done: false,
421 }
421 }
422 }
422 }
423 /// Return a mutable reference for `Block` at index `idx`.
423 /// Return a mutable reference for `Block` at index `idx`.
424 ///
424 ///
425 /// If `idx` lies in the immutable area, then the reference is to
425 /// If `idx` lies in the immutable area, then the reference is to
426 /// a newly appended copy.
426 /// a newly appended copy.
427 ///
427 ///
428 /// Returns (new_idx, glen, mut_ref) where
428 /// Returns (new_idx, glen, mut_ref) where
429 ///
429 ///
430 /// - `new_idx` is the index of the mutable `Block`
430 /// - `new_idx` is the index of the mutable `Block`
431 /// - `mut_ref` is a mutable reference to the mutable Block.
431 /// - `mut_ref` is a mutable reference to the mutable Block.
432 /// - `glen` is the new length of `self.growable`
432 /// - `glen` is the new length of `self.growable`
433 ///
433 ///
434 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
434 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
435 /// itself because of the mutable borrow taken with the returned `Block`
435 /// itself because of the mutable borrow taken with the returned `Block`
436 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
436 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
437 let ro_blocks = &self.readonly;
437 let ro_blocks = &self.readonly;
438 let ro_len = ro_blocks.len();
438 let ro_len = ro_blocks.len();
439 let glen = self.growable.len();
439 let glen = self.growable.len();
440 if idx < ro_len {
440 if idx < ro_len {
441 self.masked_inner_blocks += 1;
441 self.masked_inner_blocks += 1;
442 self.growable.push(ro_blocks[idx]);
442 self.growable.push(ro_blocks[idx]);
443 (glen + ro_len, &mut self.growable[glen], glen + 1)
443 (glen + ro_len, &mut self.growable[glen], glen + 1)
444 } else if glen + ro_len == idx {
444 } else if glen + ro_len == idx {
445 (idx, &mut self.root, glen)
445 (idx, &mut self.root, glen)
446 } else {
446 } else {
447 (idx, &mut self.growable[idx - ro_len], glen)
447 (idx, &mut self.growable[idx - ro_len], glen)
448 }
448 }
449 }
449 }
450
450
451 /// Main insertion method
451 /// Main insertion method
452 ///
452 ///
453 /// This will dive in the node tree to find the deepest `Block` for
453 /// This will dive in the node tree to find the deepest `Block` for
454 /// `node`, split it as much as needed and record `node` in there.
454 /// `node`, split it as much as needed and record `node` in there.
455 /// The method then backtracks, updating references in all the visited
455 /// The method then backtracks, updating references in all the visited
456 /// blocks from the root.
456 /// blocks from the root.
457 ///
457 ///
458 /// All the mutated `Block` are copied first to the growable part if
458 /// All the mutated `Block` are copied first to the growable part if
459 /// needed. That happens for those in the immutable part except the root.
459 /// needed. That happens for those in the immutable part except the root.
460 pub fn insert<I: RevlogIndex>(
460 pub fn insert<I: RevlogIndex>(
461 &mut self,
461 &mut self,
462 index: &I,
462 index: &I,
463 node: &Node,
463 node: &Node,
464 rev: Revision,
464 rev: Revision,
465 ) -> Result<(), NodeMapError> {
465 ) -> Result<(), NodeMapError> {
466 let ro_len = &self.readonly.len();
466 let ro_len = &self.readonly.len();
467
467
468 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
468 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
469 let read_nybbles = visit_steps.len();
469 let read_nybbles = visit_steps.len();
470 // visit_steps cannot be empty, since we always visit the root block
470 // visit_steps cannot be empty, since we always visit the root block
471 let deepest = visit_steps.pop().unwrap();
471 let deepest = visit_steps.pop().unwrap();
472
472
473 let (mut block_idx, mut block, mut glen) =
473 let (mut block_idx, mut block, mut glen) =
474 self.mutable_block(deepest.block_idx);
474 self.mutable_block(deepest.block_idx);
475
475
476 if let Element::Rev(old_rev) = deepest.element {
476 if let Element::Rev(old_rev) = deepest.element {
477 let old_node = index
477 let old_node = index
478 .check_revision(old_rev.into())
478 .check_revision(old_rev.into())
479 .and_then(|rev| index.node(rev))
479 .and_then(|rev| index.node(rev))
480 .ok_or_else(|| {
480 .ok_or_else(|| {
481 NodeMapError::RevisionNotInIndex(old_rev.into())
481 NodeMapError::RevisionNotInIndex(old_rev.into())
482 })?;
482 })?;
483 if old_node == node {
483 if old_node == node {
484 return Ok(()); // avoid creating lots of useless blocks
484 return Ok(()); // avoid creating lots of useless blocks
485 }
485 }
486
486
487 // Looping over the tail of nybbles in both nodes, creating
487 // Looping over the tail of nybbles in both nodes, creating
488 // new blocks until we find the difference
488 // new blocks until we find the difference
489 let mut new_block_idx = ro_len + glen;
489 let mut new_block_idx = ro_len + glen;
490 let mut nybble = deepest.nybble;
490 let mut nybble = deepest.nybble;
491 for nybble_pos in read_nybbles..node.nybbles_len() {
491 for nybble_pos in read_nybbles..node.nybbles_len() {
492 block.set(nybble, Element::Block(new_block_idx));
492 block.set(nybble, Element::Block(new_block_idx));
493
493
494 let new_nybble = node.get_nybble(nybble_pos);
494 let new_nybble = node.get_nybble(nybble_pos);
495 let old_nybble = old_node.get_nybble(nybble_pos);
495 let old_nybble = old_node.get_nybble(nybble_pos);
496
496
497 if old_nybble == new_nybble {
497 if old_nybble == new_nybble {
498 self.growable.push(Block::new());
498 self.growable.push(Block::new());
499 block = &mut self.growable[glen];
499 block = &mut self.growable[glen];
500 glen += 1;
500 glen += 1;
501 new_block_idx += 1;
501 new_block_idx += 1;
502 nybble = new_nybble;
502 nybble = new_nybble;
503 } else {
503 } else {
504 let mut new_block = Block::new();
504 let mut new_block = Block::new();
505 new_block.set(old_nybble, Element::Rev(old_rev));
505 new_block.set(old_nybble, Element::Rev(old_rev));
506 new_block.set(new_nybble, Element::Rev(rev.0));
506 new_block.set(new_nybble, Element::Rev(rev.0));
507 self.growable.push(new_block);
507 self.growable.push(new_block);
508 break;
508 break;
509 }
509 }
510 }
510 }
511 } else {
511 } else {
512 // Free slot in the deepest block: no splitting has to be done
512 // Free slot in the deepest block: no splitting has to be done
513 block.set(deepest.nybble, Element::Rev(rev.0));
513 block.set(deepest.nybble, Element::Rev(rev.0));
514 }
514 }
515
515
516 // Backtrack over visit steps to update references
516 // Backtrack over visit steps to update references
517 while let Some(visited) = visit_steps.pop() {
517 while let Some(visited) = visit_steps.pop() {
518 let to_write = Element::Block(block_idx);
518 let to_write = Element::Block(block_idx);
519 if visit_steps.is_empty() {
519 if visit_steps.is_empty() {
520 self.root.set(visited.nybble, to_write);
520 self.root.set(visited.nybble, to_write);
521 break;
521 break;
522 }
522 }
523 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
523 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
524 if block.get(visited.nybble) == to_write {
524 if block.get(visited.nybble) == to_write {
525 break;
525 break;
526 }
526 }
527 block.set(visited.nybble, to_write);
527 block.set(visited.nybble, to_write);
528 block_idx = new_idx;
528 block_idx = new_idx;
529 }
529 }
530 Ok(())
530 Ok(())
531 }
531 }
532
532
533 /// Make the whole `NodeTree` logically empty, without touching the
533 /// Make the whole `NodeTree` logically empty, without touching the
534 /// immutable part.
534 /// immutable part.
535 pub fn invalidate_all(&mut self) {
535 pub fn invalidate_all(&mut self) {
536 self.root = Block::new();
536 self.root = Block::new();
537 self.growable = Vec::new();
537 self.growable = Vec::new();
538 self.masked_inner_blocks = self.readonly.len();
538 self.masked_inner_blocks = self.readonly.len();
539 }
539 }
540
540
541 /// Return the number of blocks in the readonly part that are currently
541 /// Return the number of blocks in the readonly part that are currently
542 /// masked in the mutable part.
542 /// masked in the mutable part.
543 ///
543 ///
544 /// The `NodeTree` structure has no efficient way to know how many blocks
544 /// The `NodeTree` structure has no efficient way to know how many blocks
545 /// are already unreachable in the readonly part.
545 /// are already unreachable in the readonly part.
546 ///
546 ///
547 /// After a call to `invalidate_all()`, the returned number can be actually
547 /// After a call to `invalidate_all()`, the returned number can be actually
548 /// bigger than the whole readonly part, a conventional way to mean that
548 /// bigger than the whole readonly part, a conventional way to mean that
549 /// all the readonly blocks have been masked. This is what is really
549 /// all the readonly blocks have been masked. This is what is really
550 /// useful to the caller and does not require to know how many were
550 /// useful to the caller and does not require to know how many were
551 /// actually unreachable to begin with.
551 /// actually unreachable to begin with.
552 pub fn masked_readonly_blocks(&self) -> usize {
552 pub fn masked_readonly_blocks(&self) -> usize {
553 if let Some(readonly_root) = self.readonly.last() {
553 if let Some(readonly_root) = self.readonly.last() {
554 if readonly_root == &self.root {
554 if readonly_root == &self.root {
555 return 0;
555 return 0;
556 }
556 }
557 } else {
557 } else {
558 return 0;
558 return 0;
559 }
559 }
560 self.masked_inner_blocks + 1
560 self.masked_inner_blocks + 1
561 }
561 }
562 }
562 }
563
563
564 pub struct NodeTreeBytes {
564 pub struct NodeTreeBytes {
565 buffer: Box<dyn Deref<Target = [u8]> + Send>,
565 buffer: Box<dyn Deref<Target = [u8]> + Send>,
566 len_in_blocks: usize,
566 len_in_blocks: usize,
567 }
567 }
568
568
569 impl NodeTreeBytes {
569 impl NodeTreeBytes {
570 fn new(
570 fn new(
571 buffer: Box<dyn Deref<Target = [u8]> + Send>,
571 buffer: Box<dyn Deref<Target = [u8]> + Send>,
572 amount: usize,
572 amount: usize,
573 ) -> Self {
573 ) -> Self {
574 assert!(buffer.len() >= amount);
574 assert!(buffer.len() >= amount);
575 let len_in_blocks = amount / size_of::<Block>();
575 let len_in_blocks = amount / size_of::<Block>();
576 NodeTreeBytes {
576 NodeTreeBytes {
577 buffer,
577 buffer,
578 len_in_blocks,
578 len_in_blocks,
579 }
579 }
580 }
580 }
581 }
581 }
582
582
583 impl Deref for NodeTreeBytes {
583 impl Deref for NodeTreeBytes {
584 type Target = [Block];
584 type Target = [Block];
585
585
586 fn deref(&self) -> &[Block] {
586 fn deref(&self) -> &[Block] {
587 Block::slice_from_bytes(&self.buffer, self.len_in_blocks)
587 Block::slice_from_bytes(&self.buffer, self.len_in_blocks)
588 // `NodeTreeBytes::new` already asserted that `self.buffer` is
588 // `NodeTreeBytes::new` already asserted that `self.buffer` is
589 // large enough.
589 // large enough.
590 .unwrap()
590 .unwrap()
591 .0
591 .0
592 }
592 }
593 }
593 }
594
594
595 struct NodeTreeVisitor<'n> {
595 struct NodeTreeVisitor<'n> {
596 nt: &'n NodeTree,
596 nt: &'n NodeTree,
597 prefix: NodePrefix,
597 prefix: NodePrefix,
598 visit: usize,
598 visit: usize,
599 nybble_idx: usize,
599 nybble_idx: usize,
600 done: bool,
600 done: bool,
601 }
601 }
602
602
603 #[derive(Debug, PartialEq, Clone)]
603 #[derive(Debug, PartialEq, Clone)]
604 struct NodeTreeVisitItem {
604 struct NodeTreeVisitItem {
605 block_idx: usize,
605 block_idx: usize,
606 nybble: u8,
606 nybble: u8,
607 element: Element,
607 element: Element,
608 }
608 }
609
609
610 impl<'n> Iterator for NodeTreeVisitor<'n> {
610 impl<'n> Iterator for NodeTreeVisitor<'n> {
611 type Item = NodeTreeVisitItem;
611 type Item = NodeTreeVisitItem;
612
612
613 fn next(&mut self) -> Option<Self::Item> {
613 fn next(&mut self) -> Option<Self::Item> {
614 if self.done || self.nybble_idx >= self.prefix.nybbles_len() {
614 if self.done || self.nybble_idx >= self.prefix.nybbles_len() {
615 return None;
615 return None;
616 }
616 }
617
617
618 let nybble = self.prefix.get_nybble(self.nybble_idx);
618 let nybble = self.prefix.get_nybble(self.nybble_idx);
619 self.nybble_idx += 1;
619 self.nybble_idx += 1;
620
620
621 let visit = self.visit;
621 let visit = self.visit;
622 let element = self.nt[visit].get(nybble);
622 let element = self.nt[visit].get(nybble);
623 if let Element::Block(idx) = element {
623 if let Element::Block(idx) = element {
624 self.visit = idx;
624 self.visit = idx;
625 } else {
625 } else {
626 self.done = true;
626 self.done = true;
627 }
627 }
628
628
629 Some(NodeTreeVisitItem {
629 Some(NodeTreeVisitItem {
630 block_idx: visit,
630 block_idx: visit,
631 nybble,
631 nybble,
632 element,
632 element,
633 })
633 })
634 }
634 }
635 }
635 }
636
636
637 impl NodeTreeVisitItem {
637 impl NodeTreeVisitItem {
638 // Return `Some(opt)` if this item is final, with `opt` being the
638 // Return `Some(opt)` if this item is final, with `opt` being the
639 // `UncheckedRevision` that it may represent.
639 // `UncheckedRevision` that it may represent.
640 //
640 //
641 // If the item is not terminal, return `None`
641 // If the item is not terminal, return `None`
642 fn final_revision(&self) -> Option<Option<UncheckedRevision>> {
642 fn final_revision(&self) -> Option<Option<UncheckedRevision>> {
643 match self.element {
643 match self.element {
644 Element::Block(_) => None,
644 Element::Block(_) => None,
645 Element::Rev(r) => Some(Some(r.into())),
645 Element::Rev(r) => Some(Some(r.into())),
646 Element::None => Some(None),
646 Element::None => Some(None),
647 }
647 }
648 }
648 }
649 }
649 }
650
650
651 impl From<Vec<Block>> for NodeTree {
651 impl From<Vec<Block>> for NodeTree {
652 fn from(vec: Vec<Block>) -> Self {
652 fn from(vec: Vec<Block>) -> Self {
653 Self::new(Box::new(vec))
653 Self::new(Box::new(vec))
654 }
654 }
655 }
655 }
656
656
657 impl fmt::Debug for NodeTree {
657 impl fmt::Debug for NodeTree {
658 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
658 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
659 let readonly: &[Block] = &*self.readonly;
659 let readonly: &[Block] = &self.readonly;
660 write!(
660 write!(
661 f,
661 f,
662 "readonly: {:?}, growable: {:?}, root: {:?}",
662 "readonly: {:?}, growable: {:?}, root: {:?}",
663 readonly, self.growable, self.root
663 readonly, self.growable, self.root
664 )
664 )
665 }
665 }
666 }
666 }
667
667
668 impl Default for NodeTree {
668 impl Default for NodeTree {
669 /// Create a fully mutable empty NodeTree
669 /// Create a fully mutable empty NodeTree
670 fn default() -> Self {
670 fn default() -> Self {
671 NodeTree::new(Box::new(Vec::new()))
671 NodeTree::new(Box::<Vec<_>>::default())
672 }
672 }
673 }
673 }
674
674
675 impl NodeMap for NodeTree {
675 impl NodeMap for NodeTree {
676 fn find_bin<'a>(
676 fn find_bin<'a>(
677 &self,
677 &self,
678 idx: &impl RevlogIndex,
678 idx: &impl RevlogIndex,
679 prefix: NodePrefix,
679 prefix: NodePrefix,
680 ) -> Result<Option<Revision>, NodeMapError> {
680 ) -> Result<Option<Revision>, NodeMapError> {
681 validate_candidate(idx, prefix, self.lookup(prefix)?)
681 validate_candidate(idx, prefix, self.lookup(prefix)?)
682 .map(|(opt, _shortest)| opt)
682 .map(|(opt, _shortest)| opt)
683 }
683 }
684
684
685 fn unique_prefix_len_bin<'a>(
685 fn unique_prefix_len_bin<'a>(
686 &self,
686 &self,
687 idx: &impl RevlogIndex,
687 idx: &impl RevlogIndex,
688 prefix: NodePrefix,
688 prefix: NodePrefix,
689 ) -> Result<Option<usize>, NodeMapError> {
689 ) -> Result<Option<usize>, NodeMapError> {
690 validate_candidate(idx, prefix, self.lookup(prefix)?)
690 validate_candidate(idx, prefix, self.lookup(prefix)?)
691 .map(|(opt, shortest)| opt.map(|_rev| shortest))
691 .map(|(opt, shortest)| opt.map(|_rev| shortest))
692 }
692 }
693 }
693 }
694
694
695 #[cfg(test)]
695 #[cfg(test)]
696 pub mod tests {
696 pub mod tests {
697 use super::NodeMapError::*;
697 use super::NodeMapError::*;
698 use super::*;
698 use super::*;
699 use crate::revlog::node::{hex_pad_right, Node};
699 use crate::revlog::node::{hex_pad_right, Node};
700 use std::collections::HashMap;
700 use std::collections::HashMap;
701
701
702 /// Creates a `Block` using a syntax close to the `Debug` output
702 /// Creates a `Block` using a syntax close to the `Debug` output
703 macro_rules! block {
703 macro_rules! block {
704 {$($nybble:tt : $variant:ident($val:tt)),*} => (
704 {$($nybble:tt : $variant:ident($val:tt)),*} => (
705 {
705 {
706 let mut block = Block::new();
706 let mut block = Block::new();
707 $(block.set($nybble, Element::$variant($val)));*;
707 $(block.set($nybble, Element::$variant($val)));*;
708 block
708 block
709 }
709 }
710 )
710 )
711 }
711 }
712
712
713 /// Shorthand to reduce boilerplate when creating [`Revision`] for testing
713 /// Shorthand to reduce boilerplate when creating [`Revision`] for testing
714 macro_rules! R {
714 macro_rules! R {
715 ($revision:literal) => {
715 ($revision:literal) => {
716 Revision($revision)
716 Revision($revision)
717 };
717 };
718 }
718 }
719
719
720 #[test]
720 #[test]
721 fn test_block_debug() {
721 fn test_block_debug() {
722 let mut block = Block::new();
722 let mut block = Block::new();
723 block.set(1, Element::Rev(3));
723 block.set(1, Element::Rev(3));
724 block.set(10, Element::Block(0));
724 block.set(10, Element::Block(0));
725 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
725 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
726 }
726 }
727
727
728 #[test]
728 #[test]
729 fn test_block_macro() {
729 fn test_block_macro() {
730 let block = block! {5: Block(2)};
730 let block = block! {5: Block(2)};
731 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
731 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
732
732
733 let block = block! {13: Rev(15), 5: Block(2)};
733 let block = block! {13: Rev(15), 5: Block(2)};
734 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
734 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
735 }
735 }
736
736
737 #[test]
737 #[test]
738 fn test_raw_block() {
738 fn test_raw_block() {
739 let mut raw = [255u8; 64];
739 let mut raw = [255u8; 64];
740
740
741 let mut counter = 0;
741 let mut counter = 0;
742 for val in [0_i32, 15, -2, -1, -3].iter() {
742 for val in [0_i32, 15, -2, -1, -3].iter() {
743 for byte in val.to_be_bytes().iter() {
743 for byte in val.to_be_bytes().iter() {
744 raw[counter] = *byte;
744 raw[counter] = *byte;
745 counter += 1;
745 counter += 1;
746 }
746 }
747 }
747 }
748 let (block, _) = Block::from_bytes(&raw).unwrap();
748 let (block, _) = Block::from_bytes(&raw).unwrap();
749 assert_eq!(block.get(0), Element::Block(0));
749 assert_eq!(block.get(0), Element::Block(0));
750 assert_eq!(block.get(1), Element::Block(15));
750 assert_eq!(block.get(1), Element::Block(15));
751 assert_eq!(block.get(3), Element::None);
751 assert_eq!(block.get(3), Element::None);
752 assert_eq!(block.get(2), Element::Rev(0));
752 assert_eq!(block.get(2), Element::Rev(0));
753 assert_eq!(block.get(4), Element::Rev(1));
753 assert_eq!(block.get(4), Element::Rev(1));
754 }
754 }
755
755
756 type TestIndex = HashMap<UncheckedRevision, Node>;
756 type TestIndex = HashMap<UncheckedRevision, Node>;
757
757
758 impl RevlogIndex for TestIndex {
758 impl RevlogIndex for TestIndex {
759 fn node(&self, rev: Revision) -> Option<&Node> {
759 fn node(&self, rev: Revision) -> Option<&Node> {
760 self.get(&rev.into())
760 self.get(&rev.into())
761 }
761 }
762
762
763 fn len(&self) -> usize {
763 fn len(&self) -> usize {
764 self.len()
764 self.len()
765 }
765 }
766
766
767 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
767 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
768 self.get(&rev).map(|_| Revision(rev.0))
768 self.get(&rev).map(|_| Revision(rev.0))
769 }
769 }
770 }
770 }
771
771
772 /// Pad hexadecimal Node prefix with zeros on the right
772 /// Pad hexadecimal Node prefix with zeros on the right
773 ///
773 ///
774 /// This avoids having to repeatedly write very long hexadecimal
774 /// This avoids having to repeatedly write very long hexadecimal
775 /// strings for test data, and brings actual hash size independency.
775 /// strings for test data, and brings actual hash size independency.
776 #[cfg(test)]
776 #[cfg(test)]
777 fn pad_node(hex: &str) -> Node {
777 fn pad_node(hex: &str) -> Node {
778 Node::from_hex(&hex_pad_right(hex)).unwrap()
778 Node::from_hex(hex_pad_right(hex)).unwrap()
779 }
779 }
780
780
781 /// Pad hexadecimal Node prefix with zeros on the right, then insert
781 /// Pad hexadecimal Node prefix with zeros on the right, then insert
782 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
782 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
783 idx.insert(rev.into(), pad_node(hex));
783 idx.insert(rev.into(), pad_node(hex));
784 }
784 }
785
785
786 fn sample_nodetree() -> NodeTree {
786 fn sample_nodetree() -> NodeTree {
787 NodeTree::from(vec![
787 NodeTree::from(vec![
788 block![0: Rev(9)],
788 block![0: Rev(9)],
789 block![0: Rev(0), 1: Rev(9)],
789 block![0: Rev(0), 1: Rev(9)],
790 block![0: Block(1), 1:Rev(1)],
790 block![0: Block(1), 1:Rev(1)],
791 ])
791 ])
792 }
792 }
793
793
794 fn hex(s: &str) -> NodePrefix {
794 fn hex(s: &str) -> NodePrefix {
795 NodePrefix::from_hex(s).unwrap()
795 NodePrefix::from_hex(s).unwrap()
796 }
796 }
797
797
798 #[test]
798 #[test]
799 fn test_nt_debug() {
799 fn test_nt_debug() {
800 let nt = sample_nodetree();
800 let nt = sample_nodetree();
801 assert_eq!(
801 assert_eq!(
802 format!("{:?}", nt),
802 format!("{:?}", nt),
803 "readonly: \
803 "readonly: \
804 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
804 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
805 growable: [], \
805 growable: [], \
806 root: {0: Block(1), 1: Rev(1)}",
806 root: {0: Block(1), 1: Rev(1)}",
807 );
807 );
808 }
808 }
809
809
810 #[test]
810 #[test]
811 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
811 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
812 let mut idx: TestIndex = HashMap::new();
812 let mut idx: TestIndex = HashMap::new();
813 pad_insert(&mut idx, R!(1), "1234deadcafe");
813 pad_insert(&mut idx, R!(1), "1234deadcafe");
814
814
815 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
815 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
816 assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(R!(1)));
816 assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(R!(1)));
817 assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(R!(1)));
817 assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(R!(1)));
818 assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(R!(1)));
818 assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(R!(1)));
819 assert_eq!(nt.find_bin(&idx, hex("1a"))?, None);
819 assert_eq!(nt.find_bin(&idx, hex("1a"))?, None);
820 assert_eq!(nt.find_bin(&idx, hex("ab"))?, None);
820 assert_eq!(nt.find_bin(&idx, hex("ab"))?, None);
821
821
822 // and with full binary Nodes
822 // and with full binary Nodes
823 assert_eq!(
823 assert_eq!(
824 nt.find_node(&idx, idx.get(&1.into()).unwrap())?,
824 nt.find_node(&idx, idx.get(&1.into()).unwrap())?,
825 Some(R!(1))
825 Some(R!(1))
826 );
826 );
827 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
827 let unknown = Node::from_hex(hex_pad_right("3d")).unwrap();
828 assert_eq!(nt.find_node(&idx, &unknown)?, None);
828 assert_eq!(nt.find_node(&idx, &unknown)?, None);
829 Ok(())
829 Ok(())
830 }
830 }
831
831
832 #[test]
832 #[test]
833 fn test_immutable_find_one_jump() {
833 fn test_immutable_find_one_jump() {
834 let mut idx = TestIndex::new();
834 let mut idx = TestIndex::new();
835 pad_insert(&mut idx, R!(9), "012");
835 pad_insert(&mut idx, R!(9), "012");
836 pad_insert(&mut idx, R!(0), "00a");
836 pad_insert(&mut idx, R!(0), "00a");
837
837
838 let nt = sample_nodetree();
838 let nt = sample_nodetree();
839
839
840 assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults));
840 assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults));
841 assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(R!(9))));
841 assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(R!(9))));
842 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
842 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
843 assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(R!(0))));
843 assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(R!(0))));
844 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3)));
844 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3)));
845 assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION)));
845 assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION)));
846 }
846 }
847
847
848 #[test]
848 #[test]
849 fn test_mutated_find() -> Result<(), NodeMapError> {
849 fn test_mutated_find() -> Result<(), NodeMapError> {
850 let mut idx = TestIndex::new();
850 let mut idx = TestIndex::new();
851 pad_insert(&mut idx, R!(9), "012");
851 pad_insert(&mut idx, R!(9), "012");
852 pad_insert(&mut idx, R!(0), "00a");
852 pad_insert(&mut idx, R!(0), "00a");
853 pad_insert(&mut idx, R!(2), "cafe");
853 pad_insert(&mut idx, R!(2), "cafe");
854 pad_insert(&mut idx, R!(3), "15");
854 pad_insert(&mut idx, R!(3), "15");
855 pad_insert(&mut idx, R!(1), "10");
855 pad_insert(&mut idx, R!(1), "10");
856
856
857 let nt = NodeTree {
857 let nt = NodeTree {
858 readonly: sample_nodetree().readonly,
858 readonly: sample_nodetree().readonly,
859 growable: vec![block![0: Rev(1), 5: Rev(3)]],
859 growable: vec![block![0: Rev(1), 5: Rev(3)]],
860 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
860 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
861 masked_inner_blocks: 1,
861 masked_inner_blocks: 1,
862 };
862 };
863 assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(R!(1)));
863 assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(R!(1)));
864 assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(R!(2)));
864 assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(R!(2)));
865 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1));
865 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1));
866 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
866 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
867 assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION));
867 assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION));
868 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3));
868 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3));
869 assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(R!(9)));
869 assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(R!(9)));
870 assert_eq!(nt.masked_readonly_blocks(), 2);
870 assert_eq!(nt.masked_readonly_blocks(), 2);
871 Ok(())
871 Ok(())
872 }
872 }
873
873
874 pub struct TestNtIndex {
874 pub struct TestNtIndex {
875 pub index: TestIndex,
875 pub index: TestIndex,
876 pub nt: NodeTree,
876 pub nt: NodeTree,
877 }
877 }
878
878
879 impl TestNtIndex {
879 impl TestNtIndex {
880 pub fn new() -> Self {
880 pub fn new() -> Self {
881 TestNtIndex {
881 TestNtIndex {
882 index: HashMap::new(),
882 index: HashMap::new(),
883 nt: NodeTree::default(),
883 nt: NodeTree::default(),
884 }
884 }
885 }
885 }
886
886
887 pub fn insert_node(
887 pub fn insert_node(
888 &mut self,
888 &mut self,
889 rev: Revision,
889 rev: Revision,
890 node: Node,
890 node: Node,
891 ) -> Result<(), NodeMapError> {
891 ) -> Result<(), NodeMapError> {
892 self.index.insert(rev.into(), node);
892 self.index.insert(rev.into(), node);
893 self.nt.insert(&self.index, &node, rev)?;
893 self.nt.insert(&self.index, &node, rev)?;
894 Ok(())
894 Ok(())
895 }
895 }
896
896
897 pub fn insert(
897 pub fn insert(
898 &mut self,
898 &mut self,
899 rev: Revision,
899 rev: Revision,
900 hex: &str,
900 hex: &str,
901 ) -> Result<(), NodeMapError> {
901 ) -> Result<(), NodeMapError> {
902 let node = pad_node(hex);
902 let node = pad_node(hex);
903 return self.insert_node(rev, node);
903 self.insert_node(rev, node)
904 }
904 }
905
905
906 fn find_hex(
906 fn find_hex(
907 &self,
907 &self,
908 prefix: &str,
908 prefix: &str,
909 ) -> Result<Option<Revision>, NodeMapError> {
909 ) -> Result<Option<Revision>, NodeMapError> {
910 self.nt.find_bin(&self.index, hex(prefix))
910 self.nt.find_bin(&self.index, hex(prefix))
911 }
911 }
912
912
913 fn unique_prefix_len_hex(
913 fn unique_prefix_len_hex(
914 &self,
914 &self,
915 prefix: &str,
915 prefix: &str,
916 ) -> Result<Option<usize>, NodeMapError> {
916 ) -> Result<Option<usize>, NodeMapError> {
917 self.nt.unique_prefix_len_bin(&self.index, hex(prefix))
917 self.nt.unique_prefix_len_bin(&self.index, hex(prefix))
918 }
918 }
919
919
920 /// Drain `added` and restart a new one
920 /// Drain `added` and restart a new one
921 fn commit(self) -> Self {
921 fn commit(self) -> Self {
922 let mut as_vec: Vec<Block> =
922 let mut as_vec: Vec<Block> =
923 self.nt.readonly.iter().copied().collect();
923 self.nt.readonly.iter().copied().collect();
924 as_vec.extend(self.nt.growable);
924 as_vec.extend(self.nt.growable);
925 as_vec.push(self.nt.root);
925 as_vec.push(self.nt.root);
926
926
927 Self {
927 Self {
928 index: self.index,
928 index: self.index,
929 nt: NodeTree::from(as_vec),
929 nt: NodeTree::from(as_vec),
930 }
930 }
931 }
931 }
932 }
932 }
933
933
934 impl Default for TestNtIndex {
935 fn default() -> Self {
936 Self::new()
937 }
938 }
939
934 #[test]
940 #[test]
935 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
941 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
936 let mut idx = TestNtIndex::new();
942 let mut idx = TestNtIndex::new();
937 idx.insert(Revision(0), "1234")?;
943 idx.insert(Revision(0), "1234")?;
938 assert_eq!(idx.find_hex("1")?, Some(R!(0)));
944 assert_eq!(idx.find_hex("1")?, Some(R!(0)));
939 assert_eq!(idx.find_hex("12")?, Some(R!(0)));
945 assert_eq!(idx.find_hex("12")?, Some(R!(0)));
940
946
941 // let's trigger a simple split
947 // let's trigger a simple split
942 idx.insert(Revision(1), "1a34")?;
948 idx.insert(Revision(1), "1a34")?;
943 assert_eq!(idx.nt.growable.len(), 1);
949 assert_eq!(idx.nt.growable.len(), 1);
944 assert_eq!(idx.find_hex("12")?, Some(R!(0)));
950 assert_eq!(idx.find_hex("12")?, Some(R!(0)));
945 assert_eq!(idx.find_hex("1a")?, Some(R!(1)));
951 assert_eq!(idx.find_hex("1a")?, Some(R!(1)));
946
952
947 // reinserting is a no_op
953 // reinserting is a no_op
948 idx.insert(Revision(1), "1a34")?;
954 idx.insert(Revision(1), "1a34")?;
949 assert_eq!(idx.nt.growable.len(), 1);
955 assert_eq!(idx.nt.growable.len(), 1);
950 assert_eq!(idx.find_hex("12")?, Some(R!(0)));
956 assert_eq!(idx.find_hex("12")?, Some(R!(0)));
951 assert_eq!(idx.find_hex("1a")?, Some(R!(1)));
957 assert_eq!(idx.find_hex("1a")?, Some(R!(1)));
952
958
953 idx.insert(Revision(2), "1a01")?;
959 idx.insert(Revision(2), "1a01")?;
954 assert_eq!(idx.nt.growable.len(), 2);
960 assert_eq!(idx.nt.growable.len(), 2);
955 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
961 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
956 assert_eq!(idx.find_hex("12")?, Some(R!(0)));
962 assert_eq!(idx.find_hex("12")?, Some(R!(0)));
957 assert_eq!(idx.find_hex("1a3")?, Some(R!(1)));
963 assert_eq!(idx.find_hex("1a3")?, Some(R!(1)));
958 assert_eq!(idx.find_hex("1a0")?, Some(R!(2)));
964 assert_eq!(idx.find_hex("1a0")?, Some(R!(2)));
959 assert_eq!(idx.find_hex("1a12")?, None);
965 assert_eq!(idx.find_hex("1a12")?, None);
960
966
961 // now let's make it split and create more than one additional block
967 // now let's make it split and create more than one additional block
962 idx.insert(Revision(3), "1a345")?;
968 idx.insert(Revision(3), "1a345")?;
963 assert_eq!(idx.nt.growable.len(), 4);
969 assert_eq!(idx.nt.growable.len(), 4);
964 assert_eq!(idx.find_hex("1a340")?, Some(R!(1)));
970 assert_eq!(idx.find_hex("1a340")?, Some(R!(1)));
965 assert_eq!(idx.find_hex("1a345")?, Some(R!(3)));
971 assert_eq!(idx.find_hex("1a345")?, Some(R!(3)));
966 assert_eq!(idx.find_hex("1a341")?, None);
972 assert_eq!(idx.find_hex("1a341")?, None);
967
973
968 // there's no readonly block to mask
974 // there's no readonly block to mask
969 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
975 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
970 Ok(())
976 Ok(())
971 }
977 }
972
978
973 #[test]
979 #[test]
974 fn test_unique_prefix_len_zero_prefix() {
980 fn test_unique_prefix_len_zero_prefix() {
975 let mut idx = TestNtIndex::new();
981 let mut idx = TestNtIndex::new();
976 idx.insert(Revision(0), "00000abcd").unwrap();
982 idx.insert(Revision(0), "00000abcd").unwrap();
977
983
978 assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
984 assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
979 // in the nodetree proper, this will be found at the first nybble
985 // in the nodetree proper, this will be found at the first nybble
980 // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
986 // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
981 // but the first difference with `NULL_NODE`
987 // but the first difference with `NULL_NODE`
982 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
988 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
983 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
989 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
984
990
985 // same with odd result
991 // same with odd result
986 idx.insert(Revision(1), "00123").unwrap();
992 idx.insert(Revision(1), "00123").unwrap();
987 assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
993 assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
988 assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
994 assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
989
995
990 // these are unchanged of course
996 // these are unchanged of course
991 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
997 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
992 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
998 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
993 }
999 }
994
1000
995 #[test]
1001 #[test]
996 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
1002 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
997 // check that the splitting loop is long enough
1003 // check that the splitting loop is long enough
998 let mut nt_idx = TestNtIndex::new();
1004 let mut nt_idx = TestNtIndex::new();
999 let nt = &mut nt_idx.nt;
1005 let nt = &mut nt_idx.nt;
1000 let idx = &mut nt_idx.index;
1006 let idx = &mut nt_idx.index;
1001
1007
1002 let node0_hex = hex_pad_right("444444");
1008 let node0_hex = hex_pad_right("444444");
1003 let mut node1_hex = hex_pad_right("444444");
1009 let mut node1_hex = hex_pad_right("444444");
1004 node1_hex.pop();
1010 node1_hex.pop();
1005 node1_hex.push('5');
1011 node1_hex.push('5');
1006 let node0 = Node::from_hex(&node0_hex).unwrap();
1012 let node0 = Node::from_hex(node0_hex).unwrap();
1007 let node1 = Node::from_hex(&node1_hex).unwrap();
1013 let node1 = Node::from_hex(&node1_hex).unwrap();
1008
1014
1009 idx.insert(0.into(), node0);
1015 idx.insert(0.into(), node0);
1010 nt.insert(idx, &node0, R!(0))?;
1016 nt.insert(idx, &node0, R!(0))?;
1011 idx.insert(1.into(), node1);
1017 idx.insert(1.into(), node1);
1012 nt.insert(idx, &node1, R!(1))?;
1018 nt.insert(idx, &node1, R!(1))?;
1013
1019
1014 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(R!(0)));
1020 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(R!(0)));
1015 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(R!(1)));
1021 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(R!(1)));
1016 Ok(())
1022 Ok(())
1017 }
1023 }
1018
1024
1019 #[test]
1025 #[test]
1020 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
1026 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
1021 let mut idx = TestNtIndex::new();
1027 let mut idx = TestNtIndex::new();
1022 idx.insert(Revision(0), "1234")?;
1028 idx.insert(Revision(0), "1234")?;
1023 idx.insert(Revision(1), "1235")?;
1029 idx.insert(Revision(1), "1235")?;
1024 idx.insert(Revision(2), "131")?;
1030 idx.insert(Revision(2), "131")?;
1025 idx.insert(Revision(3), "cafe")?;
1031 idx.insert(Revision(3), "cafe")?;
1026 let mut idx = idx.commit();
1032 let mut idx = idx.commit();
1027 assert_eq!(idx.find_hex("1234")?, Some(R!(0)));
1033 assert_eq!(idx.find_hex("1234")?, Some(R!(0)));
1028 assert_eq!(idx.find_hex("1235")?, Some(R!(1)));
1034 assert_eq!(idx.find_hex("1235")?, Some(R!(1)));
1029 assert_eq!(idx.find_hex("131")?, Some(R!(2)));
1035 assert_eq!(idx.find_hex("131")?, Some(R!(2)));
1030 assert_eq!(idx.find_hex("cafe")?, Some(R!(3)));
1036 assert_eq!(idx.find_hex("cafe")?, Some(R!(3)));
1031 // we did not add anything since init from readonly
1037 // we did not add anything since init from readonly
1032 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
1038 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
1033
1039
1034 idx.insert(Revision(4), "123A")?;
1040 idx.insert(Revision(4), "123A")?;
1035 assert_eq!(idx.find_hex("1234")?, Some(R!(0)));
1041 assert_eq!(idx.find_hex("1234")?, Some(R!(0)));
1036 assert_eq!(idx.find_hex("1235")?, Some(R!(1)));
1042 assert_eq!(idx.find_hex("1235")?, Some(R!(1)));
1037 assert_eq!(idx.find_hex("131")?, Some(R!(2)));
1043 assert_eq!(idx.find_hex("131")?, Some(R!(2)));
1038 assert_eq!(idx.find_hex("cafe")?, Some(R!(3)));
1044 assert_eq!(idx.find_hex("cafe")?, Some(R!(3)));
1039 assert_eq!(idx.find_hex("123A")?, Some(R!(4)));
1045 assert_eq!(idx.find_hex("123A")?, Some(R!(4)));
1040 // we masked blocks for all prefixes of "123", including the root
1046 // we masked blocks for all prefixes of "123", including the root
1041 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1047 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1042
1048
1043 eprintln!("{:?}", idx.nt);
1049 eprintln!("{:?}", idx.nt);
1044 idx.insert(Revision(5), "c0")?;
1050 idx.insert(Revision(5), "c0")?;
1045 assert_eq!(idx.find_hex("cafe")?, Some(R!(3)));
1051 assert_eq!(idx.find_hex("cafe")?, Some(R!(3)));
1046 assert_eq!(idx.find_hex("c0")?, Some(R!(5)));
1052 assert_eq!(idx.find_hex("c0")?, Some(R!(5)));
1047 assert_eq!(idx.find_hex("c1")?, None);
1053 assert_eq!(idx.find_hex("c1")?, None);
1048 assert_eq!(idx.find_hex("1234")?, Some(R!(0)));
1054 assert_eq!(idx.find_hex("1234")?, Some(R!(0)));
1049 // inserting "c0" is just splitting the 'c' slot of the mutable root,
1055 // inserting "c0" is just splitting the 'c' slot of the mutable root,
1050 // it doesn't mask anything
1056 // it doesn't mask anything
1051 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1057 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1052
1058
1053 Ok(())
1059 Ok(())
1054 }
1060 }
1055
1061
1056 #[test]
1062 #[test]
1057 fn test_invalidate_all() -> Result<(), NodeMapError> {
1063 fn test_invalidate_all() -> Result<(), NodeMapError> {
1058 let mut idx = TestNtIndex::new();
1064 let mut idx = TestNtIndex::new();
1059 idx.insert(Revision(0), "1234")?;
1065 idx.insert(Revision(0), "1234")?;
1060 idx.insert(Revision(1), "1235")?;
1066 idx.insert(Revision(1), "1235")?;
1061 idx.insert(Revision(2), "131")?;
1067 idx.insert(Revision(2), "131")?;
1062 idx.insert(Revision(3), "cafe")?;
1068 idx.insert(Revision(3), "cafe")?;
1063 let mut idx = idx.commit();
1069 let mut idx = idx.commit();
1064
1070
1065 idx.nt.invalidate_all();
1071 idx.nt.invalidate_all();
1066
1072
1067 assert_eq!(idx.find_hex("1234")?, None);
1073 assert_eq!(idx.find_hex("1234")?, None);
1068 assert_eq!(idx.find_hex("1235")?, None);
1074 assert_eq!(idx.find_hex("1235")?, None);
1069 assert_eq!(idx.find_hex("131")?, None);
1075 assert_eq!(idx.find_hex("131")?, None);
1070 assert_eq!(idx.find_hex("cafe")?, None);
1076 assert_eq!(idx.find_hex("cafe")?, None);
1071 // all the readonly blocks have been masked, this is the
1077 // all the readonly blocks have been masked, this is the
1072 // conventional expected response
1078 // conventional expected response
1073 assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
1079 assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
1074 Ok(())
1080 Ok(())
1075 }
1081 }
1076
1082
1077 #[test]
1083 #[test]
1078 fn test_into_added_empty() {
1084 fn test_into_added_empty() {
1079 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
1085 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
1080 assert!(sample_nodetree()
1086 assert!(sample_nodetree()
1081 .into_readonly_and_added_bytes()
1087 .into_readonly_and_added_bytes()
1082 .1
1088 .1
1083 .is_empty());
1089 .is_empty());
1084 }
1090 }
1085
1091
1086 #[test]
1092 #[test]
1087 fn test_into_added_bytes() -> Result<(), NodeMapError> {
1093 fn test_into_added_bytes() -> Result<(), NodeMapError> {
1088 let mut idx = TestNtIndex::new();
1094 let mut idx = TestNtIndex::new();
1089 idx.insert(Revision(0), "1234")?;
1095 idx.insert(Revision(0), "1234")?;
1090 let mut idx = idx.commit();
1096 let mut idx = idx.commit();
1091 idx.insert(Revision(4), "cafe")?;
1097 idx.insert(Revision(4), "cafe")?;
1092 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1098 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1093
1099
1094 // only the root block has been changed
1100 // only the root block has been changed
1095 assert_eq!(bytes.len(), size_of::<Block>());
1101 assert_eq!(bytes.len(), size_of::<Block>());
1096 // big endian for -2
1102 // big endian for -2
1097 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1103 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1098 // big endian for -6
1104 // big endian for -6
1099 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
1105 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
1100 Ok(())
1106 Ok(())
1101 }
1107 }
1102 }
1108 }
@@ -1,108 +1,108 b''
1 use crate::errors::{HgError, HgResultExt};
1 use crate::errors::{HgError, HgResultExt};
2 use bytes_cast::{unaligned, BytesCast};
2 use bytes_cast::{unaligned, BytesCast};
3 use memmap2::Mmap;
3 use memmap2::Mmap;
4 use std::path::{Path, PathBuf};
4 use std::path::{Path, PathBuf};
5
5
6 use crate::vfs::Vfs;
6 use crate::vfs::Vfs;
7
7
8 const ONDISK_VERSION: u8 = 1;
8 const ONDISK_VERSION: u8 = 1;
9
9
10 pub(super) struct NodeMapDocket {
10 pub(super) struct NodeMapDocket {
11 pub data_length: usize,
11 pub data_length: usize,
12 // TODO: keep here more of the data from `parse()` when we need it
12 // TODO: keep here more of the data from `parse()` when we need it
13 }
13 }
14
14
15 #[derive(BytesCast)]
15 #[derive(BytesCast)]
16 #[repr(C)]
16 #[repr(C)]
17 struct DocketHeader {
17 struct DocketHeader {
18 uid_size: u8,
18 uid_size: u8,
19 _tip_rev: unaligned::U64Be,
19 _tip_rev: unaligned::U64Be,
20 data_length: unaligned::U64Be,
20 data_length: unaligned::U64Be,
21 _data_unused: unaligned::U64Be,
21 _data_unused: unaligned::U64Be,
22 tip_node_size: unaligned::U64Be,
22 tip_node_size: unaligned::U64Be,
23 }
23 }
24
24
25 impl NodeMapDocket {
25 impl NodeMapDocket {
26 /// Return `Ok(None)` when the caller should proceed without a persistent
26 /// Return `Ok(None)` when the caller should proceed without a persistent
27 /// nodemap:
27 /// nodemap:
28 ///
28 ///
29 /// * This revlog does not have a `.n` docket file (it is not generated for
29 /// * This revlog does not have a `.n` docket file (it is not generated for
30 /// small revlogs), or
30 /// small revlogs), or
31 /// * The docket has an unsupported version number (repositories created by
31 /// * The docket has an unsupported version number (repositories created by
32 /// later hg, maybe that should be a requirement instead?), or
32 /// later hg, maybe that should be a requirement instead?), or
33 /// * The docket file points to a missing (likely deleted) data file (this
33 /// * The docket file points to a missing (likely deleted) data file (this
34 /// can happen in a rare race condition).
34 /// can happen in a rare race condition).
35 pub fn read_from_file(
35 pub fn read_from_file(
36 store_vfs: &Vfs,
36 store_vfs: &Vfs,
37 index_path: &Path,
37 index_path: &Path,
38 ) -> Result<Option<(Self, Mmap)>, HgError> {
38 ) -> Result<Option<(Self, Mmap)>, HgError> {
39 let docket_path = index_path.with_extension("n");
39 let docket_path = index_path.with_extension("n");
40 let docket_bytes = if let Some(bytes) =
40 let docket_bytes = if let Some(bytes) =
41 store_vfs.read(&docket_path).io_not_found_as_none()?
41 store_vfs.read(&docket_path).io_not_found_as_none()?
42 {
42 {
43 bytes
43 bytes
44 } else {
44 } else {
45 return Ok(None);
45 return Ok(None);
46 };
46 };
47
47
48 let input = if let Some((&ONDISK_VERSION, rest)) =
48 let input = if let Some((&ONDISK_VERSION, rest)) =
49 docket_bytes.split_first()
49 docket_bytes.split_first()
50 {
50 {
51 rest
51 rest
52 } else {
52 } else {
53 return Ok(None);
53 return Ok(None);
54 };
54 };
55
55
56 /// Treat any error as a parse error
56 /// Treat any error as a parse error
57 fn parse<T, E>(result: Result<T, E>) -> Result<T, HgError> {
57 fn parse<T, E>(result: Result<T, E>) -> Result<T, HgError> {
58 result
58 result
59 .map_err(|_| HgError::corrupted("nodemap docket parse error"))
59 .map_err(|_| HgError::corrupted("nodemap docket parse error"))
60 }
60 }
61
61
62 let (header, rest) = parse(DocketHeader::from_bytes(input))?;
62 let (header, rest) = parse(DocketHeader::from_bytes(input))?;
63 let uid_size = header.uid_size as usize;
63 let uid_size = header.uid_size as usize;
64 // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
64 // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
65 // systems?
65 // systems?
66 let tip_node_size = header.tip_node_size.get() as usize;
66 let tip_node_size = header.tip_node_size.get() as usize;
67 let data_length = header.data_length.get() as usize;
67 let data_length = header.data_length.get() as usize;
68 let (uid, rest) = parse(u8::slice_from_bytes(rest, uid_size))?;
68 let (uid, rest) = parse(u8::slice_from_bytes(rest, uid_size))?;
69 let (_tip_node, _rest) =
69 let (_tip_node, _rest) =
70 parse(u8::slice_from_bytes(rest, tip_node_size))?;
70 parse(u8::slice_from_bytes(rest, tip_node_size))?;
71 let uid = parse(std::str::from_utf8(uid))?;
71 let uid = parse(std::str::from_utf8(uid))?;
72 let docket = NodeMapDocket { data_length };
72 let docket = NodeMapDocket { data_length };
73
73
74 let data_path = rawdata_path(&docket_path, uid);
74 let data_path = rawdata_path(&docket_path, uid);
75 // TODO: use `vfs.read()` here when the `persistent-nodemap.mmap`
75 // TODO: use `vfs.read()` here when the `persistent-nodemap.mmap`
76 // config is false?
76 // config is false?
77 if let Some(mmap) =
77 if let Some(mmap) =
78 store_vfs.mmap_open(&data_path).io_not_found_as_none()?
78 store_vfs.mmap_open(data_path).io_not_found_as_none()?
79 {
79 {
80 if mmap.len() >= data_length {
80 if mmap.len() >= data_length {
81 Ok(Some((docket, mmap)))
81 Ok(Some((docket, mmap)))
82 } else {
82 } else {
83 Err(HgError::corrupted("persistent nodemap too short"))
83 Err(HgError::corrupted("persistent nodemap too short"))
84 }
84 }
85 } else {
85 } else {
86 // Even if .hg/requires opted in, some revlogs are deemed small
86 // Even if .hg/requires opted in, some revlogs are deemed small
87 // enough to not need a persistent nodemap.
87 // enough to not need a persistent nodemap.
88 Ok(None)
88 Ok(None)
89 }
89 }
90 }
90 }
91 }
91 }
92
92
93 fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf {
93 fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf {
94 let docket_name = docket_path
94 let docket_name = docket_path
95 .file_name()
95 .file_name()
96 .expect("expected a base name")
96 .expect("expected a base name")
97 .to_str()
97 .to_str()
98 .expect("expected an ASCII file name in the store");
98 .expect("expected an ASCII file name in the store");
99 let prefix = docket_name
99 let prefix = docket_name
100 .strip_suffix(".n.a")
100 .strip_suffix(".n.a")
101 .or_else(|| docket_name.strip_suffix(".n"))
101 .or_else(|| docket_name.strip_suffix(".n"))
102 .expect("expected docket path in .n or .n.a");
102 .expect("expected docket path in .n or .n.a");
103 let name = format!("{}-{}.nd", prefix, uid);
103 let name = format!("{}-{}.nd", prefix, uid);
104 docket_path
104 docket_path
105 .parent()
105 .parent()
106 .expect("expected a non-root path")
106 .expect("expected a non-root path")
107 .join(name)
107 .join(name)
108 }
108 }
@@ -1,436 +1,436 b''
1 // files.rs
1 // files.rs
2 //
2 //
3 // Copyright 2019
3 // Copyright 2019
4 // Raphaël Gomès <rgomes@octobus.net>,
4 // Raphaël Gomès <rgomes@octobus.net>,
5 // Yuya Nishihara <yuya@tcha.org>
5 // Yuya Nishihara <yuya@tcha.org>
6 //
6 //
7 // This software may be used and distributed according to the terms of the
7 // This software may be used and distributed according to the terms of the
8 // GNU General Public License version 2 or any later version.
8 // GNU General Public License version 2 or any later version.
9
9
10 //! Functions for fiddling with files.
10 //! Functions for fiddling with files.
11
11
12 use crate::utils::{
12 use crate::utils::{
13 hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
13 hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
14 path_auditor::PathAuditor,
14 path_auditor::PathAuditor,
15 replace_slice,
15 replace_slice,
16 };
16 };
17 use lazy_static::lazy_static;
17 use lazy_static::lazy_static;
18 use same_file::is_same_file;
18 use same_file::is_same_file;
19 use std::borrow::{Cow, ToOwned};
19 use std::borrow::{Cow, ToOwned};
20 use std::ffi::{OsStr, OsString};
20 use std::ffi::{OsStr, OsString};
21 use std::iter::FusedIterator;
21 use std::iter::FusedIterator;
22 use std::ops::Deref;
22 use std::ops::Deref;
23 use std::path::{Path, PathBuf};
23 use std::path::{Path, PathBuf};
24
24
25 pub fn get_os_str_from_bytes(bytes: &[u8]) -> &OsStr {
25 pub fn get_os_str_from_bytes(bytes: &[u8]) -> &OsStr {
26 let os_str;
26 let os_str;
27 #[cfg(unix)]
27 #[cfg(unix)]
28 {
28 {
29 use std::os::unix::ffi::OsStrExt;
29 use std::os::unix::ffi::OsStrExt;
30 os_str = std::ffi::OsStr::from_bytes(bytes);
30 os_str = std::ffi::OsStr::from_bytes(bytes);
31 }
31 }
32 // TODO Handle other platforms
32 // TODO Handle other platforms
33 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
33 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
34 // Perhaps, the return type would have to be Result<PathBuf>.
34 // Perhaps, the return type would have to be Result<PathBuf>.
35 os_str
35 os_str
36 }
36 }
37
37
38 pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
38 pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
39 Path::new(get_os_str_from_bytes(bytes))
39 Path::new(get_os_str_from_bytes(bytes))
40 }
40 }
41
41
42 // TODO: need to convert from WTF8 to MBCS bytes on Windows.
42 // TODO: need to convert from WTF8 to MBCS bytes on Windows.
43 // that's why Vec<u8> is returned.
43 // that's why Vec<u8> is returned.
44 #[cfg(unix)]
44 #[cfg(unix)]
45 pub fn get_bytes_from_path(path: impl AsRef<Path>) -> Vec<u8> {
45 pub fn get_bytes_from_path(path: impl AsRef<Path>) -> Vec<u8> {
46 get_bytes_from_os_str(path.as_ref())
46 get_bytes_from_os_str(path.as_ref())
47 }
47 }
48
48
49 #[cfg(unix)]
49 #[cfg(unix)]
50 pub fn get_bytes_from_os_str(str: impl AsRef<OsStr>) -> Vec<u8> {
50 pub fn get_bytes_from_os_str(str: impl AsRef<OsStr>) -> Vec<u8> {
51 use std::os::unix::ffi::OsStrExt;
51 use std::os::unix::ffi::OsStrExt;
52 str.as_ref().as_bytes().to_vec()
52 str.as_ref().as_bytes().to_vec()
53 }
53 }
54
54
55 #[cfg(unix)]
55 #[cfg(unix)]
56 pub fn get_bytes_from_os_string(str: OsString) -> Vec<u8> {
56 pub fn get_bytes_from_os_string(str: OsString) -> Vec<u8> {
57 use std::os::unix::ffi::OsStringExt;
57 use std::os::unix::ffi::OsStringExt;
58 str.into_vec()
58 str.into_vec()
59 }
59 }
60
60
61 /// An iterator over repository path yielding itself and its ancestors.
61 /// An iterator over repository path yielding itself and its ancestors.
62 #[derive(Copy, Clone, Debug)]
62 #[derive(Copy, Clone, Debug)]
63 pub struct Ancestors<'a> {
63 pub struct Ancestors<'a> {
64 next: Option<&'a HgPath>,
64 next: Option<&'a HgPath>,
65 }
65 }
66
66
67 impl<'a> Iterator for Ancestors<'a> {
67 impl<'a> Iterator for Ancestors<'a> {
68 type Item = &'a HgPath;
68 type Item = &'a HgPath;
69
69
70 fn next(&mut self) -> Option<Self::Item> {
70 fn next(&mut self) -> Option<Self::Item> {
71 let next = self.next;
71 let next = self.next;
72 self.next = match self.next {
72 self.next = match self.next {
73 Some(s) if s.is_empty() => None,
73 Some(s) if s.is_empty() => None,
74 Some(s) => {
74 Some(s) => {
75 let p = s.bytes().rposition(|c| *c == b'/').unwrap_or(0);
75 let p = s.bytes().rposition(|c| *c == b'/').unwrap_or(0);
76 Some(HgPath::new(&s.as_bytes()[..p]))
76 Some(HgPath::new(&s.as_bytes()[..p]))
77 }
77 }
78 None => None,
78 None => None,
79 };
79 };
80 next
80 next
81 }
81 }
82 }
82 }
83
83
84 impl<'a> FusedIterator for Ancestors<'a> {}
84 impl<'a> FusedIterator for Ancestors<'a> {}
85
85
86 /// An iterator over repository path yielding itself and its ancestors.
86 /// An iterator over repository path yielding itself and its ancestors.
87 #[derive(Copy, Clone, Debug)]
87 #[derive(Copy, Clone, Debug)]
88 pub(crate) struct AncestorsWithBase<'a> {
88 pub(crate) struct AncestorsWithBase<'a> {
89 next: Option<(&'a HgPath, &'a HgPath)>,
89 next: Option<(&'a HgPath, &'a HgPath)>,
90 }
90 }
91
91
92 impl<'a> Iterator for AncestorsWithBase<'a> {
92 impl<'a> Iterator for AncestorsWithBase<'a> {
93 type Item = (&'a HgPath, &'a HgPath);
93 type Item = (&'a HgPath, &'a HgPath);
94
94
95 fn next(&mut self) -> Option<Self::Item> {
95 fn next(&mut self) -> Option<Self::Item> {
96 let next = self.next;
96 let next = self.next;
97 self.next = match self.next {
97 self.next = match self.next {
98 Some((s, _)) if s.is_empty() => None,
98 Some((s, _)) if s.is_empty() => None,
99 Some((s, _)) => Some(s.split_filename()),
99 Some((s, _)) => Some(s.split_filename()),
100 None => None,
100 None => None,
101 };
101 };
102 next
102 next
103 }
103 }
104 }
104 }
105
105
106 impl<'a> FusedIterator for AncestorsWithBase<'a> {}
106 impl<'a> FusedIterator for AncestorsWithBase<'a> {}
107
107
108 /// Returns an iterator yielding ancestor directories of the given repository
108 /// Returns an iterator yielding ancestor directories of the given repository
109 /// path.
109 /// path.
110 ///
110 ///
111 /// The path is separated by '/', and must not start with '/'.
111 /// The path is separated by '/', and must not start with '/'.
112 ///
112 ///
113 /// The path itself isn't included unless it is b"" (meaning the root
113 /// The path itself isn't included unless it is b"" (meaning the root
114 /// directory.)
114 /// directory.)
115 pub fn find_dirs(path: &HgPath) -> Ancestors {
115 pub fn find_dirs(path: &HgPath) -> Ancestors {
116 let mut dirs = Ancestors { next: Some(path) };
116 let mut dirs = Ancestors { next: Some(path) };
117 if !path.is_empty() {
117 if !path.is_empty() {
118 dirs.next(); // skip itself
118 dirs.next(); // skip itself
119 }
119 }
120 dirs
120 dirs
121 }
121 }
122
122
123 /// Returns an iterator yielding ancestor directories of the given repository
123 /// Returns an iterator yielding ancestor directories of the given repository
124 /// path.
124 /// path.
125 ///
125 ///
126 /// The path is separated by '/', and must not start with '/'.
126 /// The path is separated by '/', and must not start with '/'.
127 ///
127 ///
128 /// The path itself isn't included unless it is b"" (meaning the root
128 /// The path itself isn't included unless it is b"" (meaning the root
129 /// directory.)
129 /// directory.)
130 pub(crate) fn find_dirs_with_base(path: &HgPath) -> AncestorsWithBase {
130 pub(crate) fn find_dirs_with_base(path: &HgPath) -> AncestorsWithBase {
131 let mut dirs = AncestorsWithBase {
131 let mut dirs = AncestorsWithBase {
132 next: Some((path, HgPath::new(b""))),
132 next: Some((path, HgPath::new(b""))),
133 };
133 };
134 if !path.is_empty() {
134 if !path.is_empty() {
135 dirs.next(); // skip itself
135 dirs.next(); // skip itself
136 }
136 }
137 dirs
137 dirs
138 }
138 }
139
139
140 /// TODO more than ASCII?
140 /// TODO more than ASCII?
141 pub fn normalize_case(path: &HgPath) -> HgPathBuf {
141 pub fn normalize_case(path: &HgPath) -> HgPathBuf {
142 #[cfg(windows)] // NTFS compares via upper()
142 #[cfg(windows)] // NTFS compares via upper()
143 return path.to_ascii_uppercase();
143 return path.to_ascii_uppercase();
144 #[cfg(unix)]
144 #[cfg(unix)]
145 path.to_ascii_lowercase()
145 path.to_ascii_lowercase()
146 }
146 }
147
147
148 lazy_static! {
148 lazy_static! {
149 static ref IGNORED_CHARS: Vec<Vec<u8>> = {
149 static ref IGNORED_CHARS: Vec<Vec<u8>> = {
150 [
150 [
151 0x200c, 0x200d, 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d,
151 0x200c, 0x200d, 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d,
152 0x202e, 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
152 0x202e, 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
153 ]
153 ]
154 .iter()
154 .iter()
155 .map(|code| {
155 .map(|code| {
156 std::char::from_u32(*code)
156 std::char::from_u32(*code)
157 .unwrap()
157 .unwrap()
158 .encode_utf8(&mut [0; 3])
158 .encode_utf8(&mut [0; 3])
159 .bytes()
159 .bytes()
160 .collect()
160 .collect()
161 })
161 })
162 .collect()
162 .collect()
163 };
163 };
164 }
164 }
165
165
166 fn hfs_ignore_clean(bytes: &[u8]) -> Vec<u8> {
166 fn hfs_ignore_clean(bytes: &[u8]) -> Vec<u8> {
167 let mut buf = bytes.to_owned();
167 let mut buf = bytes.to_owned();
168 let needs_escaping = bytes.iter().any(|b| *b == b'\xe2' || *b == b'\xef');
168 let needs_escaping = bytes.iter().any(|b| *b == b'\xe2' || *b == b'\xef');
169 if needs_escaping {
169 if needs_escaping {
170 for forbidden in IGNORED_CHARS.iter() {
170 for forbidden in IGNORED_CHARS.iter() {
171 replace_slice(&mut buf, forbidden, &[])
171 replace_slice(&mut buf, forbidden, &[])
172 }
172 }
173 buf
173 buf
174 } else {
174 } else {
175 buf
175 buf
176 }
176 }
177 }
177 }
178
178
179 pub fn lower_clean(bytes: &[u8]) -> Vec<u8> {
179 pub fn lower_clean(bytes: &[u8]) -> Vec<u8> {
180 hfs_ignore_clean(&bytes.to_ascii_lowercase())
180 hfs_ignore_clean(&bytes.to_ascii_lowercase())
181 }
181 }
182
182
183 /// Returns the canonical path of `name`, given `cwd` and `root`
183 /// Returns the canonical path of `name`, given `cwd` and `root`
184 pub fn canonical_path(
184 pub fn canonical_path(
185 root: impl AsRef<Path>,
185 root: impl AsRef<Path>,
186 cwd: impl AsRef<Path>,
186 cwd: impl AsRef<Path>,
187 name: impl AsRef<Path>,
187 name: impl AsRef<Path>,
188 ) -> Result<PathBuf, HgPathError> {
188 ) -> Result<PathBuf, HgPathError> {
189 // TODO add missing normalization for other platforms
189 // TODO add missing normalization for other platforms
190 let root = root.as_ref();
190 let root = root.as_ref();
191 let cwd = cwd.as_ref();
191 let cwd = cwd.as_ref();
192 let name = name.as_ref();
192 let name = name.as_ref();
193
193
194 let name = if !name.is_absolute() {
194 let name = if !name.is_absolute() {
195 root.join(&cwd).join(&name)
195 root.join(cwd).join(name)
196 } else {
196 } else {
197 name.to_owned()
197 name.to_owned()
198 };
198 };
199 let auditor = PathAuditor::new(&root);
199 let auditor = PathAuditor::new(root);
200 if name != root && name.starts_with(&root) {
200 if name != root && name.starts_with(root) {
201 let name = name.strip_prefix(&root).unwrap();
201 let name = name.strip_prefix(root).unwrap();
202 auditor.audit_path(path_to_hg_path_buf(name)?)?;
202 auditor.audit_path(path_to_hg_path_buf(name)?)?;
203 Ok(name.to_owned())
203 Ok(name.to_owned())
204 } else if name == root {
204 } else if name == root {
205 Ok("".into())
205 Ok("".into())
206 } else {
206 } else {
207 // Determine whether `name' is in the hierarchy at or beneath `root',
207 // Determine whether `name' is in the hierarchy at or beneath `root',
208 // by iterating name=name.parent() until it returns `None` (can't
208 // by iterating name=name.parent() until it returns `None` (can't
209 // check name == '/', because that doesn't work on windows).
209 // check name == '/', because that doesn't work on windows).
210 let mut name = name.deref();
210 let mut name = name.deref();
211 let original_name = name.to_owned();
211 let original_name = name.to_owned();
212 loop {
212 loop {
213 let same = is_same_file(&name, &root).unwrap_or(false);
213 let same = is_same_file(name, root).unwrap_or(false);
214 if same {
214 if same {
215 if name == original_name {
215 if name == original_name {
216 // `name` was actually the same as root (maybe a symlink)
216 // `name` was actually the same as root (maybe a symlink)
217 return Ok("".into());
217 return Ok("".into());
218 }
218 }
219 // `name` is a symlink to root, so `original_name` is under
219 // `name` is a symlink to root, so `original_name` is under
220 // root
220 // root
221 let rel_path = original_name.strip_prefix(&name).unwrap();
221 let rel_path = original_name.strip_prefix(name).unwrap();
222 auditor.audit_path(path_to_hg_path_buf(&rel_path)?)?;
222 auditor.audit_path(path_to_hg_path_buf(rel_path)?)?;
223 return Ok(rel_path.to_owned());
223 return Ok(rel_path.to_owned());
224 }
224 }
225 name = match name.parent() {
225 name = match name.parent() {
226 None => break,
226 None => break,
227 Some(p) => p,
227 Some(p) => p,
228 };
228 };
229 }
229 }
230 // TODO hint to the user about using --cwd
230 // TODO hint to the user about using --cwd
231 // Bubble up the responsibility to Python for now
231 // Bubble up the responsibility to Python for now
232 Err(HgPathError::NotUnderRoot {
232 Err(HgPathError::NotUnderRoot {
233 path: original_name,
233 path: original_name,
234 root: root.to_owned(),
234 root: root.to_owned(),
235 })
235 })
236 }
236 }
237 }
237 }
238
238
239 /// Returns the representation of the path relative to the current working
239 /// Returns the representation of the path relative to the current working
240 /// directory for display purposes.
240 /// directory for display purposes.
241 ///
241 ///
242 /// `cwd` is a `HgPath`, so it is considered relative to the root directory
242 /// `cwd` is a `HgPath`, so it is considered relative to the root directory
243 /// of the repository.
243 /// of the repository.
244 ///
244 ///
245 /// # Examples
245 /// # Examples
246 ///
246 ///
247 /// ```
247 /// ```
248 /// use hg::utils::hg_path::HgPath;
248 /// use hg::utils::hg_path::HgPath;
249 /// use hg::utils::files::relativize_path;
249 /// use hg::utils::files::relativize_path;
250 /// use std::borrow::Cow;
250 /// use std::borrow::Cow;
251 ///
251 ///
252 /// let file = HgPath::new(b"nested/file");
252 /// let file = HgPath::new(b"nested/file");
253 /// let cwd = HgPath::new(b"");
253 /// let cwd = HgPath::new(b"");
254 /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"nested/file"));
254 /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"nested/file"));
255 ///
255 ///
256 /// let cwd = HgPath::new(b"nested");
256 /// let cwd = HgPath::new(b"nested");
257 /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"file"));
257 /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"file"));
258 ///
258 ///
259 /// let cwd = HgPath::new(b"other");
259 /// let cwd = HgPath::new(b"other");
260 /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"../nested/file"));
260 /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"../nested/file"));
261 /// ```
261 /// ```
262 pub fn relativize_path(path: &HgPath, cwd: impl AsRef<HgPath>) -> Cow<[u8]> {
262 pub fn relativize_path(path: &HgPath, cwd: impl AsRef<HgPath>) -> Cow<[u8]> {
263 if cwd.as_ref().is_empty() {
263 if cwd.as_ref().is_empty() {
264 Cow::Borrowed(path.as_bytes())
264 Cow::Borrowed(path.as_bytes())
265 } else {
265 } else {
266 // This is not all accurate as to how large `res` will actually be, but
266 // This is not all accurate as to how large `res` will actually be, but
267 // profiling `rhg files` on a large-ish repo shows it’s better than
267 // profiling `rhg files` on a large-ish repo shows it’s better than
268 // starting from a zero-capacity `Vec` and letting `extend` reallocate
268 // starting from a zero-capacity `Vec` and letting `extend` reallocate
269 // repeatedly.
269 // repeatedly.
270 let guesstimate = path.as_bytes().len();
270 let guesstimate = path.as_bytes().len();
271
271
272 let mut res: Vec<u8> = Vec::with_capacity(guesstimate);
272 let mut res: Vec<u8> = Vec::with_capacity(guesstimate);
273 let mut path_iter = path.as_bytes().split(|b| *b == b'/').peekable();
273 let mut path_iter = path.as_bytes().split(|b| *b == b'/').peekable();
274 let mut cwd_iter =
274 let mut cwd_iter =
275 cwd.as_ref().as_bytes().split(|b| *b == b'/').peekable();
275 cwd.as_ref().as_bytes().split(|b| *b == b'/').peekable();
276 loop {
276 loop {
277 match (path_iter.peek(), cwd_iter.peek()) {
277 match (path_iter.peek(), cwd_iter.peek()) {
278 (Some(a), Some(b)) if a == b => (),
278 (Some(a), Some(b)) if a == b => (),
279 _ => break,
279 _ => break,
280 }
280 }
281 path_iter.next();
281 path_iter.next();
282 cwd_iter.next();
282 cwd_iter.next();
283 }
283 }
284 let mut need_sep = false;
284 let mut need_sep = false;
285 for _ in cwd_iter {
285 for _ in cwd_iter {
286 if need_sep {
286 if need_sep {
287 res.extend(b"/")
287 res.extend(b"/")
288 } else {
288 } else {
289 need_sep = true
289 need_sep = true
290 };
290 };
291 res.extend(b"..");
291 res.extend(b"..");
292 }
292 }
293 for c in path_iter {
293 for c in path_iter {
294 if need_sep {
294 if need_sep {
295 res.extend(b"/")
295 res.extend(b"/")
296 } else {
296 } else {
297 need_sep = true
297 need_sep = true
298 };
298 };
299 res.extend(c);
299 res.extend(c);
300 }
300 }
301 Cow::Owned(res)
301 Cow::Owned(res)
302 }
302 }
303 }
303 }
304
304
305 #[cfg(test)]
305 #[cfg(test)]
306 mod tests {
306 mod tests {
307 use super::*;
307 use super::*;
308 use pretty_assertions::assert_eq;
308 use pretty_assertions::assert_eq;
309
309
310 #[test]
310 #[test]
311 fn find_dirs_some() {
311 fn find_dirs_some() {
312 let mut dirs = super::find_dirs(HgPath::new(b"foo/bar/baz"));
312 let mut dirs = super::find_dirs(HgPath::new(b"foo/bar/baz"));
313 assert_eq!(dirs.next(), Some(HgPath::new(b"foo/bar")));
313 assert_eq!(dirs.next(), Some(HgPath::new(b"foo/bar")));
314 assert_eq!(dirs.next(), Some(HgPath::new(b"foo")));
314 assert_eq!(dirs.next(), Some(HgPath::new(b"foo")));
315 assert_eq!(dirs.next(), Some(HgPath::new(b"")));
315 assert_eq!(dirs.next(), Some(HgPath::new(b"")));
316 assert_eq!(dirs.next(), None);
316 assert_eq!(dirs.next(), None);
317 assert_eq!(dirs.next(), None);
317 assert_eq!(dirs.next(), None);
318 }
318 }
319
319
320 #[test]
320 #[test]
321 fn find_dirs_empty() {
321 fn find_dirs_empty() {
322 // looks weird, but mercurial.pathutil.finddirs(b"") yields b""
322 // looks weird, but mercurial.pathutil.finddirs(b"") yields b""
323 let mut dirs = super::find_dirs(HgPath::new(b""));
323 let mut dirs = super::find_dirs(HgPath::new(b""));
324 assert_eq!(dirs.next(), Some(HgPath::new(b"")));
324 assert_eq!(dirs.next(), Some(HgPath::new(b"")));
325 assert_eq!(dirs.next(), None);
325 assert_eq!(dirs.next(), None);
326 assert_eq!(dirs.next(), None);
326 assert_eq!(dirs.next(), None);
327 }
327 }
328
328
329 #[test]
329 #[test]
330 fn test_find_dirs_with_base_some() {
330 fn test_find_dirs_with_base_some() {
331 let mut dirs = super::find_dirs_with_base(HgPath::new(b"foo/bar/baz"));
331 let mut dirs = super::find_dirs_with_base(HgPath::new(b"foo/bar/baz"));
332 assert_eq!(
332 assert_eq!(
333 dirs.next(),
333 dirs.next(),
334 Some((HgPath::new(b"foo/bar"), HgPath::new(b"baz")))
334 Some((HgPath::new(b"foo/bar"), HgPath::new(b"baz")))
335 );
335 );
336 assert_eq!(
336 assert_eq!(
337 dirs.next(),
337 dirs.next(),
338 Some((HgPath::new(b"foo"), HgPath::new(b"bar")))
338 Some((HgPath::new(b"foo"), HgPath::new(b"bar")))
339 );
339 );
340 assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"foo"))));
340 assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"foo"))));
341 assert_eq!(dirs.next(), None);
341 assert_eq!(dirs.next(), None);
342 assert_eq!(dirs.next(), None);
342 assert_eq!(dirs.next(), None);
343 }
343 }
344
344
345 #[test]
345 #[test]
346 fn test_find_dirs_with_base_empty() {
346 fn test_find_dirs_with_base_empty() {
347 let mut dirs = super::find_dirs_with_base(HgPath::new(b""));
347 let mut dirs = super::find_dirs_with_base(HgPath::new(b""));
348 assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b""))));
348 assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b""))));
349 assert_eq!(dirs.next(), None);
349 assert_eq!(dirs.next(), None);
350 assert_eq!(dirs.next(), None);
350 assert_eq!(dirs.next(), None);
351 }
351 }
352
352
353 #[test]
353 #[test]
354 fn test_canonical_path() {
354 fn test_canonical_path() {
355 let root = Path::new("/repo");
355 let root = Path::new("/repo");
356 let cwd = Path::new("/dir");
356 let cwd = Path::new("/dir");
357 let name = Path::new("filename");
357 let name = Path::new("filename");
358 assert_eq!(
358 assert_eq!(
359 canonical_path(root, cwd, name),
359 canonical_path(root, cwd, name),
360 Err(HgPathError::NotUnderRoot {
360 Err(HgPathError::NotUnderRoot {
361 path: PathBuf::from("/dir/filename"),
361 path: PathBuf::from("/dir/filename"),
362 root: root.to_path_buf()
362 root: root.to_path_buf()
363 })
363 })
364 );
364 );
365
365
366 let root = Path::new("/repo");
366 let root = Path::new("/repo");
367 let cwd = Path::new("/");
367 let cwd = Path::new("/");
368 let name = Path::new("filename");
368 let name = Path::new("filename");
369 assert_eq!(
369 assert_eq!(
370 canonical_path(root, cwd, name),
370 canonical_path(root, cwd, name),
371 Err(HgPathError::NotUnderRoot {
371 Err(HgPathError::NotUnderRoot {
372 path: PathBuf::from("/filename"),
372 path: PathBuf::from("/filename"),
373 root: root.to_path_buf()
373 root: root.to_path_buf()
374 })
374 })
375 );
375 );
376
376
377 let root = Path::new("/repo");
377 let root = Path::new("/repo");
378 let cwd = Path::new("/");
378 let cwd = Path::new("/");
379 let name = Path::new("repo/filename");
379 let name = Path::new("repo/filename");
380 assert_eq!(
380 assert_eq!(
381 canonical_path(root, cwd, name),
381 canonical_path(root, cwd, name),
382 Ok(PathBuf::from("filename"))
382 Ok(PathBuf::from("filename"))
383 );
383 );
384
384
385 let root = Path::new("/repo");
385 let root = Path::new("/repo");
386 let cwd = Path::new("/repo");
386 let cwd = Path::new("/repo");
387 let name = Path::new("filename");
387 let name = Path::new("filename");
388 assert_eq!(
388 assert_eq!(
389 canonical_path(root, cwd, name),
389 canonical_path(root, cwd, name),
390 Ok(PathBuf::from("filename"))
390 Ok(PathBuf::from("filename"))
391 );
391 );
392
392
393 let root = Path::new("/repo");
393 let root = Path::new("/repo");
394 let cwd = Path::new("/repo/subdir");
394 let cwd = Path::new("/repo/subdir");
395 let name = Path::new("filename");
395 let name = Path::new("filename");
396 assert_eq!(
396 assert_eq!(
397 canonical_path(root, cwd, name),
397 canonical_path(root, cwd, name),
398 Ok(PathBuf::from("subdir/filename"))
398 Ok(PathBuf::from("subdir/filename"))
399 );
399 );
400 }
400 }
401
401
402 #[test]
402 #[test]
403 fn test_canonical_path_not_rooted() {
403 fn test_canonical_path_not_rooted() {
404 use std::fs::create_dir;
404 use std::fs::create_dir;
405 use tempfile::tempdir;
405 use tempfile::tempdir;
406
406
407 let base_dir = tempdir().unwrap();
407 let base_dir = tempdir().unwrap();
408 let base_dir_path = base_dir.path();
408 let base_dir_path = base_dir.path();
409 let beneath_repo = base_dir_path.join("a");
409 let beneath_repo = base_dir_path.join("a");
410 let root = base_dir_path.join("a/b");
410 let root = base_dir_path.join("a/b");
411 let out_of_repo = base_dir_path.join("c");
411 let out_of_repo = base_dir_path.join("c");
412 let under_repo_symlink = out_of_repo.join("d");
412 let under_repo_symlink = out_of_repo.join("d");
413
413
414 create_dir(&beneath_repo).unwrap();
414 create_dir(&beneath_repo).unwrap();
415 create_dir(&root).unwrap();
415 create_dir(&root).unwrap();
416
416
417 // TODO make portable
417 // TODO make portable
418 std::os::unix::fs::symlink(&root, &out_of_repo).unwrap();
418 std::os::unix::fs::symlink(&root, &out_of_repo).unwrap();
419
419
420 assert_eq!(
420 assert_eq!(
421 canonical_path(&root, Path::new(""), out_of_repo),
421 canonical_path(&root, Path::new(""), out_of_repo),
422 Ok(PathBuf::from(""))
422 Ok(PathBuf::from(""))
423 );
423 );
424 assert_eq!(
424 assert_eq!(
425 canonical_path(&root, Path::new(""), &beneath_repo),
425 canonical_path(&root, Path::new(""), &beneath_repo),
426 Err(HgPathError::NotUnderRoot {
426 Err(HgPathError::NotUnderRoot {
427 path: beneath_repo,
427 path: beneath_repo,
428 root: root.to_owned()
428 root: root.to_owned()
429 })
429 })
430 );
430 );
431 assert_eq!(
431 assert_eq!(
432 canonical_path(&root, Path::new(""), &under_repo_symlink),
432 canonical_path(&root, Path::new(""), under_repo_symlink),
433 Ok(PathBuf::from("d"))
433 Ok(PathBuf::from("d"))
434 );
434 );
435 }
435 }
436 }
436 }
@@ -1,223 +1,223 b''
1 // path_auditor.rs
1 // path_auditor.rs
2 //
2 //
3 // Copyright 2020
3 // Copyright 2020
4 // Raphaël Gomès <rgomes@octobus.net>,
4 // Raphaël Gomès <rgomes@octobus.net>,
5 //
5 //
6 // This software may be used and distributed according to the terms of the
6 // This software may be used and distributed according to the terms of the
7 // GNU General Public License version 2 or any later version.
7 // GNU General Public License version 2 or any later version.
8
8
9 use crate::utils::{
9 use crate::utils::{
10 files::lower_clean,
10 files::lower_clean,
11 find_slice_in_slice,
11 find_slice_in_slice,
12 hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf, HgPathError},
12 hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf, HgPathError},
13 };
13 };
14 use std::collections::HashSet;
14 use std::collections::HashSet;
15 use std::path::{Path, PathBuf};
15 use std::path::{Path, PathBuf};
16 use std::sync::{Mutex, RwLock};
16 use std::sync::{Mutex, RwLock};
17
17
18 /// Ensures that a path is valid for use in the repository i.e. does not use
18 /// Ensures that a path is valid for use in the repository i.e. does not use
19 /// any banned components, does not traverse a symlink, etc.
19 /// any banned components, does not traverse a symlink, etc.
20 #[derive(Debug, Default)]
20 #[derive(Debug, Default)]
21 pub struct PathAuditor {
21 pub struct PathAuditor {
22 audited: Mutex<HashSet<HgPathBuf>>,
22 audited: Mutex<HashSet<HgPathBuf>>,
23 audited_dirs: RwLock<HashSet<HgPathBuf>>,
23 audited_dirs: RwLock<HashSet<HgPathBuf>>,
24 root: PathBuf,
24 root: PathBuf,
25 }
25 }
26
26
27 impl PathAuditor {
27 impl PathAuditor {
28 pub fn new(root: impl AsRef<Path>) -> Self {
28 pub fn new(root: impl AsRef<Path>) -> Self {
29 Self {
29 Self {
30 root: root.as_ref().to_owned(),
30 root: root.as_ref().to_owned(),
31 ..Default::default()
31 ..Default::default()
32 }
32 }
33 }
33 }
34 pub fn audit_path(
34 pub fn audit_path(
35 &self,
35 &self,
36 path: impl AsRef<HgPath>,
36 path: impl AsRef<HgPath>,
37 ) -> Result<(), HgPathError> {
37 ) -> Result<(), HgPathError> {
38 // TODO windows "localpath" normalization
38 // TODO windows "localpath" normalization
39 let path = path.as_ref();
39 let path = path.as_ref();
40 if path.is_empty() {
40 if path.is_empty() {
41 return Ok(());
41 return Ok(());
42 }
42 }
43 // TODO case normalization
43 // TODO case normalization
44 if self.audited.lock().unwrap().contains(path) {
44 if self.audited.lock().unwrap().contains(path) {
45 return Ok(());
45 return Ok(());
46 }
46 }
47 // AIX ignores "/" at end of path, others raise EISDIR.
47 // AIX ignores "/" at end of path, others raise EISDIR.
48 let last_byte = path.as_bytes()[path.len() - 1];
48 let last_byte = path.as_bytes()[path.len() - 1];
49 if last_byte == b'/' || last_byte == b'\\' {
49 if last_byte == b'/' || last_byte == b'\\' {
50 return Err(HgPathError::EndsWithSlash(path.to_owned()));
50 return Err(HgPathError::EndsWithSlash(path.to_owned()));
51 }
51 }
52 let parts: Vec<_> = path
52 let parts: Vec<_> = path
53 .as_bytes()
53 .as_bytes()
54 .split(|b| std::path::is_separator(*b as char))
54 .split(|b| std::path::is_separator(*b as char))
55 .collect();
55 .collect();
56
56
57 let first_component = lower_clean(parts[0]);
57 let first_component = lower_clean(parts[0]);
58 let first_component = first_component.as_slice();
58 let first_component = first_component.as_slice();
59 if !path.split_drive().0.is_empty()
59 if !path.split_drive().0.is_empty()
60 || (first_component == b".hg"
60 || (first_component == b".hg"
61 || first_component == b".hg."
61 || first_component == b".hg."
62 || first_component == b"")
62 || first_component == b"")
63 || parts.iter().any(|c| c == b"..")
63 || parts.iter().any(|c| c == b"..")
64 {
64 {
65 return Err(HgPathError::InsideDotHg(path.to_owned()));
65 return Err(HgPathError::InsideDotHg(path.to_owned()));
66 }
66 }
67
67
68 // Windows shortname aliases
68 // Windows shortname aliases
69 for part in parts.iter() {
69 for part in parts.iter() {
70 if part.contains(&b'~') {
70 if part.contains(&b'~') {
71 let mut split = part.splitn(2, |b| *b == b'~');
71 let mut split = part.splitn(2, |b| *b == b'~');
72 let first =
72 let first =
73 split.next().unwrap().to_owned().to_ascii_uppercase();
73 split.next().unwrap().to_owned().to_ascii_uppercase();
74 let last = split.next().unwrap();
74 let last = split.next().unwrap();
75 if last.iter().all(u8::is_ascii_digit)
75 if last.iter().all(u8::is_ascii_digit)
76 && (first == b"HG" || first == b"HG8B6C")
76 && (first == b"HG" || first == b"HG8B6C")
77 {
77 {
78 return Err(HgPathError::ContainsIllegalComponent(
78 return Err(HgPathError::ContainsIllegalComponent(
79 path.to_owned(),
79 path.to_owned(),
80 ));
80 ));
81 }
81 }
82 }
82 }
83 }
83 }
84 let lower_path = lower_clean(path.as_bytes());
84 let lower_path = lower_clean(path.as_bytes());
85 if find_slice_in_slice(&lower_path, b".hg").is_some() {
85 if find_slice_in_slice(&lower_path, b".hg").is_some() {
86 let lower_parts: Vec<_> = path
86 let lower_parts: Vec<_> = path
87 .as_bytes()
87 .as_bytes()
88 .split(|b| std::path::is_separator(*b as char))
88 .split(|b| std::path::is_separator(*b as char))
89 .collect();
89 .collect();
90 for pattern in [b".hg".to_vec(), b".hg.".to_vec()].iter() {
90 for pattern in [b".hg".to_vec(), b".hg.".to_vec()].iter() {
91 if let Some(pos) = lower_parts[1..]
91 if let Some(pos) = lower_parts[1..]
92 .iter()
92 .iter()
93 .position(|part| part == &pattern.as_slice())
93 .position(|part| part == &pattern.as_slice())
94 {
94 {
95 let base = lower_parts[..=pos]
95 let base = lower_parts[..=pos]
96 .iter()
96 .iter()
97 .fold(HgPathBuf::new(), |acc, p| {
97 .fold(HgPathBuf::new(), |acc, p| {
98 acc.join(HgPath::new(p))
98 acc.join(HgPath::new(p))
99 });
99 });
100 return Err(HgPathError::IsInsideNestedRepo {
100 return Err(HgPathError::IsInsideNestedRepo {
101 path: path.to_owned(),
101 path: path.to_owned(),
102 nested_repo: base,
102 nested_repo: base,
103 });
103 });
104 }
104 }
105 }
105 }
106 }
106 }
107
107
108 let parts = &parts[..parts.len().saturating_sub(1)];
108 let parts = &parts[..parts.len().saturating_sub(1)];
109
109
110 // We don't want to add "foo/bar/baz" to `audited_dirs` before checking
110 // We don't want to add "foo/bar/baz" to `audited_dirs` before checking
111 // if there's a "foo/.hg" directory. This also means we won't
111 // if there's a "foo/.hg" directory. This also means we won't
112 // accidentally traverse a symlink into some other filesystem (which
112 // accidentally traverse a symlink into some other filesystem (which
113 // is potentially expensive to access).
113 // is potentially expensive to access).
114 for index in 0..parts.len() {
114 for index in 0..parts.len() {
115 let prefix = &parts[..=index].join(&b'/');
115 let prefix = &parts[..=index].join(&b'/');
116 let prefix = HgPath::new(prefix);
116 let prefix = HgPath::new(prefix);
117 if self.audited_dirs.read().unwrap().contains(prefix) {
117 if self.audited_dirs.read().unwrap().contains(prefix) {
118 continue;
118 continue;
119 }
119 }
120 self.check_filesystem(&prefix, &path)?;
120 self.check_filesystem(prefix, path)?;
121 self.audited_dirs.write().unwrap().insert(prefix.to_owned());
121 self.audited_dirs.write().unwrap().insert(prefix.to_owned());
122 }
122 }
123
123
124 self.audited.lock().unwrap().insert(path.to_owned());
124 self.audited.lock().unwrap().insert(path.to_owned());
125
125
126 Ok(())
126 Ok(())
127 }
127 }
128
128
129 pub fn check_filesystem(
129 pub fn check_filesystem(
130 &self,
130 &self,
131 prefix: impl AsRef<HgPath>,
131 prefix: impl AsRef<HgPath>,
132 path: impl AsRef<HgPath>,
132 path: impl AsRef<HgPath>,
133 ) -> Result<(), HgPathError> {
133 ) -> Result<(), HgPathError> {
134 let prefix = prefix.as_ref();
134 let prefix = prefix.as_ref();
135 let path = path.as_ref();
135 let path = path.as_ref();
136 let current_path = self.root.join(
136 let current_path = self.root.join(
137 hg_path_to_path_buf(prefix)
137 hg_path_to_path_buf(prefix)
138 .map_err(|_| HgPathError::NotFsCompliant(path.to_owned()))?,
138 .map_err(|_| HgPathError::NotFsCompliant(path.to_owned()))?,
139 );
139 );
140 match std::fs::symlink_metadata(&current_path) {
140 match std::fs::symlink_metadata(&current_path) {
141 Err(e) => {
141 Err(e) => {
142 // EINVAL can be raised as invalid path syntax under win32.
142 // EINVAL can be raised as invalid path syntax under win32.
143 if e.kind() != std::io::ErrorKind::NotFound
143 if e.kind() != std::io::ErrorKind::NotFound
144 && e.kind() != std::io::ErrorKind::InvalidInput
144 && e.kind() != std::io::ErrorKind::InvalidInput
145 && e.raw_os_error() != Some(20)
145 && e.raw_os_error() != Some(20)
146 {
146 {
147 // Rust does not yet have an `ErrorKind` for
147 // Rust does not yet have an `ErrorKind` for
148 // `NotADirectory` (errno 20)
148 // `NotADirectory` (errno 20)
149 // It happens if the dirstate contains `foo/bar` and
149 // It happens if the dirstate contains `foo/bar` and
150 // foo is not a directory
150 // foo is not a directory
151 return Err(HgPathError::NotFsCompliant(path.to_owned()));
151 return Err(HgPathError::NotFsCompliant(path.to_owned()));
152 }
152 }
153 }
153 }
154 Ok(meta) => {
154 Ok(meta) => {
155 if meta.file_type().is_symlink() {
155 if meta.file_type().is_symlink() {
156 return Err(HgPathError::TraversesSymbolicLink {
156 return Err(HgPathError::TraversesSymbolicLink {
157 path: path.to_owned(),
157 path: path.to_owned(),
158 symlink: prefix.to_owned(),
158 symlink: prefix.to_owned(),
159 });
159 });
160 }
160 }
161 if meta.file_type().is_dir()
161 if meta.file_type().is_dir()
162 && current_path.join(".hg").is_dir()
162 && current_path.join(".hg").is_dir()
163 {
163 {
164 return Err(HgPathError::IsInsideNestedRepo {
164 return Err(HgPathError::IsInsideNestedRepo {
165 path: path.to_owned(),
165 path: path.to_owned(),
166 nested_repo: prefix.to_owned(),
166 nested_repo: prefix.to_owned(),
167 });
167 });
168 }
168 }
169 }
169 }
170 };
170 };
171
171
172 Ok(())
172 Ok(())
173 }
173 }
174
174
175 pub fn check(&self, path: impl AsRef<HgPath>) -> bool {
175 pub fn check(&self, path: impl AsRef<HgPath>) -> bool {
176 self.audit_path(path).is_ok()
176 self.audit_path(path).is_ok()
177 }
177 }
178 }
178 }
179
179
180 #[cfg(test)]
180 #[cfg(test)]
181 mod tests {
181 mod tests {
182 use super::*;
182 use super::*;
183 use std::fs::{create_dir, File};
183 use std::fs::{create_dir, File};
184 use tempfile::tempdir;
184 use tempfile::tempdir;
185
185
186 #[test]
186 #[test]
187 fn test_path_auditor() {
187 fn test_path_auditor() {
188 let base_dir = tempdir().unwrap();
188 let base_dir = tempdir().unwrap();
189 let base_dir_path = base_dir.path();
189 let base_dir_path = base_dir.path();
190 let auditor = PathAuditor::new(base_dir_path);
190 let auditor = PathAuditor::new(base_dir_path);
191
191
192 let path = HgPath::new(b".hg/00changelog.i");
192 let path = HgPath::new(b".hg/00changelog.i");
193 assert_eq!(
193 assert_eq!(
194 auditor.audit_path(path),
194 auditor.audit_path(path),
195 Err(HgPathError::InsideDotHg(path.to_owned()))
195 Err(HgPathError::InsideDotHg(path.to_owned()))
196 );
196 );
197 let path = HgPath::new(b"this/is/nested/.hg/thing.txt");
197 let path = HgPath::new(b"this/is/nested/.hg/thing.txt");
198 assert_eq!(
198 assert_eq!(
199 auditor.audit_path(path),
199 auditor.audit_path(path),
200 Err(HgPathError::IsInsideNestedRepo {
200 Err(HgPathError::IsInsideNestedRepo {
201 path: path.to_owned(),
201 path: path.to_owned(),
202 nested_repo: HgPathBuf::from_bytes(b"this/is/nested")
202 nested_repo: HgPathBuf::from_bytes(b"this/is/nested")
203 })
203 })
204 );
204 );
205
205
206 create_dir(&base_dir_path.join("realdir")).unwrap();
206 create_dir(base_dir_path.join("realdir")).unwrap();
207 File::create(&base_dir_path.join("realdir/realfile")).unwrap();
207 File::create(base_dir_path.join("realdir/realfile")).unwrap();
208 // TODO make portable
208 // TODO make portable
209 std::os::unix::fs::symlink(
209 std::os::unix::fs::symlink(
210 &base_dir_path.join("realdir"),
210 base_dir_path.join("realdir"),
211 &base_dir_path.join("symlink"),
211 base_dir_path.join("symlink"),
212 )
212 )
213 .unwrap();
213 .unwrap();
214 let path = HgPath::new(b"symlink/realfile");
214 let path = HgPath::new(b"symlink/realfile");
215 assert_eq!(
215 assert_eq!(
216 auditor.audit_path(path),
216 auditor.audit_path(path),
217 Err(HgPathError::TraversesSymbolicLink {
217 Err(HgPathError::TraversesSymbolicLink {
218 path: path.to_owned(),
218 path: path.to_owned(),
219 symlink: HgPathBuf::from_bytes(b"symlink"),
219 symlink: HgPathBuf::from_bytes(b"symlink"),
220 })
220 })
221 );
221 );
222 }
222 }
223 }
223 }
@@ -1,307 +1,307 b''
1 // status.rs
1 // status.rs
2 //
2 //
3 // Copyright 2019, Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019, Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Bindings for the `hg::status` module provided by the
8 //! Bindings for the `hg::status` module provided by the
9 //! `hg-core` crate. From Python, this will be seen as
9 //! `hg-core` crate. From Python, this will be seen as
10 //! `rustext.dirstate.status`.
10 //! `rustext.dirstate.status`.
11
11
12 use crate::{dirstate::DirstateMap, exceptions::FallbackError};
12 use crate::{dirstate::DirstateMap, exceptions::FallbackError};
13 use cpython::{
13 use cpython::{
14 exc::ValueError, ObjectProtocol, PyBytes, PyErr, PyList, PyObject,
14 exc::ValueError, ObjectProtocol, PyBytes, PyErr, PyList, PyObject,
15 PyResult, PyTuple, Python, PythonObject, ToPyObject,
15 PyResult, PyTuple, Python, PythonObject, ToPyObject,
16 };
16 };
17 use hg::dirstate::status::StatusPath;
17 use hg::dirstate::status::StatusPath;
18 use hg::matchers::{
18 use hg::matchers::{
19 DifferenceMatcher, IntersectionMatcher, Matcher, NeverMatcher,
19 DifferenceMatcher, IntersectionMatcher, Matcher, NeverMatcher,
20 UnionMatcher,
20 UnionMatcher,
21 };
21 };
22 use hg::{
22 use hg::{
23 matchers::{AlwaysMatcher, FileMatcher, IncludeMatcher},
23 matchers::{AlwaysMatcher, FileMatcher, IncludeMatcher},
24 parse_pattern_syntax,
24 parse_pattern_syntax,
25 utils::{
25 utils::{
26 files::{get_bytes_from_path, get_path_from_bytes},
26 files::{get_bytes_from_path, get_path_from_bytes},
27 hg_path::{HgPath, HgPathBuf},
27 hg_path::{HgPath, HgPathBuf},
28 },
28 },
29 BadMatch, DirstateStatus, IgnorePattern, PatternFileWarning, StatusError,
29 BadMatch, DirstateStatus, IgnorePattern, PatternFileWarning, StatusError,
30 StatusOptions,
30 StatusOptions,
31 };
31 };
32 use std::borrow::Borrow;
32 use std::borrow::Borrow;
33
33
34 fn collect_status_path_list(py: Python, paths: &[StatusPath<'_>]) -> PyList {
34 fn collect_status_path_list(py: Python, paths: &[StatusPath<'_>]) -> PyList {
35 collect_pybytes_list(py, paths.iter().map(|item| &*item.path))
35 collect_pybytes_list(py, paths.iter().map(|item| &*item.path))
36 }
36 }
37
37
38 /// This will be useless once trait impls for collection are added to `PyBytes`
38 /// This will be useless once trait impls for collection are added to `PyBytes`
39 /// upstream.
39 /// upstream.
40 fn collect_pybytes_list(
40 fn collect_pybytes_list(
41 py: Python,
41 py: Python,
42 iter: impl Iterator<Item = impl AsRef<HgPath>>,
42 iter: impl Iterator<Item = impl AsRef<HgPath>>,
43 ) -> PyList {
43 ) -> PyList {
44 let list = PyList::new(py, &[]);
44 let list = PyList::new(py, &[]);
45
45
46 for path in iter {
46 for path in iter {
47 list.append(
47 list.append(
48 py,
48 py,
49 PyBytes::new(py, path.as_ref().as_bytes()).into_object(),
49 PyBytes::new(py, path.as_ref().as_bytes()).into_object(),
50 )
50 )
51 }
51 }
52
52
53 list
53 list
54 }
54 }
55
55
56 fn collect_bad_matches(
56 fn collect_bad_matches(
57 py: Python,
57 py: Python,
58 collection: &[(impl AsRef<HgPath>, BadMatch)],
58 collection: &[(impl AsRef<HgPath>, BadMatch)],
59 ) -> PyResult<PyList> {
59 ) -> PyResult<PyList> {
60 let list = PyList::new(py, &[]);
60 let list = PyList::new(py, &[]);
61
61
62 let os = py.import("os")?;
62 let os = py.import("os")?;
63 let get_error_message = |code: i32| -> PyResult<_> {
63 let get_error_message = |code: i32| -> PyResult<_> {
64 os.call(
64 os.call(
65 py,
65 py,
66 "strerror",
66 "strerror",
67 PyTuple::new(py, &[code.to_py_object(py).into_object()]),
67 PyTuple::new(py, &[code.to_py_object(py).into_object()]),
68 None,
68 None,
69 )
69 )
70 };
70 };
71
71
72 for (path, bad_match) in collection.iter() {
72 for (path, bad_match) in collection.iter() {
73 let message = match bad_match {
73 let message = match bad_match {
74 BadMatch::OsError(code) => get_error_message(*code)?,
74 BadMatch::OsError(code) => get_error_message(*code)?,
75 BadMatch::BadType(bad_type) => {
75 BadMatch::BadType(bad_type) => {
76 format!("unsupported file type (type is {})", bad_type)
76 format!("unsupported file type (type is {})", bad_type)
77 .to_py_object(py)
77 .to_py_object(py)
78 .into_object()
78 .into_object()
79 }
79 }
80 };
80 };
81 list.append(
81 list.append(
82 py,
82 py,
83 (PyBytes::new(py, path.as_ref().as_bytes()), message)
83 (PyBytes::new(py, path.as_ref().as_bytes()), message)
84 .to_py_object(py)
84 .to_py_object(py)
85 .into_object(),
85 .into_object(),
86 )
86 )
87 }
87 }
88
88
89 Ok(list)
89 Ok(list)
90 }
90 }
91
91
92 fn handle_fallback(py: Python, err: StatusError) -> PyErr {
92 fn handle_fallback(py: Python, err: StatusError) -> PyErr {
93 match err {
93 match err {
94 StatusError::Pattern(e) => {
94 StatusError::Pattern(e) => {
95 let as_string = e.to_string();
95 let as_string = e.to_string();
96 log::trace!("Rust status fallback: `{}`", &as_string);
96 log::trace!("Rust status fallback: `{}`", &as_string);
97
97
98 PyErr::new::<FallbackError, _>(py, &as_string)
98 PyErr::new::<FallbackError, _>(py, &as_string)
99 }
99 }
100 e => PyErr::new::<ValueError, _>(py, e.to_string()),
100 e => PyErr::new::<ValueError, _>(py, e.to_string()),
101 }
101 }
102 }
102 }
103
103
104 pub fn status_wrapper(
104 pub fn status_wrapper(
105 py: Python,
105 py: Python,
106 dmap: DirstateMap,
106 dmap: DirstateMap,
107 matcher: PyObject,
107 matcher: PyObject,
108 root_dir: PyObject,
108 root_dir: PyObject,
109 ignore_files: PyList,
109 ignore_files: PyList,
110 check_exec: bool,
110 check_exec: bool,
111 list_clean: bool,
111 list_clean: bool,
112 list_ignored: bool,
112 list_ignored: bool,
113 list_unknown: bool,
113 list_unknown: bool,
114 collect_traversed_dirs: bool,
114 collect_traversed_dirs: bool,
115 ) -> PyResult<PyTuple> {
115 ) -> PyResult<PyTuple> {
116 let bytes = root_dir.extract::<PyBytes>(py)?;
116 let bytes = root_dir.extract::<PyBytes>(py)?;
117 let root_dir = get_path_from_bytes(bytes.data(py));
117 let root_dir = get_path_from_bytes(bytes.data(py));
118
118
119 let dmap: DirstateMap = dmap.to_py_object(py);
119 let dmap: DirstateMap = dmap.to_py_object(py);
120 let mut dmap = dmap.get_inner_mut(py);
120 let mut dmap = dmap.get_inner_mut(py);
121
121
122 let ignore_files: PyResult<Vec<_>> = ignore_files
122 let ignore_files: PyResult<Vec<_>> = ignore_files
123 .iter(py)
123 .iter(py)
124 .map(|b| {
124 .map(|b| {
125 let file = b.extract::<PyBytes>(py)?;
125 let file = b.extract::<PyBytes>(py)?;
126 Ok(get_path_from_bytes(file.data(py)).to_owned())
126 Ok(get_path_from_bytes(file.data(py)).to_owned())
127 })
127 })
128 .collect();
128 .collect();
129 let ignore_files = ignore_files?;
129 let ignore_files = ignore_files?;
130 // The caller may call `copymap.items()` separately
130 // The caller may call `copymap.items()` separately
131 let list_copies = false;
131 let list_copies = false;
132
132
133 let after_status = |res: Result<(DirstateStatus<'_>, _), StatusError>| {
133 let after_status = |res: Result<(DirstateStatus<'_>, _), StatusError>| {
134 let (status_res, warnings) =
134 let (status_res, warnings) =
135 res.map_err(|e| handle_fallback(py, e))?;
135 res.map_err(|e| handle_fallback(py, e))?;
136 build_response(py, status_res, warnings)
136 build_response(py, status_res, warnings)
137 };
137 };
138
138
139 let matcher = extract_matcher(py, matcher)?;
139 let matcher = extract_matcher(py, matcher)?;
140 dmap.with_status(
140 dmap.with_status(
141 &*matcher,
141 &*matcher,
142 root_dir.to_path_buf(),
142 root_dir.to_path_buf(),
143 ignore_files,
143 ignore_files,
144 StatusOptions {
144 StatusOptions {
145 check_exec,
145 check_exec,
146 list_clean,
146 list_clean,
147 list_ignored,
147 list_ignored,
148 list_unknown,
148 list_unknown,
149 list_copies,
149 list_copies,
150 collect_traversed_dirs,
150 collect_traversed_dirs,
151 },
151 },
152 after_status,
152 after_status,
153 )
153 )
154 }
154 }
155
155
156 /// Transform a Python matcher into a Rust matcher.
156 /// Transform a Python matcher into a Rust matcher.
157 fn extract_matcher(
157 fn extract_matcher(
158 py: Python,
158 py: Python,
159 matcher: PyObject,
159 matcher: PyObject,
160 ) -> PyResult<Box<dyn Matcher + Sync>> {
160 ) -> PyResult<Box<dyn Matcher + Sync>> {
161 match matcher.get_type(py).name(py).borrow() {
161 match matcher.get_type(py).name(py).borrow() {
162 "alwaysmatcher" => Ok(Box::new(AlwaysMatcher)),
162 "alwaysmatcher" => Ok(Box::new(AlwaysMatcher)),
163 "nevermatcher" => Ok(Box::new(NeverMatcher)),
163 "nevermatcher" => Ok(Box::new(NeverMatcher)),
164 "exactmatcher" => {
164 "exactmatcher" => {
165 let files = matcher.call_method(
165 let files = matcher.call_method(
166 py,
166 py,
167 "files",
167 "files",
168 PyTuple::new(py, &[]),
168 PyTuple::new(py, &[]),
169 None,
169 None,
170 )?;
170 )?;
171 let files: PyList = files.cast_into(py)?;
171 let files: PyList = files.cast_into(py)?;
172 let files: PyResult<Vec<HgPathBuf>> = files
172 let files: PyResult<Vec<HgPathBuf>> = files
173 .iter(py)
173 .iter(py)
174 .map(|f| {
174 .map(|f| {
175 Ok(HgPathBuf::from_bytes(
175 Ok(HgPathBuf::from_bytes(
176 f.extract::<PyBytes>(py)?.data(py),
176 f.extract::<PyBytes>(py)?.data(py),
177 ))
177 ))
178 })
178 })
179 .collect();
179 .collect();
180
180
181 let files = files?;
181 let files = files?;
182 let file_matcher = FileMatcher::new(files)
182 let file_matcher = FileMatcher::new(files)
183 .map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?;
183 .map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?;
184 Ok(Box::new(file_matcher))
184 Ok(Box::new(file_matcher))
185 }
185 }
186 "includematcher" => {
186 "includematcher" => {
187 // Get the patterns from Python even though most of them are
187 // Get the patterns from Python even though most of them are
188 // redundant with those we will parse later on, as they include
188 // redundant with those we will parse later on, as they include
189 // those passed from the command line.
189 // those passed from the command line.
190 let ignore_patterns: PyResult<Vec<_>> = matcher
190 let ignore_patterns: PyResult<Vec<_>> = matcher
191 .getattr(py, "_kindpats")?
191 .getattr(py, "_kindpats")?
192 .iter(py)?
192 .iter(py)?
193 .map(|k| {
193 .map(|k| {
194 let k = k?;
194 let k = k?;
195 let syntax = parse_pattern_syntax(
195 let syntax = parse_pattern_syntax(
196 &[
196 &[
197 k.get_item(py, 0)?
197 k.get_item(py, 0)?
198 .extract::<PyBytes>(py)?
198 .extract::<PyBytes>(py)?
199 .data(py),
199 .data(py),
200 &b":"[..],
200 &b":"[..],
201 ]
201 ]
202 .concat(),
202 .concat(),
203 )
203 )
204 .map_err(|e| {
204 .map_err(|e| {
205 handle_fallback(py, StatusError::Pattern(e))
205 handle_fallback(py, StatusError::Pattern(e))
206 })?;
206 })?;
207 let pattern = k.get_item(py, 1)?.extract::<PyBytes>(py)?;
207 let pattern = k.get_item(py, 1)?.extract::<PyBytes>(py)?;
208 let pattern = pattern.data(py);
208 let pattern = pattern.data(py);
209 let source = k.get_item(py, 2)?.extract::<PyBytes>(py)?;
209 let source = k.get_item(py, 2)?.extract::<PyBytes>(py)?;
210 let source = get_path_from_bytes(source.data(py));
210 let source = get_path_from_bytes(source.data(py));
211 let new = IgnorePattern::new(syntax, pattern, source);
211 let new = IgnorePattern::new(syntax, pattern, source);
212 Ok(new)
212 Ok(new)
213 })
213 })
214 .collect();
214 .collect();
215
215
216 let ignore_patterns = ignore_patterns?;
216 let ignore_patterns = ignore_patterns?;
217
217
218 let matcher = IncludeMatcher::new(ignore_patterns)
218 let matcher = IncludeMatcher::new(ignore_patterns)
219 .map_err(|e| handle_fallback(py, e.into()))?;
219 .map_err(|e| handle_fallback(py, e.into()))?;
220
220
221 Ok(Box::new(matcher))
221 Ok(Box::new(matcher))
222 }
222 }
223 "unionmatcher" => {
223 "unionmatcher" => {
224 let matchers: PyResult<Vec<_>> = matcher
224 let matchers: PyResult<Vec<_>> = matcher
225 .getattr(py, "_matchers")?
225 .getattr(py, "_matchers")?
226 .iter(py)?
226 .iter(py)?
227 .map(|py_matcher| extract_matcher(py, py_matcher?))
227 .map(|py_matcher| extract_matcher(py, py_matcher?))
228 .collect();
228 .collect();
229
229
230 Ok(Box::new(UnionMatcher::new(matchers?)))
230 Ok(Box::new(UnionMatcher::new(matchers?)))
231 }
231 }
232 "intersectionmatcher" => {
232 "intersectionmatcher" => {
233 let m1 = extract_matcher(py, matcher.getattr(py, "_m1")?)?;
233 let m1 = extract_matcher(py, matcher.getattr(py, "_m1")?)?;
234 let m2 = extract_matcher(py, matcher.getattr(py, "_m2")?)?;
234 let m2 = extract_matcher(py, matcher.getattr(py, "_m2")?)?;
235
235
236 Ok(Box::new(IntersectionMatcher::new(m1, m2)))
236 Ok(Box::new(IntersectionMatcher::new(m1, m2)))
237 }
237 }
238 "differencematcher" => {
238 "differencematcher" => {
239 let m1 = extract_matcher(py, matcher.getattr(py, "_m1")?)?;
239 let m1 = extract_matcher(py, matcher.getattr(py, "_m1")?)?;
240 let m2 = extract_matcher(py, matcher.getattr(py, "_m2")?)?;
240 let m2 = extract_matcher(py, matcher.getattr(py, "_m2")?)?;
241
241
242 Ok(Box::new(DifferenceMatcher::new(m1, m2)))
242 Ok(Box::new(DifferenceMatcher::new(m1, m2)))
243 }
243 }
244 e => Err(PyErr::new::<FallbackError, _>(
244 e => Err(PyErr::new::<FallbackError, _>(
245 py,
245 py,
246 format!("Unsupported matcher {}", e),
246 format!("Unsupported matcher {}", e),
247 )),
247 )),
248 }
248 }
249 }
249 }
250
250
251 fn build_response(
251 fn build_response(
252 py: Python,
252 py: Python,
253 status_res: DirstateStatus,
253 status_res: DirstateStatus,
254 warnings: Vec<PatternFileWarning>,
254 warnings: Vec<PatternFileWarning>,
255 ) -> PyResult<PyTuple> {
255 ) -> PyResult<PyTuple> {
256 let modified = collect_status_path_list(py, &status_res.modified);
256 let modified = collect_status_path_list(py, &status_res.modified);
257 let added = collect_status_path_list(py, &status_res.added);
257 let added = collect_status_path_list(py, &status_res.added);
258 let removed = collect_status_path_list(py, &status_res.removed);
258 let removed = collect_status_path_list(py, &status_res.removed);
259 let deleted = collect_status_path_list(py, &status_res.deleted);
259 let deleted = collect_status_path_list(py, &status_res.deleted);
260 let clean = collect_status_path_list(py, &status_res.clean);
260 let clean = collect_status_path_list(py, &status_res.clean);
261 let ignored = collect_status_path_list(py, &status_res.ignored);
261 let ignored = collect_status_path_list(py, &status_res.ignored);
262 let unknown = collect_status_path_list(py, &status_res.unknown);
262 let unknown = collect_status_path_list(py, &status_res.unknown);
263 let unsure = collect_status_path_list(py, &status_res.unsure);
263 let unsure = collect_status_path_list(py, &status_res.unsure);
264 let bad = collect_bad_matches(py, &status_res.bad)?;
264 let bad = collect_bad_matches(py, &status_res.bad)?;
265 let traversed = collect_pybytes_list(py, status_res.traversed.iter());
265 let traversed = collect_pybytes_list(py, status_res.traversed.iter());
266 let dirty = status_res.dirty.to_py_object(py);
266 let dirty = status_res.dirty.to_py_object(py);
267 let py_warnings = PyList::new(py, &[]);
267 let py_warnings = PyList::new(py, &[]);
268 for warning in warnings.iter() {
268 for warning in warnings.iter() {
269 // We use duck-typing on the Python side for dispatch, good enough for
269 // We use duck-typing on the Python side for dispatch, good enough for
270 // now.
270 // now.
271 match warning {
271 match warning {
272 PatternFileWarning::InvalidSyntax(file, syn) => {
272 PatternFileWarning::InvalidSyntax(file, syn) => {
273 py_warnings.append(
273 py_warnings.append(
274 py,
274 py,
275 (
275 (
276 PyBytes::new(py, &get_bytes_from_path(&file)),
276 PyBytes::new(py, &get_bytes_from_path(file)),
277 PyBytes::new(py, syn),
277 PyBytes::new(py, syn),
278 )
278 )
279 .to_py_object(py)
279 .to_py_object(py)
280 .into_object(),
280 .into_object(),
281 );
281 );
282 }
282 }
283 PatternFileWarning::NoSuchFile(file) => py_warnings.append(
283 PatternFileWarning::NoSuchFile(file) => py_warnings.append(
284 py,
284 py,
285 PyBytes::new(py, &get_bytes_from_path(&file)).into_object(),
285 PyBytes::new(py, &get_bytes_from_path(file)).into_object(),
286 ),
286 ),
287 }
287 }
288 }
288 }
289
289
290 Ok(PyTuple::new(
290 Ok(PyTuple::new(
291 py,
291 py,
292 &[
292 &[
293 unsure.into_object(),
293 unsure.into_object(),
294 modified.into_object(),
294 modified.into_object(),
295 added.into_object(),
295 added.into_object(),
296 removed.into_object(),
296 removed.into_object(),
297 deleted.into_object(),
297 deleted.into_object(),
298 clean.into_object(),
298 clean.into_object(),
299 ignored.into_object(),
299 ignored.into_object(),
300 unknown.into_object(),
300 unknown.into_object(),
301 py_warnings.into_object(),
301 py_warnings.into_object(),
302 bad.into_object(),
302 bad.into_object(),
303 traversed.into_object(),
303 traversed.into_object(),
304 dirty.into_object(),
304 dirty.into_object(),
305 ][..],
305 ][..],
306 ))
306 ))
307 }
307 }
@@ -1,524 +1,524 b''
1 // revlog.rs
1 // revlog.rs
2 //
2 //
3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use crate::{
8 use crate::{
9 cindex,
9 cindex,
10 utils::{node_from_py_bytes, node_from_py_object},
10 utils::{node_from_py_bytes, node_from_py_object},
11 PyRevision,
11 PyRevision,
12 };
12 };
13 use cpython::{
13 use cpython::{
14 buffer::{Element, PyBuffer},
14 buffer::{Element, PyBuffer},
15 exc::{IndexError, ValueError},
15 exc::{IndexError, ValueError},
16 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
16 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
17 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
17 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
18 };
18 };
19 use hg::{
19 use hg::{
20 nodemap::{Block, NodeMapError, NodeTree},
20 nodemap::{Block, NodeMapError, NodeTree},
21 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
21 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
22 BaseRevision, Revision, UncheckedRevision,
22 BaseRevision, Revision, UncheckedRevision,
23 };
23 };
24 use std::cell::RefCell;
24 use std::cell::RefCell;
25
25
26 /// Return a Struct implementing the Graph trait
26 /// Return a Struct implementing the Graph trait
27 pub(crate) fn pyindex_to_graph(
27 pub(crate) fn pyindex_to_graph(
28 py: Python,
28 py: Python,
29 index: PyObject,
29 index: PyObject,
30 ) -> PyResult<cindex::Index> {
30 ) -> PyResult<cindex::Index> {
31 match index.extract::<MixedIndex>(py) {
31 match index.extract::<MixedIndex>(py) {
32 Ok(midx) => Ok(midx.clone_cindex(py)),
32 Ok(midx) => Ok(midx.clone_cindex(py)),
33 Err(_) => cindex::Index::new(py, index),
33 Err(_) => cindex::Index::new(py, index),
34 }
34 }
35 }
35 }
36
36
37 py_class!(pub class MixedIndex |py| {
37 py_class!(pub class MixedIndex |py| {
38 data cindex: RefCell<cindex::Index>;
38 data cindex: RefCell<cindex::Index>;
39 data nt: RefCell<Option<NodeTree>>;
39 data nt: RefCell<Option<NodeTree>>;
40 data docket: RefCell<Option<PyObject>>;
40 data docket: RefCell<Option<PyObject>>;
41 // Holds a reference to the mmap'ed persistent nodemap data
41 // Holds a reference to the mmap'ed persistent nodemap data
42 data mmap: RefCell<Option<PyBuffer>>;
42 data mmap: RefCell<Option<PyBuffer>>;
43
43
44 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
44 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
45 Self::new(py, cindex)
45 Self::new(py, cindex)
46 }
46 }
47
47
48 /// Compatibility layer used for Python consumers needing access to the C index
48 /// Compatibility layer used for Python consumers needing access to the C index
49 ///
49 ///
50 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
50 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
51 /// that may need to build a custom `nodetree`, based on a specified revset.
51 /// that may need to build a custom `nodetree`, based on a specified revset.
52 /// With a Rust implementation of the nodemap, we will be able to get rid of
52 /// With a Rust implementation of the nodemap, we will be able to get rid of
53 /// this, by exposing our own standalone nodemap class,
53 /// this, by exposing our own standalone nodemap class,
54 /// ready to accept `MixedIndex`.
54 /// ready to accept `MixedIndex`.
55 def get_cindex(&self) -> PyResult<PyObject> {
55 def get_cindex(&self) -> PyResult<PyObject> {
56 Ok(self.cindex(py).borrow().inner().clone_ref(py))
56 Ok(self.cindex(py).borrow().inner().clone_ref(py))
57 }
57 }
58
58
59 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
59 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
60
60
61 /// Return Revision if found, raises a bare `error.RevlogError`
61 /// Return Revision if found, raises a bare `error.RevlogError`
62 /// in case of ambiguity, same as C version does
62 /// in case of ambiguity, same as C version does
63 def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
63 def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
64 let opt = self.get_nodetree(py)?.borrow();
64 let opt = self.get_nodetree(py)?.borrow();
65 let nt = opt.as_ref().unwrap();
65 let nt = opt.as_ref().unwrap();
66 let idx = &*self.cindex(py).borrow();
66 let idx = &*self.cindex(py).borrow();
67 let node = node_from_py_bytes(py, &node)?;
67 let node = node_from_py_bytes(py, &node)?;
68 let res = nt.find_bin(idx, node.into());
68 let res = nt.find_bin(idx, node.into());
69 Ok(res.map_err(|e| nodemap_error(py, e))?.map(Into::into))
69 Ok(res.map_err(|e| nodemap_error(py, e))?.map(Into::into))
70 }
70 }
71
71
72 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
72 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
73 /// is not found.
73 /// is not found.
74 ///
74 ///
75 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
75 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
76 /// will catch and rewrap with it
76 /// will catch and rewrap with it
77 def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
77 def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
78 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
78 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
79 }
79 }
80
80
81 /// return True if the node exist in the index
81 /// return True if the node exist in the index
82 def has_node(&self, node: PyBytes) -> PyResult<bool> {
82 def has_node(&self, node: PyBytes) -> PyResult<bool> {
83 self.get_rev(py, node).map(|opt| opt.is_some())
83 self.get_rev(py, node).map(|opt| opt.is_some())
84 }
84 }
85
85
86 /// find length of shortest hex nodeid of a binary ID
86 /// find length of shortest hex nodeid of a binary ID
87 def shortest(&self, node: PyBytes) -> PyResult<usize> {
87 def shortest(&self, node: PyBytes) -> PyResult<usize> {
88 let opt = self.get_nodetree(py)?.borrow();
88 let opt = self.get_nodetree(py)?.borrow();
89 let nt = opt.as_ref().unwrap();
89 let nt = opt.as_ref().unwrap();
90 let idx = &*self.cindex(py).borrow();
90 let idx = &*self.cindex(py).borrow();
91 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
91 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
92 {
92 {
93 Ok(Some(l)) => Ok(l),
93 Ok(Some(l)) => Ok(l),
94 Ok(None) => Err(revlog_error(py)),
94 Ok(None) => Err(revlog_error(py)),
95 Err(e) => Err(nodemap_error(py, e)),
95 Err(e) => Err(nodemap_error(py, e)),
96 }
96 }
97 }
97 }
98
98
99 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
99 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
100 let opt = self.get_nodetree(py)?.borrow();
100 let opt = self.get_nodetree(py)?.borrow();
101 let nt = opt.as_ref().unwrap();
101 let nt = opt.as_ref().unwrap();
102 let idx = &*self.cindex(py).borrow();
102 let idx = &*self.cindex(py).borrow();
103
103
104 let node_as_string = if cfg!(feature = "python3-sys") {
104 let node_as_string = if cfg!(feature = "python3-sys") {
105 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
105 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
106 }
106 }
107 else {
107 else {
108 let node = node.extract::<PyBytes>(py)?;
108 let node = node.extract::<PyBytes>(py)?;
109 String::from_utf8_lossy(node.data(py)).to_string()
109 String::from_utf8_lossy(node.data(py)).to_string()
110 };
110 };
111
111
112 let prefix = NodePrefix::from_hex(&node_as_string)
112 let prefix = NodePrefix::from_hex(&node_as_string)
113 .map_err(|_| PyErr::new::<ValueError, _>(
113 .map_err(|_| PyErr::new::<ValueError, _>(
114 py, format!("Invalid node or prefix '{}'", node_as_string))
114 py, format!("Invalid node or prefix '{}'", node_as_string))
115 )?;
115 )?;
116
116
117 nt.find_bin(idx, prefix)
117 nt.find_bin(idx, prefix)
118 // TODO make an inner API returning the node directly
118 // TODO make an inner API returning the node directly
119 .map(|opt| opt.map(
119 .map(|opt| opt.map(
120 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
120 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
121 .map_err(|e| nodemap_error(py, e))
121 .map_err(|e| nodemap_error(py, e))
122
122
123 }
123 }
124
124
125 /// append an index entry
125 /// append an index entry
126 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
126 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
127 if tup.len(py) < 8 {
127 if tup.len(py) < 8 {
128 // this is better than the panic promised by tup.get_item()
128 // this is better than the panic promised by tup.get_item()
129 return Err(
129 return Err(
130 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
130 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
131 }
131 }
132 let node_bytes = tup.get_item(py, 7).extract(py)?;
132 let node_bytes = tup.get_item(py, 7).extract(py)?;
133 let node = node_from_py_object(py, &node_bytes)?;
133 let node = node_from_py_object(py, &node_bytes)?;
134
134
135 let mut idx = self.cindex(py).borrow_mut();
135 let mut idx = self.cindex(py).borrow_mut();
136
136
137 // This is ok since we will just add the revision to the index
137 // This is ok since we will just add the revision to the index
138 let rev = Revision(idx.len() as BaseRevision);
138 let rev = Revision(idx.len() as BaseRevision);
139 idx.append(py, tup)?;
139 idx.append(py, tup)?;
140
140
141 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
141 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
142 .insert(&*idx, &node, rev)
142 .insert(&*idx, &node, rev)
143 .map_err(|e| nodemap_error(py, e))?;
143 .map_err(|e| nodemap_error(py, e))?;
144 Ok(py.None())
144 Ok(py.None())
145 }
145 }
146
146
147 def __delitem__(&self, key: PyObject) -> PyResult<()> {
147 def __delitem__(&self, key: PyObject) -> PyResult<()> {
148 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
148 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
149 self.cindex(py).borrow().inner().del_item(py, key)?;
149 self.cindex(py).borrow().inner().del_item(py, key)?;
150 let mut opt = self.get_nodetree(py)?.borrow_mut();
150 let mut opt = self.get_nodetree(py)?.borrow_mut();
151 let nt = opt.as_mut().unwrap();
151 let nt = opt.as_mut().unwrap();
152 nt.invalidate_all();
152 nt.invalidate_all();
153 self.fill_nodemap(py, nt)?;
153 self.fill_nodemap(py, nt)?;
154 Ok(())
154 Ok(())
155 }
155 }
156
156
157 //
157 //
158 // Reforwarded C index API
158 // Reforwarded C index API
159 //
159 //
160
160
161 // index_methods (tp_methods). Same ordering as in revlog.c
161 // index_methods (tp_methods). Same ordering as in revlog.c
162
162
163 /// return the gca set of the given revs
163 /// return the gca set of the given revs
164 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
164 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
165 self.call_cindex(py, "ancestors", args, kw)
165 self.call_cindex(py, "ancestors", args, kw)
166 }
166 }
167
167
168 /// return the heads of the common ancestors of the given revs
168 /// return the heads of the common ancestors of the given revs
169 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
169 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
170 self.call_cindex(py, "commonancestorsheads", args, kw)
170 self.call_cindex(py, "commonancestorsheads", args, kw)
171 }
171 }
172
172
173 /// Clear the index caches and inner py_class data.
173 /// Clear the index caches and inner py_class data.
174 /// It is Python's responsibility to call `update_nodemap_data` again.
174 /// It is Python's responsibility to call `update_nodemap_data` again.
175 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
175 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
176 self.nt(py).borrow_mut().take();
176 self.nt(py).borrow_mut().take();
177 self.docket(py).borrow_mut().take();
177 self.docket(py).borrow_mut().take();
178 self.mmap(py).borrow_mut().take();
178 self.mmap(py).borrow_mut().take();
179 self.call_cindex(py, "clearcaches", args, kw)
179 self.call_cindex(py, "clearcaches", args, kw)
180 }
180 }
181
181
182 /// return the raw binary string representing a revision
182 /// return the raw binary string representing a revision
183 def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
183 def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
184 self.call_cindex(py, "entry_binary", args, kw)
184 self.call_cindex(py, "entry_binary", args, kw)
185 }
185 }
186
186
187 /// return a binary packed version of the header
187 /// return a binary packed version of the header
188 def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
188 def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
189 self.call_cindex(py, "pack_header", args, kw)
189 self.call_cindex(py, "pack_header", args, kw)
190 }
190 }
191
191
192 /// get an index entry
192 /// get an index entry
193 def get(&self, *args, **kw) -> PyResult<PyObject> {
193 def get(&self, *args, **kw) -> PyResult<PyObject> {
194 self.call_cindex(py, "get", args, kw)
194 self.call_cindex(py, "get", args, kw)
195 }
195 }
196
196
197 /// compute phases
197 /// compute phases
198 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
198 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
199 self.call_cindex(py, "computephasesmapsets", args, kw)
199 self.call_cindex(py, "computephasesmapsets", args, kw)
200 }
200 }
201
201
202 /// reachableroots
202 /// reachableroots
203 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
203 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
204 self.call_cindex(py, "reachableroots2", args, kw)
204 self.call_cindex(py, "reachableroots2", args, kw)
205 }
205 }
206
206
207 /// get head revisions
207 /// get head revisions
208 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
208 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
209 self.call_cindex(py, "headrevs", args, kw)
209 self.call_cindex(py, "headrevs", args, kw)
210 }
210 }
211
211
212 /// get filtered head revisions
212 /// get filtered head revisions
213 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
213 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
214 self.call_cindex(py, "headrevsfiltered", args, kw)
214 self.call_cindex(py, "headrevsfiltered", args, kw)
215 }
215 }
216
216
217 /// True if the object is a snapshot
217 /// True if the object is a snapshot
218 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
218 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
219 self.call_cindex(py, "issnapshot", args, kw)
219 self.call_cindex(py, "issnapshot", args, kw)
220 }
220 }
221
221
222 /// Gather snapshot data in a cache dict
222 /// Gather snapshot data in a cache dict
223 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
223 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
224 self.call_cindex(py, "findsnapshots", args, kw)
224 self.call_cindex(py, "findsnapshots", args, kw)
225 }
225 }
226
226
227 /// determine revisions with deltas to reconstruct fulltext
227 /// determine revisions with deltas to reconstruct fulltext
228 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
228 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
229 self.call_cindex(py, "deltachain", args, kw)
229 self.call_cindex(py, "deltachain", args, kw)
230 }
230 }
231
231
232 /// slice planned chunk read to reach a density threshold
232 /// slice planned chunk read to reach a density threshold
233 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
233 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
234 self.call_cindex(py, "slicechunktodensity", args, kw)
234 self.call_cindex(py, "slicechunktodensity", args, kw)
235 }
235 }
236
236
237 /// stats for the index
237 /// stats for the index
238 def stats(&self, *args, **kw) -> PyResult<PyObject> {
238 def stats(&self, *args, **kw) -> PyResult<PyObject> {
239 self.call_cindex(py, "stats", args, kw)
239 self.call_cindex(py, "stats", args, kw)
240 }
240 }
241
241
242 // index_sequence_methods and index_mapping_methods.
242 // index_sequence_methods and index_mapping_methods.
243 //
243 //
244 // Since we call back through the high level Python API,
244 // Since we call back through the high level Python API,
245 // there's no point making a distinction between index_get
245 // there's no point making a distinction between index_get
246 // and index_getitem.
246 // and index_getitem.
247
247
248 def __len__(&self) -> PyResult<usize> {
248 def __len__(&self) -> PyResult<usize> {
249 self.cindex(py).borrow().inner().len(py)
249 self.cindex(py).borrow().inner().len(py)
250 }
250 }
251
251
252 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
252 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
253 // this conversion seems needless, but that's actually because
253 // this conversion seems needless, but that's actually because
254 // `index_getitem` does not handle conversion from PyLong,
254 // `index_getitem` does not handle conversion from PyLong,
255 // which expressions such as [e for e in index] internally use.
255 // which expressions such as [e for e in index] internally use.
256 // Note that we don't seem to have a direct way to call
256 // Note that we don't seem to have a direct way to call
257 // PySequence_GetItem (does the job), which would possibly be better
257 // PySequence_GetItem (does the job), which would possibly be better
258 // for performance
258 // for performance
259 let key = match key.extract::<i32>(py) {
259 let key = match key.extract::<i32>(py) {
260 Ok(rev) => rev.to_py_object(py).into_object(),
260 Ok(rev) => rev.to_py_object(py).into_object(),
261 Err(_) => key,
261 Err(_) => key,
262 };
262 };
263 self.cindex(py).borrow().inner().get_item(py, key)
263 self.cindex(py).borrow().inner().get_item(py, key)
264 }
264 }
265
265
266 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
266 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
267 self.cindex(py).borrow().inner().set_item(py, key, value)
267 self.cindex(py).borrow().inner().set_item(py, key, value)
268 }
268 }
269
269
270 def __contains__(&self, item: PyObject) -> PyResult<bool> {
270 def __contains__(&self, item: PyObject) -> PyResult<bool> {
271 // ObjectProtocol does not seem to provide contains(), so
271 // ObjectProtocol does not seem to provide contains(), so
272 // this is an equivalent implementation of the index_contains()
272 // this is an equivalent implementation of the index_contains()
273 // defined in revlog.c
273 // defined in revlog.c
274 let cindex = self.cindex(py).borrow();
274 let cindex = self.cindex(py).borrow();
275 match item.extract::<i32>(py) {
275 match item.extract::<i32>(py) {
276 Ok(rev) => {
276 Ok(rev) => {
277 Ok(rev >= -1 && rev < cindex.inner().len(py)? as BaseRevision)
277 Ok(rev >= -1 && rev < cindex.inner().len(py)? as BaseRevision)
278 }
278 }
279 Err(_) => {
279 Err(_) => {
280 cindex.inner().call_method(
280 cindex.inner().call_method(
281 py,
281 py,
282 "has_node",
282 "has_node",
283 PyTuple::new(py, &[item]),
283 PyTuple::new(py, &[item]),
284 None)?
284 None)?
285 .extract(py)
285 .extract(py)
286 }
286 }
287 }
287 }
288 }
288 }
289
289
290 def nodemap_data_all(&self) -> PyResult<PyBytes> {
290 def nodemap_data_all(&self) -> PyResult<PyBytes> {
291 self.inner_nodemap_data_all(py)
291 self.inner_nodemap_data_all(py)
292 }
292 }
293
293
294 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
294 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
295 self.inner_nodemap_data_incremental(py)
295 self.inner_nodemap_data_incremental(py)
296 }
296 }
297 def update_nodemap_data(
297 def update_nodemap_data(
298 &self,
298 &self,
299 docket: PyObject,
299 docket: PyObject,
300 nm_data: PyObject
300 nm_data: PyObject
301 ) -> PyResult<PyObject> {
301 ) -> PyResult<PyObject> {
302 self.inner_update_nodemap_data(py, docket, nm_data)
302 self.inner_update_nodemap_data(py, docket, nm_data)
303 }
303 }
304
304
305 @property
305 @property
306 def entry_size(&self) -> PyResult<PyInt> {
306 def entry_size(&self) -> PyResult<PyInt> {
307 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
307 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
308 }
308 }
309
309
310 @property
310 @property
311 def rust_ext_compat(&self) -> PyResult<PyInt> {
311 def rust_ext_compat(&self) -> PyResult<PyInt> {
312 self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py)
312 self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py)
313 }
313 }
314
314
315 });
315 });
316
316
317 impl MixedIndex {
317 impl MixedIndex {
318 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
318 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
319 Self::create_instance(
319 Self::create_instance(
320 py,
320 py,
321 RefCell::new(cindex::Index::new(py, cindex)?),
321 RefCell::new(cindex::Index::new(py, cindex)?),
322 RefCell::new(None),
322 RefCell::new(None),
323 RefCell::new(None),
323 RefCell::new(None),
324 RefCell::new(None),
324 RefCell::new(None),
325 )
325 )
326 }
326 }
327
327
328 /// This is scaffolding at this point, but it could also become
328 /// This is scaffolding at this point, but it could also become
329 /// a way to start a persistent nodemap or perform a
329 /// a way to start a persistent nodemap or perform a
330 /// vacuum / repack operation
330 /// vacuum / repack operation
331 fn fill_nodemap(
331 fn fill_nodemap(
332 &self,
332 &self,
333 py: Python,
333 py: Python,
334 nt: &mut NodeTree,
334 nt: &mut NodeTree,
335 ) -> PyResult<PyObject> {
335 ) -> PyResult<PyObject> {
336 let index = self.cindex(py).borrow();
336 let index = self.cindex(py).borrow();
337 for r in 0..index.len() {
337 for r in 0..index.len() {
338 let rev = Revision(r as BaseRevision);
338 let rev = Revision(r as BaseRevision);
339 // in this case node() won't ever return None
339 // in this case node() won't ever return None
340 nt.insert(&*index, index.node(rev).unwrap(), rev)
340 nt.insert(&*index, index.node(rev).unwrap(), rev)
341 .map_err(|e| nodemap_error(py, e))?
341 .map_err(|e| nodemap_error(py, e))?
342 }
342 }
343 Ok(py.None())
343 Ok(py.None())
344 }
344 }
345
345
346 fn get_nodetree<'a>(
346 fn get_nodetree<'a>(
347 &'a self,
347 &'a self,
348 py: Python<'a>,
348 py: Python<'a>,
349 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
349 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
350 if self.nt(py).borrow().is_none() {
350 if self.nt(py).borrow().is_none() {
351 let readonly = Box::new(Vec::new());
351 let readonly = Box::<Vec<_>>::default();
352 let mut nt = NodeTree::load_bytes(readonly, 0);
352 let mut nt = NodeTree::load_bytes(readonly, 0);
353 self.fill_nodemap(py, &mut nt)?;
353 self.fill_nodemap(py, &mut nt)?;
354 self.nt(py).borrow_mut().replace(nt);
354 self.nt(py).borrow_mut().replace(nt);
355 }
355 }
356 Ok(self.nt(py))
356 Ok(self.nt(py))
357 }
357 }
358
358
359 /// forward a method call to the underlying C index
359 /// forward a method call to the underlying C index
360 fn call_cindex(
360 fn call_cindex(
361 &self,
361 &self,
362 py: Python,
362 py: Python,
363 name: &str,
363 name: &str,
364 args: &PyTuple,
364 args: &PyTuple,
365 kwargs: Option<&PyDict>,
365 kwargs: Option<&PyDict>,
366 ) -> PyResult<PyObject> {
366 ) -> PyResult<PyObject> {
367 self.cindex(py)
367 self.cindex(py)
368 .borrow()
368 .borrow()
369 .inner()
369 .inner()
370 .call_method(py, name, args, kwargs)
370 .call_method(py, name, args, kwargs)
371 }
371 }
372
372
373 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
373 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
374 self.cindex(py).borrow().clone_ref(py)
374 self.cindex(py).borrow().clone_ref(py)
375 }
375 }
376
376
377 /// Returns the full nodemap bytes to be written as-is to disk
377 /// Returns the full nodemap bytes to be written as-is to disk
378 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
378 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
379 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
379 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
380 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
380 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
381
381
382 // If there's anything readonly, we need to build the data again from
382 // If there's anything readonly, we need to build the data again from
383 // scratch
383 // scratch
384 let bytes = if readonly.len() > 0 {
384 let bytes = if readonly.len() > 0 {
385 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
385 let mut nt = NodeTree::load_bytes(Box::<Vec<_>>::default(), 0);
386 self.fill_nodemap(py, &mut nt)?;
386 self.fill_nodemap(py, &mut nt)?;
387
387
388 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
388 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
389 assert_eq!(readonly.len(), 0);
389 assert_eq!(readonly.len(), 0);
390
390
391 bytes
391 bytes
392 } else {
392 } else {
393 bytes
393 bytes
394 };
394 };
395
395
396 let bytes = PyBytes::new(py, &bytes);
396 let bytes = PyBytes::new(py, &bytes);
397 Ok(bytes)
397 Ok(bytes)
398 }
398 }
399
399
400 /// Returns the last saved docket along with the size of any changed data
400 /// Returns the last saved docket along with the size of any changed data
401 /// (in number of blocks), and said data as bytes.
401 /// (in number of blocks), and said data as bytes.
402 fn inner_nodemap_data_incremental(
402 fn inner_nodemap_data_incremental(
403 &self,
403 &self,
404 py: Python,
404 py: Python,
405 ) -> PyResult<PyObject> {
405 ) -> PyResult<PyObject> {
406 let docket = self.docket(py).borrow();
406 let docket = self.docket(py).borrow();
407 let docket = match docket.as_ref() {
407 let docket = match docket.as_ref() {
408 Some(d) => d,
408 Some(d) => d,
409 None => return Ok(py.None()),
409 None => return Ok(py.None()),
410 };
410 };
411
411
412 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
412 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
413 let masked_blocks = node_tree.masked_readonly_blocks();
413 let masked_blocks = node_tree.masked_readonly_blocks();
414 let (_, data) = node_tree.into_readonly_and_added_bytes();
414 let (_, data) = node_tree.into_readonly_and_added_bytes();
415 let changed = masked_blocks * std::mem::size_of::<Block>();
415 let changed = masked_blocks * std::mem::size_of::<Block>();
416
416
417 Ok((docket, changed, PyBytes::new(py, &data))
417 Ok((docket, changed, PyBytes::new(py, &data))
418 .to_py_object(py)
418 .to_py_object(py)
419 .into_object())
419 .into_object())
420 }
420 }
421
421
422 /// Update the nodemap from the new (mmaped) data.
422 /// Update the nodemap from the new (mmaped) data.
423 /// The docket is kept as a reference for later incremental calls.
423 /// The docket is kept as a reference for later incremental calls.
424 fn inner_update_nodemap_data(
424 fn inner_update_nodemap_data(
425 &self,
425 &self,
426 py: Python,
426 py: Python,
427 docket: PyObject,
427 docket: PyObject,
428 nm_data: PyObject,
428 nm_data: PyObject,
429 ) -> PyResult<PyObject> {
429 ) -> PyResult<PyObject> {
430 let buf = PyBuffer::get(py, &nm_data)?;
430 let buf = PyBuffer::get(py, &nm_data)?;
431 let len = buf.item_count();
431 let len = buf.item_count();
432
432
433 // Build a slice from the mmap'ed buffer data
433 // Build a slice from the mmap'ed buffer data
434 let cbuf = buf.buf_ptr();
434 let cbuf = buf.buf_ptr();
435 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
435 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
436 && buf.is_c_contiguous()
436 && buf.is_c_contiguous()
437 && u8::is_compatible_format(buf.format())
437 && u8::is_compatible_format(buf.format())
438 {
438 {
439 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
439 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
440 } else {
440 } else {
441 return Err(PyErr::new::<ValueError, _>(
441 return Err(PyErr::new::<ValueError, _>(
442 py,
442 py,
443 "Nodemap data buffer has an invalid memory representation"
443 "Nodemap data buffer has an invalid memory representation"
444 .to_string(),
444 .to_string(),
445 ));
445 ));
446 };
446 };
447
447
448 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
448 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
449 // pointer.
449 // pointer.
450 self.mmap(py).borrow_mut().replace(buf);
450 self.mmap(py).borrow_mut().replace(buf);
451
451
452 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
452 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
453
453
454 let data_tip = docket
454 let data_tip = docket
455 .getattr(py, "tip_rev")?
455 .getattr(py, "tip_rev")?
456 .extract::<BaseRevision>(py)?
456 .extract::<BaseRevision>(py)?
457 .into();
457 .into();
458 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
458 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
459 let idx = self.cindex(py).borrow();
459 let idx = self.cindex(py).borrow();
460 let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
460 let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
461 nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
461 nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
462 })?;
462 })?;
463 let current_tip = idx.len();
463 let current_tip = idx.len();
464
464
465 for r in (data_tip.0 + 1)..current_tip as BaseRevision {
465 for r in (data_tip.0 + 1)..current_tip as BaseRevision {
466 let rev = Revision(r);
466 let rev = Revision(r);
467 // in this case node() won't ever return None
467 // in this case node() won't ever return None
468 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
468 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
469 .map_err(|e| nodemap_error(py, e))?
469 .map_err(|e| nodemap_error(py, e))?
470 }
470 }
471
471
472 *self.nt(py).borrow_mut() = Some(nt);
472 *self.nt(py).borrow_mut() = Some(nt);
473
473
474 Ok(py.None())
474 Ok(py.None())
475 }
475 }
476 }
476 }
477
477
478 fn revlog_error(py: Python) -> PyErr {
478 fn revlog_error(py: Python) -> PyErr {
479 match py
479 match py
480 .import("mercurial.error")
480 .import("mercurial.error")
481 .and_then(|m| m.get(py, "RevlogError"))
481 .and_then(|m| m.get(py, "RevlogError"))
482 {
482 {
483 Err(e) => e,
483 Err(e) => e,
484 Ok(cls) => PyErr::from_instance(
484 Ok(cls) => PyErr::from_instance(
485 py,
485 py,
486 cls.call(py, (py.None(),), None).ok().into_py_object(py),
486 cls.call(py, (py.None(),), None).ok().into_py_object(py),
487 ),
487 ),
488 }
488 }
489 }
489 }
490
490
491 fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
491 fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
492 PyErr::new::<ValueError, _>(
492 PyErr::new::<ValueError, _>(
493 py,
493 py,
494 format!(
494 format!(
495 "Inconsistency: Revision {} found in nodemap \
495 "Inconsistency: Revision {} found in nodemap \
496 is not in revlog index",
496 is not in revlog index",
497 rev
497 rev
498 ),
498 ),
499 )
499 )
500 }
500 }
501
501
502 /// Standard treatment of NodeMapError
502 /// Standard treatment of NodeMapError
503 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
503 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
504 match err {
504 match err {
505 NodeMapError::MultipleResults => revlog_error(py),
505 NodeMapError::MultipleResults => revlog_error(py),
506 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
506 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
507 }
507 }
508 }
508 }
509
509
510 /// Create the module, with __package__ given from parent
510 /// Create the module, with __package__ given from parent
511 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
511 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
512 let dotted_name = &format!("{}.revlog", package);
512 let dotted_name = &format!("{}.revlog", package);
513 let m = PyModule::new(py, dotted_name)?;
513 let m = PyModule::new(py, dotted_name)?;
514 m.add(py, "__package__", package)?;
514 m.add(py, "__package__", package)?;
515 m.add(py, "__doc__", "RevLog - Rust implementations")?;
515 m.add(py, "__doc__", "RevLog - Rust implementations")?;
516
516
517 m.add_class::<MixedIndex>(py)?;
517 m.add_class::<MixedIndex>(py)?;
518
518
519 let sys = PyModule::import(py, "sys")?;
519 let sys = PyModule::import(py, "sys")?;
520 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
520 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
521 sys_modules.set_item(py, dotted_name, &m)?;
521 sys_modules.set_item(py, dotted_name, &m)?;
522
522
523 Ok(m)
523 Ok(m)
524 }
524 }
@@ -1,116 +1,116 b''
1 use crate::error::CommandError;
1 use crate::error::CommandError;
2 use clap::Arg;
2 use clap::Arg;
3 use format_bytes::format_bytes;
3 use format_bytes::format_bytes;
4 use hg::operations::cat;
4 use hg::operations::cat;
5 use hg::utils::hg_path::HgPathBuf;
5 use hg::utils::hg_path::HgPathBuf;
6 use std::ffi::OsString;
6 use std::ffi::OsString;
7 use std::os::unix::prelude::OsStrExt;
7 use std::os::unix::prelude::OsStrExt;
8
8
9 pub const HELP_TEXT: &str = "
9 pub const HELP_TEXT: &str = "
10 Output the current or given revision of files
10 Output the current or given revision of files
11 ";
11 ";
12
12
13 pub fn args() -> clap::Command {
13 pub fn args() -> clap::Command {
14 clap::command!("cat")
14 clap::command!("cat")
15 .arg(
15 .arg(
16 Arg::new("rev")
16 Arg::new("rev")
17 .help("search the repository as it is in REV")
17 .help("search the repository as it is in REV")
18 .short('r')
18 .short('r')
19 .long("rev")
19 .long("rev")
20 .value_name("REV"),
20 .value_name("REV"),
21 )
21 )
22 .arg(
22 .arg(
23 clap::Arg::new("files")
23 clap::Arg::new("files")
24 .required(true)
24 .required(true)
25 .num_args(1..)
25 .num_args(1..)
26 .value_name("FILE")
26 .value_name("FILE")
27 .value_parser(clap::value_parser!(std::ffi::OsString))
27 .value_parser(clap::value_parser!(std::ffi::OsString))
28 .help("Files to output"),
28 .help("Files to output"),
29 )
29 )
30 .about(HELP_TEXT)
30 .about(HELP_TEXT)
31 }
31 }
32
32
33 #[logging_timer::time("trace")]
33 #[logging_timer::time("trace")]
34 pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
34 pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
35 let cat_enabled = invocation.config.get_bool(b"rhg", b"cat")?;
35 let cat_enabled = invocation.config.get_bool(b"rhg", b"cat")?;
36 if !cat_enabled {
36 if !cat_enabled {
37 return Err(CommandError::unsupported(
37 return Err(CommandError::unsupported(
38 "cat is disabled in rhg (enable it with 'rhg.cat = true' \
38 "cat is disabled in rhg (enable it with 'rhg.cat = true' \
39 or enable fallback with 'rhg.on-unsupported = fallback')",
39 or enable fallback with 'rhg.on-unsupported = fallback')",
40 ));
40 ));
41 }
41 }
42
42
43 let rev = invocation.subcommand_args.get_one::<String>("rev");
43 let rev = invocation.subcommand_args.get_one::<String>("rev");
44 let file_args =
44 let file_args =
45 match invocation.subcommand_args.get_many::<OsString>("files") {
45 match invocation.subcommand_args.get_many::<OsString>("files") {
46 Some(files) => files
46 Some(files) => files
47 .filter(|s| !s.is_empty())
47 .filter(|s| !s.is_empty())
48 .map(|s| s.as_os_str())
48 .map(|s| s.as_os_str())
49 .collect(),
49 .collect(),
50 None => vec![],
50 None => vec![],
51 };
51 };
52
52
53 let repo = invocation.repo?;
53 let repo = invocation.repo?;
54 let cwd = hg::utils::current_dir()?;
54 let cwd = hg::utils::current_dir()?;
55 let working_directory = repo.working_directory_path();
55 let working_directory = repo.working_directory_path();
56 let working_directory = cwd.join(working_directory); // Make it absolute
56 let working_directory = cwd.join(working_directory); // Make it absolute
57
57
58 let mut files = vec![];
58 let mut files = vec![];
59 for file in file_args {
59 for file in file_args {
60 if file.as_bytes().starts_with(b"set:") {
60 if file.as_bytes().starts_with(b"set:") {
61 let message = "fileset";
61 let message = "fileset";
62 return Err(CommandError::unsupported(message));
62 return Err(CommandError::unsupported(message));
63 }
63 }
64
64
65 let normalized = cwd.join(&file);
65 let normalized = cwd.join(file);
66 // TODO: actually normalize `..` path segments etc?
66 // TODO: actually normalize `..` path segments etc?
67 let dotted = normalized.components().any(|c| c.as_os_str() == "..");
67 let dotted = normalized.components().any(|c| c.as_os_str() == "..");
68 if file.as_bytes() == b"." || dotted {
68 if file.as_bytes() == b"." || dotted {
69 let message = "`..` or `.` path segment";
69 let message = "`..` or `.` path segment";
70 return Err(CommandError::unsupported(message));
70 return Err(CommandError::unsupported(message));
71 }
71 }
72 let relative_path = working_directory
72 let relative_path = working_directory
73 .strip_prefix(&cwd)
73 .strip_prefix(&cwd)
74 .unwrap_or(&working_directory);
74 .unwrap_or(&working_directory);
75 let stripped = normalized
75 let stripped = normalized
76 .strip_prefix(&working_directory)
76 .strip_prefix(&working_directory)
77 .map_err(|_| {
77 .map_err(|_| {
78 CommandError::abort(format!(
78 CommandError::abort(format!(
79 "abort: {} not under root '{}'\n(consider using '--cwd {}')",
79 "abort: {} not under root '{}'\n(consider using '--cwd {}')",
80 String::from_utf8_lossy(file.as_bytes()),
80 String::from_utf8_lossy(file.as_bytes()),
81 working_directory.display(),
81 working_directory.display(),
82 relative_path.display(),
82 relative_path.display(),
83 ))
83 ))
84 })?;
84 })?;
85 let hg_file = HgPathBuf::try_from(stripped.to_path_buf())
85 let hg_file = HgPathBuf::try_from(stripped.to_path_buf())
86 .map_err(|e| CommandError::abort(e.to_string()))?;
86 .map_err(|e| CommandError::abort(e.to_string()))?;
87 files.push(hg_file);
87 files.push(hg_file);
88 }
88 }
89 let files = files.iter().map(|file| file.as_ref()).collect();
89 let files = files.iter().map(|file| file.as_ref()).collect();
90 // TODO probably move this to a util function like `repo.default_rev` or
90 // TODO probably move this to a util function like `repo.default_rev` or
91 // something when it's used somewhere else
91 // something when it's used somewhere else
92 let rev = match rev {
92 let rev = match rev {
93 Some(r) => r.to_string(),
93 Some(r) => r.to_string(),
94 None => format!("{:x}", repo.dirstate_parents()?.p1),
94 None => format!("{:x}", repo.dirstate_parents()?.p1),
95 };
95 };
96
96
97 let output = cat(repo, &rev, files).map_err(|e| (e, rev.as_str()))?;
97 let output = cat(repo, &rev, files).map_err(|e| (e, rev.as_str()))?;
98 for (_file, contents) in output.results {
98 for (_file, contents) in output.results {
99 invocation.ui.write_stdout(&contents)?;
99 invocation.ui.write_stdout(&contents)?;
100 }
100 }
101 if !output.missing.is_empty() {
101 if !output.missing.is_empty() {
102 let short = format!("{:x}", output.node.short()).into_bytes();
102 let short = format!("{:x}", output.node.short()).into_bytes();
103 for path in &output.missing {
103 for path in &output.missing {
104 invocation.ui.write_stderr(&format_bytes!(
104 invocation.ui.write_stderr(&format_bytes!(
105 b"{}: no such file in rev {}\n",
105 b"{}: no such file in rev {}\n",
106 path.as_bytes(),
106 path.as_bytes(),
107 short
107 short
108 ))?;
108 ))?;
109 }
109 }
110 }
110 }
111 if output.found_any {
111 if output.found_any {
112 Ok(())
112 Ok(())
113 } else {
113 } else {
114 Err(CommandError::Unsuccessful)
114 Err(CommandError::Unsuccessful)
115 }
115 }
116 }
116 }
@@ -1,71 +1,71 b''
1 use crate::error::CommandError;
1 use crate::error::CommandError;
2 use clap::Arg;
2 use clap::Arg;
3 use clap::ArgGroup;
3 use clap::ArgGroup;
4 use hg::operations::{debug_data, DebugDataKind};
4 use hg::operations::{debug_data, DebugDataKind};
5
5
6 pub const HELP_TEXT: &str = "
6 pub const HELP_TEXT: &str = "
7 Dump the contents of a data file revision
7 Dump the contents of a data file revision
8 ";
8 ";
9
9
10 pub fn args() -> clap::Command {
10 pub fn args() -> clap::Command {
11 clap::command!("debugdata")
11 clap::command!("debugdata")
12 .arg(
12 .arg(
13 Arg::new("changelog")
13 Arg::new("changelog")
14 .help("open changelog")
14 .help("open changelog")
15 .short('c')
15 .short('c')
16 .action(clap::ArgAction::SetTrue),
16 .action(clap::ArgAction::SetTrue),
17 )
17 )
18 .arg(
18 .arg(
19 Arg::new("manifest")
19 Arg::new("manifest")
20 .help("open manifest")
20 .help("open manifest")
21 .short('m')
21 .short('m')
22 .action(clap::ArgAction::SetTrue),
22 .action(clap::ArgAction::SetTrue),
23 )
23 )
24 .group(
24 .group(
25 ArgGroup::new("revlog")
25 ArgGroup::new("revlog")
26 .args(&["changelog", "manifest"])
26 .args(["changelog", "manifest"])
27 .required(true),
27 .required(true),
28 )
28 )
29 .arg(
29 .arg(
30 Arg::new("rev")
30 Arg::new("rev")
31 .help("revision")
31 .help("revision")
32 .required(true)
32 .required(true)
33 .value_name("REV"),
33 .value_name("REV"),
34 )
34 )
35 .about(HELP_TEXT)
35 .about(HELP_TEXT)
36 }
36 }
37
37
38 #[logging_timer::time("trace")]
38 #[logging_timer::time("trace")]
39 pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
39 pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
40 let args = invocation.subcommand_args;
40 let args = invocation.subcommand_args;
41 let rev = args
41 let rev = args
42 .get_one::<String>("rev")
42 .get_one::<String>("rev")
43 .expect("rev should be a required argument");
43 .expect("rev should be a required argument");
44 let kind = match (
44 let kind = match (
45 args.get_one::<bool>("changelog").unwrap(),
45 args.get_one::<bool>("changelog").unwrap(),
46 args.get_one::<bool>("manifest").unwrap(),
46 args.get_one::<bool>("manifest").unwrap(),
47 ) {
47 ) {
48 (true, false) => DebugDataKind::Changelog,
48 (true, false) => DebugDataKind::Changelog,
49 (false, true) => DebugDataKind::Manifest,
49 (false, true) => DebugDataKind::Manifest,
50 (true, true) => {
50 (true, true) => {
51 unreachable!("Should not happen since options are exclusive")
51 unreachable!("Should not happen since options are exclusive")
52 }
52 }
53 (false, false) => {
53 (false, false) => {
54 unreachable!("Should not happen since options are required")
54 unreachable!("Should not happen since options are required")
55 }
55 }
56 };
56 };
57
57
58 let repo = invocation.repo?;
58 let repo = invocation.repo?;
59 if repo.has_narrow() {
59 if repo.has_narrow() {
60 return Err(CommandError::unsupported(
60 return Err(CommandError::unsupported(
61 "support for ellipsis nodes is missing and repo has narrow enabled",
61 "support for ellipsis nodes is missing and repo has narrow enabled",
62 ));
62 ));
63 }
63 }
64 let data = debug_data(repo, rev, kind).map_err(|e| (e, rev.as_ref()))?;
64 let data = debug_data(repo, rev, kind).map_err(|e| (e, rev.as_ref()))?;
65
65
66 let mut stdout = invocation.ui.stdout_buffer();
66 let mut stdout = invocation.ui.stdout_buffer();
67 stdout.write_all(&data)?;
67 stdout.write_all(&data)?;
68 stdout.flush()?;
68 stdout.flush()?;
69
69
70 Ok(())
70 Ok(())
71 }
71 }
@@ -1,28 +1,28 b''
1 use crate::error::CommandError;
1 use crate::error::CommandError;
2 use format_bytes::format_bytes;
2 use format_bytes::format_bytes;
3 use hg::errors::{IoErrorContext, IoResultExt};
3 use hg::errors::{IoErrorContext, IoResultExt};
4 use hg::utils::files::get_bytes_from_path;
4 use hg::utils::files::get_bytes_from_path;
5
5
6 pub const HELP_TEXT: &str = "
6 pub const HELP_TEXT: &str = "
7 Print the root directory of the current repository.
7 Print the root directory of the current repository.
8
8
9 Returns 0 on success.
9 Returns 0 on success.
10 ";
10 ";
11
11
12 pub fn args() -> clap::Command {
12 pub fn args() -> clap::Command {
13 clap::command!("root").about(HELP_TEXT)
13 clap::command!("root").about(HELP_TEXT)
14 }
14 }
15
15
16 pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
16 pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
17 let repo = invocation.repo?;
17 let repo = invocation.repo?;
18 let working_directory = repo.working_directory_path();
18 let working_directory = repo.working_directory_path();
19 let working_directory = std::fs::canonicalize(working_directory)
19 let working_directory = std::fs::canonicalize(working_directory)
20 .with_context(|| {
20 .with_context(|| {
21 IoErrorContext::CanonicalizingPath(working_directory.to_owned())
21 IoErrorContext::CanonicalizingPath(working_directory.to_owned())
22 })?;
22 })?;
23 let bytes = get_bytes_from_path(&working_directory);
23 let bytes = get_bytes_from_path(working_directory);
24 invocation
24 invocation
25 .ui
25 .ui
26 .write_stdout(&format_bytes!(b"{}\n", bytes.as_slice()))?;
26 .write_stdout(&format_bytes!(b"{}\n", bytes.as_slice()))?;
27 Ok(())
27 Ok(())
28 }
28 }
@@ -1,842 +1,842 b''
1 extern crate log;
1 extern crate log;
2 use crate::error::CommandError;
2 use crate::error::CommandError;
3 use crate::ui::{local_to_utf8, Ui};
3 use crate::ui::{local_to_utf8, Ui};
4 use clap::{command, Arg, ArgMatches};
4 use clap::{command, Arg, ArgMatches};
5 use format_bytes::{format_bytes, join};
5 use format_bytes::{format_bytes, join};
6 use hg::config::{Config, ConfigSource, PlainInfo};
6 use hg::config::{Config, ConfigSource, PlainInfo};
7 use hg::repo::{Repo, RepoError};
7 use hg::repo::{Repo, RepoError};
8 use hg::utils::files::{get_bytes_from_os_str, get_path_from_bytes};
8 use hg::utils::files::{get_bytes_from_os_str, get_path_from_bytes};
9 use hg::utils::SliceExt;
9 use hg::utils::SliceExt;
10 use hg::{exit_codes, requirements};
10 use hg::{exit_codes, requirements};
11 use std::borrow::Cow;
11 use std::borrow::Cow;
12 use std::collections::HashSet;
12 use std::collections::HashSet;
13 use std::ffi::OsString;
13 use std::ffi::OsString;
14 use std::os::unix::prelude::CommandExt;
14 use std::os::unix::prelude::CommandExt;
15 use std::path::PathBuf;
15 use std::path::PathBuf;
16 use std::process::Command;
16 use std::process::Command;
17
17
18 mod blackbox;
18 mod blackbox;
19 mod color;
19 mod color;
20 mod error;
20 mod error;
21 mod ui;
21 mod ui;
22 pub mod utils {
22 pub mod utils {
23 pub mod path_utils;
23 pub mod path_utils;
24 }
24 }
25
25
26 fn main_with_result(
26 fn main_with_result(
27 argv: Vec<OsString>,
27 argv: Vec<OsString>,
28 process_start_time: &blackbox::ProcessStartTime,
28 process_start_time: &blackbox::ProcessStartTime,
29 ui: &ui::Ui,
29 ui: &ui::Ui,
30 repo: Result<&Repo, &NoRepoInCwdError>,
30 repo: Result<&Repo, &NoRepoInCwdError>,
31 config: &Config,
31 config: &Config,
32 ) -> Result<(), CommandError> {
32 ) -> Result<(), CommandError> {
33 check_unsupported(config, repo)?;
33 check_unsupported(config, repo)?;
34
34
35 let app = command!()
35 let app = command!()
36 .subcommand_required(true)
36 .subcommand_required(true)
37 .arg(
37 .arg(
38 Arg::new("repository")
38 Arg::new("repository")
39 .help("repository root directory")
39 .help("repository root directory")
40 .short('R')
40 .short('R')
41 .value_name("REPO")
41 .value_name("REPO")
42 // Both ok: `hg -R ./foo log` or `hg log -R ./foo`
42 // Both ok: `hg -R ./foo log` or `hg log -R ./foo`
43 .global(true),
43 .global(true),
44 )
44 )
45 .arg(
45 .arg(
46 Arg::new("config")
46 Arg::new("config")
47 .help("set/override config option (use 'section.name=value')")
47 .help("set/override config option (use 'section.name=value')")
48 .value_name("CONFIG")
48 .value_name("CONFIG")
49 .global(true)
49 .global(true)
50 .long("config")
50 .long("config")
51 // Ok: `--config section.key1=val --config section.key2=val2`
51 // Ok: `--config section.key1=val --config section.key2=val2`
52 // Not ok: `--config section.key1=val section.key2=val2`
52 // Not ok: `--config section.key1=val section.key2=val2`
53 .action(clap::ArgAction::Append),
53 .action(clap::ArgAction::Append),
54 )
54 )
55 .arg(
55 .arg(
56 Arg::new("cwd")
56 Arg::new("cwd")
57 .help("change working directory")
57 .help("change working directory")
58 .value_name("DIR")
58 .value_name("DIR")
59 .long("cwd")
59 .long("cwd")
60 .global(true),
60 .global(true),
61 )
61 )
62 .arg(
62 .arg(
63 Arg::new("color")
63 Arg::new("color")
64 .help("when to colorize (boolean, always, auto, never, or debug)")
64 .help("when to colorize (boolean, always, auto, never, or debug)")
65 .value_name("TYPE")
65 .value_name("TYPE")
66 .long("color")
66 .long("color")
67 .global(true),
67 .global(true),
68 )
68 )
69 .version("0.0.1");
69 .version("0.0.1");
70 let app = add_subcommand_args(app);
70 let app = add_subcommand_args(app);
71
71
72 let matches = app.try_get_matches_from(argv.iter())?;
72 let matches = app.try_get_matches_from(argv.iter())?;
73
73
74 let (subcommand_name, subcommand_args) =
74 let (subcommand_name, subcommand_args) =
75 matches.subcommand().expect("subcommand required");
75 matches.subcommand().expect("subcommand required");
76
76
77 // Mercurial allows users to define "defaults" for commands, fallback
77 // Mercurial allows users to define "defaults" for commands, fallback
78 // if a default is detected for the current command
78 // if a default is detected for the current command
79 let defaults = config.get_str(b"defaults", subcommand_name.as_bytes())?;
79 let defaults = config.get_str(b"defaults", subcommand_name.as_bytes())?;
80 match defaults {
80 match defaults {
81 // Programmatic usage might set defaults to an empty string to unset
81 // Programmatic usage might set defaults to an empty string to unset
82 // it; allow that
82 // it; allow that
83 None | Some("") => {}
83 None | Some("") => {}
84 Some(_) => {
84 Some(_) => {
85 let msg = "`defaults` config set";
85 let msg = "`defaults` config set";
86 return Err(CommandError::unsupported(msg));
86 return Err(CommandError::unsupported(msg));
87 }
87 }
88 }
88 }
89
89
90 for prefix in ["pre", "post", "fail"].iter() {
90 for prefix in ["pre", "post", "fail"].iter() {
91 // Mercurial allows users to define generic hooks for commands,
91 // Mercurial allows users to define generic hooks for commands,
92 // fallback if any are detected
92 // fallback if any are detected
93 let item = format!("{}-{}", prefix, subcommand_name);
93 let item = format!("{}-{}", prefix, subcommand_name);
94 let hook_for_command =
94 let hook_for_command =
95 config.get_str_no_default(b"hooks", item.as_bytes())?;
95 config.get_str_no_default(b"hooks", item.as_bytes())?;
96 if hook_for_command.is_some() {
96 if hook_for_command.is_some() {
97 let msg = format!("{}-{} hook defined", prefix, subcommand_name);
97 let msg = format!("{}-{} hook defined", prefix, subcommand_name);
98 return Err(CommandError::unsupported(msg));
98 return Err(CommandError::unsupported(msg));
99 }
99 }
100 }
100 }
101 let run = subcommand_run_fn(subcommand_name)
101 let run = subcommand_run_fn(subcommand_name)
102 .expect("unknown subcommand name from clap despite Command::subcommand_required");
102 .expect("unknown subcommand name from clap despite Command::subcommand_required");
103
103
104 let invocation = CliInvocation {
104 let invocation = CliInvocation {
105 ui,
105 ui,
106 subcommand_args,
106 subcommand_args,
107 config,
107 config,
108 repo,
108 repo,
109 };
109 };
110
110
111 if let Ok(repo) = repo {
111 if let Ok(repo) = repo {
112 // We don't support subrepos, fallback if the subrepos file is present
112 // We don't support subrepos, fallback if the subrepos file is present
113 if repo.working_directory_vfs().join(".hgsub").exists() {
113 if repo.working_directory_vfs().join(".hgsub").exists() {
114 let msg = "subrepos (.hgsub is present)";
114 let msg = "subrepos (.hgsub is present)";
115 return Err(CommandError::unsupported(msg));
115 return Err(CommandError::unsupported(msg));
116 }
116 }
117 }
117 }
118
118
119 if config.is_extension_enabled(b"blackbox") {
119 if config.is_extension_enabled(b"blackbox") {
120 let blackbox =
120 let blackbox =
121 blackbox::Blackbox::new(&invocation, process_start_time)?;
121 blackbox::Blackbox::new(&invocation, process_start_time)?;
122 blackbox.log_command_start(argv.iter());
122 blackbox.log_command_start(argv.iter());
123 let result = run(&invocation);
123 let result = run(&invocation);
124 blackbox.log_command_end(
124 blackbox.log_command_end(
125 argv.iter(),
125 argv.iter(),
126 exit_code(
126 exit_code(
127 &result,
127 &result,
128 // TODO: show a warning or combine with original error if
128 // TODO: show a warning or combine with original error if
129 // `get_bool` returns an error
129 // `get_bool` returns an error
130 config
130 config
131 .get_bool(b"ui", b"detailed-exit-code")
131 .get_bool(b"ui", b"detailed-exit-code")
132 .unwrap_or(false),
132 .unwrap_or(false),
133 ),
133 ),
134 );
134 );
135 result
135 result
136 } else {
136 } else {
137 run(&invocation)
137 run(&invocation)
138 }
138 }
139 }
139 }
140
140
141 fn rhg_main(argv: Vec<OsString>) -> ! {
141 fn rhg_main(argv: Vec<OsString>) -> ! {
142 // Run this first, before we find out if the blackbox extension is even
142 // Run this first, before we find out if the blackbox extension is even
143 // enabled, in order to include everything in-between in the duration
143 // enabled, in order to include everything in-between in the duration
144 // measurements. Reading config files can be slow if they’re on NFS.
144 // measurements. Reading config files can be slow if they’re on NFS.
145 let process_start_time = blackbox::ProcessStartTime::now();
145 let process_start_time = blackbox::ProcessStartTime::now();
146
146
147 env_logger::init();
147 env_logger::init();
148
148
149 // Make sure nothing in a future version of `rhg` sets the global
149 // Make sure nothing in a future version of `rhg` sets the global
150 // threadpool before we can cap default threads. (This is also called
150 // threadpool before we can cap default threads. (This is also called
151 // in core because Python uses the same code path, we're adding a
151 // in core because Python uses the same code path, we're adding a
152 // redundant check.)
152 // redundant check.)
153 hg::utils::cap_default_rayon_threads()
153 hg::utils::cap_default_rayon_threads()
154 .expect("Rayon threadpool already initialized");
154 .expect("Rayon threadpool already initialized");
155
155
156 let early_args = EarlyArgs::parse(&argv);
156 let early_args = EarlyArgs::parse(&argv);
157
157
158 let initial_current_dir = early_args.cwd.map(|cwd| {
158 let initial_current_dir = early_args.cwd.map(|cwd| {
159 let cwd = get_path_from_bytes(&cwd);
159 let cwd = get_path_from_bytes(&cwd);
160 std::env::current_dir()
160 std::env::current_dir()
161 .and_then(|initial| {
161 .and_then(|initial| {
162 std::env::set_current_dir(cwd)?;
162 std::env::set_current_dir(cwd)?;
163 Ok(initial)
163 Ok(initial)
164 })
164 })
165 .unwrap_or_else(|error| {
165 .unwrap_or_else(|error| {
166 exit(
166 exit(
167 &argv,
167 &argv,
168 &None,
168 &None,
169 &Ui::new_infallible(&Config::empty()),
169 &Ui::new_infallible(&Config::empty()),
170 OnUnsupported::Abort,
170 OnUnsupported::Abort,
171 Err(CommandError::abort(format!(
171 Err(CommandError::abort(format!(
172 "abort: {}: '{}'",
172 "abort: {}: '{}'",
173 error,
173 error,
174 cwd.display()
174 cwd.display()
175 ))),
175 ))),
176 false,
176 false,
177 )
177 )
178 })
178 })
179 });
179 });
180
180
181 let mut non_repo_config =
181 let mut non_repo_config =
182 Config::load_non_repo().unwrap_or_else(|error| {
182 Config::load_non_repo().unwrap_or_else(|error| {
183 // Normally this is decided based on config, but we don’t have that
183 // Normally this is decided based on config, but we don’t have that
184 // available. As of this writing config loading never returns an
184 // available. As of this writing config loading never returns an
185 // "unsupported" error but that is not enforced by the type system.
185 // "unsupported" error but that is not enforced by the type system.
186 let on_unsupported = OnUnsupported::Abort;
186 let on_unsupported = OnUnsupported::Abort;
187
187
188 exit(
188 exit(
189 &argv,
189 &argv,
190 &initial_current_dir,
190 &initial_current_dir,
191 &Ui::new_infallible(&Config::empty()),
191 &Ui::new_infallible(&Config::empty()),
192 on_unsupported,
192 on_unsupported,
193 Err(error.into()),
193 Err(error.into()),
194 false,
194 false,
195 )
195 )
196 });
196 });
197
197
198 non_repo_config
198 non_repo_config
199 .load_cli_args(early_args.config, early_args.color)
199 .load_cli_args(early_args.config, early_args.color)
200 .unwrap_or_else(|error| {
200 .unwrap_or_else(|error| {
201 exit(
201 exit(
202 &argv,
202 &argv,
203 &initial_current_dir,
203 &initial_current_dir,
204 &Ui::new_infallible(&non_repo_config),
204 &Ui::new_infallible(&non_repo_config),
205 OnUnsupported::from_config(&non_repo_config),
205 OnUnsupported::from_config(&non_repo_config),
206 Err(error.into()),
206 Err(error.into()),
207 non_repo_config
207 non_repo_config
208 .get_bool(b"ui", b"detailed-exit-code")
208 .get_bool(b"ui", b"detailed-exit-code")
209 .unwrap_or(false),
209 .unwrap_or(false),
210 )
210 )
211 });
211 });
212
212
213 if let Some(repo_path_bytes) = &early_args.repo {
213 if let Some(repo_path_bytes) = &early_args.repo {
214 lazy_static::lazy_static! {
214 lazy_static::lazy_static! {
215 static ref SCHEME_RE: regex::bytes::Regex =
215 static ref SCHEME_RE: regex::bytes::Regex =
216 // Same as `_matchscheme` in `mercurial/util.py`
216 // Same as `_matchscheme` in `mercurial/util.py`
217 regex::bytes::Regex::new("^[a-zA-Z0-9+.\\-]+:").unwrap();
217 regex::bytes::Regex::new("^[a-zA-Z0-9+.\\-]+:").unwrap();
218 }
218 }
219 if SCHEME_RE.is_match(repo_path_bytes) {
219 if SCHEME_RE.is_match(repo_path_bytes) {
220 exit(
220 exit(
221 &argv,
221 &argv,
222 &initial_current_dir,
222 &initial_current_dir,
223 &Ui::new_infallible(&non_repo_config),
223 &Ui::new_infallible(&non_repo_config),
224 OnUnsupported::from_config(&non_repo_config),
224 OnUnsupported::from_config(&non_repo_config),
225 Err(CommandError::UnsupportedFeature {
225 Err(CommandError::UnsupportedFeature {
226 message: format_bytes!(
226 message: format_bytes!(
227 b"URL-like --repository {}",
227 b"URL-like --repository {}",
228 repo_path_bytes
228 repo_path_bytes
229 ),
229 ),
230 }),
230 }),
231 // TODO: show a warning or combine with original error if
231 // TODO: show a warning or combine with original error if
232 // `get_bool` returns an error
232 // `get_bool` returns an error
233 non_repo_config
233 non_repo_config
234 .get_bool(b"ui", b"detailed-exit-code")
234 .get_bool(b"ui", b"detailed-exit-code")
235 .unwrap_or(false),
235 .unwrap_or(false),
236 )
236 )
237 }
237 }
238 }
238 }
239 let repo_arg = early_args.repo.unwrap_or_default();
239 let repo_arg = early_args.repo.unwrap_or_default();
240 let repo_path: Option<PathBuf> = {
240 let repo_path: Option<PathBuf> = {
241 if repo_arg.is_empty() {
241 if repo_arg.is_empty() {
242 None
242 None
243 } else {
243 } else {
244 let local_config = {
244 let local_config = {
245 if std::env::var_os("HGRCSKIPREPO").is_none() {
245 if std::env::var_os("HGRCSKIPREPO").is_none() {
246 // TODO: handle errors from find_repo_root
246 // TODO: handle errors from find_repo_root
247 if let Ok(current_dir_path) = Repo::find_repo_root() {
247 if let Ok(current_dir_path) = Repo::find_repo_root() {
248 let config_files = vec![
248 let config_files = vec![
249 ConfigSource::AbsPath(
249 ConfigSource::AbsPath(
250 current_dir_path.join(".hg/hgrc"),
250 current_dir_path.join(".hg/hgrc"),
251 ),
251 ),
252 ConfigSource::AbsPath(
252 ConfigSource::AbsPath(
253 current_dir_path.join(".hg/hgrc-not-shared"),
253 current_dir_path.join(".hg/hgrc-not-shared"),
254 ),
254 ),
255 ];
255 ];
256 // TODO: handle errors from
256 // TODO: handle errors from
257 // `load_from_explicit_sources`
257 // `load_from_explicit_sources`
258 Config::load_from_explicit_sources(config_files).ok()
258 Config::load_from_explicit_sources(config_files).ok()
259 } else {
259 } else {
260 None
260 None
261 }
261 }
262 } else {
262 } else {
263 None
263 None
264 }
264 }
265 };
265 };
266
266
267 let non_repo_config_val = {
267 let non_repo_config_val = {
268 let non_repo_val = non_repo_config.get(b"paths", &repo_arg);
268 let non_repo_val = non_repo_config.get(b"paths", &repo_arg);
269 match &non_repo_val {
269 match &non_repo_val {
270 Some(val) if !val.is_empty() => home::home_dir()
270 Some(val) if !val.is_empty() => home::home_dir()
271 .unwrap_or_else(|| PathBuf::from("~"))
271 .unwrap_or_else(|| PathBuf::from("~"))
272 .join(get_path_from_bytes(val))
272 .join(get_path_from_bytes(val))
273 .canonicalize()
273 .canonicalize()
274 // TODO: handle error and make it similar to python
274 // TODO: handle error and make it similar to python
275 // implementation maybe?
275 // implementation maybe?
276 .ok(),
276 .ok(),
277 _ => None,
277 _ => None,
278 }
278 }
279 };
279 };
280
280
281 let config_val = match &local_config {
281 let config_val = match &local_config {
282 None => non_repo_config_val,
282 None => non_repo_config_val,
283 Some(val) => {
283 Some(val) => {
284 let local_config_val = val.get(b"paths", &repo_arg);
284 let local_config_val = val.get(b"paths", &repo_arg);
285 match &local_config_val {
285 match &local_config_val {
286 Some(val) if !val.is_empty() => {
286 Some(val) if !val.is_empty() => {
287 // presence of a local_config assures that
287 // presence of a local_config assures that
288 // current_dir
288 // current_dir
289 // wont result in an Error
289 // wont result in an Error
290 let canpath = hg::utils::current_dir()
290 let canpath = hg::utils::current_dir()
291 .unwrap()
291 .unwrap()
292 .join(get_path_from_bytes(val))
292 .join(get_path_from_bytes(val))
293 .canonicalize();
293 .canonicalize();
294 canpath.ok().or(non_repo_config_val)
294 canpath.ok().or(non_repo_config_val)
295 }
295 }
296 _ => non_repo_config_val,
296 _ => non_repo_config_val,
297 }
297 }
298 }
298 }
299 };
299 };
300 config_val
300 config_val
301 .or_else(|| Some(get_path_from_bytes(&repo_arg).to_path_buf()))
301 .or_else(|| Some(get_path_from_bytes(&repo_arg).to_path_buf()))
302 }
302 }
303 };
303 };
304
304
305 let simple_exit =
305 let simple_exit =
306 |ui: &Ui, config: &Config, result: Result<(), CommandError>| -> ! {
306 |ui: &Ui, config: &Config, result: Result<(), CommandError>| -> ! {
307 exit(
307 exit(
308 &argv,
308 &argv,
309 &initial_current_dir,
309 &initial_current_dir,
310 ui,
310 ui,
311 OnUnsupported::from_config(config),
311 OnUnsupported::from_config(config),
312 result,
312 result,
313 // TODO: show a warning or combine with original error if
313 // TODO: show a warning or combine with original error if
314 // `get_bool` returns an error
314 // `get_bool` returns an error
315 non_repo_config
315 non_repo_config
316 .get_bool(b"ui", b"detailed-exit-code")
316 .get_bool(b"ui", b"detailed-exit-code")
317 .unwrap_or(false),
317 .unwrap_or(false),
318 )
318 )
319 };
319 };
320 let early_exit = |config: &Config, error: CommandError| -> ! {
320 let early_exit = |config: &Config, error: CommandError| -> ! {
321 simple_exit(&Ui::new_infallible(config), config, Err(error))
321 simple_exit(&Ui::new_infallible(config), config, Err(error))
322 };
322 };
323 let repo_result = match Repo::find(&non_repo_config, repo_path.to_owned())
323 let repo_result = match Repo::find(&non_repo_config, repo_path.to_owned())
324 {
324 {
325 Ok(repo) => Ok(repo),
325 Ok(repo) => Ok(repo),
326 Err(RepoError::NotFound { at }) if repo_path.is_none() => {
326 Err(RepoError::NotFound { at }) if repo_path.is_none() => {
327 // Not finding a repo is not fatal yet, if `-R` was not given
327 // Not finding a repo is not fatal yet, if `-R` was not given
328 Err(NoRepoInCwdError { cwd: at })
328 Err(NoRepoInCwdError { cwd: at })
329 }
329 }
330 Err(error) => early_exit(&non_repo_config, error.into()),
330 Err(error) => early_exit(&non_repo_config, error.into()),
331 };
331 };
332
332
333 let config = if let Ok(repo) = &repo_result {
333 let config = if let Ok(repo) = &repo_result {
334 repo.config()
334 repo.config()
335 } else {
335 } else {
336 &non_repo_config
336 &non_repo_config
337 };
337 };
338
338
339 let mut config_cow = Cow::Borrowed(config);
339 let mut config_cow = Cow::Borrowed(config);
340 config_cow.to_mut().apply_plain(PlainInfo::from_env());
340 config_cow.to_mut().apply_plain(PlainInfo::from_env());
341 if !ui::plain(Some("tweakdefaults"))
341 if !ui::plain(Some("tweakdefaults"))
342 && config_cow
342 && config_cow
343 .as_ref()
343 .as_ref()
344 .get_bool(b"ui", b"tweakdefaults")
344 .get_bool(b"ui", b"tweakdefaults")
345 .unwrap_or_else(|error| early_exit(config, error.into()))
345 .unwrap_or_else(|error| early_exit(config, error.into()))
346 {
346 {
347 config_cow.to_mut().tweakdefaults()
347 config_cow.to_mut().tweakdefaults()
348 };
348 };
349 let config = config_cow.as_ref();
349 let config = config_cow.as_ref();
350 let ui = Ui::new(config)
350 let ui = Ui::new(config)
351 .unwrap_or_else(|error| early_exit(config, error.into()));
351 .unwrap_or_else(|error| early_exit(config, error.into()));
352
352
353 if let Ok(true) = config.get_bool(b"rhg", b"fallback-immediately") {
353 if let Ok(true) = config.get_bool(b"rhg", b"fallback-immediately") {
354 exit(
354 exit(
355 &argv,
355 &argv,
356 &initial_current_dir,
356 &initial_current_dir,
357 &ui,
357 &ui,
358 OnUnsupported::fallback(config),
358 OnUnsupported::fallback(config),
359 Err(CommandError::unsupported(
359 Err(CommandError::unsupported(
360 "`rhg.fallback-immediately is true`",
360 "`rhg.fallback-immediately is true`",
361 )),
361 )),
362 false,
362 false,
363 )
363 )
364 }
364 }
365
365
366 let result = main_with_result(
366 let result = main_with_result(
367 argv.iter().map(|s| s.to_owned()).collect(),
367 argv.iter().map(|s| s.to_owned()).collect(),
368 &process_start_time,
368 &process_start_time,
369 &ui,
369 &ui,
370 repo_result.as_ref(),
370 repo_result.as_ref(),
371 config,
371 config,
372 );
372 );
373 simple_exit(&ui, config, result)
373 simple_exit(&ui, config, result)
374 }
374 }
375
375
376 fn main() -> ! {
376 fn main() -> ! {
377 rhg_main(std::env::args_os().collect())
377 rhg_main(std::env::args_os().collect())
378 }
378 }
379
379
380 fn exit_code(
380 fn exit_code(
381 result: &Result<(), CommandError>,
381 result: &Result<(), CommandError>,
382 use_detailed_exit_code: bool,
382 use_detailed_exit_code: bool,
383 ) -> i32 {
383 ) -> i32 {
384 match result {
384 match result {
385 Ok(()) => exit_codes::OK,
385 Ok(()) => exit_codes::OK,
386 Err(CommandError::Abort {
386 Err(CommandError::Abort {
387 detailed_exit_code, ..
387 detailed_exit_code, ..
388 }) => {
388 }) => {
389 if use_detailed_exit_code {
389 if use_detailed_exit_code {
390 *detailed_exit_code
390 *detailed_exit_code
391 } else {
391 } else {
392 exit_codes::ABORT
392 exit_codes::ABORT
393 }
393 }
394 }
394 }
395 Err(CommandError::Unsuccessful) => exit_codes::UNSUCCESSFUL,
395 Err(CommandError::Unsuccessful) => exit_codes::UNSUCCESSFUL,
396 // Exit with a specific code and no error message to let a potential
396 // Exit with a specific code and no error message to let a potential
397 // wrapper script fallback to Python-based Mercurial.
397 // wrapper script fallback to Python-based Mercurial.
398 Err(CommandError::UnsupportedFeature { .. }) => {
398 Err(CommandError::UnsupportedFeature { .. }) => {
399 exit_codes::UNIMPLEMENTED
399 exit_codes::UNIMPLEMENTED
400 }
400 }
401 Err(CommandError::InvalidFallback { .. }) => {
401 Err(CommandError::InvalidFallback { .. }) => {
402 exit_codes::INVALID_FALLBACK
402 exit_codes::INVALID_FALLBACK
403 }
403 }
404 }
404 }
405 }
405 }
406
406
407 fn exit<'a>(
407 fn exit(
408 original_args: &'a [OsString],
408 original_args: &[OsString],
409 initial_current_dir: &Option<PathBuf>,
409 initial_current_dir: &Option<PathBuf>,
410 ui: &Ui,
410 ui: &Ui,
411 mut on_unsupported: OnUnsupported,
411 mut on_unsupported: OnUnsupported,
412 result: Result<(), CommandError>,
412 result: Result<(), CommandError>,
413 use_detailed_exit_code: bool,
413 use_detailed_exit_code: bool,
414 ) -> ! {
414 ) -> ! {
415 if let (
415 if let (
416 OnUnsupported::Fallback { executable },
416 OnUnsupported::Fallback { executable },
417 Err(CommandError::UnsupportedFeature { message }),
417 Err(CommandError::UnsupportedFeature { message }),
418 ) = (&on_unsupported, &result)
418 ) = (&on_unsupported, &result)
419 {
419 {
420 let mut args = original_args.iter();
420 let mut args = original_args.iter();
421 let executable = match executable {
421 let executable = match executable {
422 None => {
422 None => {
423 exit_no_fallback(
423 exit_no_fallback(
424 ui,
424 ui,
425 OnUnsupported::Abort,
425 OnUnsupported::Abort,
426 Err(CommandError::abort(
426 Err(CommandError::abort(
427 "abort: 'rhg.on-unsupported=fallback' without \
427 "abort: 'rhg.on-unsupported=fallback' without \
428 'rhg.fallback-executable' set.",
428 'rhg.fallback-executable' set.",
429 )),
429 )),
430 false,
430 false,
431 );
431 );
432 }
432 }
433 Some(executable) => executable,
433 Some(executable) => executable,
434 };
434 };
435 let executable_path = get_path_from_bytes(executable);
435 let executable_path = get_path_from_bytes(executable);
436 let this_executable = args.next().expect("exepcted argv[0] to exist");
436 let this_executable = args.next().expect("exepcted argv[0] to exist");
437 if executable_path == *this_executable {
437 if executable_path == *this_executable {
438 // Avoid spawning infinitely many processes until resource
438 // Avoid spawning infinitely many processes until resource
439 // exhaustion.
439 // exhaustion.
440 let _ = ui.write_stderr(&format_bytes!(
440 let _ = ui.write_stderr(&format_bytes!(
441 b"Blocking recursive fallback. The 'rhg.fallback-executable = {}' config \
441 b"Blocking recursive fallback. The 'rhg.fallback-executable = {}' config \
442 points to `rhg` itself.\n",
442 points to `rhg` itself.\n",
443 executable
443 executable
444 ));
444 ));
445 on_unsupported = OnUnsupported::Abort
445 on_unsupported = OnUnsupported::Abort
446 } else {
446 } else {
447 log::debug!("falling back (see trace-level log)");
447 log::debug!("falling back (see trace-level log)");
448 log::trace!("{}", local_to_utf8(message));
448 log::trace!("{}", local_to_utf8(message));
449 if let Err(err) = which::which(executable_path) {
449 if let Err(err) = which::which(executable_path) {
450 exit_no_fallback(
450 exit_no_fallback(
451 ui,
451 ui,
452 OnUnsupported::Abort,
452 OnUnsupported::Abort,
453 Err(CommandError::InvalidFallback {
453 Err(CommandError::InvalidFallback {
454 path: executable.to_owned(),
454 path: executable.to_owned(),
455 err: err.to_string(),
455 err: err.to_string(),
456 }),
456 }),
457 use_detailed_exit_code,
457 use_detailed_exit_code,
458 )
458 )
459 }
459 }
460 // `args` is now `argv[1..]` since we’ve already consumed
460 // `args` is now `argv[1..]` since we’ve already consumed
461 // `argv[0]`
461 // `argv[0]`
462 let mut command = Command::new(executable_path);
462 let mut command = Command::new(executable_path);
463 command.args(args);
463 command.args(args);
464 if let Some(initial) = initial_current_dir {
464 if let Some(initial) = initial_current_dir {
465 command.current_dir(initial);
465 command.current_dir(initial);
466 }
466 }
467 // We don't use subprocess because proper signal handling is harder
467 // We don't use subprocess because proper signal handling is harder
468 // and we don't want to keep `rhg` around after a fallback anyway.
468 // and we don't want to keep `rhg` around after a fallback anyway.
469 // For example, if `rhg` is run in the background and falls back to
469 // For example, if `rhg` is run in the background and falls back to
470 // `hg` which, in turn, waits for a signal, we'll get stuck if
470 // `hg` which, in turn, waits for a signal, we'll get stuck if
471 // we're doing plain subprocess.
471 // we're doing plain subprocess.
472 //
472 //
473 // If `exec` returns, we can only assume our process is very broken
473 // If `exec` returns, we can only assume our process is very broken
474 // (see its documentation), so only try to forward the error code
474 // (see its documentation), so only try to forward the error code
475 // when exiting.
475 // when exiting.
476 let err = command.exec();
476 let err = command.exec();
477 std::process::exit(
477 std::process::exit(
478 err.raw_os_error().unwrap_or(exit_codes::ABORT),
478 err.raw_os_error().unwrap_or(exit_codes::ABORT),
479 );
479 );
480 }
480 }
481 }
481 }
482 exit_no_fallback(ui, on_unsupported, result, use_detailed_exit_code)
482 exit_no_fallback(ui, on_unsupported, result, use_detailed_exit_code)
483 }
483 }
484
484
485 fn exit_no_fallback(
485 fn exit_no_fallback(
486 ui: &Ui,
486 ui: &Ui,
487 on_unsupported: OnUnsupported,
487 on_unsupported: OnUnsupported,
488 result: Result<(), CommandError>,
488 result: Result<(), CommandError>,
489 use_detailed_exit_code: bool,
489 use_detailed_exit_code: bool,
490 ) -> ! {
490 ) -> ! {
491 match &result {
491 match &result {
492 Ok(_) => {}
492 Ok(_) => {}
493 Err(CommandError::Unsuccessful) => {}
493 Err(CommandError::Unsuccessful) => {}
494 Err(CommandError::Abort { message, hint, .. }) => {
494 Err(CommandError::Abort { message, hint, .. }) => {
495 // Ignore errors when writing to stderr, we’re already exiting
495 // Ignore errors when writing to stderr, we’re already exiting
496 // with failure code so there’s not much more we can do.
496 // with failure code so there’s not much more we can do.
497 if !message.is_empty() {
497 if !message.is_empty() {
498 let _ = ui.write_stderr(&format_bytes!(b"{}\n", message));
498 let _ = ui.write_stderr(&format_bytes!(b"{}\n", message));
499 }
499 }
500 if let Some(hint) = hint {
500 if let Some(hint) = hint {
501 let _ = ui.write_stderr(&format_bytes!(b"({})\n", hint));
501 let _ = ui.write_stderr(&format_bytes!(b"({})\n", hint));
502 }
502 }
503 }
503 }
504 Err(CommandError::UnsupportedFeature { message }) => {
504 Err(CommandError::UnsupportedFeature { message }) => {
505 match on_unsupported {
505 match on_unsupported {
506 OnUnsupported::Abort => {
506 OnUnsupported::Abort => {
507 let _ = ui.write_stderr(&format_bytes!(
507 let _ = ui.write_stderr(&format_bytes!(
508 b"unsupported feature: {}\n",
508 b"unsupported feature: {}\n",
509 message
509 message
510 ));
510 ));
511 }
511 }
512 OnUnsupported::AbortSilent => {}
512 OnUnsupported::AbortSilent => {}
513 OnUnsupported::Fallback { .. } => unreachable!(),
513 OnUnsupported::Fallback { .. } => unreachable!(),
514 }
514 }
515 }
515 }
516 Err(CommandError::InvalidFallback { path, err }) => {
516 Err(CommandError::InvalidFallback { path, err }) => {
517 let _ = ui.write_stderr(&format_bytes!(
517 let _ = ui.write_stderr(&format_bytes!(
518 b"abort: invalid fallback '{}': {}\n",
518 b"abort: invalid fallback '{}': {}\n",
519 path,
519 path,
520 err.as_bytes(),
520 err.as_bytes(),
521 ));
521 ));
522 }
522 }
523 }
523 }
524 std::process::exit(exit_code(&result, use_detailed_exit_code))
524 std::process::exit(exit_code(&result, use_detailed_exit_code))
525 }
525 }
526
526
527 macro_rules! subcommands {
527 macro_rules! subcommands {
528 ($( $command: ident )+) => {
528 ($( $command: ident )+) => {
529 mod commands {
529 mod commands {
530 $(
530 $(
531 pub mod $command;
531 pub mod $command;
532 )+
532 )+
533 }
533 }
534
534
535 fn add_subcommand_args(app: clap::Command) -> clap::Command {
535 fn add_subcommand_args(app: clap::Command) -> clap::Command {
536 app
536 app
537 $(
537 $(
538 .subcommand(commands::$command::args())
538 .subcommand(commands::$command::args())
539 )+
539 )+
540 }
540 }
541
541
542 pub type RunFn = fn(&CliInvocation) -> Result<(), CommandError>;
542 pub type RunFn = fn(&CliInvocation) -> Result<(), CommandError>;
543
543
544 fn subcommand_run_fn(name: &str) -> Option<RunFn> {
544 fn subcommand_run_fn(name: &str) -> Option<RunFn> {
545 match name {
545 match name {
546 $(
546 $(
547 stringify!($command) => Some(commands::$command::run),
547 stringify!($command) => Some(commands::$command::run),
548 )+
548 )+
549 _ => None,
549 _ => None,
550 }
550 }
551 }
551 }
552 };
552 };
553 }
553 }
554
554
555 subcommands! {
555 subcommands! {
556 cat
556 cat
557 debugdata
557 debugdata
558 debugrequirements
558 debugrequirements
559 debugignorerhg
559 debugignorerhg
560 debugrhgsparse
560 debugrhgsparse
561 files
561 files
562 root
562 root
563 config
563 config
564 status
564 status
565 }
565 }
566
566
567 pub struct CliInvocation<'a> {
567 pub struct CliInvocation<'a> {
568 ui: &'a Ui,
568 ui: &'a Ui,
569 subcommand_args: &'a ArgMatches,
569 subcommand_args: &'a ArgMatches,
570 config: &'a Config,
570 config: &'a Config,
571 /// References inside `Result` is a bit peculiar but allow
571 /// References inside `Result` is a bit peculiar but allow
572 /// `invocation.repo?` to work out with `&CliInvocation` since this
572 /// `invocation.repo?` to work out with `&CliInvocation` since this
573 /// `Result` type is `Copy`.
573 /// `Result` type is `Copy`.
574 repo: Result<&'a Repo, &'a NoRepoInCwdError>,
574 repo: Result<&'a Repo, &'a NoRepoInCwdError>,
575 }
575 }
576
576
577 struct NoRepoInCwdError {
577 struct NoRepoInCwdError {
578 cwd: PathBuf,
578 cwd: PathBuf,
579 }
579 }
580
580
581 /// CLI arguments to be parsed "early" in order to be able to read
581 /// CLI arguments to be parsed "early" in order to be able to read
582 /// configuration before using Clap. Ideally we would also use Clap for this,
582 /// configuration before using Clap. Ideally we would also use Clap for this,
583 /// see <https://github.com/clap-rs/clap/discussions/2366>.
583 /// see <https://github.com/clap-rs/clap/discussions/2366>.
584 ///
584 ///
585 /// These arguments are still declared when we do use Clap later, so that Clap
585 /// These arguments are still declared when we do use Clap later, so that Clap
586 /// does not return an error for their presence.
586 /// does not return an error for their presence.
587 struct EarlyArgs {
587 struct EarlyArgs {
588 /// Values of all `--config` arguments. (Possibly none)
588 /// Values of all `--config` arguments. (Possibly none)
589 config: Vec<Vec<u8>>,
589 config: Vec<Vec<u8>>,
590 /// Value of all the `--color` argument, if any.
590 /// Value of all the `--color` argument, if any.
591 color: Option<Vec<u8>>,
591 color: Option<Vec<u8>>,
592 /// Value of the `-R` or `--repository` argument, if any.
592 /// Value of the `-R` or `--repository` argument, if any.
593 repo: Option<Vec<u8>>,
593 repo: Option<Vec<u8>>,
594 /// Value of the `--cwd` argument, if any.
594 /// Value of the `--cwd` argument, if any.
595 cwd: Option<Vec<u8>>,
595 cwd: Option<Vec<u8>>,
596 }
596 }
597
597
598 impl EarlyArgs {
598 impl EarlyArgs {
599 fn parse<'a>(args: impl IntoIterator<Item = &'a OsString>) -> Self {
599 fn parse<'a>(args: impl IntoIterator<Item = &'a OsString>) -> Self {
600 let mut args = args.into_iter().map(get_bytes_from_os_str);
600 let mut args = args.into_iter().map(get_bytes_from_os_str);
601 let mut config = Vec::new();
601 let mut config = Vec::new();
602 let mut color = None;
602 let mut color = None;
603 let mut repo = None;
603 let mut repo = None;
604 let mut cwd = None;
604 let mut cwd = None;
605 // Use `while let` instead of `for` so that we can also call
605 // Use `while let` instead of `for` so that we can also call
606 // `args.next()` inside the loop.
606 // `args.next()` inside the loop.
607 while let Some(arg) = args.next() {
607 while let Some(arg) = args.next() {
608 if arg == b"--config" {
608 if arg == b"--config" {
609 if let Some(value) = args.next() {
609 if let Some(value) = args.next() {
610 config.push(value)
610 config.push(value)
611 }
611 }
612 } else if let Some(value) = arg.drop_prefix(b"--config=") {
612 } else if let Some(value) = arg.drop_prefix(b"--config=") {
613 config.push(value.to_owned())
613 config.push(value.to_owned())
614 }
614 }
615
615
616 if arg == b"--color" {
616 if arg == b"--color" {
617 if let Some(value) = args.next() {
617 if let Some(value) = args.next() {
618 color = Some(value)
618 color = Some(value)
619 }
619 }
620 } else if let Some(value) = arg.drop_prefix(b"--color=") {
620 } else if let Some(value) = arg.drop_prefix(b"--color=") {
621 color = Some(value.to_owned())
621 color = Some(value.to_owned())
622 }
622 }
623
623
624 if arg == b"--cwd" {
624 if arg == b"--cwd" {
625 if let Some(value) = args.next() {
625 if let Some(value) = args.next() {
626 cwd = Some(value)
626 cwd = Some(value)
627 }
627 }
628 } else if let Some(value) = arg.drop_prefix(b"--cwd=") {
628 } else if let Some(value) = arg.drop_prefix(b"--cwd=") {
629 cwd = Some(value.to_owned())
629 cwd = Some(value.to_owned())
630 }
630 }
631
631
632 if arg == b"--repository" || arg == b"-R" {
632 if arg == b"--repository" || arg == b"-R" {
633 if let Some(value) = args.next() {
633 if let Some(value) = args.next() {
634 repo = Some(value)
634 repo = Some(value)
635 }
635 }
636 } else if let Some(value) = arg.drop_prefix(b"--repository=") {
636 } else if let Some(value) = arg.drop_prefix(b"--repository=") {
637 repo = Some(value.to_owned())
637 repo = Some(value.to_owned())
638 } else if let Some(value) = arg.drop_prefix(b"-R") {
638 } else if let Some(value) = arg.drop_prefix(b"-R") {
639 repo = Some(value.to_owned())
639 repo = Some(value.to_owned())
640 }
640 }
641 }
641 }
642 Self {
642 Self {
643 config,
643 config,
644 color,
644 color,
645 repo,
645 repo,
646 cwd,
646 cwd,
647 }
647 }
648 }
648 }
649 }
649 }
650
650
651 /// What to do when encountering some unsupported feature.
651 /// What to do when encountering some unsupported feature.
652 ///
652 ///
653 /// See `HgError::UnsupportedFeature` and `CommandError::UnsupportedFeature`.
653 /// See `HgError::UnsupportedFeature` and `CommandError::UnsupportedFeature`.
654 enum OnUnsupported {
654 enum OnUnsupported {
655 /// Print an error message describing what feature is not supported,
655 /// Print an error message describing what feature is not supported,
656 /// and exit with code 252.
656 /// and exit with code 252.
657 Abort,
657 Abort,
658 /// Silently exit with code 252.
658 /// Silently exit with code 252.
659 AbortSilent,
659 AbortSilent,
660 /// Try running a Python implementation
660 /// Try running a Python implementation
661 Fallback { executable: Option<Vec<u8>> },
661 Fallback { executable: Option<Vec<u8>> },
662 }
662 }
663
663
664 impl OnUnsupported {
664 impl OnUnsupported {
665 const DEFAULT: Self = OnUnsupported::Abort;
665 const DEFAULT: Self = OnUnsupported::Abort;
666
666
667 fn fallback_executable(config: &Config) -> Option<Vec<u8>> {
667 fn fallback_executable(config: &Config) -> Option<Vec<u8>> {
668 config
668 config
669 .get(b"rhg", b"fallback-executable")
669 .get(b"rhg", b"fallback-executable")
670 .map(|x| x.to_owned())
670 .map(|x| x.to_owned())
671 }
671 }
672
672
673 fn fallback(config: &Config) -> Self {
673 fn fallback(config: &Config) -> Self {
674 OnUnsupported::Fallback {
674 OnUnsupported::Fallback {
675 executable: Self::fallback_executable(config),
675 executable: Self::fallback_executable(config),
676 }
676 }
677 }
677 }
678
678
679 fn from_config(config: &Config) -> Self {
679 fn from_config(config: &Config) -> Self {
680 match config
680 match config
681 .get(b"rhg", b"on-unsupported")
681 .get(b"rhg", b"on-unsupported")
682 .map(|value| value.to_ascii_lowercase())
682 .map(|value| value.to_ascii_lowercase())
683 .as_deref()
683 .as_deref()
684 {
684 {
685 Some(b"abort") => OnUnsupported::Abort,
685 Some(b"abort") => OnUnsupported::Abort,
686 Some(b"abort-silent") => OnUnsupported::AbortSilent,
686 Some(b"abort-silent") => OnUnsupported::AbortSilent,
687 Some(b"fallback") => Self::fallback(config),
687 Some(b"fallback") => Self::fallback(config),
688 None => Self::DEFAULT,
688 None => Self::DEFAULT,
689 Some(_) => {
689 Some(_) => {
690 // TODO: warn about unknown config value
690 // TODO: warn about unknown config value
691 Self::DEFAULT
691 Self::DEFAULT
692 }
692 }
693 }
693 }
694 }
694 }
695 }
695 }
696
696
697 /// The `*` extension is an edge-case for config sub-options that apply to all
697 /// The `*` extension is an edge-case for config sub-options that apply to all
698 /// extensions. For now, only `:required` exists, but that may change in the
698 /// extensions. For now, only `:required` exists, but that may change in the
699 /// future.
699 /// future.
700 const SUPPORTED_EXTENSIONS: &[&[u8]] = &[
700 const SUPPORTED_EXTENSIONS: &[&[u8]] = &[
701 b"blackbox",
701 b"blackbox",
702 b"share",
702 b"share",
703 b"sparse",
703 b"sparse",
704 b"narrow",
704 b"narrow",
705 b"*",
705 b"*",
706 b"strip",
706 b"strip",
707 b"rebase",
707 b"rebase",
708 ];
708 ];
709
709
710 fn check_extensions(config: &Config) -> Result<(), CommandError> {
710 fn check_extensions(config: &Config) -> Result<(), CommandError> {
711 if let Some(b"*") = config.get(b"rhg", b"ignored-extensions") {
711 if let Some(b"*") = config.get(b"rhg", b"ignored-extensions") {
712 // All extensions are to be ignored, nothing to do here
712 // All extensions are to be ignored, nothing to do here
713 return Ok(());
713 return Ok(());
714 }
714 }
715
715
716 let enabled: HashSet<&[u8]> = config
716 let enabled: HashSet<&[u8]> = config
717 .iter_section(b"extensions")
717 .iter_section(b"extensions")
718 .filter_map(|(extension, value)| {
718 .filter_map(|(extension, value)| {
719 if value == b"!" {
719 if value == b"!" {
720 // Filter out disabled extensions
720 // Filter out disabled extensions
721 return None;
721 return None;
722 }
722 }
723 // Ignore extension suboptions. Only `required` exists for now.
723 // Ignore extension suboptions. Only `required` exists for now.
724 // `rhg` either supports an extension or doesn't, so it doesn't
724 // `rhg` either supports an extension or doesn't, so it doesn't
725 // make sense to consider the loading of an extension.
725 // make sense to consider the loading of an extension.
726 let actual_extension =
726 let actual_extension =
727 extension.split_2(b':').unwrap_or((extension, b"")).0;
727 extension.split_2(b':').unwrap_or((extension, b"")).0;
728 Some(actual_extension)
728 Some(actual_extension)
729 })
729 })
730 .collect();
730 .collect();
731
731
732 let mut unsupported = enabled;
732 let mut unsupported = enabled;
733 for supported in SUPPORTED_EXTENSIONS {
733 for supported in SUPPORTED_EXTENSIONS {
734 unsupported.remove(supported);
734 unsupported.remove(supported);
735 }
735 }
736
736
737 if let Some(ignored_list) = config.get_list(b"rhg", b"ignored-extensions")
737 if let Some(ignored_list) = config.get_list(b"rhg", b"ignored-extensions")
738 {
738 {
739 for ignored in ignored_list {
739 for ignored in ignored_list {
740 unsupported.remove(ignored.as_slice());
740 unsupported.remove(ignored.as_slice());
741 }
741 }
742 }
742 }
743
743
744 if unsupported.is_empty() {
744 if unsupported.is_empty() {
745 Ok(())
745 Ok(())
746 } else {
746 } else {
747 let mut unsupported: Vec<_> = unsupported.into_iter().collect();
747 let mut unsupported: Vec<_> = unsupported.into_iter().collect();
748 // Sort the extensions to get a stable output
748 // Sort the extensions to get a stable output
749 unsupported.sort();
749 unsupported.sort();
750 Err(CommandError::UnsupportedFeature {
750 Err(CommandError::UnsupportedFeature {
751 message: format_bytes!(
751 message: format_bytes!(
752 b"extensions: {} (consider adding them to 'rhg.ignored-extensions' config)",
752 b"extensions: {} (consider adding them to 'rhg.ignored-extensions' config)",
753 join(unsupported, b", ")
753 join(unsupported, b", ")
754 ),
754 ),
755 })
755 })
756 }
756 }
757 }
757 }
758
758
759 /// Array of tuples of (auto upgrade conf, feature conf, local requirement)
759 /// Array of tuples of (auto upgrade conf, feature conf, local requirement)
760 #[allow(clippy::type_complexity)]
760 #[allow(clippy::type_complexity)]
761 const AUTO_UPGRADES: &[((&str, &str), (&str, &str), &str)] = &[
761 const AUTO_UPGRADES: &[((&str, &str), (&str, &str), &str)] = &[
762 (
762 (
763 ("format", "use-share-safe.automatic-upgrade-of-mismatching-repositories"),
763 ("format", "use-share-safe.automatic-upgrade-of-mismatching-repositories"),
764 ("format", "use-share-safe"),
764 ("format", "use-share-safe"),
765 requirements::SHARESAFE_REQUIREMENT,
765 requirements::SHARESAFE_REQUIREMENT,
766 ),
766 ),
767 (
767 (
768 ("format", "use-dirstate-tracked-hint.automatic-upgrade-of-mismatching-repositories"),
768 ("format", "use-dirstate-tracked-hint.automatic-upgrade-of-mismatching-repositories"),
769 ("format", "use-dirstate-tracked-hint"),
769 ("format", "use-dirstate-tracked-hint"),
770 requirements::DIRSTATE_TRACKED_HINT_V1,
770 requirements::DIRSTATE_TRACKED_HINT_V1,
771 ),
771 ),
772 (
772 (
773 ("format", "use-dirstate-v2.automatic-upgrade-of-mismatching-repositories"),
773 ("format", "use-dirstate-v2.automatic-upgrade-of-mismatching-repositories"),
774 ("format", "use-dirstate-v2"),
774 ("format", "use-dirstate-v2"),
775 requirements::DIRSTATE_V2_REQUIREMENT,
775 requirements::DIRSTATE_V2_REQUIREMENT,
776 ),
776 ),
777 ];
777 ];
778
778
779 /// Mercurial allows users to automatically upgrade their repository.
779 /// Mercurial allows users to automatically upgrade their repository.
780 /// `rhg` does not have the ability to upgrade yet, so fallback if an upgrade
780 /// `rhg` does not have the ability to upgrade yet, so fallback if an upgrade
781 /// is needed.
781 /// is needed.
782 fn check_auto_upgrade(
782 fn check_auto_upgrade(
783 config: &Config,
783 config: &Config,
784 reqs: &HashSet<String>,
784 reqs: &HashSet<String>,
785 ) -> Result<(), CommandError> {
785 ) -> Result<(), CommandError> {
786 for (upgrade_conf, feature_conf, local_req) in AUTO_UPGRADES.iter() {
786 for (upgrade_conf, feature_conf, local_req) in AUTO_UPGRADES.iter() {
787 let auto_upgrade = config
787 let auto_upgrade = config
788 .get_bool(upgrade_conf.0.as_bytes(), upgrade_conf.1.as_bytes())?;
788 .get_bool(upgrade_conf.0.as_bytes(), upgrade_conf.1.as_bytes())?;
789
789
790 if auto_upgrade {
790 if auto_upgrade {
791 let want_it = config.get_bool(
791 let want_it = config.get_bool(
792 feature_conf.0.as_bytes(),
792 feature_conf.0.as_bytes(),
793 feature_conf.1.as_bytes(),
793 feature_conf.1.as_bytes(),
794 )?;
794 )?;
795 let have_it = reqs.contains(*local_req);
795 let have_it = reqs.contains(*local_req);
796
796
797 let action = match (want_it, have_it) {
797 let action = match (want_it, have_it) {
798 (true, false) => Some("upgrade"),
798 (true, false) => Some("upgrade"),
799 (false, true) => Some("downgrade"),
799 (false, true) => Some("downgrade"),
800 _ => None,
800 _ => None,
801 };
801 };
802 if let Some(action) = action {
802 if let Some(action) = action {
803 let message = format!(
803 let message = format!(
804 "automatic {} {}.{}",
804 "automatic {} {}.{}",
805 action, upgrade_conf.0, upgrade_conf.1
805 action, upgrade_conf.0, upgrade_conf.1
806 );
806 );
807 return Err(CommandError::unsupported(message));
807 return Err(CommandError::unsupported(message));
808 }
808 }
809 }
809 }
810 }
810 }
811 Ok(())
811 Ok(())
812 }
812 }
813
813
814 fn check_unsupported(
814 fn check_unsupported(
815 config: &Config,
815 config: &Config,
816 repo: Result<&Repo, &NoRepoInCwdError>,
816 repo: Result<&Repo, &NoRepoInCwdError>,
817 ) -> Result<(), CommandError> {
817 ) -> Result<(), CommandError> {
818 check_extensions(config)?;
818 check_extensions(config)?;
819
819
820 if std::env::var_os("HG_PENDING").is_some() {
820 if std::env::var_os("HG_PENDING").is_some() {
821 // TODO: only if the value is `== repo.working_directory`?
821 // TODO: only if the value is `== repo.working_directory`?
822 // What about relative v.s. absolute paths?
822 // What about relative v.s. absolute paths?
823 Err(CommandError::unsupported("$HG_PENDING"))?
823 Err(CommandError::unsupported("$HG_PENDING"))?
824 }
824 }
825
825
826 if let Ok(repo) = repo {
826 if let Ok(repo) = repo {
827 if repo.has_subrepos()? {
827 if repo.has_subrepos()? {
828 Err(CommandError::unsupported("sub-repositories"))?
828 Err(CommandError::unsupported("sub-repositories"))?
829 }
829 }
830 check_auto_upgrade(config, repo.requirements())?;
830 check_auto_upgrade(config, repo.requirements())?;
831 }
831 }
832
832
833 if config.has_non_empty_section(b"encode") {
833 if config.has_non_empty_section(b"encode") {
834 Err(CommandError::unsupported("[encode] config"))?
834 Err(CommandError::unsupported("[encode] config"))?
835 }
835 }
836
836
837 if config.has_non_empty_section(b"decode") {
837 if config.has_non_empty_section(b"decode") {
838 Err(CommandError::unsupported("[decode] config"))?
838 Err(CommandError::unsupported("[decode] config"))?
839 }
839 }
840
840
841 Ok(())
841 Ok(())
842 }
842 }
@@ -1,307 +1,307 b''
1 use crate::color::ColorConfig;
1 use crate::color::ColorConfig;
2 use crate::color::Effect;
2 use crate::color::Effect;
3 use crate::error::CommandError;
3 use crate::error::CommandError;
4 use format_bytes::format_bytes;
4 use format_bytes::format_bytes;
5 use format_bytes::write_bytes;
5 use format_bytes::write_bytes;
6 use hg::config::Config;
6 use hg::config::Config;
7 use hg::config::PlainInfo;
7 use hg::config::PlainInfo;
8 use hg::errors::HgError;
8 use hg::errors::HgError;
9 use hg::repo::Repo;
9 use hg::repo::Repo;
10 use hg::sparse;
10 use hg::sparse;
11 use hg::utils::files::get_bytes_from_path;
11 use hg::utils::files::get_bytes_from_path;
12 use hg::PatternFileWarning;
12 use hg::PatternFileWarning;
13 use std::borrow::Cow;
13 use std::borrow::Cow;
14 use std::io;
14 use std::io;
15 use std::io::{ErrorKind, Write};
15 use std::io::{ErrorKind, Write};
16
16
17 pub struct Ui {
17 pub struct Ui {
18 stdout: std::io::Stdout,
18 stdout: std::io::Stdout,
19 stderr: std::io::Stderr,
19 stderr: std::io::Stderr,
20 colors: Option<ColorConfig>,
20 colors: Option<ColorConfig>,
21 }
21 }
22
22
23 /// The kind of user interface error
23 /// The kind of user interface error
24 pub enum UiError {
24 pub enum UiError {
25 /// The standard output stream cannot be written to
25 /// The standard output stream cannot be written to
26 StdoutError(io::Error),
26 StdoutError(io::Error),
27 /// The standard error stream cannot be written to
27 /// The standard error stream cannot be written to
28 StderrError(io::Error),
28 StderrError(io::Error),
29 }
29 }
30
30
31 /// The commandline user interface
31 /// The commandline user interface
32 impl Ui {
32 impl Ui {
33 pub fn new(config: &Config) -> Result<Self, HgError> {
33 pub fn new(config: &Config) -> Result<Self, HgError> {
34 Ok(Ui {
34 Ok(Ui {
35 // If using something else, also adapt `isatty()` below.
35 // If using something else, also adapt `isatty()` below.
36 stdout: std::io::stdout(),
36 stdout: std::io::stdout(),
37
37
38 stderr: std::io::stderr(),
38 stderr: std::io::stderr(),
39 colors: ColorConfig::new(config)?,
39 colors: ColorConfig::new(config)?,
40 })
40 })
41 }
41 }
42
42
43 /// Default to no color if color configuration errors.
43 /// Default to no color if color configuration errors.
44 ///
44 ///
45 /// Useful when we’re already handling another error.
45 /// Useful when we’re already handling another error.
46 pub fn new_infallible(config: &Config) -> Self {
46 pub fn new_infallible(config: &Config) -> Self {
47 Ui {
47 Ui {
48 // If using something else, also adapt `isatty()` below.
48 // If using something else, also adapt `isatty()` below.
49 stdout: std::io::stdout(),
49 stdout: std::io::stdout(),
50
50
51 stderr: std::io::stderr(),
51 stderr: std::io::stderr(),
52 colors: ColorConfig::new(config).unwrap_or(None),
52 colors: ColorConfig::new(config).unwrap_or(None),
53 }
53 }
54 }
54 }
55
55
56 /// Returns a buffered handle on stdout for faster batch printing
56 /// Returns a buffered handle on stdout for faster batch printing
57 /// operations.
57 /// operations.
58 pub fn stdout_buffer(&self) -> StdoutBuffer<std::io::StdoutLock> {
58 pub fn stdout_buffer(&self) -> StdoutBuffer<std::io::StdoutLock> {
59 StdoutBuffer::new(self.stdout.lock())
59 StdoutBuffer::new(self.stdout.lock())
60 }
60 }
61
61
62 /// Write bytes to stdout
62 /// Write bytes to stdout
63 pub fn write_stdout(&self, bytes: &[u8]) -> Result<(), UiError> {
63 pub fn write_stdout(&self, bytes: &[u8]) -> Result<(), UiError> {
64 let mut stdout = self.stdout.lock();
64 let mut stdout = self.stdout.lock();
65
65
66 stdout.write_all(bytes).or_else(handle_stdout_error)?;
66 stdout.write_all(bytes).or_else(handle_stdout_error)?;
67
67
68 stdout.flush().or_else(handle_stdout_error)
68 stdout.flush().or_else(handle_stdout_error)
69 }
69 }
70
70
71 /// Write bytes to stderr
71 /// Write bytes to stderr
72 pub fn write_stderr(&self, bytes: &[u8]) -> Result<(), UiError> {
72 pub fn write_stderr(&self, bytes: &[u8]) -> Result<(), UiError> {
73 let mut stderr = self.stderr.lock();
73 let mut stderr = self.stderr.lock();
74
74
75 stderr.write_all(bytes).or_else(handle_stderr_error)?;
75 stderr.write_all(bytes).or_else(handle_stderr_error)?;
76
76
77 stderr.flush().or_else(handle_stderr_error)
77 stderr.flush().or_else(handle_stderr_error)
78 }
78 }
79
79
80 /// Write bytes to stdout with the given label
80 /// Write bytes to stdout with the given label
81 ///
81 ///
82 /// Like the optional `label` parameter in `mercurial/ui.py`,
82 /// Like the optional `label` parameter in `mercurial/ui.py`,
83 /// this label influences the color used for this output.
83 /// this label influences the color used for this output.
84 pub fn write_stdout_labelled(
84 pub fn write_stdout_labelled(
85 &self,
85 &self,
86 bytes: &[u8],
86 bytes: &[u8],
87 label: &str,
87 label: &str,
88 ) -> Result<(), UiError> {
88 ) -> Result<(), UiError> {
89 if let Some(colors) = &self.colors {
89 if let Some(colors) = &self.colors {
90 if let Some(effects) = colors.styles.get(label.as_bytes()) {
90 if let Some(effects) = colors.styles.get(label.as_bytes()) {
91 if !effects.is_empty() {
91 if !effects.is_empty() {
92 return self
92 return self
93 .write_stdout_with_effects(bytes, effects)
93 .write_stdout_with_effects(bytes, effects)
94 .or_else(handle_stdout_error);
94 .or_else(handle_stdout_error);
95 }
95 }
96 }
96 }
97 }
97 }
98 self.write_stdout(bytes)
98 self.write_stdout(bytes)
99 }
99 }
100
100
101 fn write_stdout_with_effects(
101 fn write_stdout_with_effects(
102 &self,
102 &self,
103 bytes: &[u8],
103 bytes: &[u8],
104 effects: &[Effect],
104 effects: &[Effect],
105 ) -> io::Result<()> {
105 ) -> io::Result<()> {
106 let stdout = &mut self.stdout.lock();
106 let stdout = &mut self.stdout.lock();
107 let mut write_line = |line: &[u8], first: bool| {
107 let mut write_line = |line: &[u8], first: bool| {
108 // `line` does not include the newline delimiter
108 // `line` does not include the newline delimiter
109 if !first {
109 if !first {
110 stdout.write_all(b"\n")?;
110 stdout.write_all(b"\n")?;
111 }
111 }
112 if line.is_empty() {
112 if line.is_empty() {
113 return Ok(());
113 return Ok(());
114 }
114 }
115 /// 0x1B == 27 == 0o33
115 /// 0x1B == 27 == 0o33
116 const ASCII_ESCAPE: &[u8] = b"\x1b";
116 const ASCII_ESCAPE: &[u8] = b"\x1b";
117 write_bytes!(stdout, b"{}[0", ASCII_ESCAPE)?;
117 write_bytes!(stdout, b"{}[0", ASCII_ESCAPE)?;
118 for effect in effects {
118 for effect in effects {
119 write_bytes!(stdout, b";{}", effect)?;
119 write_bytes!(stdout, b";{}", effect)?;
120 }
120 }
121 write_bytes!(stdout, b"m")?;
121 write_bytes!(stdout, b"m")?;
122 stdout.write_all(line)?;
122 stdout.write_all(line)?;
123 write_bytes!(stdout, b"{}[0m", ASCII_ESCAPE)
123 write_bytes!(stdout, b"{}[0m", ASCII_ESCAPE)
124 };
124 };
125 let mut lines = bytes.split(|&byte| byte == b'\n');
125 let mut lines = bytes.split(|&byte| byte == b'\n');
126 if let Some(first) = lines.next() {
126 if let Some(first) = lines.next() {
127 write_line(first, true)?;
127 write_line(first, true)?;
128 for line in lines {
128 for line in lines {
129 write_line(line, false)?
129 write_line(line, false)?
130 }
130 }
131 }
131 }
132 stdout.flush()
132 stdout.flush()
133 }
133 }
134 }
134 }
135
135
136 // TODO: pass the PlainInfo to call sites directly and
136 // TODO: pass the PlainInfo to call sites directly and
137 // delete this function
137 // delete this function
138 pub fn plain(opt_feature: Option<&str>) -> bool {
138 pub fn plain(opt_feature: Option<&str>) -> bool {
139 let plain_info = PlainInfo::from_env();
139 let plain_info = PlainInfo::from_env();
140 match opt_feature {
140 match opt_feature {
141 None => plain_info.is_plain(),
141 None => plain_info.is_plain(),
142 Some(feature) => plain_info.is_feature_plain(feature),
142 Some(feature) => plain_info.is_feature_plain(feature),
143 }
143 }
144 }
144 }
145
145
146 /// A buffered stdout writer for faster batch printing operations.
146 /// A buffered stdout writer for faster batch printing operations.
147 pub struct StdoutBuffer<W: Write> {
147 pub struct StdoutBuffer<W: Write> {
148 buf: io::BufWriter<W>,
148 buf: io::BufWriter<W>,
149 }
149 }
150
150
151 impl<W: Write> StdoutBuffer<W> {
151 impl<W: Write> StdoutBuffer<W> {
152 pub fn new(writer: W) -> Self {
152 pub fn new(writer: W) -> Self {
153 let buf = io::BufWriter::new(writer);
153 let buf = io::BufWriter::new(writer);
154 Self { buf }
154 Self { buf }
155 }
155 }
156
156
157 /// Write bytes to stdout buffer
157 /// Write bytes to stdout buffer
158 pub fn write_all(&mut self, bytes: &[u8]) -> Result<(), UiError> {
158 pub fn write_all(&mut self, bytes: &[u8]) -> Result<(), UiError> {
159 self.buf.write_all(bytes).or_else(handle_stdout_error)
159 self.buf.write_all(bytes).or_else(handle_stdout_error)
160 }
160 }
161
161
162 /// Flush bytes to stdout
162 /// Flush bytes to stdout
163 pub fn flush(&mut self) -> Result<(), UiError> {
163 pub fn flush(&mut self) -> Result<(), UiError> {
164 self.buf.flush().or_else(handle_stdout_error)
164 self.buf.flush().or_else(handle_stdout_error)
165 }
165 }
166 }
166 }
167
167
168 /// Sometimes writing to stdout is not possible, try writing to stderr to
168 /// Sometimes writing to stdout is not possible, try writing to stderr to
169 /// signal that failure, otherwise just bail.
169 /// signal that failure, otherwise just bail.
170 fn handle_stdout_error(error: io::Error) -> Result<(), UiError> {
170 fn handle_stdout_error(error: io::Error) -> Result<(), UiError> {
171 if let ErrorKind::BrokenPipe = error.kind() {
171 if let ErrorKind::BrokenPipe = error.kind() {
172 // This makes `| head` work for example
172 // This makes `| head` work for example
173 return Ok(());
173 return Ok(());
174 }
174 }
175 let mut stderr = io::stderr();
175 let mut stderr = io::stderr();
176
176
177 stderr
177 stderr
178 .write_all(&format_bytes!(
178 .write_all(&format_bytes!(
179 b"abort: {}\n",
179 b"abort: {}\n",
180 error.to_string().as_bytes()
180 error.to_string().as_bytes()
181 ))
181 ))
182 .map_err(UiError::StderrError)?;
182 .map_err(UiError::StderrError)?;
183
183
184 stderr.flush().map_err(UiError::StderrError)?;
184 stderr.flush().map_err(UiError::StderrError)?;
185
185
186 Err(UiError::StdoutError(error))
186 Err(UiError::StdoutError(error))
187 }
187 }
188
188
189 /// Sometimes writing to stderr is not possible.
189 /// Sometimes writing to stderr is not possible.
190 fn handle_stderr_error(error: io::Error) -> Result<(), UiError> {
190 fn handle_stderr_error(error: io::Error) -> Result<(), UiError> {
191 // A broken pipe should not result in a error
191 // A broken pipe should not result in a error
192 // like with `| head` for example
192 // like with `| head` for example
193 if let ErrorKind::BrokenPipe = error.kind() {
193 if let ErrorKind::BrokenPipe = error.kind() {
194 return Ok(());
194 return Ok(());
195 }
195 }
196 Err(UiError::StdoutError(error))
196 Err(UiError::StdoutError(error))
197 }
197 }
198
198
199 /// Encode rust strings according to the user system.
199 /// Encode rust strings according to the user system.
200 pub fn utf8_to_local(s: &str) -> Cow<[u8]> {
200 pub fn utf8_to_local(s: &str) -> Cow<[u8]> {
201 // TODO encode for the user's system //
201 // TODO encode for the user's system //
202 let bytes = s.as_bytes();
202 let bytes = s.as_bytes();
203 Cow::Borrowed(bytes)
203 Cow::Borrowed(bytes)
204 }
204 }
205
205
206 /// Decode user system bytes to Rust string.
206 /// Decode user system bytes to Rust string.
207 pub fn local_to_utf8(s: &[u8]) -> Cow<str> {
207 pub fn local_to_utf8(s: &[u8]) -> Cow<str> {
208 // TODO decode from the user's system
208 // TODO decode from the user's system
209 String::from_utf8_lossy(s)
209 String::from_utf8_lossy(s)
210 }
210 }
211
211
212 /// Should formatted output be used?
212 /// Should formatted output be used?
213 ///
213 ///
214 /// Note: rhg does not have the formatter mechanism yet,
214 /// Note: rhg does not have the formatter mechanism yet,
215 /// but this is also used when deciding whether to use color.
215 /// but this is also used when deciding whether to use color.
216 pub fn formatted(config: &Config) -> Result<bool, HgError> {
216 pub fn formatted(config: &Config) -> Result<bool, HgError> {
217 if let Some(formatted) = config.get_option(b"ui", b"formatted")? {
217 if let Some(formatted) = config.get_option(b"ui", b"formatted")? {
218 Ok(formatted)
218 Ok(formatted)
219 } else {
219 } else {
220 isatty(config)
220 isatty(config)
221 }
221 }
222 }
222 }
223
223
224 pub enum RelativePaths {
224 pub enum RelativePaths {
225 Legacy,
225 Legacy,
226 Bool(bool),
226 Bool(bool),
227 }
227 }
228
228
229 pub fn relative_paths(config: &Config) -> Result<RelativePaths, HgError> {
229 pub fn relative_paths(config: &Config) -> Result<RelativePaths, HgError> {
230 Ok(match config.get(b"ui", b"relative-paths") {
230 Ok(match config.get(b"ui", b"relative-paths") {
231 None | Some(b"legacy") => RelativePaths::Legacy,
231 None | Some(b"legacy") => RelativePaths::Legacy,
232 _ => RelativePaths::Bool(config.get_bool(b"ui", b"relative-paths")?),
232 _ => RelativePaths::Bool(config.get_bool(b"ui", b"relative-paths")?),
233 })
233 })
234 }
234 }
235
235
236 fn isatty(config: &Config) -> Result<bool, HgError> {
236 fn isatty(config: &Config) -> Result<bool, HgError> {
237 Ok(if config.get_bool(b"ui", b"nontty")? {
237 Ok(if config.get_bool(b"ui", b"nontty")? {
238 false
238 false
239 } else {
239 } else {
240 atty::is(atty::Stream::Stdout)
240 atty::is(atty::Stream::Stdout)
241 })
241 })
242 }
242 }
243
243
244 /// Return the formatted bytestring corresponding to a pattern file warning,
244 /// Return the formatted bytestring corresponding to a pattern file warning,
245 /// as expected by the CLI.
245 /// as expected by the CLI.
246 pub(crate) fn format_pattern_file_warning(
246 pub(crate) fn format_pattern_file_warning(
247 warning: &PatternFileWarning,
247 warning: &PatternFileWarning,
248 repo: &Repo,
248 repo: &Repo,
249 ) -> Vec<u8> {
249 ) -> Vec<u8> {
250 match warning {
250 match warning {
251 PatternFileWarning::InvalidSyntax(path, syntax) => format_bytes!(
251 PatternFileWarning::InvalidSyntax(path, syntax) => format_bytes!(
252 b"{}: ignoring invalid syntax '{}'\n",
252 b"{}: ignoring invalid syntax '{}'\n",
253 get_bytes_from_path(path),
253 get_bytes_from_path(path),
254 &*syntax
254 syntax
255 ),
255 ),
256 PatternFileWarning::NoSuchFile(path) => {
256 PatternFileWarning::NoSuchFile(path) => {
257 let path = if let Ok(relative) =
257 let path = if let Ok(relative) =
258 path.strip_prefix(repo.working_directory_path())
258 path.strip_prefix(repo.working_directory_path())
259 {
259 {
260 relative
260 relative
261 } else {
261 } else {
262 &*path
262 path
263 };
263 };
264 format_bytes!(
264 format_bytes!(
265 b"skipping unreadable pattern file '{}': \
265 b"skipping unreadable pattern file '{}': \
266 No such file or directory\n",
266 No such file or directory\n",
267 get_bytes_from_path(path),
267 get_bytes_from_path(path),
268 )
268 )
269 }
269 }
270 }
270 }
271 }
271 }
272
272
273 /// Print with `Ui` the formatted bytestring corresponding to a
273 /// Print with `Ui` the formatted bytestring corresponding to a
274 /// sparse/narrow warning, as expected by the CLI.
274 /// sparse/narrow warning, as expected by the CLI.
275 pub(crate) fn print_narrow_sparse_warnings(
275 pub(crate) fn print_narrow_sparse_warnings(
276 narrow_warnings: &[sparse::SparseWarning],
276 narrow_warnings: &[sparse::SparseWarning],
277 sparse_warnings: &[sparse::SparseWarning],
277 sparse_warnings: &[sparse::SparseWarning],
278 ui: &Ui,
278 ui: &Ui,
279 repo: &Repo,
279 repo: &Repo,
280 ) -> Result<(), CommandError> {
280 ) -> Result<(), CommandError> {
281 for warning in narrow_warnings.iter().chain(sparse_warnings) {
281 for warning in narrow_warnings.iter().chain(sparse_warnings) {
282 match &warning {
282 match &warning {
283 sparse::SparseWarning::RootWarning { context, line } => {
283 sparse::SparseWarning::RootWarning { context, line } => {
284 let msg = format_bytes!(
284 let msg = format_bytes!(
285 b"warning: {} profile cannot use paths \"
285 b"warning: {} profile cannot use paths \"
286 starting with /, ignoring {}\n",
286 starting with /, ignoring {}\n",
287 context,
287 context,
288 line
288 line
289 );
289 );
290 ui.write_stderr(&msg)?;
290 ui.write_stderr(&msg)?;
291 }
291 }
292 sparse::SparseWarning::ProfileNotFound { profile, rev } => {
292 sparse::SparseWarning::ProfileNotFound { profile, rev } => {
293 let msg = format_bytes!(
293 let msg = format_bytes!(
294 b"warning: sparse profile '{}' not found \"
294 b"warning: sparse profile '{}' not found \"
295 in rev {} - ignoring it\n",
295 in rev {} - ignoring it\n",
296 profile,
296 profile,
297 rev
297 rev
298 );
298 );
299 ui.write_stderr(&msg)?;
299 ui.write_stderr(&msg)?;
300 }
300 }
301 sparse::SparseWarning::Pattern(e) => {
301 sparse::SparseWarning::Pattern(e) => {
302 ui.write_stderr(&format_pattern_file_warning(e, repo))?;
302 ui.write_stderr(&format_pattern_file_warning(e, repo))?;
303 }
303 }
304 }
304 }
305 }
305 }
306 Ok(())
306 Ok(())
307 }
307 }
General Comments 0
You need to be logged in to leave comments. Login now