##// END OF EJS Templates
rust: run a clippy pass with the latest stable version...
Raphaël Gomès -
r52013:532e74ad default
parent child Browse files
Show More
@@ -1,349 +1,349 b''
1 1 // layer.rs
2 2 //
3 3 // Copyright 2020
4 4 // Valentin Gatien-Baron,
5 5 // Raphaël Gomès <rgomes@octobus.net>
6 6 //
7 7 // This software may be used and distributed according to the terms of the
8 8 // GNU General Public License version 2 or any later version.
9 9
10 10 use crate::errors::HgError;
11 11 use crate::exit_codes::CONFIG_PARSE_ERROR_ABORT;
12 12 use crate::utils::files::{get_bytes_from_path, get_path_from_bytes};
13 13 use format_bytes::{format_bytes, write_bytes, DisplayBytes};
14 14 use lazy_static::lazy_static;
15 15 use regex::bytes::Regex;
16 16 use std::collections::HashMap;
17 17 use std::path::{Path, PathBuf};
18 18
19 19 lazy_static! {
20 20 static ref SECTION_RE: Regex = make_regex(r"^\[([^\[]+)\]");
21 21 static ref ITEM_RE: Regex = make_regex(r"^([^=\s][^=]*?)\s*=\s*((.*\S)?)");
22 22 /// Continuation whitespace
23 23 static ref CONT_RE: Regex = make_regex(r"^\s+(\S|\S.*\S)\s*$");
24 24 static ref EMPTY_RE: Regex = make_regex(r"^(;|#|\s*$)");
25 25 static ref COMMENT_RE: Regex = make_regex(r"^(;|#)");
26 26 /// A directive that allows for removing previous entries
27 27 static ref UNSET_RE: Regex = make_regex(r"^%unset\s+(\S+)");
28 28 /// A directive that allows for including other config files
29 29 static ref INCLUDE_RE: Regex = make_regex(r"^%include\s+(\S|\S.*\S)\s*$");
30 30 }
31 31
32 32 /// All config values separated by layers of precedence.
33 33 /// Each config source may be split in multiple layers if `%include` directives
34 34 /// are used.
35 35 /// TODO detail the general precedence
36 36 #[derive(Clone)]
37 37 pub struct ConfigLayer {
38 38 /// Mapping of the sections to their items
39 39 sections: HashMap<Vec<u8>, ConfigItem>,
40 40 /// All sections (and their items/values) in a layer share the same origin
41 41 pub origin: ConfigOrigin,
42 42 /// Whether this layer comes from a trusted user or group
43 43 pub trusted: bool,
44 44 }
45 45
46 46 impl ConfigLayer {
47 47 pub fn new(origin: ConfigOrigin) -> Self {
48 48 ConfigLayer {
49 49 sections: HashMap::new(),
50 50 trusted: true, // TODO check
51 51 origin,
52 52 }
53 53 }
54 54
55 55 /// Parse `--config` CLI arguments and return a layer if there’s any
56 56 pub(crate) fn parse_cli_args(
57 57 cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>,
58 58 ) -> Result<Option<Self>, ConfigError> {
59 59 fn parse_one(arg: &[u8]) -> Option<(Vec<u8>, Vec<u8>, Vec<u8>)> {
60 60 use crate::utils::SliceExt;
61 61
62 62 let (section_and_item, value) = arg.split_2(b'=')?;
63 63 let (section, item) = section_and_item.trim().split_2(b'.')?;
64 64 Some((
65 65 section.to_owned(),
66 66 item.to_owned(),
67 67 value.trim().to_owned(),
68 68 ))
69 69 }
70 70
71 71 let mut layer = Self::new(ConfigOrigin::CommandLine);
72 72 for arg in cli_config_args {
73 73 let arg = arg.as_ref();
74 74 if let Some((section, item, value)) = parse_one(arg) {
75 75 layer.add(section, item, value, None);
76 76 } else {
77 77 Err(HgError::abort(
78 78 format!(
79 79 "abort: malformed --config option: '{}' \
80 80 (use --config section.name=value)",
81 81 String::from_utf8_lossy(arg),
82 82 ),
83 83 CONFIG_PARSE_ERROR_ABORT,
84 84 None,
85 85 ))?
86 86 }
87 87 }
88 88 if layer.sections.is_empty() {
89 89 Ok(None)
90 90 } else {
91 91 Ok(Some(layer))
92 92 }
93 93 }
94 94
95 95 /// Returns whether this layer comes from `--config` CLI arguments
96 96 pub(crate) fn is_from_command_line(&self) -> bool {
97 97 matches!(self.origin, ConfigOrigin::CommandLine)
98 98 }
99 99
100 100 /// Add an entry to the config, overwriting the old one if already present.
101 101 pub fn add(
102 102 &mut self,
103 103 section: Vec<u8>,
104 104 item: Vec<u8>,
105 105 value: Vec<u8>,
106 106 line: Option<usize>,
107 107 ) {
108 108 self.sections
109 109 .entry(section)
110 .or_insert_with(HashMap::new)
110 .or_default()
111 111 .insert(item, ConfigValue { bytes: value, line });
112 112 }
113 113
114 114 /// Returns the config value in `<section>.<item>` if it exists
115 115 pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&ConfigValue> {
116 116 self.sections.get(section)?.get(item)
117 117 }
118 118
119 119 /// Returns the keys defined in the given section
120 120 pub fn iter_keys(&self, section: &[u8]) -> impl Iterator<Item = &[u8]> {
121 121 self.sections
122 122 .get(section)
123 123 .into_iter()
124 124 .flat_map(|section| section.keys().map(|vec| &**vec))
125 125 }
126 126
127 127 /// Returns the (key, value) pairs defined in the given section
128 128 pub fn iter_section<'layer>(
129 129 &'layer self,
130 130 section: &[u8],
131 131 ) -> impl Iterator<Item = (&'layer [u8], &'layer [u8])> {
132 132 self.sections
133 133 .get(section)
134 134 .into_iter()
135 135 .flat_map(|section| section.iter().map(|(k, v)| (&**k, &*v.bytes)))
136 136 }
137 137
138 138 /// Returns whether any key is defined in the given section
139 139 pub fn has_non_empty_section(&self, section: &[u8]) -> bool {
140 140 self.sections
141 141 .get(section)
142 142 .map_or(false, |section| !section.is_empty())
143 143 }
144 144
145 145 pub fn is_empty(&self) -> bool {
146 146 self.sections.is_empty()
147 147 }
148 148
149 149 /// Returns a `Vec` of layers in order of precedence (so, in read order),
150 150 /// recursively parsing the `%include` directives if any.
151 151 pub fn parse(src: &Path, data: &[u8]) -> Result<Vec<Self>, ConfigError> {
152 152 let mut layers = vec![];
153 153
154 154 // Discard byte order mark if any
155 155 let data = if data.starts_with(b"\xef\xbb\xbf") {
156 156 &data[3..]
157 157 } else {
158 158 data
159 159 };
160 160
161 161 // TODO check if it's trusted
162 162 let mut current_layer = Self::new(ConfigOrigin::File(src.to_owned()));
163 163
164 164 let mut lines_iter =
165 165 data.split(|b| *b == b'\n').enumerate().peekable();
166 166 let mut section = b"".to_vec();
167 167
168 168 while let Some((index, bytes)) = lines_iter.next() {
169 169 let line = Some(index + 1);
170 170 if let Some(m) = INCLUDE_RE.captures(bytes) {
171 171 let filename_bytes = &m[1];
172 172 let filename_bytes = crate::utils::expand_vars(filename_bytes);
173 173 // `Path::parent` only fails for the root directory,
174 174 // which `src` can’t be since we’ve managed to open it as a
175 175 // file.
176 176 let dir = src
177 177 .parent()
178 178 .expect("Path::parent fail on a file we’ve read");
179 179 // `Path::join` with an absolute argument correctly ignores the
180 180 // base path
181 let filename = dir.join(&get_path_from_bytes(&filename_bytes));
181 let filename = dir.join(get_path_from_bytes(&filename_bytes));
182 182 match std::fs::read(&filename) {
183 183 Ok(data) => {
184 184 layers.push(current_layer);
185 185 layers.extend(Self::parse(&filename, &data)?);
186 186 current_layer =
187 187 Self::new(ConfigOrigin::File(src.to_owned()));
188 188 }
189 189 Err(error) => {
190 190 if error.kind() != std::io::ErrorKind::NotFound {
191 191 return Err(ConfigParseError {
192 192 origin: ConfigOrigin::File(src.to_owned()),
193 193 line,
194 194 message: format_bytes!(
195 195 b"cannot include {} ({})",
196 196 filename_bytes,
197 197 format_bytes::Utf8(error)
198 198 ),
199 199 }
200 200 .into());
201 201 }
202 202 }
203 203 }
204 204 } else if EMPTY_RE.captures(bytes).is_some() {
205 205 } else if let Some(m) = SECTION_RE.captures(bytes) {
206 206 section = m[1].to_vec();
207 207 } else if let Some(m) = ITEM_RE.captures(bytes) {
208 208 let item = m[1].to_vec();
209 209 let mut value = m[2].to_vec();
210 210 loop {
211 211 match lines_iter.peek() {
212 212 None => break,
213 213 Some((_, v)) => {
214 214 if COMMENT_RE.captures(v).is_some() {
215 215 } else if CONT_RE.captures(v).is_some() {
216 216 value.extend(b"\n");
217 217 value.extend(&m[1]);
218 218 } else {
219 219 break;
220 220 }
221 221 }
222 222 };
223 223 lines_iter.next();
224 224 }
225 225 current_layer.add(section.clone(), item, value, line);
226 226 } else if let Some(m) = UNSET_RE.captures(bytes) {
227 227 if let Some(map) = current_layer.sections.get_mut(&section) {
228 228 map.remove(&m[1]);
229 229 }
230 230 } else {
231 231 let message = if bytes.starts_with(b" ") {
232 232 format_bytes!(b"unexpected leading whitespace: {}", bytes)
233 233 } else {
234 234 bytes.to_owned()
235 235 };
236 236 return Err(ConfigParseError {
237 237 origin: ConfigOrigin::File(src.to_owned()),
238 238 line,
239 239 message,
240 240 }
241 241 .into());
242 242 }
243 243 }
244 244 if !current_layer.is_empty() {
245 245 layers.push(current_layer);
246 246 }
247 247 Ok(layers)
248 248 }
249 249 }
250 250
251 251 impl DisplayBytes for ConfigLayer {
252 252 fn display_bytes(
253 253 &self,
254 254 out: &mut dyn std::io::Write,
255 255 ) -> std::io::Result<()> {
256 256 let mut sections: Vec<_> = self.sections.iter().collect();
257 257 sections.sort_by(|e0, e1| e0.0.cmp(e1.0));
258 258
259 259 for (section, items) in sections.into_iter() {
260 260 let mut items: Vec<_> = items.iter().collect();
261 261 items.sort_by(|e0, e1| e0.0.cmp(e1.0));
262 262
263 263 for (item, config_entry) in items {
264 264 write_bytes!(
265 265 out,
266 266 b"{}.{}={} # {}\n",
267 267 section,
268 268 item,
269 269 &config_entry.bytes,
270 270 &self.origin,
271 271 )?
272 272 }
273 273 }
274 274 Ok(())
275 275 }
276 276 }
277 277
278 278 /// Mapping of section item to value.
279 279 /// In the following:
280 280 /// ```text
281 281 /// [ui]
282 282 /// paginate=no
283 283 /// ```
284 284 /// "paginate" is the section item and "no" the value.
285 285 pub type ConfigItem = HashMap<Vec<u8>, ConfigValue>;
286 286
287 287 #[derive(Clone, Debug, PartialEq)]
288 288 pub struct ConfigValue {
289 289 /// The raw bytes of the value (be it from the CLI, env or from a file)
290 290 pub bytes: Vec<u8>,
291 291 /// Only present if the value comes from a file, 1-indexed.
292 292 pub line: Option<usize>,
293 293 }
294 294
295 295 #[derive(Clone, Debug, PartialEq, Eq)]
296 296 pub enum ConfigOrigin {
297 297 /// From a configuration file
298 298 File(PathBuf),
299 299 /// From [ui.tweakdefaults]
300 300 Tweakdefaults,
301 301 /// From a `--config` CLI argument
302 302 CommandLine,
303 303 /// From a `--color` CLI argument
304 304 CommandLineColor,
305 305 /// From environment variables like `$PAGER` or `$EDITOR`
306 306 Environment(Vec<u8>),
307 307 /// From configitems.toml
308 308 Defaults,
309 309 /* TODO extensions
310 310 * TODO Python resources?
311 311 * Others? */
312 312 }
313 313
314 314 impl DisplayBytes for ConfigOrigin {
315 315 fn display_bytes(
316 316 &self,
317 317 out: &mut dyn std::io::Write,
318 318 ) -> std::io::Result<()> {
319 319 match self {
320 320 ConfigOrigin::File(p) => out.write_all(&get_bytes_from_path(p)),
321 321 ConfigOrigin::CommandLine => out.write_all(b"--config"),
322 322 ConfigOrigin::CommandLineColor => out.write_all(b"--color"),
323 323 ConfigOrigin::Environment(e) => write_bytes!(out, b"${}", e),
324 324 ConfigOrigin::Tweakdefaults => {
325 325 write_bytes!(out, b"ui.tweakdefaults")
326 326 }
327 327 ConfigOrigin::Defaults => {
328 328 write_bytes!(out, b"configitems.toml")
329 329 }
330 330 }
331 331 }
332 332 }
333 333
334 334 #[derive(Debug)]
335 335 pub struct ConfigParseError {
336 336 pub origin: ConfigOrigin,
337 337 pub line: Option<usize>,
338 338 pub message: Vec<u8>,
339 339 }
340 340
341 341 #[derive(Debug, derive_more::From)]
342 342 pub enum ConfigError {
343 343 Parse(ConfigParseError),
344 344 Other(HgError),
345 345 }
346 346
347 347 fn make_regex(pattern: &'static str) -> Regex {
348 348 Regex::new(pattern).expect("expected a valid regex")
349 349 }
@@ -1,810 +1,810 b''
1 1 // config.rs
2 2 //
3 3 // Copyright 2020
4 4 // Valentin Gatien-Baron,
5 5 // Raphaël Gomès <rgomes@octobus.net>
6 6 //
7 7 // This software may be used and distributed according to the terms of the
8 8 // GNU General Public License version 2 or any later version.
9 9
10 10 //! Mercurial config parsing and interfaces.
11 11
12 12 pub mod config_items;
13 13 mod layer;
14 14 mod plain_info;
15 15 mod values;
16 16 pub use layer::{ConfigError, ConfigOrigin, ConfigParseError};
17 17 use lazy_static::lazy_static;
18 18 pub use plain_info::PlainInfo;
19 19
20 20 use self::config_items::DefaultConfig;
21 21 use self::config_items::DefaultConfigItem;
22 22 use self::layer::ConfigLayer;
23 23 use self::layer::ConfigValue;
24 24 use crate::errors::HgError;
25 25 use crate::errors::{HgResultExt, IoResultExt};
26 26 use crate::utils::files::get_bytes_from_os_str;
27 27 use format_bytes::{write_bytes, DisplayBytes};
28 28 use std::collections::HashSet;
29 29 use std::env;
30 30 use std::fmt;
31 31 use std::path::{Path, PathBuf};
32 32 use std::str;
33 33
34 34 lazy_static! {
35 35 static ref DEFAULT_CONFIG: Result<DefaultConfig, HgError> = {
36 36 DefaultConfig::from_contents(include_str!(
37 37 "../../../../mercurial/configitems.toml"
38 38 ))
39 39 };
40 40 }
41 41
42 42 /// Holds the config values for the current repository
43 43 /// TODO update this docstring once we support more sources
44 44 #[derive(Clone)]
45 45 pub struct Config {
46 46 layers: Vec<layer::ConfigLayer>,
47 47 plain: PlainInfo,
48 48 }
49 49
50 50 impl DisplayBytes for Config {
51 51 fn display_bytes(
52 52 &self,
53 53 out: &mut dyn std::io::Write,
54 54 ) -> std::io::Result<()> {
55 55 for (index, layer) in self.layers.iter().rev().enumerate() {
56 56 write_bytes!(
57 57 out,
58 58 b"==== Layer {} (trusted: {}) ====\n{}",
59 59 index,
60 60 if layer.trusted {
61 61 &b"yes"[..]
62 62 } else {
63 63 &b"no"[..]
64 64 },
65 65 layer
66 66 )?;
67 67 }
68 68 Ok(())
69 69 }
70 70 }
71 71
72 72 pub enum ConfigSource {
73 73 /// Absolute path to a config file
74 74 AbsPath(PathBuf),
75 75 /// Already parsed (from the CLI, env, Python resources, etc.)
76 76 Parsed(layer::ConfigLayer),
77 77 }
78 78
79 79 #[derive(Debug)]
80 80 pub struct ConfigValueParseErrorDetails {
81 81 pub origin: ConfigOrigin,
82 82 pub line: Option<usize>,
83 83 pub section: Vec<u8>,
84 84 pub item: Vec<u8>,
85 85 pub value: Vec<u8>,
86 86 pub expected_type: &'static str,
87 87 }
88 88
89 89 // boxed to avoid very large Result types
90 90 pub type ConfigValueParseError = Box<ConfigValueParseErrorDetails>;
91 91
92 92 impl fmt::Display for ConfigValueParseError {
93 93 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
94 94 // TODO: add origin and line number information, here and in
95 95 // corresponding python code
96 96 write!(
97 97 f,
98 98 "config error: {}.{} is not a {} ('{}')",
99 99 String::from_utf8_lossy(&self.section),
100 100 String::from_utf8_lossy(&self.item),
101 101 self.expected_type,
102 102 String::from_utf8_lossy(&self.value)
103 103 )
104 104 }
105 105 }
106 106
107 107 /// Returns true if the config item is disabled by PLAIN or PLAINEXCEPT
108 108 fn should_ignore(plain: &PlainInfo, section: &[u8], item: &[u8]) -> bool {
109 109 // duplication with [_applyconfig] in [ui.py],
110 110 if !plain.is_plain() {
111 111 return false;
112 112 }
113 113 if section == b"alias" {
114 114 return plain.plainalias();
115 115 }
116 116 if section == b"revsetalias" {
117 117 return plain.plainrevsetalias();
118 118 }
119 119 if section == b"templatealias" {
120 120 return plain.plaintemplatealias();
121 121 }
122 122 if section == b"ui" {
123 123 let to_delete: &[&[u8]] = &[
124 124 b"debug",
125 125 b"fallbackencoding",
126 126 b"quiet",
127 127 b"slash",
128 128 b"logtemplate",
129 129 b"message-output",
130 130 b"statuscopies",
131 131 b"style",
132 132 b"traceback",
133 133 b"verbose",
134 134 ];
135 135 return to_delete.contains(&item);
136 136 }
137 137 let sections_to_delete: &[&[u8]] =
138 138 &[b"defaults", b"commands", b"command-templates"];
139 139 sections_to_delete.contains(&section)
140 140 }
141 141
142 142 impl Config {
143 143 /// The configuration to use when printing configuration-loading errors
144 144 pub fn empty() -> Self {
145 145 Self {
146 146 layers: Vec::new(),
147 147 plain: PlainInfo::empty(),
148 148 }
149 149 }
150 150
151 151 /// Load system and user configuration from various files.
152 152 ///
153 153 /// This is also affected by some environment variables.
154 154 pub fn load_non_repo() -> Result<Self, ConfigError> {
155 155 let mut config = Self::empty();
156 156 let opt_rc_path = env::var_os("HGRCPATH");
157 157 // HGRCPATH replaces system config
158 158 if opt_rc_path.is_none() {
159 159 config.add_system_config()?
160 160 }
161 161
162 162 config.add_for_environment_variable("EDITOR", b"ui", b"editor");
163 163 config.add_for_environment_variable("VISUAL", b"ui", b"editor");
164 164 config.add_for_environment_variable("PAGER", b"pager", b"pager");
165 165
166 166 // These are set by `run-tests.py --rhg` to enable fallback for the
167 167 // entire test suite. Alternatives would be setting configuration
168 168 // through `$HGRCPATH` but some tests override that, or changing the
169 169 // `hg` shell alias to include `--config` but that disrupts tests that
170 170 // print command lines and check expected output.
171 171 config.add_for_environment_variable(
172 172 "RHG_ON_UNSUPPORTED",
173 173 b"rhg",
174 174 b"on-unsupported",
175 175 );
176 176 config.add_for_environment_variable(
177 177 "RHG_FALLBACK_EXECUTABLE",
178 178 b"rhg",
179 179 b"fallback-executable",
180 180 );
181 181
182 182 // HGRCPATH replaces user config
183 183 if opt_rc_path.is_none() {
184 184 config.add_user_config()?
185 185 }
186 186 if let Some(rc_path) = &opt_rc_path {
187 187 for path in env::split_paths(rc_path) {
188 188 if !path.as_os_str().is_empty() {
189 189 if path.is_dir() {
190 190 config.add_trusted_dir(&path)?
191 191 } else {
192 192 config.add_trusted_file(&path)?
193 193 }
194 194 }
195 195 }
196 196 }
197 197 Ok(config)
198 198 }
199 199
200 200 pub fn load_cli_args(
201 201 &mut self,
202 202 cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>,
203 203 color_arg: Option<Vec<u8>>,
204 204 ) -> Result<(), ConfigError> {
205 205 if let Some(layer) = ConfigLayer::parse_cli_args(cli_config_args)? {
206 206 self.layers.push(layer)
207 207 }
208 208 if let Some(arg) = color_arg {
209 209 let mut layer = ConfigLayer::new(ConfigOrigin::CommandLineColor);
210 210 layer.add(b"ui"[..].into(), b"color"[..].into(), arg, None);
211 211 self.layers.push(layer)
212 212 }
213 213 Ok(())
214 214 }
215 215
216 216 fn add_trusted_dir(&mut self, path: &Path) -> Result<(), ConfigError> {
217 217 if let Some(entries) = std::fs::read_dir(path)
218 218 .when_reading_file(path)
219 219 .io_not_found_as_none()?
220 220 {
221 221 let mut file_paths = entries
222 222 .map(|result| {
223 223 result.when_reading_file(path).map(|entry| entry.path())
224 224 })
225 225 .collect::<Result<Vec<_>, _>>()?;
226 226 file_paths.sort();
227 227 for file_path in &file_paths {
228 228 if file_path.extension() == Some(std::ffi::OsStr::new("rc")) {
229 229 self.add_trusted_file(file_path)?
230 230 }
231 231 }
232 232 }
233 233 Ok(())
234 234 }
235 235
236 236 fn add_trusted_file(&mut self, path: &Path) -> Result<(), ConfigError> {
237 237 if let Some(data) = std::fs::read(path)
238 238 .when_reading_file(path)
239 239 .io_not_found_as_none()?
240 240 {
241 241 self.layers.extend(ConfigLayer::parse(path, &data)?)
242 242 }
243 243 Ok(())
244 244 }
245 245
246 246 fn add_for_environment_variable(
247 247 &mut self,
248 248 var: &str,
249 249 section: &[u8],
250 250 key: &[u8],
251 251 ) {
252 252 if let Some(value) = env::var_os(var) {
253 253 let origin = layer::ConfigOrigin::Environment(var.into());
254 254 let mut layer = ConfigLayer::new(origin);
255 255 layer.add(
256 256 section.to_owned(),
257 257 key.to_owned(),
258 258 get_bytes_from_os_str(value),
259 259 None,
260 260 );
261 261 self.layers.push(layer)
262 262 }
263 263 }
264 264
265 265 #[cfg(unix)] // TODO: other platforms
266 266 fn add_system_config(&mut self) -> Result<(), ConfigError> {
267 267 let mut add_for_prefix = |prefix: &Path| -> Result<(), ConfigError> {
268 268 let etc = prefix.join("etc").join("mercurial");
269 269 self.add_trusted_file(&etc.join("hgrc"))?;
270 270 self.add_trusted_dir(&etc.join("hgrc.d"))
271 271 };
272 272 let root = Path::new("/");
273 273 // TODO: use `std::env::args_os().next().unwrap()` a.k.a. argv[0]
274 274 // instead? TODO: can this be a relative path?
275 275 let hg = crate::utils::current_exe()?;
276 276 // TODO: this order (per-installation then per-system) matches
277 277 // `systemrcpath()` in `mercurial/scmposix.py`, but
278 278 // `mercurial/helptext/config.txt` suggests it should be reversed
279 279 if let Some(installation_prefix) = hg.parent().and_then(Path::parent) {
280 280 if installation_prefix != root {
281 281 add_for_prefix(installation_prefix)?
282 282 }
283 283 }
284 284 add_for_prefix(root)?;
285 285 Ok(())
286 286 }
287 287
288 288 #[cfg(unix)] // TODO: other plateforms
289 289 fn add_user_config(&mut self) -> Result<(), ConfigError> {
290 290 let opt_home = home::home_dir();
291 291 if let Some(home) = &opt_home {
292 292 self.add_trusted_file(&home.join(".hgrc"))?
293 293 }
294 294 let darwin = cfg!(any(target_os = "macos", target_os = "ios"));
295 295 if !darwin {
296 296 if let Some(config_home) = env::var_os("XDG_CONFIG_HOME")
297 297 .map(PathBuf::from)
298 298 .or_else(|| opt_home.map(|home| home.join(".config")))
299 299 {
300 300 self.add_trusted_file(&config_home.join("hg").join("hgrc"))?
301 301 }
302 302 }
303 303 Ok(())
304 304 }
305 305
306 306 /// Loads in order, which means that the precedence is the same
307 307 /// as the order of `sources`.
308 308 pub fn load_from_explicit_sources(
309 309 sources: Vec<ConfigSource>,
310 310 ) -> Result<Self, ConfigError> {
311 311 let mut layers = vec![];
312 312
313 313 for source in sources.into_iter() {
314 314 match source {
315 315 ConfigSource::Parsed(c) => layers.push(c),
316 316 ConfigSource::AbsPath(c) => {
317 317 // TODO check if it should be trusted
318 318 // mercurial/ui.py:427
319 319 let data = match std::fs::read(&c) {
320 320 Err(_) => continue, // same as the python code
321 321 Ok(data) => data,
322 322 };
323 323 layers.extend(ConfigLayer::parse(&c, &data)?)
324 324 }
325 325 }
326 326 }
327 327
328 328 Ok(Config {
329 329 layers,
330 330 plain: PlainInfo::empty(),
331 331 })
332 332 }
333 333
334 334 /// Loads the per-repository config into a new `Config` which is combined
335 335 /// with `self`.
336 336 pub(crate) fn combine_with_repo(
337 337 &self,
338 338 repo_config_files: &[PathBuf],
339 339 ) -> Result<Self, ConfigError> {
340 340 let (cli_layers, other_layers) = self
341 341 .layers
342 342 .iter()
343 343 .cloned()
344 344 .partition(ConfigLayer::is_from_command_line);
345 345
346 346 let mut repo_config = Self {
347 347 layers: other_layers,
348 348 plain: PlainInfo::empty(),
349 349 };
350 350 for path in repo_config_files {
351 351 // TODO: check if this file should be trusted:
352 352 // `mercurial/ui.py:427`
353 353 repo_config.add_trusted_file(path)?;
354 354 }
355 355 repo_config.layers.extend(cli_layers);
356 356 Ok(repo_config)
357 357 }
358 358
359 359 pub fn apply_plain(&mut self, plain: PlainInfo) {
360 360 self.plain = plain;
361 361 }
362 362
363 363 /// Returns the default value for the given config item, if any.
364 364 pub fn get_default(
365 365 &self,
366 366 section: &[u8],
367 367 item: &[u8],
368 368 ) -> Result<Option<&DefaultConfigItem>, HgError> {
369 369 let default_config = DEFAULT_CONFIG.as_ref().map_err(|e| {
370 370 HgError::abort(
371 371 e.to_string(),
372 372 crate::exit_codes::ABORT,
373 373 Some("`mercurial/configitems.toml` is not valid".into()),
374 374 )
375 375 })?;
376 376 let default_opt = default_config.get(section, item);
377 377 Ok(default_opt.filter(|default| {
378 378 default
379 379 .in_core_extension()
380 380 .map(|extension| {
381 381 // Only return the default for an in-core extension item
382 382 // if said extension is enabled
383 383 self.is_extension_enabled(extension.as_bytes())
384 384 })
385 385 .unwrap_or(true)
386 386 }))
387 387 }
388 388
389 389 /// Return the config item that corresponds to a section + item, a function
390 390 /// to parse from the raw bytes to the expected type (which is passed as
391 391 /// a string only to make debugging easier).
392 392 /// Used by higher-level methods like `get_bool`.
393 393 ///
394 394 /// `fallback_to_default` controls whether the default value (if any) is
395 395 /// returned if nothing is found.
396 396 fn get_parse<'config, T: 'config>(
397 397 &'config self,
398 398 section: &[u8],
399 399 item: &[u8],
400 400 expected_type: &'static str,
401 401 parse: impl Fn(&'config [u8]) -> Option<T>,
402 402 fallback_to_default: bool,
403 403 ) -> Result<Option<T>, HgError>
404 404 where
405 405 Option<T>: TryFrom<&'config DefaultConfigItem, Error = HgError>,
406 406 {
407 407 match self.get_inner(section, item) {
408 408 Some((layer, v)) => match parse(&v.bytes) {
409 409 Some(b) => Ok(Some(b)),
410 410 None => Err(Box::new(ConfigValueParseErrorDetails {
411 411 origin: layer.origin.to_owned(),
412 412 line: v.line,
413 413 value: v.bytes.to_owned(),
414 414 section: section.to_owned(),
415 415 item: item.to_owned(),
416 416 expected_type,
417 417 })
418 418 .into()),
419 419 },
420 420 None => {
421 421 if !fallback_to_default {
422 422 return Ok(None);
423 423 }
424 424 match self.get_default(section, item)? {
425 425 Some(default) => {
426 426 // Defaults are TOML values, so they're not in the same
427 427 // shape as in the config files.
428 428 // First try to convert directly to the expected type
429 429 let as_t = default.try_into();
430 430 match as_t {
431 431 Ok(t) => Ok(t),
432 432 Err(e) => {
433 433 // If it fails, it means that...
434 434 let as_bytes: Result<Option<&[u8]>, _> =
435 435 default.try_into();
436 436 match as_bytes {
437 437 Ok(bytes_opt) => {
438 438 if let Some(bytes) = bytes_opt {
439 439 // ...we should be able to parse it
440 440 return Ok(parse(bytes));
441 441 }
442 442 Err(e)
443 443 }
444 444 Err(_) => Err(e),
445 445 }
446 446 }
447 447 }
448 448 }
449 449 None => {
450 450 self.print_devel_warning(section, item)?;
451 451 Ok(None)
452 452 }
453 453 }
454 454 }
455 455 }
456 456 }
457 457
458 458 fn print_devel_warning(
459 459 &self,
460 460 section: &[u8],
461 461 item: &[u8],
462 462 ) -> Result<(), HgError> {
463 463 let warn_all = self.get_bool(b"devel", b"all-warnings")?;
464 464 let warn_specific = self.get_bool(b"devel", b"warn-config-unknown")?;
465 465 if !warn_all || !warn_specific {
466 466 // We technically shouldn't print anything here since it's not
467 467 // the concern of `hg-core`.
468 468 //
469 469 // We're printing directly to stderr since development warnings
470 470 // are not on by default and surfacing this to consumer crates
471 471 // (like `rhg`) would be more difficult, probably requiring
472 472 // something à la `log` crate.
473 473 //
474 474 // TODO maybe figure out a way of exposing a "warnings" channel
475 475 // that consumer crates can hook into. It would be useful for
476 476 // all other warnings that `hg-core` could expose.
477 477 eprintln!(
478 478 "devel-warn: accessing unregistered config item: '{}.{}'",
479 479 String::from_utf8_lossy(section),
480 480 String::from_utf8_lossy(item),
481 481 );
482 482 }
483 483 Ok(())
484 484 }
485 485
486 486 /// Returns an `Err` if the first value found is not a valid UTF-8 string.
487 487 /// Otherwise, returns an `Ok(value)` if found, or `None`.
488 488 pub fn get_str(
489 489 &self,
490 490 section: &[u8],
491 491 item: &[u8],
492 492 ) -> Result<Option<&str>, HgError> {
493 493 self.get_parse(
494 494 section,
495 495 item,
496 496 "ASCII or UTF-8 string",
497 497 |value| str::from_utf8(value).ok(),
498 498 true,
499 499 )
500 500 }
501 501
502 502 /// Same as `get_str`, but doesn't fall back to the default `configitem`
503 503 /// if not defined in the user config.
504 504 pub fn get_str_no_default(
505 505 &self,
506 506 section: &[u8],
507 507 item: &[u8],
508 508 ) -> Result<Option<&str>, HgError> {
509 509 self.get_parse(
510 510 section,
511 511 item,
512 512 "ASCII or UTF-8 string",
513 513 |value| str::from_utf8(value).ok(),
514 514 false,
515 515 )
516 516 }
517 517
518 518 /// Returns an `Err` if the first value found is not a valid unsigned
519 519 /// integer. Otherwise, returns an `Ok(value)` if found, or `None`.
520 520 pub fn get_u32(
521 521 &self,
522 522 section: &[u8],
523 523 item: &[u8],
524 524 ) -> Result<Option<u32>, HgError> {
525 525 self.get_parse(
526 526 section,
527 527 item,
528 528 "valid integer",
529 529 |value| str::from_utf8(value).ok()?.parse().ok(),
530 530 true,
531 531 )
532 532 }
533 533
534 534 /// Returns an `Err` if the first value found is not a valid file size
535 535 /// value such as `30` (default unit is bytes), `7 MB`, or `42.5 kb`.
536 536 /// Otherwise, returns an `Ok(value_in_bytes)` if found, or `None`.
537 537 pub fn get_byte_size(
538 538 &self,
539 539 section: &[u8],
540 540 item: &[u8],
541 541 ) -> Result<Option<u64>, HgError> {
542 542 self.get_parse(
543 543 section,
544 544 item,
545 545 "byte quantity",
546 546 values::parse_byte_size,
547 547 true,
548 548 )
549 549 }
550 550
551 551 /// Returns an `Err` if the first value found is not a valid boolean.
552 552 /// Otherwise, returns an `Ok(option)`, where `option` is the boolean if
553 553 /// found, or `None`.
554 554 pub fn get_option(
555 555 &self,
556 556 section: &[u8],
557 557 item: &[u8],
558 558 ) -> Result<Option<bool>, HgError> {
559 559 self.get_parse(section, item, "boolean", values::parse_bool, true)
560 560 }
561 561
562 562 /// Same as `get_option`, but doesn't fall back to the default `configitem`
563 563 /// if not defined in the user config.
564 564 pub fn get_option_no_default(
565 565 &self,
566 566 section: &[u8],
567 567 item: &[u8],
568 568 ) -> Result<Option<bool>, HgError> {
569 569 self.get_parse(section, item, "boolean", values::parse_bool, false)
570 570 }
571 571
572 572 /// Returns the corresponding boolean in the config. Returns `Ok(false)`
573 573 /// if the value is not found, an `Err` if it's not a valid boolean.
574 574 pub fn get_bool(
575 575 &self,
576 576 section: &[u8],
577 577 item: &[u8],
578 578 ) -> Result<bool, HgError> {
579 579 Ok(self.get_option(section, item)?.unwrap_or(false))
580 580 }
581 581
582 582 /// Same as `get_bool`, but doesn't fall back to the default `configitem`
583 583 /// if not defined in the user config.
584 584 pub fn get_bool_no_default(
585 585 &self,
586 586 section: &[u8],
587 587 item: &[u8],
588 588 ) -> Result<bool, HgError> {
589 589 Ok(self.get_option_no_default(section, item)?.unwrap_or(false))
590 590 }
591 591
592 592 /// Returns `true` if the extension is enabled, `false` otherwise
593 593 pub fn is_extension_enabled(&self, extension: &[u8]) -> bool {
594 594 let value = self.get(b"extensions", extension);
595 595 match value {
596 596 Some(c) => !c.starts_with(b"!"),
597 597 None => false,
598 598 }
599 599 }
600 600
601 601 /// If there is an `item` value in `section`, parse and return a list of
602 602 /// byte strings.
603 603 pub fn get_list(
604 604 &self,
605 605 section: &[u8],
606 606 item: &[u8],
607 607 ) -> Option<Vec<Vec<u8>>> {
608 608 self.get(section, item).map(values::parse_list)
609 609 }
610 610
611 611 /// Returns the raw value bytes of the first one found, or `None`.
612 612 pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> {
613 613 self.get_inner(section, item)
614 614 .map(|(_, value)| value.bytes.as_ref())
615 615 }
616 616
617 617 /// Returns the raw value bytes of the first one found, or `None`.
618 618 pub fn get_with_origin(
619 619 &self,
620 620 section: &[u8],
621 621 item: &[u8],
622 622 ) -> Option<(&[u8], &ConfigOrigin)> {
623 623 self.get_inner(section, item)
624 624 .map(|(layer, value)| (value.bytes.as_ref(), &layer.origin))
625 625 }
626 626
627 627 /// Returns the layer and the value of the first one found, or `None`.
628 628 fn get_inner(
629 629 &self,
630 630 section: &[u8],
631 631 item: &[u8],
632 632 ) -> Option<(&ConfigLayer, &ConfigValue)> {
633 633 // Filter out the config items that are hidden by [PLAIN].
634 634 // This differs from python hg where we delete them from the config.
635 635 let should_ignore = should_ignore(&self.plain, section, item);
636 636 for layer in self.layers.iter().rev() {
637 637 if !layer.trusted {
638 638 continue;
639 639 }
640 640 //The [PLAIN] config should not affect the defaults.
641 641 //
642 642 // However, PLAIN should also affect the "tweaked" defaults (unless
643 643 // "tweakdefault" is part of "HGPLAINEXCEPT").
644 644 //
645 645 // In practice the tweak-default layer is only added when it is
646 646 // relevant, so we can safely always take it into
647 647 // account here.
648 648 if should_ignore && !(layer.origin == ConfigOrigin::Tweakdefaults)
649 649 {
650 650 continue;
651 651 }
652 652 if let Some(v) = layer.get(section, item) {
653 653 return Some((layer, v));
654 654 }
655 655 }
656 656 None
657 657 }
658 658
659 659 /// Return all keys defined for the given section
660 660 pub fn get_section_keys(&self, section: &[u8]) -> HashSet<&[u8]> {
661 661 self.layers
662 662 .iter()
663 663 .flat_map(|layer| layer.iter_keys(section))
664 664 .collect()
665 665 }
666 666
667 667 /// Returns whether any key is defined in the given section
668 668 pub fn has_non_empty_section(&self, section: &[u8]) -> bool {
669 669 self.layers
670 670 .iter()
671 671 .any(|layer| layer.has_non_empty_section(section))
672 672 }
673 673
674 674 /// Yields (key, value) pairs for everything in the given section
675 675 pub fn iter_section<'a>(
676 676 &'a self,
677 677 section: &'a [u8],
678 678 ) -> impl Iterator<Item = (&[u8], &[u8])> + 'a {
679 679 // Deduplicate keys redefined in multiple layers
680 680 let mut keys_already_seen = HashSet::new();
681 681 let mut key_is_new =
682 682 move |&(key, _value): &(&'a [u8], &'a [u8])| -> bool {
683 683 keys_already_seen.insert(key)
684 684 };
685 685 // This is similar to `flat_map` + `filter_map`, except with a single
686 686 // closure that owns `key_is_new` (and therefore the
687 687 // `keys_already_seen` set):
688 688 let mut layer_iters = self
689 689 .layers
690 690 .iter()
691 691 .rev()
692 692 .map(move |layer| layer.iter_section(section))
693 693 .peekable();
694 694 std::iter::from_fn(move || loop {
695 695 if let Some(pair) = layer_iters.peek_mut()?.find(&mut key_is_new) {
696 696 return Some(pair);
697 697 } else {
698 698 layer_iters.next();
699 699 }
700 700 })
701 701 }
702 702
703 703 /// Get raw values bytes from all layers (even untrusted ones) in order
704 704 /// of precedence.
705 705 #[cfg(test)]
706 706 fn get_all(&self, section: &[u8], item: &[u8]) -> Vec<&[u8]> {
707 707 let mut res = vec![];
708 708 for layer in self.layers.iter().rev() {
709 709 if let Some(v) = layer.get(section, item) {
710 710 res.push(v.bytes.as_ref());
711 711 }
712 712 }
713 713 res
714 714 }
715 715
716 716 // a config layer that's introduced by ui.tweakdefaults
717 717 fn tweakdefaults_layer() -> ConfigLayer {
718 718 let mut layer = ConfigLayer::new(ConfigOrigin::Tweakdefaults);
719 719
720 720 let mut add = |section: &[u8], item: &[u8], value: &[u8]| {
721 721 layer.add(
722 722 section[..].into(),
723 723 item[..].into(),
724 724 value[..].into(),
725 725 None,
726 726 );
727 727 };
728 728 // duplication of [tweakrc] from [ui.py]
729 729 add(b"ui", b"rollback", b"False");
730 730 add(b"ui", b"statuscopies", b"yes");
731 731 add(b"ui", b"interface", b"curses");
732 732 add(b"ui", b"relative-paths", b"yes");
733 733 add(b"commands", b"grep.all-files", b"True");
734 734 add(b"commands", b"update.check", b"noconflict");
735 735 add(b"commands", b"status.verbose", b"True");
736 736 add(b"commands", b"resolve.explicit-re-merge", b"True");
737 737 add(b"git", b"git", b"1");
738 738 add(b"git", b"showfunc", b"1");
739 739 add(b"git", b"word-diff", b"1");
740 740 layer
741 741 }
742 742
743 743 // introduce the tweaked defaults as implied by ui.tweakdefaults
744 744 pub fn tweakdefaults(&mut self) {
745 745 self.layers.insert(0, Config::tweakdefaults_layer());
746 746 }
747 747 }
748 748
749 749 #[cfg(test)]
750 750 mod tests {
751 751 use super::*;
752 752 use pretty_assertions::assert_eq;
753 753 use std::fs::File;
754 754 use std::io::Write;
755 755
756 756 #[test]
757 757 fn test_include_layer_ordering() {
758 758 let tmpdir = tempfile::tempdir().unwrap();
759 759 let tmpdir_path = tmpdir.path();
760 760 let mut included_file =
761 File::create(&tmpdir_path.join("included.rc")).unwrap();
761 File::create(tmpdir_path.join("included.rc")).unwrap();
762 762
763 763 included_file.write_all(b"[section]\nitem=value1").unwrap();
764 764 let base_config_path = tmpdir_path.join("base.rc");
765 765 let mut config_file = File::create(&base_config_path).unwrap();
766 766 let data =
767 767 b"[section]\nitem=value0\n%include included.rc\nitem=value2\n\
768 768 [section2]\ncount = 4\nsize = 1.5 KB\nnot-count = 1.5\nnot-size = 1 ub";
769 769 config_file.write_all(data).unwrap();
770 770
771 771 let sources = vec![ConfigSource::AbsPath(base_config_path)];
772 772 let config = Config::load_from_explicit_sources(sources)
773 773 .expect("expected valid config");
774 774
775 775 let (_, value) = config.get_inner(b"section", b"item").unwrap();
776 776 assert_eq!(
777 777 value,
778 778 &ConfigValue {
779 779 bytes: b"value2".to_vec(),
780 780 line: Some(4)
781 781 }
782 782 );
783 783
784 784 let value = config.get(b"section", b"item").unwrap();
785 785 assert_eq!(value, b"value2",);
786 786 assert_eq!(
787 787 config.get_all(b"section", b"item"),
788 788 [b"value2", b"value1", b"value0"]
789 789 );
790 790
791 791 assert_eq!(config.get_u32(b"section2", b"count").unwrap(), Some(4));
792 792 assert_eq!(
793 793 config.get_byte_size(b"section2", b"size").unwrap(),
794 794 Some(1024 + 512)
795 795 );
796 796 assert!(config.get_u32(b"section2", b"not-count").is_err());
797 797 assert!(config.get_byte_size(b"section2", b"not-size").is_err());
798 798 }
799 799
800 800 #[test]
801 801 fn test_default_parse() {
802 802 let config = Config::load_from_explicit_sources(vec![])
803 803 .expect("expected valid config");
804 804 let ret = config.get_byte_size(b"cmdserver", b"max-log-size");
805 805 assert!(ret.is_ok(), "{:?}", ret);
806 806
807 807 let ret = config.get_byte_size(b"ui", b"formatted");
808 808 assert!(ret.unwrap().is_none());
809 809 }
810 810 }
@@ -1,286 +1,285 b''
1 1 // dagops.rs
2 2 //
3 3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Miscellaneous DAG operations
9 9 //!
10 10 //! # Terminology
11 11 //! - By *relative heads* of a collection of revision numbers (`Revision`), we
12 12 //! mean those revisions that have no children among the collection.
13 13 //! - Similarly *relative roots* of a collection of `Revision`, we mean those
14 14 //! whose parents, if any, don't belong to the collection.
15 15 use super::{Graph, GraphError, Revision, NULL_REVISION};
16 16 use crate::ancestors::AncestorsIterator;
17 17 use std::collections::{BTreeSet, HashSet};
18 18
19 19 fn remove_parents<S: std::hash::BuildHasher>(
20 20 graph: &impl Graph,
21 21 rev: Revision,
22 22 set: &mut HashSet<Revision, S>,
23 23 ) -> Result<(), GraphError> {
24 24 for parent in graph.parents(rev)?.iter() {
25 25 if *parent != NULL_REVISION {
26 26 set.remove(parent);
27 27 }
28 28 }
29 29 Ok(())
30 30 }
31 31
32 32 /// Relative heads out of some revisions, passed as an iterator.
33 33 ///
34 34 /// These heads are defined as those revisions that have no children
35 35 /// among those emitted by the iterator.
36 36 ///
37 37 /// # Performance notes
38 38 /// Internally, this clones the iterator, and builds a `HashSet` out of it.
39 39 ///
40 40 /// This function takes an `Iterator` instead of `impl IntoIterator` to
41 41 /// guarantee that cloning the iterator doesn't result in cloning the full
42 42 /// construct it comes from.
43 43 pub fn heads<'a>(
44 44 graph: &impl Graph,
45 45 iter_revs: impl Clone + Iterator<Item = &'a Revision>,
46 46 ) -> Result<HashSet<Revision>, GraphError> {
47 47 let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
48 48 heads.remove(&NULL_REVISION);
49 49 for rev in iter_revs {
50 50 if *rev != NULL_REVISION {
51 51 remove_parents(graph, *rev, &mut heads)?;
52 52 }
53 53 }
54 54 Ok(heads)
55 55 }
56 56
57 57 /// Retain in `revs` only its relative heads.
58 58 ///
59 59 /// This is an in-place operation, so that control of the incoming
60 60 /// set is left to the caller.
61 61 /// - a direct Python binding would probably need to build its own `HashSet`
62 62 /// from an incoming iterable, even if its sole purpose is to extract the
63 63 /// heads.
64 64 /// - a Rust caller can decide whether cloning beforehand is appropriate
65 65 ///
66 66 /// # Performance notes
67 67 /// Internally, this function will store a full copy of `revs` in a `Vec`.
68 68 pub fn retain_heads<S: std::hash::BuildHasher>(
69 69 graph: &impl Graph,
70 70 revs: &mut HashSet<Revision, S>,
71 71 ) -> Result<(), GraphError> {
72 72 revs.remove(&NULL_REVISION);
73 73 // we need to construct an iterable copy of revs to avoid itering while
74 74 // mutating
75 75 let as_vec: Vec<Revision> = revs.iter().cloned().collect();
76 76 for rev in as_vec {
77 77 if rev != NULL_REVISION {
78 78 remove_parents(graph, rev, revs)?;
79 79 }
80 80 }
81 81 Ok(())
82 82 }
83 83
84 84 /// Roots of `revs`, passed as a `HashSet`
85 85 ///
86 86 /// They are returned in arbitrary order
87 87 pub fn roots<G: Graph, S: std::hash::BuildHasher>(
88 88 graph: &G,
89 89 revs: &HashSet<Revision, S>,
90 90 ) -> Result<Vec<Revision>, GraphError> {
91 91 let mut roots: Vec<Revision> = Vec::new();
92 92 for rev in revs {
93 93 if graph
94 94 .parents(*rev)?
95 95 .iter()
96 96 .filter(|p| **p != NULL_REVISION)
97 97 .all(|p| !revs.contains(p))
98 98 {
99 99 roots.push(*rev);
100 100 }
101 101 }
102 102 Ok(roots)
103 103 }
104 104
105 105 /// Compute the topological range between two collections of revisions
106 106 ///
107 107 /// This is equivalent to the revset `<roots>::<heads>`.
108 108 ///
109 109 /// Currently, the given `Graph` has to implement `Clone`, which means
110 110 /// actually cloning just a reference-counted Python pointer if
111 111 /// it's passed over through `rust-cpython`. This is due to the internal
112 112 /// use of `AncestorsIterator`
113 113 ///
114 114 /// # Algorithmic details
115 115 ///
116 116 /// This is a two-pass swipe inspired from what `reachableroots2` from
117 117 /// `mercurial.cext.parsers` does to obtain the same results.
118 118 ///
119 119 /// - first, we climb up the DAG from `heads` in topological order, keeping
120 120 /// them in the vector `heads_ancestors` vector, and adding any element of
121 121 /// `roots` we find among them to the resulting range.
122 122 /// - Then, we iterate on that recorded vector so that a revision is always
123 123 /// emitted after its parents and add all revisions whose parents are already
124 124 /// in the range to the results.
125 125 ///
126 126 /// # Performance notes
127 127 ///
128 128 /// The main difference with the C implementation is that
129 129 /// the latter uses a flat array with bit flags, instead of complex structures
130 130 /// like `HashSet`, making it faster in most scenarios. In theory, it's
131 131 /// possible that the present implementation could be more memory efficient
132 132 /// for very large repositories with many branches.
133 133 pub fn range(
134 134 graph: &(impl Graph + Clone),
135 135 roots: impl IntoIterator<Item = Revision>,
136 136 heads: impl IntoIterator<Item = Revision>,
137 137 ) -> Result<BTreeSet<Revision>, GraphError> {
138 138 let mut range = BTreeSet::new();
139 139 let roots: HashSet<Revision> = roots.into_iter().collect();
140 140 let min_root: Revision = match roots.iter().cloned().min() {
141 141 None => {
142 142 return Ok(range);
143 143 }
144 144 Some(r) => r,
145 145 };
146 146
147 147 // Internally, AncestorsIterator currently maintains a `HashSet`
148 148 // of all seen revision, which is also what we record, albeit in an ordered
149 149 // way. There's room for improvement on this duplication.
150 150 let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?;
151 151 let mut heads_ancestors: Vec<Revision> = Vec::new();
152 152 for revres in ait {
153 153 let rev = revres?;
154 154 if roots.contains(&rev) {
155 155 range.insert(rev);
156 156 }
157 157 heads_ancestors.push(rev);
158 158 }
159 159
160 160 for rev in heads_ancestors.into_iter().rev() {
161 161 for parent in graph.parents(rev)?.iter() {
162 162 if *parent != NULL_REVISION && range.contains(parent) {
163 163 range.insert(rev);
164 164 }
165 165 }
166 166 }
167 167 Ok(range)
168 168 }
169 169
170 170 #[cfg(test)]
171 171 mod tests {
172 172
173 173 use super::*;
174 174 use crate::{testing::SampleGraph, BaseRevision};
175 175
176 176 /// Apply `retain_heads()` to the given slice and return as a sorted `Vec`
177 177 fn retain_heads_sorted(
178 178 graph: &impl Graph,
179 179 revs: &[BaseRevision],
180 180 ) -> Result<Vec<Revision>, GraphError> {
181 181 let mut revs: HashSet<Revision> =
182 182 revs.iter().cloned().map(Revision).collect();
183 183 retain_heads(graph, &mut revs)?;
184 184 let mut as_vec: Vec<Revision> = revs.iter().cloned().collect();
185 185 as_vec.sort_unstable();
186 186 Ok(as_vec)
187 187 }
188 188
189 189 #[test]
190 190 fn test_retain_heads() -> Result<(), GraphError> {
191 191 assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
192 192 assert_eq!(
193 193 retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
194 194 vec![1, 6, 12]
195 195 );
196 196 assert_eq!(
197 197 retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
198 198 vec![3, 5, 8, 9]
199 199 );
200 200 Ok(())
201 201 }
202 202
203 203 /// Apply `heads()` to the given slice and return as a sorted `Vec`
204 204 fn heads_sorted(
205 205 graph: &impl Graph,
206 206 revs: &[BaseRevision],
207 207 ) -> Result<Vec<Revision>, GraphError> {
208 let iter_revs: Vec<_> =
209 revs.into_iter().cloned().map(Revision).collect();
208 let iter_revs: Vec<_> = revs.iter().cloned().map(Revision).collect();
210 209 let heads = heads(graph, iter_revs.iter())?;
211 210 let mut as_vec: Vec<Revision> = heads.iter().cloned().collect();
212 211 as_vec.sort_unstable();
213 212 Ok(as_vec)
214 213 }
215 214
216 215 #[test]
217 216 fn test_heads() -> Result<(), GraphError> {
218 217 assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
219 218 assert_eq!(
220 219 heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
221 220 vec![1, 6, 12]
222 221 );
223 222 assert_eq!(
224 223 heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
225 224 vec![3, 5, 8, 9]
226 225 );
227 226 Ok(())
228 227 }
229 228
230 229 /// Apply `roots()` and sort the result for easier comparison
231 230 fn roots_sorted(
232 231 graph: &impl Graph,
233 232 revs: &[BaseRevision],
234 233 ) -> Result<Vec<Revision>, GraphError> {
235 234 let set: HashSet<_> = revs.iter().cloned().map(Revision).collect();
236 235 let mut as_vec = roots(graph, &set)?;
237 236 as_vec.sort_unstable();
238 237 Ok(as_vec)
239 238 }
240 239
241 240 #[test]
242 241 fn test_roots() -> Result<(), GraphError> {
243 242 assert_eq!(roots_sorted(&SampleGraph, &[4, 5, 6])?, vec![4]);
244 243 assert_eq!(
245 244 roots_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
246 245 vec![0, 4, 12]
247 246 );
248 247 assert_eq!(
249 248 roots_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
250 249 vec![1, 8]
251 250 );
252 251 Ok(())
253 252 }
254 253
255 254 /// Apply `range()` and convert the result into a Vec for easier comparison
256 255 fn range_vec(
257 256 graph: impl Graph + Clone,
258 257 roots: &[BaseRevision],
259 258 heads: &[BaseRevision],
260 259 ) -> Result<Vec<Revision>, GraphError> {
261 260 range(
262 261 &graph,
263 262 roots.iter().cloned().map(Revision),
264 263 heads.iter().cloned().map(Revision),
265 264 )
266 265 .map(|bs| bs.into_iter().collect())
267 266 }
268 267
269 268 #[test]
270 269 fn test_range() -> Result<(), GraphError> {
271 270 assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]);
272 271 assert_eq!(
273 272 range_vec(SampleGraph, &[0], &[8])?,
274 273 Vec::<Revision>::new()
275 274 );
276 275 assert_eq!(
277 276 range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?,
278 277 vec![5, 10]
279 278 );
280 279 assert_eq!(
281 280 range_vec(SampleGraph, &[5, 6], &[10, 12])?,
282 281 vec![5, 6, 9, 10, 12]
283 282 );
284 283 Ok(())
285 284 }
286 285 }
@@ -1,1937 +1,1937 b''
1 1 use bytes_cast::BytesCast;
2 2 use std::borrow::Cow;
3 3 use std::path::PathBuf;
4 4
5 5 use super::on_disk;
6 6 use super::on_disk::DirstateV2ParseError;
7 7 use super::owning::OwningDirstateMap;
8 8 use super::path_with_basename::WithBasename;
9 9 use crate::dirstate::parsers::pack_entry;
10 10 use crate::dirstate::parsers::packed_entry_size;
11 11 use crate::dirstate::parsers::parse_dirstate_entries;
12 12 use crate::dirstate::CopyMapIter;
13 13 use crate::dirstate::DirstateV2Data;
14 14 use crate::dirstate::ParentFileData;
15 15 use crate::dirstate::StateMapIter;
16 16 use crate::dirstate::TruncatedTimestamp;
17 17 use crate::matchers::Matcher;
18 18 use crate::utils::filter_map_results;
19 19 use crate::utils::hg_path::{HgPath, HgPathBuf};
20 20 use crate::DirstateEntry;
21 21 use crate::DirstateError;
22 22 use crate::DirstateMapError;
23 23 use crate::DirstateParents;
24 24 use crate::DirstateStatus;
25 25 use crate::FastHashbrownMap as FastHashMap;
26 26 use crate::PatternFileWarning;
27 27 use crate::StatusError;
28 28 use crate::StatusOptions;
29 29
30 30 /// Append to an existing data file if the amount of unreachable data (not used
31 31 /// anymore) is less than this fraction of the total amount of existing data.
32 32 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
33 33
34 34 #[derive(Debug, PartialEq, Eq)]
35 35 /// Version of the on-disk format
36 36 pub enum DirstateVersion {
37 37 V1,
38 38 V2,
39 39 }
40 40
41 41 #[derive(Debug, PartialEq, Eq)]
42 42 pub enum DirstateMapWriteMode {
43 43 Auto,
44 44 ForceNewDataFile,
45 45 ForceAppend,
46 46 }
47 47
48 48 #[derive(Debug)]
49 49 pub struct DirstateMap<'on_disk> {
50 50 /// Contents of the `.hg/dirstate` file
51 51 pub(super) on_disk: &'on_disk [u8],
52 52
53 53 pub(super) root: ChildNodes<'on_disk>,
54 54
55 55 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
56 56 pub(super) nodes_with_entry_count: u32,
57 57
58 58 /// Number of nodes anywhere in the tree that have
59 59 /// `.copy_source.is_some()`.
60 60 pub(super) nodes_with_copy_source_count: u32,
61 61
62 62 /// See on_disk::Header
63 63 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
64 64
65 65 /// How many bytes of `on_disk` are not used anymore
66 66 pub(super) unreachable_bytes: u32,
67 67
68 68 /// Size of the data used to first load this `DirstateMap`. Used in case
69 69 /// we need to write some new metadata, but no new data on disk,
70 70 /// as well as to detect writes that have happened in another process
71 71 /// since first read.
72 72 pub(super) old_data_size: usize,
73 73
74 74 /// UUID used when first loading this `DirstateMap`. Used to check if
75 75 /// the UUID has been changed by another process since first read.
76 76 /// Can be `None` if using dirstate v1 or if it's a brand new dirstate.
77 77 pub(super) old_uuid: Option<Vec<u8>>,
78 78
79 79 /// Identity of the dirstate file (for dirstate-v1) or the docket file
80 80 /// (v2). Used to detect if the file has changed from another process.
81 81 /// Since it's always written atomically, we can compare the inode to
82 82 /// check the file identity.
83 83 ///
84 84 /// TODO On non-Unix systems, something like hashing is a possibility?
85 85 pub(super) identity: Option<u64>,
86 86
87 87 pub(super) dirstate_version: DirstateVersion,
88 88
89 89 /// Controlled by config option `devel.dirstate.v2.data_update_mode`
90 90 pub(super) write_mode: DirstateMapWriteMode,
91 91 }
92 92
93 93 /// Using a plain `HgPathBuf` of the full path from the repository root as a
94 94 /// map key would also work: all paths in a given map have the same parent
95 95 /// path, so comparing full paths gives the same result as comparing base
96 96 /// names. However `HashMap` would waste time always re-hashing the same
97 97 /// string prefix.
98 98 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
99 99
100 100 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
101 101 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
102 102 #[derive(Debug)]
103 103 pub(super) enum BorrowedPath<'tree, 'on_disk> {
104 104 InMemory(&'tree HgPathBuf),
105 105 OnDisk(&'on_disk HgPath),
106 106 }
107 107
108 108 #[derive(Debug)]
109 109 pub(super) enum ChildNodes<'on_disk> {
110 110 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
111 111 OnDisk(&'on_disk [on_disk::Node]),
112 112 }
113 113
114 114 #[derive(Debug)]
115 115 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
116 116 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
117 117 OnDisk(&'on_disk [on_disk::Node]),
118 118 }
119 119
120 120 #[derive(Debug)]
121 121 pub(super) enum NodeRef<'tree, 'on_disk> {
122 122 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
123 123 OnDisk(&'on_disk on_disk::Node),
124 124 }
125 125
126 126 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
127 127 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
128 128 match *self {
129 129 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
130 130 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
131 131 }
132 132 }
133 133 }
134 134
135 135 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
136 136 type Target = HgPath;
137 137
138 138 fn deref(&self) -> &HgPath {
139 139 match *self {
140 140 BorrowedPath::InMemory(in_memory) => in_memory,
141 141 BorrowedPath::OnDisk(on_disk) => on_disk,
142 142 }
143 143 }
144 144 }
145 145
146 146 impl Default for ChildNodes<'_> {
147 147 fn default() -> Self {
148 148 ChildNodes::InMemory(Default::default())
149 149 }
150 150 }
151 151
152 152 impl<'on_disk> ChildNodes<'on_disk> {
153 153 pub(super) fn as_ref<'tree>(
154 154 &'tree self,
155 155 ) -> ChildNodesRef<'tree, 'on_disk> {
156 156 match self {
157 157 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
158 158 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
159 159 }
160 160 }
161 161
162 162 pub(super) fn is_empty(&self) -> bool {
163 163 match self {
164 164 ChildNodes::InMemory(nodes) => nodes.is_empty(),
165 165 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
166 166 }
167 167 }
168 168
169 169 fn make_mut(
170 170 &mut self,
171 171 on_disk: &'on_disk [u8],
172 172 unreachable_bytes: &mut u32,
173 173 ) -> Result<
174 174 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
175 175 DirstateV2ParseError,
176 176 > {
177 177 match self {
178 178 ChildNodes::InMemory(nodes) => Ok(nodes),
179 179 ChildNodes::OnDisk(nodes) => {
180 180 *unreachable_bytes +=
181 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
181 std::mem::size_of_val::<[on_disk::Node]>(*nodes) as u32;
182 182 let nodes = nodes
183 183 .iter()
184 184 .map(|node| {
185 185 Ok((
186 186 node.path(on_disk)?,
187 187 node.to_in_memory_node(on_disk)?,
188 188 ))
189 189 })
190 190 .collect::<Result<_, _>>()?;
191 191 *self = ChildNodes::InMemory(nodes);
192 192 match self {
193 193 ChildNodes::InMemory(nodes) => Ok(nodes),
194 194 ChildNodes::OnDisk(_) => unreachable!(),
195 195 }
196 196 }
197 197 }
198 198 }
199 199 }
200 200
201 201 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
202 202 pub(super) fn get(
203 203 &self,
204 204 base_name: &HgPath,
205 205 on_disk: &'on_disk [u8],
206 206 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
207 207 match self {
208 208 ChildNodesRef::InMemory(nodes) => Ok(nodes
209 209 .get_key_value(base_name)
210 210 .map(|(k, v)| NodeRef::InMemory(k, v))),
211 211 ChildNodesRef::OnDisk(nodes) => {
212 212 let mut parse_result = Ok(());
213 213 let search_result = nodes.binary_search_by(|node| {
214 214 match node.base_name(on_disk) {
215 215 Ok(node_base_name) => node_base_name.cmp(base_name),
216 216 Err(e) => {
217 217 parse_result = Err(e);
218 218 // Dummy comparison result, `search_result` won’t
219 219 // be used since `parse_result` is an error
220 220 std::cmp::Ordering::Equal
221 221 }
222 222 }
223 223 });
224 224 parse_result.map(|()| {
225 225 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
226 226 })
227 227 }
228 228 }
229 229 }
230 230
231 231 /// Iterate in undefined order
232 232 pub(super) fn iter(
233 233 &self,
234 234 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
235 235 match self {
236 236 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
237 237 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
238 238 ),
239 239 ChildNodesRef::OnDisk(nodes) => {
240 240 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
241 241 }
242 242 }
243 243 }
244 244
245 245 /// Iterate in parallel in undefined order
246 246 pub(super) fn par_iter(
247 247 &self,
248 248 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
249 249 {
250 250 use rayon::prelude::*;
251 251 match self {
252 252 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
253 253 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
254 254 ),
255 255 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
256 256 nodes.par_iter().map(NodeRef::OnDisk),
257 257 ),
258 258 }
259 259 }
260 260
261 261 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
262 262 match self {
263 263 ChildNodesRef::InMemory(nodes) => {
264 264 let mut vec: Vec<_> = nodes
265 265 .iter()
266 266 .map(|(k, v)| NodeRef::InMemory(k, v))
267 267 .collect();
268 268 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
269 269 match node {
270 270 NodeRef::InMemory(path, _node) => path.base_name(),
271 271 NodeRef::OnDisk(_) => unreachable!(),
272 272 }
273 273 }
274 274 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
275 275 // value: https://github.com/rust-lang/rust/issues/34162
276 276 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
277 277 vec
278 278 }
279 279 ChildNodesRef::OnDisk(nodes) => {
280 280 // Nodes on disk are already sorted
281 281 nodes.iter().map(NodeRef::OnDisk).collect()
282 282 }
283 283 }
284 284 }
285 285 }
286 286
287 287 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
288 288 pub(super) fn full_path(
289 289 &self,
290 290 on_disk: &'on_disk [u8],
291 291 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
292 292 match self {
293 293 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
294 294 NodeRef::OnDisk(node) => node.full_path(on_disk),
295 295 }
296 296 }
297 297
298 298 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
299 299 /// HgPath>` detached from `'tree`
300 300 pub(super) fn full_path_borrowed(
301 301 &self,
302 302 on_disk: &'on_disk [u8],
303 303 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
304 304 match self {
305 305 NodeRef::InMemory(path, _node) => match path.full_path() {
306 306 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
307 307 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
308 308 },
309 309 NodeRef::OnDisk(node) => {
310 310 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
311 311 }
312 312 }
313 313 }
314 314
315 315 pub(super) fn base_name(
316 316 &self,
317 317 on_disk: &'on_disk [u8],
318 318 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
319 319 match self {
320 320 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
321 321 NodeRef::OnDisk(node) => node.base_name(on_disk),
322 322 }
323 323 }
324 324
325 325 pub(super) fn children(
326 326 &self,
327 327 on_disk: &'on_disk [u8],
328 328 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
329 329 match self {
330 330 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
331 331 NodeRef::OnDisk(node) => {
332 332 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
333 333 }
334 334 }
335 335 }
336 336
337 337 pub(super) fn has_copy_source(&self) -> bool {
338 338 match self {
339 339 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
340 340 NodeRef::OnDisk(node) => node.has_copy_source(),
341 341 }
342 342 }
343 343
344 344 pub(super) fn copy_source(
345 345 &self,
346 346 on_disk: &'on_disk [u8],
347 347 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
348 348 match self {
349 349 NodeRef::InMemory(_path, node) => Ok(node.copy_source.as_deref()),
350 350 NodeRef::OnDisk(node) => node.copy_source(on_disk),
351 351 }
352 352 }
353 353 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
354 354 /// HgPath>` detached from `'tree`
355 355 pub(super) fn copy_source_borrowed(
356 356 &self,
357 357 on_disk: &'on_disk [u8],
358 358 ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
359 359 {
360 360 Ok(match self {
361 361 NodeRef::InMemory(_path, node) => {
362 362 node.copy_source.as_ref().map(|source| match source {
363 363 Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
364 364 Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
365 365 })
366 366 }
367 367 NodeRef::OnDisk(node) => {
368 368 node.copy_source(on_disk)?.map(BorrowedPath::OnDisk)
369 369 }
370 370 })
371 371 }
372 372
373 373 pub(super) fn entry(
374 374 &self,
375 375 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
376 376 match self {
377 377 NodeRef::InMemory(_path, node) => {
378 378 Ok(node.data.as_entry().copied())
379 379 }
380 380 NodeRef::OnDisk(node) => node.entry(),
381 381 }
382 382 }
383 383
384 384 pub(super) fn cached_directory_mtime(
385 385 &self,
386 386 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
387 387 match self {
388 388 NodeRef::InMemory(_path, node) => Ok(match node.data {
389 389 NodeData::CachedDirectory { mtime } => Some(mtime),
390 390 _ => None,
391 391 }),
392 392 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
393 393 }
394 394 }
395 395
396 396 pub(super) fn descendants_with_entry_count(&self) -> u32 {
397 397 match self {
398 398 NodeRef::InMemory(_path, node) => {
399 399 node.descendants_with_entry_count
400 400 }
401 401 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
402 402 }
403 403 }
404 404
405 405 pub(super) fn tracked_descendants_count(&self) -> u32 {
406 406 match self {
407 407 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
408 408 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
409 409 }
410 410 }
411 411 }
412 412
413 413 /// Represents a file or a directory
414 414 #[derive(Default, Debug)]
415 415 pub(super) struct Node<'on_disk> {
416 416 pub(super) data: NodeData,
417 417
418 418 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
419 419
420 420 pub(super) children: ChildNodes<'on_disk>,
421 421
422 422 /// How many (non-inclusive) descendants of this node have an entry.
423 423 pub(super) descendants_with_entry_count: u32,
424 424
425 425 /// How many (non-inclusive) descendants of this node have an entry whose
426 426 /// state is "tracked".
427 427 pub(super) tracked_descendants_count: u32,
428 428 }
429 429
430 430 #[derive(Debug)]
431 431 pub(super) enum NodeData {
432 432 Entry(DirstateEntry),
433 433 CachedDirectory { mtime: TruncatedTimestamp },
434 434 None,
435 435 }
436 436
437 437 impl Default for NodeData {
438 438 fn default() -> Self {
439 439 NodeData::None
440 440 }
441 441 }
442 442
443 443 impl NodeData {
444 444 fn has_entry(&self) -> bool {
445 445 matches!(self, NodeData::Entry(_))
446 446 }
447 447
448 448 fn as_entry(&self) -> Option<&DirstateEntry> {
449 449 match self {
450 450 NodeData::Entry(entry) => Some(entry),
451 451 _ => None,
452 452 }
453 453 }
454 454
455 455 fn as_entry_mut(&mut self) -> Option<&mut DirstateEntry> {
456 456 match self {
457 457 NodeData::Entry(entry) => Some(entry),
458 458 _ => None,
459 459 }
460 460 }
461 461 }
462 462
463 463 impl<'on_disk> DirstateMap<'on_disk> {
464 464 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
465 465 Self {
466 466 on_disk,
467 467 root: ChildNodes::default(),
468 468 nodes_with_entry_count: 0,
469 469 nodes_with_copy_source_count: 0,
470 470 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
471 471 unreachable_bytes: 0,
472 472 old_data_size: 0,
473 473 old_uuid: None,
474 474 identity: None,
475 475 dirstate_version: DirstateVersion::V1,
476 476 write_mode: DirstateMapWriteMode::Auto,
477 477 }
478 478 }
479 479
480 480 #[logging_timer::time("trace")]
481 481 pub fn new_v2(
482 482 on_disk: &'on_disk [u8],
483 483 data_size: usize,
484 484 metadata: &[u8],
485 485 uuid: Vec<u8>,
486 486 identity: Option<u64>,
487 487 ) -> Result<Self, DirstateError> {
488 488 if let Some(data) = on_disk.get(..data_size) {
489 489 Ok(on_disk::read(data, metadata, uuid, identity)?)
490 490 } else {
491 491 Err(DirstateV2ParseError::new("not enough bytes on disk").into())
492 492 }
493 493 }
494 494
495 495 #[logging_timer::time("trace")]
496 496 pub fn new_v1(
497 497 on_disk: &'on_disk [u8],
498 498 identity: Option<u64>,
499 499 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
500 500 let mut map = Self::empty(on_disk);
501 501 if map.on_disk.is_empty() {
502 502 return Ok((map, None));
503 503 }
504 504
505 505 let parents = parse_dirstate_entries(
506 506 map.on_disk,
507 507 |path, entry, copy_source| {
508 508 let tracked = entry.tracked();
509 509 let node = Self::get_or_insert_node_inner(
510 510 map.on_disk,
511 511 &mut map.unreachable_bytes,
512 512 &mut map.root,
513 513 path,
514 514 WithBasename::to_cow_borrowed,
515 515 |ancestor| {
516 516 if tracked {
517 517 ancestor.tracked_descendants_count += 1
518 518 }
519 519 ancestor.descendants_with_entry_count += 1
520 520 },
521 521 )?;
522 522 assert!(
523 523 !node.data.has_entry(),
524 524 "duplicate dirstate entry in read"
525 525 );
526 526 assert!(
527 527 node.copy_source.is_none(),
528 528 "duplicate dirstate entry in read"
529 529 );
530 530 node.data = NodeData::Entry(*entry);
531 531 node.copy_source = copy_source.map(Cow::Borrowed);
532 532 map.nodes_with_entry_count += 1;
533 533 if copy_source.is_some() {
534 534 map.nodes_with_copy_source_count += 1
535 535 }
536 536 Ok(())
537 537 },
538 538 )?;
539 539 let parents = Some(*parents);
540 540 map.identity = identity;
541 541
542 542 Ok((map, parents))
543 543 }
544 544
545 545 /// Assuming dirstate-v2 format, returns whether the next write should
546 546 /// append to the existing data file that contains `self.on_disk` (true),
547 547 /// or create a new data file from scratch (false).
548 548 pub(super) fn write_should_append(&self) -> bool {
549 549 match self.write_mode {
550 550 DirstateMapWriteMode::ForceAppend => true,
551 551 DirstateMapWriteMode::ForceNewDataFile => false,
552 552 DirstateMapWriteMode::Auto => {
553 553 let ratio =
554 554 self.unreachable_bytes as f32 / self.on_disk.len() as f32;
555 555 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
556 556 }
557 557 }
558 558 }
559 559
560 560 fn get_node<'tree>(
561 561 &'tree self,
562 562 path: &HgPath,
563 563 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
564 564 let mut children = self.root.as_ref();
565 565 let mut components = path.components();
566 566 let mut component =
567 567 components.next().expect("expected at least one components");
568 568 loop {
569 569 if let Some(child) = children.get(component, self.on_disk)? {
570 570 if let Some(next_component) = components.next() {
571 571 component = next_component;
572 572 children = child.children(self.on_disk)?;
573 573 } else {
574 574 return Ok(Some(child));
575 575 }
576 576 } else {
577 577 return Ok(None);
578 578 }
579 579 }
580 580 }
581 581
582 582 pub fn has_node(
583 583 &self,
584 584 path: &HgPath,
585 585 ) -> Result<bool, DirstateV2ParseError> {
586 586 let node = self.get_node(path)?;
587 587 Ok(node.is_some())
588 588 }
589 589
590 590 /// Returns a mutable reference to the node at `path` if it exists
591 591 ///
592 592 /// `each_ancestor` is a callback that is called for each ancestor node
593 593 /// when descending the tree. It is used to keep the different counters
594 594 /// of the `DirstateMap` up-to-date.
595 595 fn get_node_mut<'tree>(
596 596 &'tree mut self,
597 597 path: &HgPath,
598 598 each_ancestor: impl FnMut(&mut Node),
599 599 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
600 600 Self::get_node_mut_inner(
601 601 self.on_disk,
602 602 &mut self.unreachable_bytes,
603 603 &mut self.root,
604 604 path,
605 605 each_ancestor,
606 606 )
607 607 }
608 608
609 609 /// Lower-level version of `get_node_mut`.
610 610 ///
611 611 /// This takes `root` instead of `&mut self` so that callers can mutate
612 612 /// other fields while the returned borrow is still valid.
613 613 ///
614 614 /// `each_ancestor` is a callback that is called for each ancestor node
615 615 /// when descending the tree. It is used to keep the different counters
616 616 /// of the `DirstateMap` up-to-date.
617 617 fn get_node_mut_inner<'tree>(
618 618 on_disk: &'on_disk [u8],
619 619 unreachable_bytes: &mut u32,
620 620 root: &'tree mut ChildNodes<'on_disk>,
621 621 path: &HgPath,
622 622 mut each_ancestor: impl FnMut(&mut Node),
623 623 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
624 624 let mut children = root;
625 625 let mut components = path.components();
626 626 let mut component =
627 627 components.next().expect("expected at least one components");
628 628 loop {
629 629 if let Some(child) = children
630 630 .make_mut(on_disk, unreachable_bytes)?
631 631 .get_mut(component)
632 632 {
633 633 if let Some(next_component) = components.next() {
634 634 each_ancestor(child);
635 635 component = next_component;
636 636 children = &mut child.children;
637 637 } else {
638 638 return Ok(Some(child));
639 639 }
640 640 } else {
641 641 return Ok(None);
642 642 }
643 643 }
644 644 }
645 645
646 646 /// Get a mutable reference to the node at `path`, creating it if it does
647 647 /// not exist.
648 648 ///
649 649 /// `each_ancestor` is a callback that is called for each ancestor node
650 650 /// when descending the tree. It is used to keep the different counters
651 651 /// of the `DirstateMap` up-to-date.
652 652 fn get_or_insert_node<'tree, 'path>(
653 653 &'tree mut self,
654 654 path: &'path HgPath,
655 655 each_ancestor: impl FnMut(&mut Node),
656 656 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
657 657 Self::get_or_insert_node_inner(
658 658 self.on_disk,
659 659 &mut self.unreachable_bytes,
660 660 &mut self.root,
661 661 path,
662 662 WithBasename::to_cow_owned,
663 663 each_ancestor,
664 664 )
665 665 }
666 666
667 667 /// Lower-level version of `get_or_insert_node_inner`, which is used when
668 668 /// parsing disk data to remove allocations for new nodes.
669 669 fn get_or_insert_node_inner<'tree, 'path>(
670 670 on_disk: &'on_disk [u8],
671 671 unreachable_bytes: &mut u32,
672 672 root: &'tree mut ChildNodes<'on_disk>,
673 673 path: &'path HgPath,
674 674 to_cow: impl Fn(
675 675 WithBasename<&'path HgPath>,
676 676 ) -> WithBasename<Cow<'on_disk, HgPath>>,
677 677 mut each_ancestor: impl FnMut(&mut Node),
678 678 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
679 679 let mut child_nodes = root;
680 680 let mut inclusive_ancestor_paths =
681 681 WithBasename::inclusive_ancestors_of(path);
682 682 let mut ancestor_path = inclusive_ancestor_paths
683 683 .next()
684 684 .expect("expected at least one inclusive ancestor");
685 685 loop {
686 686 let (_, child_node) = child_nodes
687 687 .make_mut(on_disk, unreachable_bytes)?
688 688 .raw_entry_mut()
689 689 .from_key(ancestor_path.base_name())
690 690 .or_insert_with(|| (to_cow(ancestor_path), Node::default()));
691 691 if let Some(next) = inclusive_ancestor_paths.next() {
692 692 each_ancestor(child_node);
693 693 ancestor_path = next;
694 694 child_nodes = &mut child_node.children;
695 695 } else {
696 696 return Ok(child_node);
697 697 }
698 698 }
699 699 }
700 700
701 701 #[allow(clippy::too_many_arguments)]
702 702 fn reset_state(
703 703 &mut self,
704 704 filename: &HgPath,
705 705 old_entry_opt: Option<DirstateEntry>,
706 706 wc_tracked: bool,
707 707 p1_tracked: bool,
708 708 p2_info: bool,
709 709 has_meaningful_mtime: bool,
710 710 parent_file_data_opt: Option<ParentFileData>,
711 711 ) -> Result<(), DirstateError> {
712 712 let (had_entry, was_tracked) = match old_entry_opt {
713 713 Some(old_entry) => (true, old_entry.tracked()),
714 714 None => (false, false),
715 715 };
716 716 let node = self.get_or_insert_node(filename, |ancestor| {
717 717 if !had_entry {
718 718 ancestor.descendants_with_entry_count += 1;
719 719 }
720 720 if was_tracked {
721 721 if !wc_tracked {
722 722 ancestor.tracked_descendants_count = ancestor
723 723 .tracked_descendants_count
724 724 .checked_sub(1)
725 725 .expect("tracked count to be >= 0");
726 726 }
727 727 } else if wc_tracked {
728 728 ancestor.tracked_descendants_count += 1;
729 729 }
730 730 })?;
731 731
732 732 let v2_data = if let Some(parent_file_data) = parent_file_data_opt {
733 733 DirstateV2Data {
734 734 wc_tracked,
735 735 p1_tracked,
736 736 p2_info,
737 737 mode_size: parent_file_data.mode_size,
738 738 mtime: if has_meaningful_mtime {
739 739 parent_file_data.mtime
740 740 } else {
741 741 None
742 742 },
743 743 ..Default::default()
744 744 }
745 745 } else {
746 746 DirstateV2Data {
747 747 wc_tracked,
748 748 p1_tracked,
749 749 p2_info,
750 750 ..Default::default()
751 751 }
752 752 };
753 753 node.data = NodeData::Entry(DirstateEntry::from_v2_data(v2_data));
754 754 if !had_entry {
755 755 self.nodes_with_entry_count += 1;
756 756 }
757 757 Ok(())
758 758 }
759 759
760 760 fn set_tracked(
761 761 &mut self,
762 762 filename: &HgPath,
763 763 old_entry_opt: Option<DirstateEntry>,
764 764 ) -> Result<bool, DirstateV2ParseError> {
765 765 let was_tracked = old_entry_opt.map_or(false, |e| e.tracked());
766 766 let had_entry = old_entry_opt.is_some();
767 let tracked_count_increment = if was_tracked { 0 } else { 1 };
767 let tracked_count_increment = u32::from(!was_tracked);
768 768 let mut new = false;
769 769
770 770 let node = self.get_or_insert_node(filename, |ancestor| {
771 771 if !had_entry {
772 772 ancestor.descendants_with_entry_count += 1;
773 773 }
774 774
775 775 ancestor.tracked_descendants_count += tracked_count_increment;
776 776 })?;
777 777 if let Some(old_entry) = old_entry_opt {
778 778 let mut e = old_entry;
779 779 if e.tracked() {
780 780 // XXX
781 781 // This is probably overkill for more case, but we need this to
782 782 // fully replace the `normallookup` call with `set_tracked`
783 783 // one. Consider smoothing this in the future.
784 784 e.set_possibly_dirty();
785 785 } else {
786 786 new = true;
787 787 e.set_tracked();
788 788 }
789 789 node.data = NodeData::Entry(e)
790 790 } else {
791 791 node.data = NodeData::Entry(DirstateEntry::new_tracked());
792 792 self.nodes_with_entry_count += 1;
793 793 new = true;
794 794 };
795 795 Ok(new)
796 796 }
797 797
798 798 /// Set a node as untracked in the dirstate.
799 799 ///
800 800 /// It is the responsibility of the caller to remove the copy source and/or
801 801 /// the entry itself if appropriate.
802 802 ///
803 803 /// # Panics
804 804 ///
805 805 /// Panics if the node does not exist.
806 806 fn set_untracked(
807 807 &mut self,
808 808 filename: &HgPath,
809 809 old_entry: DirstateEntry,
810 810 ) -> Result<(), DirstateV2ParseError> {
811 811 let node = self
812 812 .get_node_mut(filename, |ancestor| {
813 813 ancestor.tracked_descendants_count = ancestor
814 814 .tracked_descendants_count
815 815 .checked_sub(1)
816 816 .expect("tracked_descendants_count should be >= 0");
817 817 })?
818 818 .expect("node should exist");
819 819 let mut new_entry = old_entry;
820 820 new_entry.set_untracked();
821 821 node.data = NodeData::Entry(new_entry);
822 822 Ok(())
823 823 }
824 824
825 825 /// Set a node as clean in the dirstate.
826 826 ///
827 827 /// It is the responsibility of the caller to remove the copy source.
828 828 ///
829 829 /// # Panics
830 830 ///
831 831 /// Panics if the node does not exist.
832 832 fn set_clean(
833 833 &mut self,
834 834 filename: &HgPath,
835 835 old_entry: DirstateEntry,
836 836 mode: u32,
837 837 size: u32,
838 838 mtime: TruncatedTimestamp,
839 839 ) -> Result<(), DirstateError> {
840 840 let node = self
841 841 .get_node_mut(filename, |ancestor| {
842 842 if !old_entry.tracked() {
843 843 ancestor.tracked_descendants_count += 1;
844 844 }
845 845 })?
846 846 .expect("node should exist");
847 847 let mut new_entry = old_entry;
848 848 new_entry.set_clean(mode, size, mtime);
849 849 node.data = NodeData::Entry(new_entry);
850 850 Ok(())
851 851 }
852 852
853 853 /// Set a node as possibly dirty in the dirstate.
854 854 ///
855 855 /// # Panics
856 856 ///
857 857 /// Panics if the node does not exist.
858 858 fn set_possibly_dirty(
859 859 &mut self,
860 860 filename: &HgPath,
861 861 ) -> Result<(), DirstateError> {
862 862 let node = self
863 863 .get_node_mut(filename, |_ancestor| {})?
864 864 .expect("node should exist");
865 865 let entry = node.data.as_entry_mut().expect("entry should exist");
866 866 entry.set_possibly_dirty();
867 867 node.data = NodeData::Entry(*entry);
868 868 Ok(())
869 869 }
870 870
871 871 /// Clears the cached mtime for the (potential) folder at `path`.
872 872 pub(super) fn clear_cached_mtime(
873 873 &mut self,
874 874 path: &HgPath,
875 875 ) -> Result<(), DirstateV2ParseError> {
876 876 let node = match self.get_node_mut(path, |_ancestor| {})? {
877 877 Some(node) => node,
878 878 None => return Ok(()),
879 879 };
880 880 if let NodeData::CachedDirectory { .. } = &node.data {
881 881 node.data = NodeData::None
882 882 }
883 883 Ok(())
884 884 }
885 885
886 886 /// Sets the cached mtime for the (potential) folder at `path`.
887 887 pub(super) fn set_cached_mtime(
888 888 &mut self,
889 889 path: &HgPath,
890 890 mtime: TruncatedTimestamp,
891 891 ) -> Result<(), DirstateV2ParseError> {
892 892 let node = match self.get_node_mut(path, |_ancestor| {})? {
893 893 Some(node) => node,
894 894 None => return Ok(()),
895 895 };
896 896 match &node.data {
897 897 NodeData::Entry(_) => {} // Don’t overwrite an entry
898 898 NodeData::CachedDirectory { .. } | NodeData::None => {
899 899 node.data = NodeData::CachedDirectory { mtime }
900 900 }
901 901 }
902 902 Ok(())
903 903 }
904 904
905 905 fn iter_nodes<'tree>(
906 906 &'tree self,
907 907 ) -> impl Iterator<
908 908 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
909 909 > + 'tree {
910 910 // Depth first tree traversal.
911 911 //
912 912 // If we could afford internal iteration and recursion,
913 913 // this would look like:
914 914 //
915 915 // ```
916 916 // fn traverse_children(
917 917 // children: &ChildNodes,
918 918 // each: &mut impl FnMut(&Node),
919 919 // ) {
920 920 // for child in children.values() {
921 921 // traverse_children(&child.children, each);
922 922 // each(child);
923 923 // }
924 924 // }
925 925 // ```
926 926 //
927 927 // However we want an external iterator and therefore can’t use the
928 928 // call stack. Use an explicit stack instead:
929 929 let mut stack = Vec::new();
930 930 let mut iter = self.root.as_ref().iter();
931 931 std::iter::from_fn(move || {
932 932 while let Some(child_node) = iter.next() {
933 933 let children = match child_node.children(self.on_disk) {
934 934 Ok(children) => children,
935 935 Err(error) => return Some(Err(error)),
936 936 };
937 937 // Pseudo-recursion
938 938 let new_iter = children.iter();
939 939 let old_iter = std::mem::replace(&mut iter, new_iter);
940 940 stack.push((child_node, old_iter));
941 941 }
942 942 // Found the end of a `children.iter()` iterator.
943 943 if let Some((child_node, next_iter)) = stack.pop() {
944 944 // "Return" from pseudo-recursion by restoring state from the
945 945 // explicit stack
946 946 iter = next_iter;
947 947
948 948 Some(Ok(child_node))
949 949 } else {
950 950 // Reached the bottom of the stack, we’re done
951 951 None
952 952 }
953 953 })
954 954 }
955 955
956 956 fn count_dropped_path(unreachable_bytes: &mut u32, path: Cow<HgPath>) {
957 957 if let Cow::Borrowed(path) = path {
958 958 *unreachable_bytes += path.len() as u32
959 959 }
960 960 }
961 961
962 962 pub(crate) fn set_write_mode(&mut self, write_mode: DirstateMapWriteMode) {
963 963 self.write_mode = write_mode;
964 964 }
965 965 }
966 966
967 967 type DebugDirstateTuple<'a> = (&'a HgPath, (u8, i32, i32, i32));
968 968
969 969 impl OwningDirstateMap {
970 970 pub fn clear(&mut self) {
971 971 self.with_dmap_mut(|map| {
972 972 map.root = Default::default();
973 973 map.nodes_with_entry_count = 0;
974 974 map.nodes_with_copy_source_count = 0;
975 975 map.unreachable_bytes = map.on_disk.len() as u32;
976 976 });
977 977 }
978 978
979 979 pub fn set_tracked(
980 980 &mut self,
981 981 filename: &HgPath,
982 982 ) -> Result<bool, DirstateV2ParseError> {
983 983 let old_entry_opt = self.get(filename)?;
984 984 self.with_dmap_mut(|map| map.set_tracked(filename, old_entry_opt))
985 985 }
986 986
987 987 pub fn set_untracked(
988 988 &mut self,
989 989 filename: &HgPath,
990 990 ) -> Result<bool, DirstateError> {
991 991 let old_entry_opt = self.get(filename)?;
992 992 match old_entry_opt {
993 993 None => Ok(false),
994 994 Some(old_entry) => {
995 995 if !old_entry.tracked() {
996 996 // `DirstateMap::set_untracked` is not a noop if
997 997 // already not tracked as it will decrement the
998 998 // tracked counters while going down.
999 999 return Ok(true);
1000 1000 }
1001 1001 if old_entry.added() {
1002 1002 // Untracking an "added" entry will just result in a
1003 1003 // worthless entry (and other parts of the code will
1004 1004 // complain about it), just drop it entirely.
1005 1005 self.drop_entry_and_copy_source(filename)?;
1006 1006 return Ok(true);
1007 1007 }
1008 1008 if !old_entry.p2_info() {
1009 1009 self.copy_map_remove(filename)?;
1010 1010 }
1011 1011
1012 1012 self.with_dmap_mut(|map| {
1013 1013 map.set_untracked(filename, old_entry)?;
1014 1014 Ok(true)
1015 1015 })
1016 1016 }
1017 1017 }
1018 1018 }
1019 1019
1020 1020 pub fn set_clean(
1021 1021 &mut self,
1022 1022 filename: &HgPath,
1023 1023 mode: u32,
1024 1024 size: u32,
1025 1025 mtime: TruncatedTimestamp,
1026 1026 ) -> Result<(), DirstateError> {
1027 1027 let old_entry = match self.get(filename)? {
1028 1028 None => {
1029 1029 return Err(
1030 1030 DirstateMapError::PathNotFound(filename.into()).into()
1031 1031 )
1032 1032 }
1033 1033 Some(e) => e,
1034 1034 };
1035 1035 self.copy_map_remove(filename)?;
1036 1036 self.with_dmap_mut(|map| {
1037 1037 map.set_clean(filename, old_entry, mode, size, mtime)
1038 1038 })
1039 1039 }
1040 1040
1041 1041 pub fn set_possibly_dirty(
1042 1042 &mut self,
1043 1043 filename: &HgPath,
1044 1044 ) -> Result<(), DirstateError> {
1045 1045 if self.get(filename)?.is_none() {
1046 1046 return Err(DirstateMapError::PathNotFound(filename.into()).into());
1047 1047 }
1048 1048 self.with_dmap_mut(|map| map.set_possibly_dirty(filename))
1049 1049 }
1050 1050
1051 1051 pub fn reset_state(
1052 1052 &mut self,
1053 1053 filename: &HgPath,
1054 1054 wc_tracked: bool,
1055 1055 p1_tracked: bool,
1056 1056 p2_info: bool,
1057 1057 has_meaningful_mtime: bool,
1058 1058 parent_file_data_opt: Option<ParentFileData>,
1059 1059 ) -> Result<(), DirstateError> {
1060 1060 if !(p1_tracked || p2_info || wc_tracked) {
1061 1061 self.drop_entry_and_copy_source(filename)?;
1062 1062 return Ok(());
1063 1063 }
1064 1064 self.copy_map_remove(filename)?;
1065 1065 let old_entry_opt = self.get(filename)?;
1066 1066 self.with_dmap_mut(|map| {
1067 1067 map.reset_state(
1068 1068 filename,
1069 1069 old_entry_opt,
1070 1070 wc_tracked,
1071 1071 p1_tracked,
1072 1072 p2_info,
1073 1073 has_meaningful_mtime,
1074 1074 parent_file_data_opt,
1075 1075 )
1076 1076 })
1077 1077 }
1078 1078
1079 1079 pub fn drop_entry_and_copy_source(
1080 1080 &mut self,
1081 1081 filename: &HgPath,
1082 1082 ) -> Result<(), DirstateError> {
1083 1083 let was_tracked = self.get(filename)?.map_or(false, |e| e.tracked());
1084 1084 struct Dropped {
1085 1085 was_tracked: bool,
1086 1086 had_entry: bool,
1087 1087 had_copy_source: bool,
1088 1088 }
1089 1089
1090 1090 /// If this returns `Ok(Some((dropped, removed)))`, then
1091 1091 ///
1092 1092 /// * `dropped` is about the leaf node that was at `filename`
1093 1093 /// * `removed` is whether this particular level of recursion just
1094 1094 /// removed a node in `nodes`.
1095 1095 fn recur<'on_disk>(
1096 1096 on_disk: &'on_disk [u8],
1097 1097 unreachable_bytes: &mut u32,
1098 1098 nodes: &mut ChildNodes<'on_disk>,
1099 1099 path: &HgPath,
1100 1100 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
1101 1101 let (first_path_component, rest_of_path) =
1102 1102 path.split_first_component();
1103 1103 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
1104 1104 let node = if let Some(node) = nodes.get_mut(first_path_component)
1105 1105 {
1106 1106 node
1107 1107 } else {
1108 1108 return Ok(None);
1109 1109 };
1110 1110 let dropped;
1111 1111 if let Some(rest) = rest_of_path {
1112 1112 if let Some((d, removed)) = recur(
1113 1113 on_disk,
1114 1114 unreachable_bytes,
1115 1115 &mut node.children,
1116 1116 rest,
1117 1117 )? {
1118 1118 dropped = d;
1119 1119 if dropped.had_entry {
1120 1120 node.descendants_with_entry_count = node
1121 1121 .descendants_with_entry_count
1122 1122 .checked_sub(1)
1123 1123 .expect(
1124 1124 "descendants_with_entry_count should be >= 0",
1125 1125 );
1126 1126 }
1127 1127 if dropped.was_tracked {
1128 1128 node.tracked_descendants_count = node
1129 1129 .tracked_descendants_count
1130 1130 .checked_sub(1)
1131 1131 .expect(
1132 1132 "tracked_descendants_count should be >= 0",
1133 1133 );
1134 1134 }
1135 1135
1136 1136 // Directory caches must be invalidated when removing a
1137 1137 // child node
1138 1138 if removed {
1139 1139 if let NodeData::CachedDirectory { .. } = &node.data {
1140 1140 node.data = NodeData::None
1141 1141 }
1142 1142 }
1143 1143 } else {
1144 1144 return Ok(None);
1145 1145 }
1146 1146 } else {
1147 1147 let entry = node.data.as_entry();
1148 1148 let was_tracked = entry.map_or(false, |entry| entry.tracked());
1149 1149 let had_entry = entry.is_some();
1150 1150 if had_entry {
1151 1151 node.data = NodeData::None
1152 1152 }
1153 1153 let mut had_copy_source = false;
1154 1154 if let Some(source) = &node.copy_source {
1155 1155 DirstateMap::count_dropped_path(
1156 1156 unreachable_bytes,
1157 1157 Cow::Borrowed(source),
1158 1158 );
1159 1159 had_copy_source = true;
1160 1160 node.copy_source = None
1161 1161 }
1162 1162 dropped = Dropped {
1163 1163 was_tracked,
1164 1164 had_entry,
1165 1165 had_copy_source,
1166 1166 };
1167 1167 }
1168 1168 // After recursion, for both leaf (rest_of_path is None) nodes and
1169 1169 // parent nodes, remove a node if it just became empty.
1170 1170 let remove = !node.data.has_entry()
1171 1171 && node.copy_source.is_none()
1172 1172 && node.children.is_empty();
1173 1173 if remove {
1174 1174 let (key, _) =
1175 1175 nodes.remove_entry(first_path_component).unwrap();
1176 1176 DirstateMap::count_dropped_path(
1177 1177 unreachable_bytes,
1178 1178 Cow::Borrowed(key.full_path()),
1179 1179 )
1180 1180 }
1181 1181 Ok(Some((dropped, remove)))
1182 1182 }
1183 1183
1184 1184 self.with_dmap_mut(|map| {
1185 1185 if let Some((dropped, _removed)) = recur(
1186 1186 map.on_disk,
1187 1187 &mut map.unreachable_bytes,
1188 1188 &mut map.root,
1189 1189 filename,
1190 1190 )? {
1191 1191 if dropped.had_entry {
1192 1192 map.nodes_with_entry_count = map
1193 1193 .nodes_with_entry_count
1194 1194 .checked_sub(1)
1195 1195 .expect("nodes_with_entry_count should be >= 0");
1196 1196 }
1197 1197 if dropped.had_copy_source {
1198 1198 map.nodes_with_copy_source_count = map
1199 1199 .nodes_with_copy_source_count
1200 1200 .checked_sub(1)
1201 1201 .expect("nodes_with_copy_source_count should be >= 0");
1202 1202 }
1203 1203 } else {
1204 1204 debug_assert!(!was_tracked);
1205 1205 }
1206 1206 Ok(())
1207 1207 })
1208 1208 }
1209 1209
1210 1210 pub fn has_tracked_dir(
1211 1211 &mut self,
1212 1212 directory: &HgPath,
1213 1213 ) -> Result<bool, DirstateError> {
1214 1214 self.with_dmap_mut(|map| {
1215 1215 if let Some(node) = map.get_node(directory)? {
1216 1216 // A node without a `DirstateEntry` was created to hold child
1217 1217 // nodes, and is therefore a directory.
1218 1218 let is_dir = node.entry()?.is_none();
1219 1219 Ok(is_dir && node.tracked_descendants_count() > 0)
1220 1220 } else {
1221 1221 Ok(false)
1222 1222 }
1223 1223 })
1224 1224 }
1225 1225
1226 1226 pub fn has_dir(
1227 1227 &mut self,
1228 1228 directory: &HgPath,
1229 1229 ) -> Result<bool, DirstateError> {
1230 1230 self.with_dmap_mut(|map| {
1231 1231 if let Some(node) = map.get_node(directory)? {
1232 1232 // A node without a `DirstateEntry` was created to hold child
1233 1233 // nodes, and is therefore a directory.
1234 1234 let is_dir = node.entry()?.is_none();
1235 1235 Ok(is_dir && node.descendants_with_entry_count() > 0)
1236 1236 } else {
1237 1237 Ok(false)
1238 1238 }
1239 1239 })
1240 1240 }
1241 1241
1242 1242 #[logging_timer::time("trace")]
1243 1243 pub fn pack_v1(
1244 1244 &self,
1245 1245 parents: DirstateParents,
1246 1246 ) -> Result<Vec<u8>, DirstateError> {
1247 1247 let map = self.get_map();
1248 1248 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
1249 1249 // reallocations
1250 1250 let mut size = parents.as_bytes().len();
1251 1251 for node in map.iter_nodes() {
1252 1252 let node = node?;
1253 1253 if node.entry()?.is_some() {
1254 1254 size += packed_entry_size(
1255 1255 node.full_path(map.on_disk)?,
1256 1256 node.copy_source(map.on_disk)?,
1257 1257 );
1258 1258 }
1259 1259 }
1260 1260
1261 1261 let mut packed = Vec::with_capacity(size);
1262 1262 packed.extend(parents.as_bytes());
1263 1263
1264 1264 for node in map.iter_nodes() {
1265 1265 let node = node?;
1266 1266 if let Some(entry) = node.entry()? {
1267 1267 pack_entry(
1268 1268 node.full_path(map.on_disk)?,
1269 1269 &entry,
1270 1270 node.copy_source(map.on_disk)?,
1271 1271 &mut packed,
1272 1272 );
1273 1273 }
1274 1274 }
1275 1275 Ok(packed)
1276 1276 }
1277 1277
1278 1278 /// Returns new data and metadata together with whether that data should be
1279 1279 /// appended to the existing data file whose content is at
1280 1280 /// `map.on_disk` (true), instead of written to a new data file
1281 1281 /// (false), and the previous size of data on disk.
1282 1282 #[logging_timer::time("trace")]
1283 1283 pub fn pack_v2(
1284 1284 &self,
1285 1285 write_mode: DirstateMapWriteMode,
1286 1286 ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool, usize), DirstateError>
1287 1287 {
1288 1288 let map = self.get_map();
1289 1289 on_disk::write(map, write_mode)
1290 1290 }
1291 1291
1292 1292 /// `callback` allows the caller to process and do something with the
1293 1293 /// results of the status. This is needed to do so efficiently (i.e.
1294 1294 /// without cloning the `DirstateStatus` object with its paths) because
1295 1295 /// we need to borrow from `Self`.
1296 1296 pub fn with_status<R>(
1297 1297 &mut self,
1298 1298 matcher: &(dyn Matcher + Sync),
1299 1299 root_dir: PathBuf,
1300 1300 ignore_files: Vec<PathBuf>,
1301 1301 options: StatusOptions,
1302 1302 callback: impl for<'r> FnOnce(
1303 1303 Result<(DirstateStatus<'r>, Vec<PatternFileWarning>), StatusError>,
1304 1304 ) -> R,
1305 1305 ) -> R {
1306 1306 self.with_dmap_mut(|map| {
1307 1307 callback(super::status::status(
1308 1308 map,
1309 1309 matcher,
1310 1310 root_dir,
1311 1311 ignore_files,
1312 1312 options,
1313 1313 ))
1314 1314 })
1315 1315 }
1316 1316
1317 1317 pub fn copy_map_len(&self) -> usize {
1318 1318 let map = self.get_map();
1319 1319 map.nodes_with_copy_source_count as usize
1320 1320 }
1321 1321
1322 1322 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
1323 1323 let map = self.get_map();
1324 1324 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1325 1325 Ok(if let Some(source) = node.copy_source(map.on_disk)? {
1326 1326 Some((node.full_path(map.on_disk)?, source))
1327 1327 } else {
1328 1328 None
1329 1329 })
1330 1330 }))
1331 1331 }
1332 1332
1333 1333 pub fn copy_map_contains_key(
1334 1334 &self,
1335 1335 key: &HgPath,
1336 1336 ) -> Result<bool, DirstateV2ParseError> {
1337 1337 let map = self.get_map();
1338 1338 Ok(if let Some(node) = map.get_node(key)? {
1339 1339 node.has_copy_source()
1340 1340 } else {
1341 1341 false
1342 1342 })
1343 1343 }
1344 1344
1345 1345 pub fn copy_map_get(
1346 1346 &self,
1347 1347 key: &HgPath,
1348 1348 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1349 1349 let map = self.get_map();
1350 1350 if let Some(node) = map.get_node(key)? {
1351 1351 if let Some(source) = node.copy_source(map.on_disk)? {
1352 1352 return Ok(Some(source));
1353 1353 }
1354 1354 }
1355 1355 Ok(None)
1356 1356 }
1357 1357
1358 1358 pub fn copy_map_remove(
1359 1359 &mut self,
1360 1360 key: &HgPath,
1361 1361 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1362 1362 self.with_dmap_mut(|map| {
1363 1363 let count = &mut map.nodes_with_copy_source_count;
1364 1364 let unreachable_bytes = &mut map.unreachable_bytes;
1365 1365 Ok(DirstateMap::get_node_mut_inner(
1366 1366 map.on_disk,
1367 1367 unreachable_bytes,
1368 1368 &mut map.root,
1369 1369 key,
1370 1370 |_ancestor| {},
1371 1371 )?
1372 1372 .and_then(|node| {
1373 1373 if let Some(source) = &node.copy_source {
1374 1374 *count = count
1375 1375 .checked_sub(1)
1376 1376 .expect("nodes_with_copy_source_count should be >= 0");
1377 1377 DirstateMap::count_dropped_path(
1378 1378 unreachable_bytes,
1379 1379 Cow::Borrowed(source),
1380 1380 );
1381 1381 }
1382 1382 node.copy_source.take().map(Cow::into_owned)
1383 1383 }))
1384 1384 })
1385 1385 }
1386 1386
1387 1387 pub fn copy_map_insert(
1388 1388 &mut self,
1389 1389 key: &HgPath,
1390 1390 value: &HgPath,
1391 1391 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1392 1392 self.with_dmap_mut(|map| {
1393 1393 let node = map.get_or_insert_node(key, |_ancestor| {})?;
1394 1394 let had_copy_source = node.copy_source.is_none();
1395 1395 let old = node
1396 1396 .copy_source
1397 1397 .replace(value.to_owned().into())
1398 1398 .map(Cow::into_owned);
1399 1399 if had_copy_source {
1400 1400 map.nodes_with_copy_source_count += 1
1401 1401 }
1402 1402 Ok(old)
1403 1403 })
1404 1404 }
1405 1405
1406 1406 pub fn len(&self) -> usize {
1407 1407 let map = self.get_map();
1408 1408 map.nodes_with_entry_count as usize
1409 1409 }
1410 1410
1411 1411 pub fn is_empty(&self) -> bool {
1412 1412 self.len() == 0
1413 1413 }
1414 1414
1415 1415 pub fn contains_key(
1416 1416 &self,
1417 1417 key: &HgPath,
1418 1418 ) -> Result<bool, DirstateV2ParseError> {
1419 1419 Ok(self.get(key)?.is_some())
1420 1420 }
1421 1421
1422 1422 pub fn get(
1423 1423 &self,
1424 1424 key: &HgPath,
1425 1425 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1426 1426 let map = self.get_map();
1427 1427 Ok(if let Some(node) = map.get_node(key)? {
1428 1428 node.entry()?
1429 1429 } else {
1430 1430 None
1431 1431 })
1432 1432 }
1433 1433
1434 1434 pub fn iter(&self) -> StateMapIter<'_> {
1435 1435 let map = self.get_map();
1436 1436 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1437 1437 Ok(if let Some(entry) = node.entry()? {
1438 1438 Some((node.full_path(map.on_disk)?, entry))
1439 1439 } else {
1440 1440 None
1441 1441 })
1442 1442 }))
1443 1443 }
1444 1444
1445 1445 pub fn iter_tracked_dirs(
1446 1446 &mut self,
1447 1447 ) -> Result<
1448 1448 Box<
1449 1449 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1450 1450 + Send
1451 1451 + '_,
1452 1452 >,
1453 1453 DirstateError,
1454 1454 > {
1455 1455 let map = self.get_map();
1456 1456 let on_disk = map.on_disk;
1457 1457 Ok(Box::new(filter_map_results(
1458 1458 map.iter_nodes(),
1459 1459 move |node| {
1460 1460 Ok(if node.tracked_descendants_count() > 0 {
1461 1461 Some(node.full_path(on_disk)?)
1462 1462 } else {
1463 1463 None
1464 1464 })
1465 1465 },
1466 1466 )))
1467 1467 }
1468 1468
1469 1469 /// Only public because it needs to be exposed to the Python layer.
1470 1470 /// It is not the full `setparents` logic, only the parts that mutate the
1471 1471 /// entries.
1472 1472 pub fn setparents_fixup(
1473 1473 &mut self,
1474 1474 ) -> Result<Vec<(HgPathBuf, HgPathBuf)>, DirstateV2ParseError> {
1475 1475 // XXX
1476 1476 // All the copying and re-querying is quite inefficient, but this is
1477 1477 // still a lot better than doing it from Python.
1478 1478 //
1479 1479 // The better solution is to develop a mechanism for `iter_mut`,
1480 1480 // which will be a lot more involved: we're dealing with a lazy,
1481 1481 // append-mostly, tree-like data structure. This will do for now.
1482 1482 let mut copies = vec![];
1483 1483 let mut files_with_p2_info = vec![];
1484 1484 for res in self.iter() {
1485 1485 let (path, entry) = res?;
1486 1486 if entry.p2_info() {
1487 1487 files_with_p2_info.push(path.to_owned())
1488 1488 }
1489 1489 }
1490 1490 self.with_dmap_mut(|map| {
1491 1491 for path in files_with_p2_info.iter() {
1492 1492 let node = map.get_or_insert_node(path, |_| {})?;
1493 1493 let entry =
1494 1494 node.data.as_entry_mut().expect("entry should exist");
1495 1495 entry.drop_merge_data();
1496 1496 if let Some(source) = node.copy_source.take().as_deref() {
1497 1497 copies.push((path.to_owned(), source.to_owned()));
1498 1498 }
1499 1499 }
1500 1500 Ok(copies)
1501 1501 })
1502 1502 }
1503 1503
1504 1504 pub fn debug_iter(
1505 1505 &self,
1506 1506 all: bool,
1507 1507 ) -> Box<
1508 1508 dyn Iterator<Item = Result<DebugDirstateTuple, DirstateV2ParseError>>
1509 1509 + Send
1510 1510 + '_,
1511 1511 > {
1512 1512 let map = self.get_map();
1513 1513 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1514 1514 let debug_tuple = if let Some(entry) = node.entry()? {
1515 1515 entry.debug_tuple()
1516 1516 } else if !all {
1517 1517 return Ok(None);
1518 1518 } else if let Some(mtime) = node.cached_directory_mtime()? {
1519 1519 (b' ', 0, -1, mtime.truncated_seconds() as i32)
1520 1520 } else {
1521 1521 (b' ', 0, -1, -1)
1522 1522 };
1523 1523 Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
1524 1524 }))
1525 1525 }
1526 1526 }
1527 1527 #[cfg(test)]
1528 1528 mod tests {
1529 1529 use super::*;
1530 1530
1531 1531 /// Shortcut to return tracked descendants of a path.
1532 1532 /// Panics if the path does not exist.
1533 1533 fn tracked_descendants(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1534 1534 let path = dbg!(HgPath::new(path));
1535 1535 let node = map.get_map().get_node(path);
1536 1536 node.unwrap().unwrap().tracked_descendants_count()
1537 1537 }
1538 1538
1539 1539 /// Shortcut to return descendants with an entry.
1540 1540 /// Panics if the path does not exist.
1541 1541 fn descendants_with_an_entry(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1542 1542 let path = dbg!(HgPath::new(path));
1543 1543 let node = map.get_map().get_node(path);
1544 1544 node.unwrap().unwrap().descendants_with_entry_count()
1545 1545 }
1546 1546
1547 1547 fn assert_does_not_exist(map: &OwningDirstateMap, path: &[u8]) {
1548 1548 let path = dbg!(HgPath::new(path));
1549 1549 let node = map.get_map().get_node(path);
1550 1550 assert!(node.unwrap().is_none());
1551 1551 }
1552 1552
1553 1553 /// Shortcut for path creation in tests
1554 1554 fn p(b: &[u8]) -> &HgPath {
1555 1555 HgPath::new(b)
1556 1556 }
1557 1557
1558 1558 /// Test the very simple case a single tracked file
1559 1559 #[test]
1560 1560 fn test_tracked_descendants_simple() -> Result<(), DirstateError> {
1561 1561 let mut map = OwningDirstateMap::new_empty(vec![]);
1562 1562 assert_eq!(map.len(), 0);
1563 1563
1564 1564 map.set_tracked(p(b"some/nested/path"))?;
1565 1565
1566 1566 assert_eq!(map.len(), 1);
1567 1567 assert_eq!(tracked_descendants(&map, b"some"), 1);
1568 1568 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1569 1569 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1570 1570
1571 1571 map.set_untracked(p(b"some/nested/path"))?;
1572 1572 assert_eq!(map.len(), 0);
1573 1573 assert!(map.get_map().get_node(p(b"some"))?.is_none());
1574 1574
1575 1575 Ok(())
1576 1576 }
1577 1577
1578 1578 /// Test the simple case of all tracked, but multiple files
1579 1579 #[test]
1580 1580 fn test_tracked_descendants_multiple() -> Result<(), DirstateError> {
1581 1581 let mut map = OwningDirstateMap::new_empty(vec![]);
1582 1582
1583 1583 map.set_tracked(p(b"some/nested/path"))?;
1584 1584 map.set_tracked(p(b"some/nested/file"))?;
1585 1585 // one layer without any files to test deletion cascade
1586 1586 map.set_tracked(p(b"some/other/nested/path"))?;
1587 1587 map.set_tracked(p(b"root_file"))?;
1588 1588 map.set_tracked(p(b"some/file"))?;
1589 1589 map.set_tracked(p(b"some/file2"))?;
1590 1590 map.set_tracked(p(b"some/file3"))?;
1591 1591
1592 1592 assert_eq!(map.len(), 7);
1593 1593 assert_eq!(tracked_descendants(&map, b"some"), 6);
1594 1594 assert_eq!(tracked_descendants(&map, b"some/nested"), 2);
1595 1595 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1596 1596 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1597 1597 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1598 1598
1599 1599 map.set_untracked(p(b"some/nested/path"))?;
1600 1600 assert_eq!(map.len(), 6);
1601 1601 assert_eq!(tracked_descendants(&map, b"some"), 5);
1602 1602 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1603 1603 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1604 1604 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1605 1605
1606 1606 map.set_untracked(p(b"some/nested/file"))?;
1607 1607 assert_eq!(map.len(), 5);
1608 1608 assert_eq!(tracked_descendants(&map, b"some"), 4);
1609 1609 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1610 1610 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1611 1611 assert_does_not_exist(&map, b"some_nested");
1612 1612
1613 1613 map.set_untracked(p(b"some/other/nested/path"))?;
1614 1614 assert_eq!(map.len(), 4);
1615 1615 assert_eq!(tracked_descendants(&map, b"some"), 3);
1616 1616 assert_does_not_exist(&map, b"some/other");
1617 1617
1618 1618 map.set_untracked(p(b"root_file"))?;
1619 1619 assert_eq!(map.len(), 3);
1620 1620 assert_eq!(tracked_descendants(&map, b"some"), 3);
1621 1621 assert_does_not_exist(&map, b"root_file");
1622 1622
1623 1623 map.set_untracked(p(b"some/file"))?;
1624 1624 assert_eq!(map.len(), 2);
1625 1625 assert_eq!(tracked_descendants(&map, b"some"), 2);
1626 1626 assert_does_not_exist(&map, b"some/file");
1627 1627
1628 1628 map.set_untracked(p(b"some/file2"))?;
1629 1629 assert_eq!(map.len(), 1);
1630 1630 assert_eq!(tracked_descendants(&map, b"some"), 1);
1631 1631 assert_does_not_exist(&map, b"some/file2");
1632 1632
1633 1633 map.set_untracked(p(b"some/file3"))?;
1634 1634 assert_eq!(map.len(), 0);
1635 1635 assert_does_not_exist(&map, b"some/file3");
1636 1636
1637 1637 Ok(())
1638 1638 }
1639 1639
1640 1640 /// Check with a mix of tracked and non-tracked items
1641 1641 #[test]
1642 1642 fn test_tracked_descendants_different() -> Result<(), DirstateError> {
1643 1643 let mut map = OwningDirstateMap::new_empty(vec![]);
1644 1644
1645 1645 // A file that was just added
1646 1646 map.set_tracked(p(b"some/nested/path"))?;
1647 1647 // This has no information, the dirstate should ignore it
1648 1648 map.reset_state(p(b"some/file"), false, false, false, false, None)?;
1649 1649 assert_does_not_exist(&map, b"some/file");
1650 1650
1651 1651 // A file that was removed
1652 1652 map.reset_state(
1653 1653 p(b"some/nested/file"),
1654 1654 false,
1655 1655 true,
1656 1656 false,
1657 1657 false,
1658 1658 None,
1659 1659 )?;
1660 1660 assert!(!map.get(p(b"some/nested/file"))?.unwrap().tracked());
1661 1661 // Only present in p2
1662 1662 map.reset_state(p(b"some/file3"), false, false, true, false, None)?;
1663 1663 assert!(!map.get(p(b"some/file3"))?.unwrap().tracked());
1664 1664 // A file that was merged
1665 1665 map.reset_state(p(b"root_file"), true, true, true, false, None)?;
1666 1666 assert!(map.get(p(b"root_file"))?.unwrap().tracked());
1667 1667 // A file that is added, with info from p2
1668 1668 // XXX is that actually possible?
1669 1669 map.reset_state(p(b"some/file2"), true, false, true, false, None)?;
1670 1670 assert!(map.get(p(b"some/file2"))?.unwrap().tracked());
1671 1671 // A clean file
1672 1672 // One layer without any files to test deletion cascade
1673 1673 map.reset_state(
1674 1674 p(b"some/other/nested/path"),
1675 1675 true,
1676 1676 true,
1677 1677 false,
1678 1678 false,
1679 1679 None,
1680 1680 )?;
1681 1681 assert!(map.get(p(b"some/other/nested/path"))?.unwrap().tracked());
1682 1682
1683 1683 assert_eq!(map.len(), 6);
1684 1684 assert_eq!(tracked_descendants(&map, b"some"), 3);
1685 1685 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1686 1686 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1687 1687 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1688 1688 assert_eq!(tracked_descendants(&map, b"some/other/nested/path"), 0);
1689 1689 assert_eq!(
1690 1690 descendants_with_an_entry(&map, b"some/other/nested/path"),
1691 1691 0
1692 1692 );
1693 1693 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1694 1694 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1695 1695
1696 1696 // might as well check this
1697 1697 map.set_untracked(p(b"path/does/not/exist"))?;
1698 1698 assert_eq!(map.len(), 6);
1699 1699
1700 1700 map.set_untracked(p(b"some/other/nested/path"))?;
1701 1701 // It is set untracked but not deleted since it held other information
1702 1702 assert_eq!(map.len(), 6);
1703 1703 assert_eq!(tracked_descendants(&map, b"some"), 2);
1704 1704 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1705 1705 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1706 1706 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1707 1707 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1708 1708 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1709 1709
1710 1710 map.set_untracked(p(b"some/nested/path"))?;
1711 1711 // It is set untracked *and* deleted since it was only added
1712 1712 assert_eq!(map.len(), 5);
1713 1713 assert_eq!(tracked_descendants(&map, b"some"), 1);
1714 1714 assert_eq!(descendants_with_an_entry(&map, b"some"), 4);
1715 1715 assert_eq!(tracked_descendants(&map, b"some/nested"), 0);
1716 1716 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 1);
1717 1717 assert_does_not_exist(&map, b"some/nested/path");
1718 1718
1719 1719 map.set_untracked(p(b"root_file"))?;
1720 1720 // Untracked but not deleted
1721 1721 assert_eq!(map.len(), 5);
1722 1722 assert!(map.get(p(b"root_file"))?.is_some());
1723 1723
1724 1724 map.set_untracked(p(b"some/file2"))?;
1725 1725 assert_eq!(map.len(), 5);
1726 1726 assert_eq!(tracked_descendants(&map, b"some"), 0);
1727 1727 assert!(map.get(p(b"some/file2"))?.is_some());
1728 1728
1729 1729 map.set_untracked(p(b"some/file3"))?;
1730 1730 assert_eq!(map.len(), 5);
1731 1731 assert_eq!(tracked_descendants(&map, b"some"), 0);
1732 1732 assert!(map.get(p(b"some/file3"))?.is_some());
1733 1733
1734 1734 Ok(())
1735 1735 }
1736 1736
1737 1737 /// Check that copies counter is correctly updated
1738 1738 #[test]
1739 1739 fn test_copy_source() -> Result<(), DirstateError> {
1740 1740 let mut map = OwningDirstateMap::new_empty(vec![]);
1741 1741
1742 1742 // Clean file
1743 1743 map.reset_state(p(b"files/clean"), true, true, false, false, None)?;
1744 1744 // Merged file
1745 1745 map.reset_state(p(b"files/from_p2"), true, true, true, false, None)?;
1746 1746 // Removed file
1747 1747 map.reset_state(p(b"removed"), false, true, false, false, None)?;
1748 1748 // Added file
1749 1749 map.reset_state(p(b"files/added"), true, false, false, false, None)?;
1750 1750 // Add copy
1751 1751 map.copy_map_insert(p(b"files/clean"), p(b"clean_copy_source"))?;
1752 1752 assert_eq!(map.copy_map_len(), 1);
1753 1753
1754 1754 // Copy override
1755 1755 map.copy_map_insert(p(b"files/clean"), p(b"other_clean_copy_source"))?;
1756 1756 assert_eq!(map.copy_map_len(), 1);
1757 1757
1758 1758 // Multiple copies
1759 1759 map.copy_map_insert(p(b"removed"), p(b"removed_copy_source"))?;
1760 1760 assert_eq!(map.copy_map_len(), 2);
1761 1761
1762 1762 map.copy_map_insert(p(b"files/added"), p(b"added_copy_source"))?;
1763 1763 assert_eq!(map.copy_map_len(), 3);
1764 1764
1765 1765 // Added, so the entry is completely removed
1766 1766 map.set_untracked(p(b"files/added"))?;
1767 1767 assert_does_not_exist(&map, b"files/added");
1768 1768 assert_eq!(map.copy_map_len(), 2);
1769 1769
1770 1770 // Removed, so the entry is kept around, so is its copy
1771 1771 map.set_untracked(p(b"removed"))?;
1772 1772 assert!(map.get(p(b"removed"))?.is_some());
1773 1773 assert_eq!(map.copy_map_len(), 2);
1774 1774
1775 1775 // Clean, so the entry is kept around, but not its copy
1776 1776 map.set_untracked(p(b"files/clean"))?;
1777 1777 assert!(map.get(p(b"files/clean"))?.is_some());
1778 1778 assert_eq!(map.copy_map_len(), 1);
1779 1779
1780 1780 map.copy_map_insert(p(b"files/from_p2"), p(b"from_p2_copy_source"))?;
1781 1781 assert_eq!(map.copy_map_len(), 2);
1782 1782
1783 1783 // Info from p2, so its copy source info is kept around
1784 1784 map.set_untracked(p(b"files/from_p2"))?;
1785 1785 assert!(map.get(p(b"files/from_p2"))?.is_some());
1786 1786 assert_eq!(map.copy_map_len(), 2);
1787 1787
1788 1788 Ok(())
1789 1789 }
1790 1790
1791 1791 /// Test with "on disk" data. For the sake of this test, the "on disk" data
1792 1792 /// does not actually come from the disk, but it's opaque to the code being
1793 1793 /// tested.
1794 1794 #[test]
1795 1795 fn test_on_disk() -> Result<(), DirstateError> {
1796 1796 // First let's create some data to put "on disk"
1797 1797 let mut map = OwningDirstateMap::new_empty(vec![]);
1798 1798
1799 1799 // A file that was just added
1800 1800 map.set_tracked(p(b"some/nested/added"))?;
1801 1801 map.copy_map_insert(p(b"some/nested/added"), p(b"added_copy_source"))?;
1802 1802
1803 1803 // A file that was removed
1804 1804 map.reset_state(
1805 1805 p(b"some/nested/removed"),
1806 1806 false,
1807 1807 true,
1808 1808 false,
1809 1809 false,
1810 1810 None,
1811 1811 )?;
1812 1812 // Only present in p2
1813 1813 map.reset_state(
1814 1814 p(b"other/p2_info_only"),
1815 1815 false,
1816 1816 false,
1817 1817 true,
1818 1818 false,
1819 1819 None,
1820 1820 )?;
1821 1821 map.copy_map_insert(
1822 1822 p(b"other/p2_info_only"),
1823 1823 p(b"other/p2_info_copy_source"),
1824 1824 )?;
1825 1825 // A file that was merged
1826 1826 map.reset_state(p(b"merged"), true, true, true, false, None)?;
1827 1827 // A file that is added, with info from p2
1828 1828 // XXX is that actually possible?
1829 1829 map.reset_state(
1830 1830 p(b"other/added_with_p2"),
1831 1831 true,
1832 1832 false,
1833 1833 true,
1834 1834 false,
1835 1835 None,
1836 1836 )?;
1837 1837 // One layer without any files to test deletion cascade
1838 1838 // A clean file
1839 1839 map.reset_state(
1840 1840 p(b"some/other/nested/clean"),
1841 1841 true,
1842 1842 true,
1843 1843 false,
1844 1844 false,
1845 1845 None,
1846 1846 )?;
1847 1847
1848 1848 let (packed, metadata, _should_append, _old_data_size) =
1849 1849 map.pack_v2(DirstateMapWriteMode::ForceNewDataFile)?;
1850 1850 let packed_len = packed.len();
1851 1851 assert!(packed_len > 0);
1852 1852
1853 1853 // Recreate "from disk"
1854 1854 let mut map = OwningDirstateMap::new_v2(
1855 1855 packed,
1856 1856 packed_len,
1857 1857 metadata.as_bytes(),
1858 1858 vec![],
1859 1859 None,
1860 1860 )?;
1861 1861
1862 1862 // Check that everything is accounted for
1863 1863 assert!(map.contains_key(p(b"some/nested/added"))?);
1864 1864 assert!(map.contains_key(p(b"some/nested/removed"))?);
1865 1865 assert!(map.contains_key(p(b"merged"))?);
1866 1866 assert!(map.contains_key(p(b"other/p2_info_only"))?);
1867 1867 assert!(map.contains_key(p(b"other/added_with_p2"))?);
1868 1868 assert!(map.contains_key(p(b"some/other/nested/clean"))?);
1869 1869 assert_eq!(
1870 1870 map.copy_map_get(p(b"some/nested/added"))?,
1871 1871 Some(p(b"added_copy_source"))
1872 1872 );
1873 1873 assert_eq!(
1874 1874 map.copy_map_get(p(b"other/p2_info_only"))?,
1875 1875 Some(p(b"other/p2_info_copy_source"))
1876 1876 );
1877 1877 assert_eq!(tracked_descendants(&map, b"some"), 2);
1878 1878 assert_eq!(descendants_with_an_entry(&map, b"some"), 3);
1879 1879 assert_eq!(tracked_descendants(&map, b"other"), 1);
1880 1880 assert_eq!(descendants_with_an_entry(&map, b"other"), 2);
1881 1881 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1882 1882 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1883 1883 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1884 1884 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1885 1885 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1886 1886 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1887 1887 assert_eq!(map.len(), 6);
1888 1888 assert_eq!(map.get_map().unreachable_bytes, 0);
1889 1889 assert_eq!(map.copy_map_len(), 2);
1890 1890
1891 1891 // Shouldn't change anything since it's already not tracked
1892 1892 map.set_untracked(p(b"some/nested/removed"))?;
1893 1893 assert_eq!(map.get_map().unreachable_bytes, 0);
1894 1894
1895 1895 if let ChildNodes::InMemory(_) = map.get_map().root {
1896 1896 panic!("root should not have been mutated")
1897 1897 }
1898 1898 // We haven't mutated enough (nothing, actually), we should still be in
1899 1899 // the append strategy
1900 1900 assert!(map.get_map().write_should_append());
1901 1901
1902 1902 // But this mutates the structure, so there should be unreachable_bytes
1903 1903 assert!(map.set_untracked(p(b"some/nested/added"))?);
1904 1904 let unreachable_bytes = map.get_map().unreachable_bytes;
1905 1905 assert!(unreachable_bytes > 0);
1906 1906
1907 1907 if let ChildNodes::OnDisk(_) = map.get_map().root {
1908 1908 panic!("root should have been mutated")
1909 1909 }
1910 1910
1911 1911 // This should not mutate the structure either, since `root` has
1912 1912 // already been mutated along with its direct children.
1913 1913 map.set_untracked(p(b"merged"))?;
1914 1914 assert_eq!(map.get_map().unreachable_bytes, unreachable_bytes);
1915 1915
1916 1916 if let NodeRef::InMemory(_, _) =
1917 1917 map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap()
1918 1918 {
1919 1919 panic!("'other/added_with_p2' should not have been mutated")
1920 1920 }
1921 1921 // But this should, since it's in a different path
1922 1922 // than `<root>some/nested/add`
1923 1923 map.set_untracked(p(b"other/added_with_p2"))?;
1924 1924 assert!(map.get_map().unreachable_bytes > unreachable_bytes);
1925 1925
1926 1926 if let NodeRef::OnDisk(_) =
1927 1927 map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap()
1928 1928 {
1929 1929 panic!("'other/added_with_p2' should have been mutated")
1930 1930 }
1931 1931
1932 1932 // We have rewritten most of the tree, we should create a new file
1933 1933 assert!(!map.get_map().write_should_append());
1934 1934
1935 1935 Ok(())
1936 1936 }
1937 1937 }
@@ -1,1055 +1,1055 b''
1 1 use crate::dirstate::entry::TruncatedTimestamp;
2 2 use crate::dirstate::status::IgnoreFnType;
3 3 use crate::dirstate::status::StatusPath;
4 4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 7 use crate::dirstate_tree::dirstate_map::DirstateVersion;
8 8 use crate::dirstate_tree::dirstate_map::NodeRef;
9 9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 10 use crate::matchers::get_ignore_function;
11 11 use crate::matchers::{Matcher, VisitChildrenSet};
12 12 use crate::utils::files::get_bytes_from_os_string;
13 13 use crate::utils::files::get_bytes_from_path;
14 14 use crate::utils::files::get_path_from_bytes;
15 15 use crate::utils::hg_path::hg_path_to_path_buf;
16 16 use crate::utils::hg_path::HgPath;
17 17 use crate::BadMatch;
18 18 use crate::BadType;
19 19 use crate::DirstateStatus;
20 20 use crate::HgPathCow;
21 21 use crate::PatternFileWarning;
22 22 use crate::StatusError;
23 23 use crate::StatusOptions;
24 24 use once_cell::sync::OnceCell;
25 25 use rayon::prelude::*;
26 26 use sha1::{Digest, Sha1};
27 27 use std::borrow::Cow;
28 28 use std::io;
29 29 use std::os::unix::prelude::FileTypeExt;
30 30 use std::path::Path;
31 31 use std::path::PathBuf;
32 32 use std::sync::Mutex;
33 33 use std::time::SystemTime;
34 34
35 35 /// Returns the status of the working directory compared to its parent
36 36 /// changeset.
37 37 ///
38 38 /// This algorithm is based on traversing the filesystem tree (`fs` in function
39 39 /// and variable names) and dirstate tree at the same time. The core of this
40 40 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
41 41 /// and its use of `itertools::merge_join_by`. When reaching a path that only
42 42 /// exists in one of the two trees, depending on information requested by
43 43 /// `options` we may need to traverse the remaining subtree.
44 44 #[logging_timer::time("trace")]
45 45 pub fn status<'dirstate>(
46 46 dmap: &'dirstate mut DirstateMap,
47 47 matcher: &(dyn Matcher + Sync),
48 48 root_dir: PathBuf,
49 49 ignore_files: Vec<PathBuf>,
50 50 options: StatusOptions,
51 51 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
52 52 {
53 53 // Also cap for a Python caller of this function, but don't complain if
54 54 // the global threadpool has already been set since this code path is also
55 55 // being used by `rhg`, which calls this early.
56 56 let _ = crate::utils::cap_default_rayon_threads();
57 57
58 58 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
59 59 if options.list_ignored || options.list_unknown {
60 60 let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
61 61 DirstateVersion::V1 => {
62 62 let (ignore_fn, warnings) = get_ignore_function(
63 63 ignore_files,
64 64 &root_dir,
65 65 &mut |_source, _pattern_bytes| {},
66 66 )?;
67 67 (ignore_fn, warnings, None)
68 68 }
69 69 DirstateVersion::V2 => {
70 70 let mut hasher = Sha1::new();
71 71 let (ignore_fn, warnings) = get_ignore_function(
72 72 ignore_files,
73 73 &root_dir,
74 74 &mut |source, pattern_bytes| {
75 75 // If inside the repo, use the relative version to
76 76 // make it deterministic inside tests.
77 77 // The performance hit should be negligible.
78 78 let source = source
79 79 .strip_prefix(&root_dir)
80 80 .unwrap_or(source);
81 81 let source = get_bytes_from_path(source);
82 82
83 83 let mut subhasher = Sha1::new();
84 84 subhasher.update(pattern_bytes);
85 85 let patterns_hash = subhasher.finalize();
86 86
87 87 hasher.update(source);
88 88 hasher.update(b" ");
89 89 hasher.update(patterns_hash);
90 90 hasher.update(b"\n");
91 91 },
92 92 )?;
93 93 let new_hash = *hasher.finalize().as_ref();
94 94 let changed = new_hash != dmap.ignore_patterns_hash;
95 95 dmap.ignore_patterns_hash = new_hash;
96 96 (ignore_fn, warnings, Some(changed))
97 97 }
98 98 };
99 99 (ignore_fn, warnings, changed)
100 100 } else {
101 101 (Box::new(|&_| true), vec![], None)
102 102 };
103 103
104 104 let filesystem_time_at_status_start =
105 105 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
106 106
107 107 // If the repository is under the current directory, prefer using a
108 108 // relative path, so the kernel needs to traverse fewer directory in every
109 109 // call to `read_dir` or `symlink_metadata`.
110 110 // This is effective in the common case where the current directory is the
111 111 // repository root.
112 112
113 113 // TODO: Better yet would be to use libc functions like `openat` and
114 114 // `fstatat` to remove such repeated traversals entirely, but the standard
115 115 // library does not provide APIs based on those.
116 116 // Maybe with a crate like https://crates.io/crates/openat instead?
117 117 let root_dir = if let Some(relative) = std::env::current_dir()
118 118 .ok()
119 119 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
120 120 {
121 121 relative
122 122 } else {
123 123 &root_dir
124 124 };
125 125
126 126 let outcome = DirstateStatus {
127 127 filesystem_time_at_status_start,
128 128 ..Default::default()
129 129 };
130 130 let common = StatusCommon {
131 131 dmap,
132 132 options,
133 133 matcher,
134 134 ignore_fn,
135 135 outcome: Mutex::new(outcome),
136 136 ignore_patterns_have_changed: patterns_changed,
137 137 new_cacheable_directories: Default::default(),
138 138 outdated_cached_directories: Default::default(),
139 139 filesystem_time_at_status_start,
140 140 };
141 141 let is_at_repo_root = true;
142 142 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
143 143 let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path);
144 144 let root_cached_mtime = None;
145 145 // If the path we have for the repository root is a symlink, do follow it.
146 146 // (As opposed to symlinks within the working directory which are not
147 147 // followed, using `std::fs::symlink_metadata`.)
148 148 common.traverse_fs_directory_and_dirstate(
149 149 &has_ignored_ancestor,
150 150 dmap.root.as_ref(),
151 151 hg_path,
152 152 &DirEntry {
153 153 hg_path: Cow::Borrowed(HgPath::new(b"")),
154 154 fs_path: Cow::Borrowed(root_dir),
155 155 symlink_metadata: None,
156 156 file_type: FakeFileType::Directory,
157 157 },
158 158 root_cached_mtime,
159 159 is_at_repo_root,
160 160 )?;
161 161 if let Some(file_set) = common.matcher.file_set() {
162 162 for file in file_set {
163 163 if !file.is_empty() && !dmap.has_node(file)? {
164 164 let path = hg_path_to_path_buf(file)?;
165 165 if let io::Result::Err(error) =
166 166 root_dir.join(path).symlink_metadata()
167 167 {
168 168 common.io_error(error, file)
169 169 }
170 170 }
171 171 }
172 172 }
173 173 let mut outcome = common.outcome.into_inner().unwrap();
174 174 let new_cacheable = common.new_cacheable_directories.into_inner().unwrap();
175 175 let outdated = common.outdated_cached_directories.into_inner().unwrap();
176 176
177 177 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
178 178 || !outdated.is_empty()
179 179 || (!new_cacheable.is_empty()
180 180 && dmap.dirstate_version == DirstateVersion::V2);
181 181
182 182 // Remove outdated mtimes before adding new mtimes, in case a given
183 183 // directory is both
184 184 for path in &outdated {
185 185 dmap.clear_cached_mtime(path)?;
186 186 }
187 187 for (path, mtime) in &new_cacheable {
188 188 dmap.set_cached_mtime(path, *mtime)?;
189 189 }
190 190
191 191 Ok((outcome, warnings))
192 192 }
193 193
194 194 /// Bag of random things needed by various parts of the algorithm. Reduces the
195 195 /// number of parameters passed to functions.
196 196 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
197 197 dmap: &'tree DirstateMap<'on_disk>,
198 198 options: StatusOptions,
199 199 matcher: &'a (dyn Matcher + Sync),
200 200 ignore_fn: IgnoreFnType<'a>,
201 201 outcome: Mutex<DirstateStatus<'on_disk>>,
202 202 /// New timestamps of directories to be used for caching their readdirs
203 203 new_cacheable_directories:
204 204 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
205 205 /// Used to invalidate the readdir cache of directories
206 206 outdated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
207 207
208 208 /// Whether ignore files like `.hgignore` have changed since the previous
209 209 /// time a `status()` call wrote their hash to the dirstate. `None` means
210 210 /// we don’t know as this run doesn’t list either ignored or uknown files
211 211 /// and therefore isn’t reading `.hgignore`.
212 212 ignore_patterns_have_changed: Option<bool>,
213 213
214 214 /// The current time at the start of the `status()` algorithm, as measured
215 215 /// and possibly truncated by the filesystem.
216 216 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
217 217 }
218 218
219 219 enum Outcome {
220 220 Modified,
221 221 Added,
222 222 Removed,
223 223 Deleted,
224 224 Clean,
225 225 Ignored,
226 226 Unknown,
227 227 Unsure,
228 228 }
229 229
230 230 /// Lazy computation of whether a given path has a hgignored
231 231 /// ancestor.
232 232 struct HasIgnoredAncestor<'a> {
233 233 /// `path` and `parent` constitute the inputs to the computation,
234 234 /// `cache` stores the outcome.
235 235 path: &'a HgPath,
236 236 parent: Option<&'a HasIgnoredAncestor<'a>>,
237 237 cache: OnceCell<bool>,
238 238 }
239 239
240 240 impl<'a> HasIgnoredAncestor<'a> {
241 241 fn create(
242 242 parent: Option<&'a HasIgnoredAncestor<'a>>,
243 243 path: &'a HgPath,
244 244 ) -> HasIgnoredAncestor<'a> {
245 245 Self {
246 246 path,
247 247 parent,
248 248 cache: OnceCell::new(),
249 249 }
250 250 }
251 251
252 fn force<'b>(&self, ignore_fn: &IgnoreFnType<'b>) -> bool {
252 fn force(&self, ignore_fn: &IgnoreFnType<'_>) -> bool {
253 253 match self.parent {
254 254 None => false,
255 255 Some(parent) => {
256 256 *(self.cache.get_or_init(|| {
257 257 parent.force(ignore_fn) || ignore_fn(self.path)
258 258 }))
259 259 }
260 260 }
261 261 }
262 262 }
263 263
264 264 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
265 265 fn push_outcome(
266 266 &self,
267 267 which: Outcome,
268 268 dirstate_node: &NodeRef<'tree, 'on_disk>,
269 269 ) -> Result<(), DirstateV2ParseError> {
270 270 let path = dirstate_node
271 271 .full_path_borrowed(self.dmap.on_disk)?
272 272 .detach_from_tree();
273 273 let copy_source = if self.options.list_copies {
274 274 dirstate_node
275 275 .copy_source_borrowed(self.dmap.on_disk)?
276 276 .map(|source| source.detach_from_tree())
277 277 } else {
278 278 None
279 279 };
280 280 self.push_outcome_common(which, path, copy_source);
281 281 Ok(())
282 282 }
283 283
284 284 fn push_outcome_without_copy_source(
285 285 &self,
286 286 which: Outcome,
287 287 path: &BorrowedPath<'_, 'on_disk>,
288 288 ) {
289 289 self.push_outcome_common(which, path.detach_from_tree(), None)
290 290 }
291 291
292 292 fn push_outcome_common(
293 293 &self,
294 294 which: Outcome,
295 295 path: HgPathCow<'on_disk>,
296 296 copy_source: Option<HgPathCow<'on_disk>>,
297 297 ) {
298 298 let mut outcome = self.outcome.lock().unwrap();
299 299 let vec = match which {
300 300 Outcome::Modified => &mut outcome.modified,
301 301 Outcome::Added => &mut outcome.added,
302 302 Outcome::Removed => &mut outcome.removed,
303 303 Outcome::Deleted => &mut outcome.deleted,
304 304 Outcome::Clean => &mut outcome.clean,
305 305 Outcome::Ignored => &mut outcome.ignored,
306 306 Outcome::Unknown => &mut outcome.unknown,
307 307 Outcome::Unsure => &mut outcome.unsure,
308 308 };
309 309 vec.push(StatusPath { path, copy_source });
310 310 }
311 311
312 312 fn read_dir(
313 313 &self,
314 314 hg_path: &HgPath,
315 315 fs_path: &Path,
316 316 is_at_repo_root: bool,
317 317 ) -> Result<Vec<DirEntry>, ()> {
318 318 DirEntry::read_dir(fs_path, is_at_repo_root)
319 319 .map_err(|error| self.io_error(error, hg_path))
320 320 }
321 321
322 322 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
323 323 let errno = error.raw_os_error().expect("expected real OS error");
324 324 self.outcome
325 325 .lock()
326 326 .unwrap()
327 327 .bad
328 328 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
329 329 }
330 330
331 331 fn check_for_outdated_directory_cache(
332 332 &self,
333 333 dirstate_node: &NodeRef<'tree, 'on_disk>,
334 334 ) -> Result<bool, DirstateV2ParseError> {
335 335 if self.ignore_patterns_have_changed == Some(true)
336 336 && dirstate_node.cached_directory_mtime()?.is_some()
337 337 {
338 338 self.outdated_cached_directories.lock().unwrap().push(
339 339 dirstate_node
340 340 .full_path_borrowed(self.dmap.on_disk)?
341 341 .detach_from_tree(),
342 342 );
343 343 return Ok(true);
344 344 }
345 345 Ok(false)
346 346 }
347 347
348 348 /// If this returns true, we can get accurate results by only using
349 349 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
350 350 /// need to call `read_dir`.
351 351 fn can_skip_fs_readdir(
352 352 &self,
353 353 directory_entry: &DirEntry,
354 354 cached_directory_mtime: Option<TruncatedTimestamp>,
355 355 ) -> bool {
356 356 if !self.options.list_unknown && !self.options.list_ignored {
357 357 // All states that we care about listing have corresponding
358 358 // dirstate entries.
359 359 // This happens for example with `hg status -mard`.
360 360 return true;
361 361 }
362 362 if !self.options.list_ignored
363 363 && self.ignore_patterns_have_changed == Some(false)
364 364 {
365 365 if let Some(cached_mtime) = cached_directory_mtime {
366 366 // The dirstate contains a cached mtime for this directory, set
367 367 // by a previous run of the `status` algorithm which found this
368 368 // directory eligible for `read_dir` caching.
369 369 if let Ok(meta) = directory_entry.symlink_metadata() {
370 370 if cached_mtime
371 371 .likely_equal_to_mtime_of(&meta)
372 372 .unwrap_or(false)
373 373 {
374 374 // The mtime of that directory has not changed
375 375 // since then, which means that the results of
376 376 // `read_dir` should also be unchanged.
377 377 return true;
378 378 }
379 379 }
380 380 }
381 381 }
382 382 false
383 383 }
384 384
385 385 fn should_visit(set: &VisitChildrenSet, basename: &HgPath) -> bool {
386 386 match set {
387 387 VisitChildrenSet::This | VisitChildrenSet::Recursive => true,
388 388 VisitChildrenSet::Empty => false,
389 389 VisitChildrenSet::Set(children_to_visit) => {
390 390 children_to_visit.contains(basename)
391 391 }
392 392 }
393 393 }
394 394
395 395 /// Returns whether all child entries of the filesystem directory have a
396 396 /// corresponding dirstate node or are ignored.
397 397 fn traverse_fs_directory_and_dirstate<'ancestor>(
398 398 &self,
399 399 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
400 400 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
401 401 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
402 402 directory_entry: &DirEntry,
403 403 cached_directory_mtime: Option<TruncatedTimestamp>,
404 404 is_at_repo_root: bool,
405 405 ) -> Result<bool, DirstateV2ParseError> {
406 406 let children_set = self.matcher.visit_children_set(directory_hg_path);
407 407 if let VisitChildrenSet::Empty = children_set {
408 408 return Ok(false);
409 409 }
410 410 if self.can_skip_fs_readdir(directory_entry, cached_directory_mtime) {
411 411 dirstate_nodes
412 412 .par_iter()
413 413 .map(|dirstate_node| {
414 414 let fs_path = &directory_entry.fs_path;
415 415 let basename =
416 416 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes();
417 417 let fs_path = fs_path.join(get_path_from_bytes(basename));
418 418 if !Self::should_visit(
419 419 &children_set,
420 420 HgPath::new(basename),
421 421 ) {
422 422 return Ok(());
423 423 }
424 424 match std::fs::symlink_metadata(&fs_path) {
425 425 Ok(fs_metadata) => {
426 426 let file_type = fs_metadata.file_type().into();
427 427 let entry = DirEntry {
428 428 hg_path: Cow::Borrowed(
429 429 dirstate_node
430 430 .full_path(self.dmap.on_disk)?,
431 431 ),
432 432 fs_path: Cow::Borrowed(&fs_path),
433 433 symlink_metadata: Some(fs_metadata),
434 434 file_type,
435 435 };
436 436 self.traverse_fs_and_dirstate(
437 437 &entry,
438 438 dirstate_node,
439 439 has_ignored_ancestor,
440 440 )
441 441 }
442 442 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
443 443 self.traverse_dirstate_only(dirstate_node)
444 444 }
445 445 Err(error) => {
446 446 let hg_path =
447 447 dirstate_node.full_path(self.dmap.on_disk)?;
448 448 self.io_error(error, hg_path);
449 449 Ok(())
450 450 }
451 451 }
452 452 })
453 453 .collect::<Result<_, _>>()?;
454 454
455 455 // We don’t know, so conservatively say this isn’t the case
456 456 let children_all_have_dirstate_node_or_are_ignored = false;
457 457
458 458 return Ok(children_all_have_dirstate_node_or_are_ignored);
459 459 }
460 460
461 461 let readdir_succeeded;
462 462 let mut fs_entries = if let Ok(entries) = self.read_dir(
463 463 directory_hg_path,
464 464 &directory_entry.fs_path,
465 465 is_at_repo_root,
466 466 ) {
467 467 readdir_succeeded = true;
468 468 entries
469 469 } else {
470 470 // Treat an unreadable directory (typically because of insufficient
471 471 // permissions) like an empty directory. `self.read_dir` has
472 472 // already called `self.io_error` so a warning will be emitted.
473 473 // We still need to remember that there was an error so that we
474 474 // know not to cache this result.
475 475 readdir_succeeded = false;
476 476 Vec::new()
477 477 };
478 478
479 479 // `merge_join_by` requires both its input iterators to be sorted:
480 480
481 481 let dirstate_nodes = dirstate_nodes.sorted();
482 482 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
483 483 // https://github.com/rust-lang/rust/issues/34162
484 484 fs_entries.sort_unstable_by(|e1, e2| e1.hg_path.cmp(&e2.hg_path));
485 485
486 486 // Propagate here any error that would happen inside the comparison
487 487 // callback below
488 488 for dirstate_node in &dirstate_nodes {
489 489 dirstate_node.base_name(self.dmap.on_disk)?;
490 490 }
491 491 itertools::merge_join_by(
492 492 dirstate_nodes,
493 493 &fs_entries,
494 494 |dirstate_node, fs_entry| {
495 495 // This `unwrap` never panics because we already propagated
496 496 // those errors above
497 497 dirstate_node
498 498 .base_name(self.dmap.on_disk)
499 499 .unwrap()
500 500 .cmp(&fs_entry.hg_path)
501 501 },
502 502 )
503 503 .par_bridge()
504 504 .map(|pair| {
505 505 use itertools::EitherOrBoth::*;
506 506 let basename = match &pair {
507 507 Left(dirstate_node) | Both(dirstate_node, _) => HgPath::new(
508 508 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
509 509 ),
510 510 Right(fs_entry) => &fs_entry.hg_path,
511 511 };
512 512 if !Self::should_visit(&children_set, basename) {
513 513 return Ok(false);
514 514 }
515 515 let has_dirstate_node_or_is_ignored = match pair {
516 516 Both(dirstate_node, fs_entry) => {
517 517 self.traverse_fs_and_dirstate(
518 518 fs_entry,
519 519 dirstate_node,
520 520 has_ignored_ancestor,
521 521 )?;
522 522 true
523 523 }
524 524 Left(dirstate_node) => {
525 525 self.traverse_dirstate_only(dirstate_node)?;
526 526 true
527 527 }
528 528 Right(fs_entry) => self.traverse_fs_only(
529 529 has_ignored_ancestor.force(&self.ignore_fn),
530 530 directory_hg_path,
531 531 fs_entry,
532 532 ),
533 533 };
534 534 Ok(has_dirstate_node_or_is_ignored)
535 535 })
536 536 .try_reduce(|| true, |a, b| Ok(a && b))
537 537 .map(|res| res && readdir_succeeded)
538 538 }
539 539
540 540 fn traverse_fs_and_dirstate<'ancestor>(
541 541 &self,
542 542 fs_entry: &DirEntry,
543 543 dirstate_node: NodeRef<'tree, 'on_disk>,
544 544 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
545 545 ) -> Result<(), DirstateV2ParseError> {
546 546 let outdated_dircache =
547 547 self.check_for_outdated_directory_cache(&dirstate_node)?;
548 548 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
549 549 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
550 550 if !file_or_symlink {
551 551 // If we previously had a file here, it was removed (with
552 552 // `hg rm` or similar) or deleted before it could be
553 553 // replaced by a directory or something else.
554 554 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
555 555 }
556 556 if let Some(bad_type) = fs_entry.is_bad() {
557 557 if self.matcher.exact_match(hg_path) {
558 558 let path = dirstate_node.full_path(self.dmap.on_disk)?;
559 559 self.outcome.lock().unwrap().bad.push((
560 560 path.to_owned().into(),
561 561 BadMatch::BadType(bad_type),
562 562 ))
563 563 }
564 564 }
565 565 if fs_entry.is_dir() {
566 566 if self.options.collect_traversed_dirs {
567 567 self.outcome
568 568 .lock()
569 569 .unwrap()
570 570 .traversed
571 571 .push(hg_path.detach_from_tree())
572 572 }
573 573 let is_ignored = HasIgnoredAncestor::create(
574 574 Some(has_ignored_ancestor),
575 575 hg_path,
576 576 );
577 577 let is_at_repo_root = false;
578 578 let children_all_have_dirstate_node_or_are_ignored = self
579 579 .traverse_fs_directory_and_dirstate(
580 580 &is_ignored,
581 581 dirstate_node.children(self.dmap.on_disk)?,
582 582 hg_path,
583 583 fs_entry,
584 584 dirstate_node.cached_directory_mtime()?,
585 585 is_at_repo_root,
586 586 )?;
587 587 self.maybe_save_directory_mtime(
588 588 children_all_have_dirstate_node_or_are_ignored,
589 589 fs_entry,
590 590 dirstate_node,
591 591 outdated_dircache,
592 592 )?
593 593 } else {
594 594 if file_or_symlink && self.matcher.matches(hg_path) {
595 595 if let Some(entry) = dirstate_node.entry()? {
596 596 if !entry.any_tracked() {
597 597 // Forward-compat if we start tracking unknown/ignored
598 598 // files for caching reasons
599 599 self.mark_unknown_or_ignored(
600 600 has_ignored_ancestor.force(&self.ignore_fn),
601 601 hg_path,
602 602 );
603 603 }
604 604 if entry.added() {
605 605 self.push_outcome(Outcome::Added, &dirstate_node)?;
606 606 } else if entry.removed() {
607 607 self.push_outcome(Outcome::Removed, &dirstate_node)?;
608 608 } else if entry.modified() {
609 609 self.push_outcome(Outcome::Modified, &dirstate_node)?;
610 610 } else {
611 611 self.handle_normal_file(&dirstate_node, fs_entry)?;
612 612 }
613 613 } else {
614 614 // `node.entry.is_none()` indicates a "directory"
615 615 // node, but the filesystem has a file
616 616 self.mark_unknown_or_ignored(
617 617 has_ignored_ancestor.force(&self.ignore_fn),
618 618 hg_path,
619 619 );
620 620 }
621 621 }
622 622
623 623 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
624 624 {
625 625 self.traverse_dirstate_only(child_node)?
626 626 }
627 627 }
628 628 Ok(())
629 629 }
630 630
631 631 /// Save directory mtime if applicable.
632 632 ///
633 633 /// `outdated_directory_cache` is `true` if we've just invalidated the
634 634 /// cache for this directory in `check_for_outdated_directory_cache`,
635 635 /// which forces the update.
636 636 fn maybe_save_directory_mtime(
637 637 &self,
638 638 children_all_have_dirstate_node_or_are_ignored: bool,
639 639 directory_entry: &DirEntry,
640 640 dirstate_node: NodeRef<'tree, 'on_disk>,
641 641 outdated_directory_cache: bool,
642 642 ) -> Result<(), DirstateV2ParseError> {
643 643 if !children_all_have_dirstate_node_or_are_ignored {
644 644 return Ok(());
645 645 }
646 646 // All filesystem directory entries from `read_dir` have a
647 647 // corresponding node in the dirstate, so we can reconstitute the
648 648 // names of those entries without calling `read_dir` again.
649 649
650 650 // TODO: use let-else here and below when available:
651 651 // https://github.com/rust-lang/rust/issues/87335
652 652 let status_start = if let Some(status_start) =
653 653 &self.filesystem_time_at_status_start
654 654 {
655 655 status_start
656 656 } else {
657 657 return Ok(());
658 658 };
659 659
660 660 // Although the Rust standard library’s `SystemTime` type
661 661 // has nanosecond precision, the times reported for a
662 662 // directory’s (or file’s) modified time may have lower
663 663 // resolution based on the filesystem (for example ext3
664 664 // only stores integer seconds), kernel (see
665 665 // https://stackoverflow.com/a/14393315/1162888), etc.
666 666 let metadata = match directory_entry.symlink_metadata() {
667 667 Ok(meta) => meta,
668 668 Err(_) => return Ok(()),
669 669 };
670 670
671 671 let directory_mtime = match TruncatedTimestamp::for_reliable_mtime_of(
672 672 &metadata,
673 673 status_start,
674 674 ) {
675 675 Ok(Some(directory_mtime)) => directory_mtime,
676 676 Ok(None) => {
677 677 // The directory was modified too recently,
678 678 // don’t cache its `read_dir` results.
679 679 //
680 680 // 1. A change to this directory (direct child was
681 681 // added or removed) cause its mtime to be set
682 682 // (possibly truncated) to `directory_mtime`
683 683 // 2. This `status` algorithm calls `read_dir`
684 684 // 3. An other change is made to the same directory is
685 685 // made so that calling `read_dir` agin would give
686 686 // different results, but soon enough after 1. that
687 687 // the mtime stays the same
688 688 //
689 689 // On a system where the time resolution poor, this
690 690 // scenario is not unlikely if all three steps are caused
691 691 // by the same script.
692 692 return Ok(());
693 693 }
694 694 Err(_) => {
695 695 // OS/libc does not support mtime?
696 696 return Ok(());
697 697 }
698 698 };
699 699 // We’ve observed (through `status_start`) that time has
700 700 // “progressed” since `directory_mtime`, so any further
701 701 // change to this directory is extremely likely to cause a
702 702 // different mtime.
703 703 //
704 704 // Having the same mtime again is not entirely impossible
705 705 // since the system clock is not monotonous. It could jump
706 706 // backward to some point before `directory_mtime`, then a
707 707 // directory change could potentially happen during exactly
708 708 // the wrong tick.
709 709 //
710 710 // We deem this scenario (unlike the previous one) to be
711 711 // unlikely enough in practice.
712 712
713 713 let is_up_to_date = if let Some(cached) =
714 714 dirstate_node.cached_directory_mtime()?
715 715 {
716 716 !outdated_directory_cache && cached.likely_equal(directory_mtime)
717 717 } else {
718 718 false
719 719 };
720 720 if !is_up_to_date {
721 721 let hg_path = dirstate_node
722 722 .full_path_borrowed(self.dmap.on_disk)?
723 723 .detach_from_tree();
724 724 self.new_cacheable_directories
725 725 .lock()
726 726 .unwrap()
727 727 .push((hg_path, directory_mtime))
728 728 }
729 729 Ok(())
730 730 }
731 731
732 732 /// A file that is clean in the dirstate was found in the filesystem
733 733 fn handle_normal_file(
734 734 &self,
735 735 dirstate_node: &NodeRef<'tree, 'on_disk>,
736 736 fs_entry: &DirEntry,
737 737 ) -> Result<(), DirstateV2ParseError> {
738 738 // Keep the low 31 bits
739 739 fn truncate_u64(value: u64) -> i32 {
740 740 (value & 0x7FFF_FFFF) as i32
741 741 }
742 742
743 743 let fs_metadata = match fs_entry.symlink_metadata() {
744 744 Ok(meta) => meta,
745 745 Err(_) => return Ok(()),
746 746 };
747 747
748 748 let entry = dirstate_node
749 749 .entry()?
750 750 .expect("handle_normal_file called with entry-less node");
751 751 let mode_changed =
752 752 || self.options.check_exec && entry.mode_changed(&fs_metadata);
753 753 let size = entry.size();
754 754 let size_changed = size != truncate_u64(fs_metadata.len());
755 755 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
756 756 // issue6456: Size returned may be longer due to encryption
757 757 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
758 758 self.push_outcome(Outcome::Unsure, dirstate_node)?
759 759 } else if dirstate_node.has_copy_source()
760 760 || entry.is_from_other_parent()
761 761 || (size >= 0 && (size_changed || mode_changed()))
762 762 {
763 763 self.push_outcome(Outcome::Modified, dirstate_node)?
764 764 } else {
765 765 let mtime_looks_clean = if let Some(dirstate_mtime) =
766 766 entry.truncated_mtime()
767 767 {
768 768 let fs_mtime = TruncatedTimestamp::for_mtime_of(&fs_metadata)
769 769 .expect("OS/libc does not support mtime?");
770 770 // There might be a change in the future if for example the
771 771 // internal clock become off while process run, but this is a
772 772 // case where the issues the user would face
773 773 // would be a lot worse and there is nothing we
774 774 // can really do.
775 775 fs_mtime.likely_equal(dirstate_mtime)
776 776 } else {
777 777 // No mtime in the dirstate entry
778 778 false
779 779 };
780 780 if !mtime_looks_clean {
781 781 self.push_outcome(Outcome::Unsure, dirstate_node)?
782 782 } else if self.options.list_clean {
783 783 self.push_outcome(Outcome::Clean, dirstate_node)?
784 784 }
785 785 }
786 786 Ok(())
787 787 }
788 788
789 789 /// A node in the dirstate tree has no corresponding filesystem entry
790 790 fn traverse_dirstate_only(
791 791 &self,
792 792 dirstate_node: NodeRef<'tree, 'on_disk>,
793 793 ) -> Result<(), DirstateV2ParseError> {
794 794 self.check_for_outdated_directory_cache(&dirstate_node)?;
795 795 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
796 796 dirstate_node
797 797 .children(self.dmap.on_disk)?
798 798 .par_iter()
799 799 .map(|child_node| self.traverse_dirstate_only(child_node))
800 800 .collect()
801 801 }
802 802
803 803 /// A node in the dirstate tree has no corresponding *file* on the
804 804 /// filesystem
805 805 ///
806 806 /// Does nothing on a "directory" node
807 807 fn mark_removed_or_deleted_if_file(
808 808 &self,
809 809 dirstate_node: &NodeRef<'tree, 'on_disk>,
810 810 ) -> Result<(), DirstateV2ParseError> {
811 811 if let Some(entry) = dirstate_node.entry()? {
812 812 if !entry.any_tracked() {
813 813 // Future-compat for when we start storing ignored and unknown
814 814 // files for caching reasons
815 815 return Ok(());
816 816 }
817 817 let path = dirstate_node.full_path(self.dmap.on_disk)?;
818 818 if self.matcher.matches(path) {
819 819 if entry.removed() {
820 820 self.push_outcome(Outcome::Removed, dirstate_node)?
821 821 } else {
822 822 self.push_outcome(Outcome::Deleted, dirstate_node)?
823 823 }
824 824 }
825 825 }
826 826 Ok(())
827 827 }
828 828
829 829 /// Something in the filesystem has no corresponding dirstate node
830 830 ///
831 831 /// Returns whether that path is ignored
832 832 fn traverse_fs_only(
833 833 &self,
834 834 has_ignored_ancestor: bool,
835 835 directory_hg_path: &HgPath,
836 836 fs_entry: &DirEntry,
837 837 ) -> bool {
838 838 let hg_path = directory_hg_path.join(&fs_entry.hg_path);
839 839 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
840 840 if fs_entry.is_dir() {
841 841 let is_ignored =
842 842 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
843 843 let traverse_children = if is_ignored {
844 844 // Descendants of an ignored directory are all ignored
845 845 self.options.list_ignored
846 846 } else {
847 847 // Descendants of an unknown directory may be either unknown or
848 848 // ignored
849 849 self.options.list_unknown || self.options.list_ignored
850 850 };
851 851 if traverse_children {
852 852 let is_at_repo_root = false;
853 853 if let Ok(children_fs_entries) =
854 854 self.read_dir(&hg_path, &fs_entry.fs_path, is_at_repo_root)
855 855 {
856 856 children_fs_entries.par_iter().for_each(|child_fs_entry| {
857 857 self.traverse_fs_only(
858 858 is_ignored,
859 859 &hg_path,
860 860 child_fs_entry,
861 861 );
862 862 })
863 863 }
864 864 if self.options.collect_traversed_dirs {
865 865 self.outcome.lock().unwrap().traversed.push(hg_path.into())
866 866 }
867 867 }
868 868 is_ignored
869 869 } else if file_or_symlink {
870 870 if self.matcher.matches(&hg_path) {
871 871 self.mark_unknown_or_ignored(
872 872 has_ignored_ancestor,
873 873 &BorrowedPath::InMemory(&hg_path),
874 874 )
875 875 } else {
876 876 // We haven’t computed whether this path is ignored. It
877 877 // might not be, and a future run of status might have a
878 878 // different matcher that matches it. So treat it as not
879 879 // ignored. That is, inhibit readdir caching of the parent
880 880 // directory.
881 881 false
882 882 }
883 883 } else {
884 884 // This is neither a directory, a plain file, or a symlink.
885 885 // Treat it like an ignored file.
886 886 true
887 887 }
888 888 }
889 889
890 890 /// Returns whether that path is ignored
891 891 fn mark_unknown_or_ignored(
892 892 &self,
893 893 has_ignored_ancestor: bool,
894 894 hg_path: &BorrowedPath<'_, 'on_disk>,
895 895 ) -> bool {
896 896 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
897 897 if is_ignored {
898 898 if self.options.list_ignored {
899 899 self.push_outcome_without_copy_source(
900 900 Outcome::Ignored,
901 901 hg_path,
902 902 )
903 903 }
904 904 } else if self.options.list_unknown {
905 905 self.push_outcome_without_copy_source(Outcome::Unknown, hg_path)
906 906 }
907 907 is_ignored
908 908 }
909 909 }
910 910
911 911 /// Since [`std::fs::FileType`] cannot be built directly, we emulate what we
912 912 /// care about.
913 913 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
914 914 enum FakeFileType {
915 915 File,
916 916 Directory,
917 917 Symlink,
918 918 BadType(BadType),
919 919 }
920 920
921 921 impl From<std::fs::FileType> for FakeFileType {
922 922 fn from(f: std::fs::FileType) -> Self {
923 923 if f.is_dir() {
924 924 Self::Directory
925 925 } else if f.is_file() {
926 926 Self::File
927 927 } else if f.is_symlink() {
928 928 Self::Symlink
929 929 } else if f.is_fifo() {
930 930 Self::BadType(BadType::FIFO)
931 931 } else if f.is_block_device() {
932 932 Self::BadType(BadType::BlockDevice)
933 933 } else if f.is_char_device() {
934 934 Self::BadType(BadType::CharacterDevice)
935 935 } else if f.is_socket() {
936 936 Self::BadType(BadType::Socket)
937 937 } else {
938 938 Self::BadType(BadType::Unknown)
939 939 }
940 940 }
941 941 }
942 942
943 943 struct DirEntry<'a> {
944 944 /// Path as stored in the dirstate, or just the filename for optimization.
945 945 hg_path: HgPathCow<'a>,
946 946 /// Filesystem path
947 947 fs_path: Cow<'a, Path>,
948 948 /// Lazily computed
949 949 symlink_metadata: Option<std::fs::Metadata>,
950 950 /// Already computed for ergonomics.
951 951 file_type: FakeFileType,
952 952 }
953 953
954 954 impl<'a> DirEntry<'a> {
955 955 /// Returns **unsorted** entries in the given directory, with name,
956 956 /// metadata and file type.
957 957 ///
958 958 /// If a `.hg` sub-directory is encountered:
959 959 ///
960 960 /// * At the repository root, ignore that sub-directory
961 961 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
962 962 /// list instead.
963 963 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
964 964 // `read_dir` returns a "not found" error for the empty path
965 965 let at_cwd = path == Path::new("");
966 966 let read_dir_path = if at_cwd { Path::new(".") } else { path };
967 967 let mut results = Vec::new();
968 968 for entry in read_dir_path.read_dir()? {
969 969 let entry = entry?;
970 970 let file_type = match entry.file_type() {
971 971 Ok(v) => v,
972 972 Err(e) => {
973 973 // race with file deletion?
974 974 if e.kind() == std::io::ErrorKind::NotFound {
975 975 continue;
976 976 } else {
977 977 return Err(e);
978 978 }
979 979 }
980 980 };
981 981 let file_name = entry.file_name();
982 982 // FIXME don't do this when cached
983 983 if file_name == ".hg" {
984 984 if is_at_repo_root {
985 985 // Skip the repo’s own .hg (might be a symlink)
986 986 continue;
987 987 } else if file_type.is_dir() {
988 988 // A .hg sub-directory at another location means a subrepo,
989 989 // skip it entirely.
990 990 return Ok(Vec::new());
991 991 }
992 992 }
993 993 let full_path = if at_cwd {
994 994 file_name.clone().into()
995 995 } else {
996 996 entry.path()
997 997 };
998 998 let filename =
999 999 Cow::Owned(get_bytes_from_os_string(file_name).into());
1000 1000 let file_type = FakeFileType::from(file_type);
1001 1001 results.push(DirEntry {
1002 1002 hg_path: filename,
1003 1003 fs_path: Cow::Owned(full_path.to_path_buf()),
1004 1004 symlink_metadata: None,
1005 1005 file_type,
1006 1006 })
1007 1007 }
1008 1008 Ok(results)
1009 1009 }
1010 1010
1011 1011 fn symlink_metadata(&self) -> Result<std::fs::Metadata, std::io::Error> {
1012 1012 match &self.symlink_metadata {
1013 1013 Some(meta) => Ok(meta.clone()),
1014 1014 None => std::fs::symlink_metadata(&self.fs_path),
1015 1015 }
1016 1016 }
1017 1017
1018 1018 fn is_dir(&self) -> bool {
1019 1019 self.file_type == FakeFileType::Directory
1020 1020 }
1021 1021
1022 1022 fn is_file(&self) -> bool {
1023 1023 self.file_type == FakeFileType::File
1024 1024 }
1025 1025
1026 1026 fn is_symlink(&self) -> bool {
1027 1027 self.file_type == FakeFileType::Symlink
1028 1028 }
1029 1029
1030 1030 fn is_bad(&self) -> Option<BadType> {
1031 1031 match self.file_type {
1032 1032 FakeFileType::BadType(ty) => Some(ty),
1033 1033 _ => None,
1034 1034 }
1035 1035 }
1036 1036 }
1037 1037
1038 1038 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
1039 1039 /// of the give repository.
1040 1040 ///
1041 1041 /// This is similar to `SystemTime::now()`, with the result truncated to the
1042 1042 /// same time resolution as other files’ modification times. Using `.hg`
1043 1043 /// instead of the system’s default temporary directory (such as `/tmp`) makes
1044 1044 /// it more likely the temporary file is in the same disk partition as contents
1045 1045 /// of the working directory, which can matter since different filesystems may
1046 1046 /// store timestamps with different resolutions.
1047 1047 ///
1048 1048 /// This may fail, typically if we lack write permissions. In that case we
1049 1049 /// should continue the `status()` algoritm anyway and consider the current
1050 1050 /// date/time to be unknown.
1051 1051 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
1052 1052 tempfile::tempfile_in(repo_root.join(".hg"))?
1053 1053 .metadata()?
1054 1054 .modified()
1055 1055 }
@@ -1,711 +1,711 b''
1 1 // discovery.rs
2 2 //
3 3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Discovery operations
9 9 //!
10 10 //! This is a Rust counterpart to the `partialdiscovery` class of
11 11 //! `mercurial.setdiscovery`
12 12
13 13 use super::{Graph, GraphError, Revision, NULL_REVISION};
14 14 use crate::{ancestors::MissingAncestors, dagops, FastHashMap};
15 15 use rand::seq::SliceRandom;
16 16 use rand::{thread_rng, RngCore, SeedableRng};
17 17 use std::cmp::{max, min};
18 18 use std::collections::{HashSet, VecDeque};
19 19
20 20 type Rng = rand_pcg::Pcg32;
21 21 type Seed = [u8; 16];
22 22
23 23 pub struct PartialDiscovery<G: Graph + Clone> {
24 24 target_heads: Option<Vec<Revision>>,
25 25 graph: G, // plays the role of self._repo
26 26 common: MissingAncestors<G>,
27 27 undecided: Option<HashSet<Revision>>,
28 28 children_cache: Option<FastHashMap<Revision, Vec<Revision>>>,
29 29 missing: HashSet<Revision>,
30 30 rng: Rng,
31 31 respect_size: bool,
32 32 randomize: bool,
33 33 }
34 34
35 35 pub struct DiscoveryStats {
36 36 pub undecided: Option<usize>,
37 37 }
38 38
39 39 /// Update an existing sample to match the expected size
40 40 ///
41 41 /// The sample is updated with revisions exponentially distant from each
42 42 /// element of `heads`.
43 43 ///
44 44 /// If a target size is specified, the sampling will stop once this size is
45 45 /// reached. Otherwise sampling will happen until roots of the <revs> set are
46 46 /// reached.
47 47 ///
48 48 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
49 49 /// represented by `parentfn`
50 50 /// - `heads`: set of DAG head revs
51 51 /// - `sample`: a sample to update
52 52 /// - `parentfn`: a callable to resolve parents for a revision
53 53 /// - `quicksamplesize`: optional target size of the sample
54 54 fn update_sample<I>(
55 55 revs: Option<&HashSet<Revision>>,
56 56 heads: impl IntoIterator<Item = Revision>,
57 57 sample: &mut HashSet<Revision>,
58 58 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
59 59 quicksamplesize: Option<usize>,
60 60 ) -> Result<(), GraphError>
61 61 where
62 62 I: Iterator<Item = Revision>,
63 63 {
64 64 let mut distances: FastHashMap<Revision, u32> = FastHashMap::default();
65 65 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
66 66 let mut factor: u32 = 1;
67 67 let mut seen: HashSet<Revision> = HashSet::new();
68 68 while let Some(current) = visit.pop_front() {
69 69 if !seen.insert(current) {
70 70 continue;
71 71 }
72 72
73 73 let d = *distances.entry(current).or_insert(1);
74 74 if d > factor {
75 75 factor *= 2;
76 76 }
77 77 if d == factor {
78 78 sample.insert(current);
79 79 if let Some(sz) = quicksamplesize {
80 80 if sample.len() >= sz {
81 81 return Ok(());
82 82 }
83 83 }
84 84 }
85 85 for p in parentsfn(current)? {
86 86 if let Some(revs) = revs {
87 87 if !revs.contains(&p) {
88 88 continue;
89 89 }
90 90 }
91 91 distances.entry(p).or_insert(d + 1);
92 92 visit.push_back(p);
93 93 }
94 94 }
95 95 Ok(())
96 96 }
97 97
98 98 struct ParentsIterator {
99 99 parents: [Revision; 2],
100 100 cur: usize,
101 101 }
102 102
103 103 impl ParentsIterator {
104 104 fn graph_parents(
105 105 graph: &impl Graph,
106 106 r: Revision,
107 107 ) -> Result<ParentsIterator, GraphError> {
108 108 Ok(ParentsIterator {
109 109 parents: graph.parents(r)?,
110 110 cur: 0,
111 111 })
112 112 }
113 113 }
114 114
115 115 impl Iterator for ParentsIterator {
116 116 type Item = Revision;
117 117
118 118 fn next(&mut self) -> Option<Revision> {
119 119 if self.cur > 1 {
120 120 return None;
121 121 }
122 122 let rev = self.parents[self.cur];
123 123 self.cur += 1;
124 124 if rev == NULL_REVISION {
125 125 return self.next();
126 126 }
127 127 Some(rev)
128 128 }
129 129 }
130 130
131 131 impl<G: Graph + Clone> PartialDiscovery<G> {
132 132 /// Create a PartialDiscovery object, with the intent
133 133 /// of comparing our `::<target_heads>` revset to the contents of another
134 134 /// repo.
135 135 ///
136 136 /// For now `target_heads` is passed as a vector, and will be used
137 137 /// at the first call to `ensure_undecided()`.
138 138 ///
139 139 /// If we want to make the signature more flexible,
140 140 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
141 141 /// object since we'll keep it in the meanwhile
142 142 ///
143 143 /// The `respect_size` boolean controls how the sampling methods
144 144 /// will interpret the size argument requested by the caller. If it's
145 145 /// `false`, they are allowed to produce a sample whose size is more
146 146 /// appropriate to the situation (typically bigger).
147 147 ///
148 148 /// The `randomize` boolean affects sampling, and specifically how
149 149 /// limiting or last-minute expanding is been done:
150 150 ///
151 151 /// If `true`, both will perform random picking from `self.undecided`.
152 152 /// This is currently the best for actual discoveries.
153 153 ///
154 154 /// If `false`, a reproductible picking strategy is performed. This is
155 155 /// useful for integration tests.
156 156 pub fn new(
157 157 graph: G,
158 158 target_heads: Vec<Revision>,
159 159 respect_size: bool,
160 160 randomize: bool,
161 161 ) -> Self {
162 162 let mut seed = [0; 16];
163 163 if randomize {
164 164 thread_rng().fill_bytes(&mut seed);
165 165 }
166 166 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
167 167 }
168 168
169 169 pub fn new_with_seed(
170 170 graph: G,
171 171 target_heads: Vec<Revision>,
172 172 seed: Seed,
173 173 respect_size: bool,
174 174 randomize: bool,
175 175 ) -> Self {
176 176 PartialDiscovery {
177 177 undecided: None,
178 178 children_cache: None,
179 179 target_heads: Some(target_heads),
180 180 graph: graph.clone(),
181 181 common: MissingAncestors::new(graph, vec![]),
182 182 missing: HashSet::new(),
183 183 rng: Rng::from_seed(seed),
184 184 respect_size,
185 185 randomize,
186 186 }
187 187 }
188 188
189 189 /// Extract at most `size` random elements from sample and return them
190 190 /// as a vector
191 191 fn limit_sample(
192 192 &mut self,
193 193 mut sample: Vec<Revision>,
194 194 size: usize,
195 195 ) -> Vec<Revision> {
196 196 if !self.randomize {
197 197 sample.sort_unstable();
198 198 sample.truncate(size);
199 199 return sample;
200 200 }
201 201 let sample_len = sample.len();
202 202 if sample_len <= size {
203 203 return sample;
204 204 }
205 205 let rng = &mut self.rng;
206 206 let dropped_size = sample_len - size;
207 207 let limited_slice = if size < dropped_size {
208 208 sample.partial_shuffle(rng, size).0
209 209 } else {
210 210 sample.partial_shuffle(rng, dropped_size).1
211 211 };
212 212 limited_slice.to_owned()
213 213 }
214 214
215 215 /// Register revisions known as being common
216 216 pub fn add_common_revisions(
217 217 &mut self,
218 218 common: impl IntoIterator<Item = Revision>,
219 219 ) -> Result<(), GraphError> {
220 220 let before_len = self.common.get_bases().len();
221 221 self.common.add_bases(common);
222 222 if self.common.get_bases().len() == before_len {
223 223 return Ok(());
224 224 }
225 225 if let Some(ref mut undecided) = self.undecided {
226 226 self.common.remove_ancestors_from(undecided)?;
227 227 }
228 228 Ok(())
229 229 }
230 230
231 231 /// Register revisions known as being missing
232 232 ///
233 233 /// # Performance note
234 234 ///
235 235 /// Except in the most trivial case, the first call of this method has
236 236 /// the side effect of computing `self.undecided` set for the first time,
237 237 /// and the related caches it might need for efficiency of its internal
238 238 /// computation. This is typically faster if more information is
239 239 /// available in `self.common`. Therefore, for good performance, the
240 240 /// caller should avoid calling this too early.
241 241 pub fn add_missing_revisions(
242 242 &mut self,
243 243 missing: impl IntoIterator<Item = Revision>,
244 244 ) -> Result<(), GraphError> {
245 245 let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
246 246 if tovisit.is_empty() {
247 247 return Ok(());
248 248 }
249 249 self.ensure_children_cache()?;
250 250 self.ensure_undecided()?; // for safety of possible future refactors
251 251 let children = self.children_cache.as_ref().unwrap();
252 252 let mut seen: HashSet<Revision> = HashSet::new();
253 253 let undecided_mut = self.undecided.as_mut().unwrap();
254 254 while let Some(rev) = tovisit.pop_front() {
255 255 if !self.missing.insert(rev) {
256 256 // either it's known to be missing from a previous
257 257 // invocation, and there's no need to iterate on its
258 258 // children (we now they are all missing)
259 259 // or it's from a previous iteration of this loop
260 260 // and its children have already been queued
261 261 continue;
262 262 }
263 263 undecided_mut.remove(&rev);
264 264 match children.get(&rev) {
265 265 None => {
266 266 continue;
267 267 }
268 268 Some(this_children) => {
269 269 for child in this_children.iter().cloned() {
270 270 if seen.insert(child) {
271 271 tovisit.push_back(child);
272 272 }
273 273 }
274 274 }
275 275 }
276 276 }
277 277 Ok(())
278 278 }
279 279
280 280 /// Do we have any information about the peer?
281 281 pub fn has_info(&self) -> bool {
282 282 self.common.has_bases()
283 283 }
284 284
285 285 /// Did we acquire full knowledge of our Revisions that the peer has?
286 286 pub fn is_complete(&self) -> bool {
287 287 self.undecided.as_ref().map_or(false, HashSet::is_empty)
288 288 }
289 289
290 290 /// Return the heads of the currently known common set of revisions.
291 291 ///
292 292 /// If the discovery process is not complete (see `is_complete()`), the
293 293 /// caller must be aware that this is an intermediate state.
294 294 ///
295 295 /// On the other hand, if it is complete, then this is currently
296 296 /// the only way to retrieve the end results of the discovery process.
297 297 ///
298 298 /// We may introduce in the future an `into_common_heads` call that
299 299 /// would be more appropriate for normal Rust callers, dropping `self`
300 300 /// if it is complete.
301 301 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
302 302 self.common.bases_heads()
303 303 }
304 304
305 305 /// Force first computation of `self.undecided`
306 306 ///
307 307 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
308 308 /// unwrapped to get workable immutable or mutable references without
309 309 /// any panic.
310 310 ///
311 311 /// This is an imperative call instead of an access with added lazyness
312 312 /// to reduce easily the scope of mutable borrow for the caller,
313 313 /// compared to undecided(&'a mut self) -> &'a… that would keep it
314 314 /// as long as the resulting immutable one.
315 315 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
316 316 if self.undecided.is_some() {
317 317 return Ok(());
318 318 }
319 319 let tgt = self.target_heads.take().unwrap();
320 320 self.undecided =
321 321 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
322 322 Ok(())
323 323 }
324 324
325 325 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
326 326 if self.children_cache.is_some() {
327 327 return Ok(());
328 328 }
329 329 self.ensure_undecided()?;
330 330
331 331 let mut children: FastHashMap<Revision, Vec<Revision>> =
332 332 FastHashMap::default();
333 333 for &rev in self.undecided.as_ref().unwrap() {
334 334 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
335 children.entry(p).or_insert_with(Vec::new).push(rev);
335 children.entry(p).or_default().push(rev);
336 336 }
337 337 }
338 338 self.children_cache = Some(children);
339 339 Ok(())
340 340 }
341 341
342 342 /// Provide statistics about the current state of the discovery process
343 343 pub fn stats(&self) -> DiscoveryStats {
344 344 DiscoveryStats {
345 345 undecided: self.undecided.as_ref().map(HashSet::len),
346 346 }
347 347 }
348 348
349 349 pub fn take_quick_sample(
350 350 &mut self,
351 351 headrevs: impl IntoIterator<Item = Revision>,
352 352 size: usize,
353 353 ) -> Result<Vec<Revision>, GraphError> {
354 354 self.ensure_undecided()?;
355 355 let mut sample = {
356 356 let undecided = self.undecided.as_ref().unwrap();
357 357 if undecided.len() <= size {
358 358 return Ok(undecided.iter().cloned().collect());
359 359 }
360 360 dagops::heads(&self.graph, undecided.iter())?
361 361 };
362 362 if sample.len() >= size {
363 363 return Ok(self.limit_sample(sample.into_iter().collect(), size));
364 364 }
365 365 update_sample(
366 366 None,
367 367 headrevs,
368 368 &mut sample,
369 369 |r| ParentsIterator::graph_parents(&self.graph, r),
370 370 Some(size),
371 371 )?;
372 372 Ok(sample.into_iter().collect())
373 373 }
374 374
375 375 /// Extract a sample from `self.undecided`, going from its heads and roots.
376 376 ///
377 377 /// The `size` parameter is used to avoid useless computations if
378 378 /// it turns out to be bigger than the whole set of undecided Revisions.
379 379 ///
380 380 /// The sample is taken by using `update_sample` from the heads, then
381 381 /// from the roots, working on the reverse DAG,
382 382 /// expressed by `self.children_cache`.
383 383 ///
384 384 /// No effort is being made to complete or limit the sample to `size`
385 385 /// but this method returns another interesting size that it derives
386 386 /// from its knowledge of the structure of the various sets, leaving
387 387 /// to the caller the decision to use it or not.
388 388 fn bidirectional_sample(
389 389 &mut self,
390 390 size: usize,
391 391 ) -> Result<(HashSet<Revision>, usize), GraphError> {
392 392 self.ensure_undecided()?;
393 393 {
394 394 // we don't want to compute children_cache before this
395 395 // but doing it after extracting self.undecided takes a mutable
396 396 // ref to self while a shareable one is still active.
397 397 let undecided = self.undecided.as_ref().unwrap();
398 398 if undecided.len() <= size {
399 399 return Ok((undecided.clone(), size));
400 400 }
401 401 }
402 402
403 403 self.ensure_children_cache()?;
404 404 let revs = self.undecided.as_ref().unwrap();
405 405 let mut sample: HashSet<Revision> = revs.clone();
406 406
407 407 // it's possible that leveraging the children cache would be more
408 408 // efficient here
409 409 dagops::retain_heads(&self.graph, &mut sample)?;
410 410 let revsheads = sample.clone(); // was again heads(revs) in python
411 411
412 412 // update from heads
413 413 update_sample(
414 414 Some(revs),
415 415 revsheads.iter().cloned(),
416 416 &mut sample,
417 417 |r| ParentsIterator::graph_parents(&self.graph, r),
418 418 None,
419 419 )?;
420 420
421 421 // update from roots
422 422 let revroots: HashSet<Revision> =
423 423 dagops::roots(&self.graph, revs)?.into_iter().collect();
424 424 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
425 425
426 426 let children = self.children_cache.as_ref().unwrap();
427 427 let empty_vec: Vec<Revision> = Vec::new();
428 428 update_sample(
429 429 Some(revs),
430 430 revroots,
431 431 &mut sample,
432 432 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
433 433 None,
434 434 )?;
435 435 Ok((sample, prescribed_size))
436 436 }
437 437
438 438 /// Fill up sample up to the wished size with random undecided Revisions.
439 439 ///
440 440 /// This is intended to be used as a last resort completion if the
441 441 /// regular sampling algorithm returns too few elements.
442 442 fn random_complete_sample(
443 443 &mut self,
444 444 sample: &mut Vec<Revision>,
445 445 size: usize,
446 446 ) {
447 447 let sample_len = sample.len();
448 448 if size <= sample_len {
449 449 return;
450 450 }
451 451 let take_from: Vec<Revision> = self
452 452 .undecided
453 453 .as_ref()
454 454 .unwrap()
455 455 .iter()
456 456 .filter(|&r| !sample.contains(r))
457 457 .cloned()
458 458 .collect();
459 459 sample.extend(self.limit_sample(take_from, size - sample_len));
460 460 }
461 461
462 462 pub fn take_full_sample(
463 463 &mut self,
464 464 size: usize,
465 465 ) -> Result<Vec<Revision>, GraphError> {
466 466 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
467 467 let size = if self.respect_size {
468 468 size
469 469 } else {
470 470 prescribed_size
471 471 };
472 472 let mut sample =
473 473 self.limit_sample(sample_set.into_iter().collect(), size);
474 474 self.random_complete_sample(&mut sample, size);
475 475 Ok(sample)
476 476 }
477 477 }
478 478
479 479 #[cfg(test)]
480 480 mod tests {
481 481 use super::*;
482 482 use crate::testing::SampleGraph;
483 483
484 484 /// Shorthand to reduce boilerplate when creating [`Revision`] for testing
485 485 macro_rules! R {
486 486 ($revision:literal) => {
487 487 Revision($revision)
488 488 };
489 489 }
490 490
491 491 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
492 492 ///
493 493 /// To avoid actual randomness in these tests, we give it a fixed
494 494 /// random seed, but by default we'll test the random version.
495 495 fn full_disco() -> PartialDiscovery<SampleGraph> {
496 496 PartialDiscovery::new_with_seed(
497 497 SampleGraph,
498 498 vec![R!(10), R!(11), R!(12), R!(13)],
499 499 [0; 16],
500 500 true,
501 501 true,
502 502 )
503 503 }
504 504
505 505 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
506 506 ///
507 507 /// To avoid actual randomness in tests, we give it a fixed random seed.
508 508 fn disco12() -> PartialDiscovery<SampleGraph> {
509 509 PartialDiscovery::new_with_seed(
510 510 SampleGraph,
511 511 vec![R!(12)],
512 512 [0; 16],
513 513 true,
514 514 true,
515 515 )
516 516 }
517 517
518 518 fn sorted_undecided(
519 519 disco: &PartialDiscovery<SampleGraph>,
520 520 ) -> Vec<Revision> {
521 521 let mut as_vec: Vec<Revision> =
522 522 disco.undecided.as_ref().unwrap().iter().cloned().collect();
523 523 as_vec.sort_unstable();
524 524 as_vec
525 525 }
526 526
527 527 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
528 528 let mut as_vec: Vec<Revision> =
529 529 disco.missing.iter().cloned().collect();
530 530 as_vec.sort_unstable();
531 531 as_vec
532 532 }
533 533
534 534 fn sorted_common_heads(
535 535 disco: &PartialDiscovery<SampleGraph>,
536 536 ) -> Result<Vec<Revision>, GraphError> {
537 537 let mut as_vec: Vec<Revision> =
538 538 disco.common_heads()?.iter().cloned().collect();
539 539 as_vec.sort_unstable();
540 540 Ok(as_vec)
541 541 }
542 542
543 543 #[test]
544 544 fn test_add_common_get_undecided() -> Result<(), GraphError> {
545 545 let mut disco = full_disco();
546 546 assert_eq!(disco.undecided, None);
547 547 assert!(!disco.has_info());
548 548 assert_eq!(disco.stats().undecided, None);
549 549
550 550 disco.add_common_revisions(vec![R!(11), R!(12)])?;
551 551 assert!(disco.has_info());
552 552 assert!(!disco.is_complete());
553 553 assert!(disco.missing.is_empty());
554 554
555 555 // add_common_revisions did not trigger a premature computation
556 556 // of `undecided`, let's check that and ask for them
557 557 assert_eq!(disco.undecided, None);
558 558 disco.ensure_undecided()?;
559 559 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
560 560 assert_eq!(disco.stats().undecided, Some(4));
561 561 Ok(())
562 562 }
563 563
564 564 /// in this test, we pretend that our peer misses exactly (8+10)::
565 565 /// and we're comparing all our repo to it (as in a bare push)
566 566 #[test]
567 567 fn test_discovery() -> Result<(), GraphError> {
568 568 let mut disco = full_disco();
569 569 disco.add_common_revisions(vec![R!(11), R!(12)])?;
570 570 disco.add_missing_revisions(vec![R!(8), R!(10)])?;
571 571 assert_eq!(sorted_undecided(&disco), vec![5]);
572 572 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
573 573 assert!(!disco.is_complete());
574 574
575 575 disco.add_common_revisions(vec![R!(5)])?;
576 576 assert_eq!(sorted_undecided(&disco), Vec::<Revision>::new());
577 577 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
578 578 assert!(disco.is_complete());
579 579 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
580 580 Ok(())
581 581 }
582 582
583 583 #[test]
584 584 fn test_add_missing_early_continue() -> Result<(), GraphError> {
585 585 eprintln!("test_add_missing_early_stop");
586 586 let mut disco = full_disco();
587 587 disco.add_common_revisions(vec![R!(13), R!(3), R!(4)])?;
588 588 disco.ensure_children_cache()?;
589 589 // 12 is grand-child of 6 through 9
590 590 // passing them in this order maximizes the chances of the
591 591 // early continue to do the wrong thing
592 592 disco.add_missing_revisions(vec![R!(6), R!(9), R!(12)])?;
593 593 assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
594 594 assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
595 595 assert!(!disco.is_complete());
596 596 Ok(())
597 597 }
598 598
599 599 #[test]
600 600 fn test_limit_sample_no_need_to() {
601 601 let sample = vec![R!(1), R!(2), R!(3), R!(4)];
602 602 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
603 603 }
604 604
605 605 #[test]
606 606 fn test_limit_sample_less_than_half() {
607 607 assert_eq!(
608 608 full_disco().limit_sample((1..6).map(Revision).collect(), 2),
609 609 vec![2, 5]
610 610 );
611 611 }
612 612
613 613 #[test]
614 614 fn test_limit_sample_more_than_half() {
615 615 assert_eq!(
616 616 full_disco().limit_sample((1..4).map(Revision).collect(), 2),
617 617 vec![1, 2]
618 618 );
619 619 }
620 620
621 621 #[test]
622 622 fn test_limit_sample_no_random() {
623 623 let mut disco = full_disco();
624 624 disco.randomize = false;
625 625 assert_eq!(
626 626 disco.limit_sample(
627 627 vec![R!(1), R!(8), R!(13), R!(5), R!(7), R!(3)],
628 628 4
629 629 ),
630 630 vec![1, 3, 5, 7]
631 631 );
632 632 }
633 633
634 634 #[test]
635 635 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
636 636 let mut disco = full_disco();
637 637 disco.undecided = Some((1..=13).map(Revision).collect());
638 638
639 639 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
640 640 sample_vec.sort_unstable();
641 641 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
642 642 Ok(())
643 643 }
644 644
645 645 #[test]
646 646 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
647 647 let mut disco = disco12();
648 648 disco.ensure_undecided()?;
649 649
650 650 let mut sample_vec = disco.take_quick_sample(vec![R!(12)], 4)?;
651 651 sample_vec.sort_unstable();
652 652 // r12's only parent is r9, whose unique grand-parent through the
653 653 // diamond shape is r4. This ends there because the distance from r4
654 654 // to the root is only 3.
655 655 assert_eq!(sample_vec, vec![4, 9, 12]);
656 656 Ok(())
657 657 }
658 658
659 659 #[test]
660 660 fn test_children_cache() -> Result<(), GraphError> {
661 661 let mut disco = full_disco();
662 662 disco.ensure_children_cache()?;
663 663
664 664 let cache = disco.children_cache.unwrap();
665 665 assert_eq!(cache.get(&R!(2)).cloned(), Some(vec![R!(4)]));
666 666 assert_eq!(cache.get(&R!(10)).cloned(), None);
667 667
668 668 let mut children_4 = cache.get(&R!(4)).cloned().unwrap();
669 669 children_4.sort_unstable();
670 670 assert_eq!(children_4, vec![R!(5), R!(6), R!(7)]);
671 671
672 672 let mut children_7 = cache.get(&R!(7)).cloned().unwrap();
673 673 children_7.sort_unstable();
674 674 assert_eq!(children_7, vec![R!(9), R!(11)]);
675 675
676 676 Ok(())
677 677 }
678 678
679 679 #[test]
680 680 fn test_complete_sample() {
681 681 let mut disco = full_disco();
682 682 let undecided: HashSet<Revision> =
683 683 [4, 7, 9, 2, 3].iter().cloned().map(Revision).collect();
684 684 disco.undecided = Some(undecided);
685 685
686 686 let mut sample = vec![R!(0)];
687 687 disco.random_complete_sample(&mut sample, 3);
688 688 assert_eq!(sample.len(), 3);
689 689
690 690 let mut sample = vec![R!(2), R!(4), R!(7)];
691 691 disco.random_complete_sample(&mut sample, 1);
692 692 assert_eq!(sample.len(), 3);
693 693 }
694 694
695 695 #[test]
696 696 fn test_bidirectional_sample() -> Result<(), GraphError> {
697 697 let mut disco = full_disco();
698 disco.undecided = Some((0..=13).into_iter().map(Revision).collect());
698 disco.undecided = Some((0..=13).map(Revision).collect());
699 699
700 700 let (sample_set, size) = disco.bidirectional_sample(7)?;
701 701 assert_eq!(size, 7);
702 702 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
703 703 sample.sort_unstable();
704 704 // our DAG is a bit too small for the results to be really interesting
705 705 // at least it shows that
706 706 // - we went both ways
707 707 // - we didn't take all Revisions (6 is not in the sample)
708 708 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
709 709 Ok(())
710 710 }
711 711 }
@@ -1,876 +1,874 b''
1 1 // filepatterns.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Handling of Mercurial-specific patterns.
9 9
10 10 use crate::{
11 11 utils::{
12 12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
13 13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
14 14 SliceExt,
15 15 },
16 16 FastHashMap, PatternError,
17 17 };
18 18 use lazy_static::lazy_static;
19 19 use regex::bytes::{NoExpand, Regex};
20 20 use std::ops::Deref;
21 21 use std::path::{Path, PathBuf};
22 22 use std::vec::Vec;
23 23
24 24 lazy_static! {
25 25 static ref RE_ESCAPE: Vec<Vec<u8>> = {
26 26 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
27 27 let to_escape = b"()[]{}?*+-|^$\\.&~#\t\n\r\x0b\x0c";
28 28 for byte in to_escape {
29 29 v[*byte as usize].insert(0, b'\\');
30 30 }
31 31 v
32 32 };
33 33 }
34 34
35 35 /// These are matched in order
36 36 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
37 37 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
38 38
39 39 #[derive(Debug, Clone, PartialEq, Eq)]
40 40 pub enum PatternSyntax {
41 41 /// A regular expression
42 42 Regexp,
43 43 /// Glob that matches at the front of the path
44 44 RootGlob,
45 45 /// Glob that matches at any suffix of the path (still anchored at
46 46 /// slashes)
47 47 Glob,
48 48 /// a path relative to repository root, which is matched recursively
49 49 Path,
50 50 /// a single exact path relative to repository root
51 51 FilePath,
52 52 /// A path relative to cwd
53 53 RelPath,
54 54 /// an unrooted glob (*.rs matches Rust files in all dirs)
55 55 RelGlob,
56 56 /// A regexp that needn't match the start of a name
57 57 RelRegexp,
58 58 /// A path relative to repository root, which is matched non-recursively
59 59 /// (will not match subdirectories)
60 60 RootFiles,
61 61 /// A file of patterns to read and include
62 62 Include,
63 63 /// A file of patterns to match against files under the same directory
64 64 SubInclude,
65 65 /// SubInclude with the result of parsing the included file
66 66 ///
67 67 /// Note: there is no ExpandedInclude because that expansion can be done
68 68 /// in place by replacing the Include pattern by the included patterns.
69 69 /// SubInclude requires more handling.
70 70 ///
71 71 /// Note: `Box` is used to minimize size impact on other enum variants
72 72 ExpandedSubInclude(Box<SubInclude>),
73 73 }
74 74
75 75 /// Transforms a glob pattern into a regex
76 76 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
77 77 let mut input = pat;
78 78 let mut res: Vec<u8> = vec![];
79 79 let mut group_depth = 0;
80 80
81 81 while let Some((c, rest)) = input.split_first() {
82 82 input = rest;
83 83
84 84 match c {
85 85 b'*' => {
86 86 for (source, repl) in GLOB_REPLACEMENTS {
87 87 if let Some(rest) = input.drop_prefix(source) {
88 88 input = rest;
89 89 res.extend(*repl);
90 90 break;
91 91 }
92 92 }
93 93 }
94 94 b'?' => res.extend(b"."),
95 95 b'[' => {
96 96 match input.iter().skip(1).position(|b| *b == b']') {
97 97 None => res.extend(b"\\["),
98 98 Some(end) => {
99 99 // Account for the one we skipped
100 100 let end = end + 1;
101 101
102 102 res.extend(b"[");
103 103
104 104 for (i, b) in input[..end].iter().enumerate() {
105 105 if *b == b'!' && i == 0 {
106 106 res.extend(b"^")
107 107 } else if *b == b'^' && i == 0 {
108 108 res.extend(b"\\^")
109 109 } else if *b == b'\\' {
110 110 res.extend(b"\\\\")
111 111 } else {
112 112 res.push(*b)
113 113 }
114 114 }
115 115 res.extend(b"]");
116 116 input = &input[end + 1..];
117 117 }
118 118 }
119 119 }
120 120 b'{' => {
121 121 group_depth += 1;
122 122 res.extend(b"(?:")
123 123 }
124 124 b'}' if group_depth > 0 => {
125 125 group_depth -= 1;
126 126 res.extend(b")");
127 127 }
128 128 b',' if group_depth > 0 => res.extend(b"|"),
129 129 b'\\' => {
130 130 let c = {
131 131 if let Some((c, rest)) = input.split_first() {
132 132 input = rest;
133 133 c
134 134 } else {
135 135 c
136 136 }
137 137 };
138 138 res.extend(&RE_ESCAPE[*c as usize])
139 139 }
140 140 _ => res.extend(&RE_ESCAPE[*c as usize]),
141 141 }
142 142 }
143 143 res
144 144 }
145 145
146 146 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
147 147 pattern
148 148 .iter()
149 149 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
150 150 .collect()
151 151 }
152 152
153 153 pub fn parse_pattern_syntax(
154 154 kind: &[u8],
155 155 ) -> Result<PatternSyntax, PatternError> {
156 156 match kind {
157 157 b"re:" => Ok(PatternSyntax::Regexp),
158 158 b"path:" => Ok(PatternSyntax::Path),
159 159 b"filepath:" => Ok(PatternSyntax::FilePath),
160 160 b"relpath:" => Ok(PatternSyntax::RelPath),
161 161 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
162 162 b"relglob:" => Ok(PatternSyntax::RelGlob),
163 163 b"relre:" => Ok(PatternSyntax::RelRegexp),
164 164 b"glob:" => Ok(PatternSyntax::Glob),
165 165 b"rootglob:" => Ok(PatternSyntax::RootGlob),
166 166 b"include:" => Ok(PatternSyntax::Include),
167 167 b"subinclude:" => Ok(PatternSyntax::SubInclude),
168 168 _ => Err(PatternError::UnsupportedSyntax(
169 169 String::from_utf8_lossy(kind).to_string(),
170 170 )),
171 171 }
172 172 }
173 173
174 174 lazy_static! {
175 175 static ref FLAG_RE: Regex = Regex::new(r"^\(\?[aiLmsux]+\)").unwrap();
176 176 }
177 177
178 178 /// Builds the regex that corresponds to the given pattern.
179 179 /// If within a `syntax: regexp` context, returns the pattern,
180 180 /// otherwise, returns the corresponding regex.
181 181 fn _build_single_regex(entry: &IgnorePattern, glob_suffix: &[u8]) -> Vec<u8> {
182 182 let IgnorePattern {
183 183 syntax, pattern, ..
184 184 } = entry;
185 185 if pattern.is_empty() {
186 186 return vec![];
187 187 }
188 188 match syntax {
189 189 PatternSyntax::Regexp => pattern.to_owned(),
190 190 PatternSyntax::RelRegexp => {
191 191 // The `regex` crate accepts `**` while `re2` and Python's `re`
192 192 // do not. Checking for `*` correctly triggers the same error all
193 193 // engines.
194 194 if pattern[0] == b'^'
195 195 || pattern[0] == b'*'
196 196 || pattern.starts_with(b".*")
197 197 {
198 198 return pattern.to_owned();
199 199 }
200 200 match FLAG_RE.find(pattern) {
201 201 Some(mat) => {
202 202 let s = mat.start();
203 203 let e = mat.end();
204 204 [
205 205 &b"(?"[..],
206 206 &pattern[s + 2..e - 1],
207 207 &b":"[..],
208 208 if pattern[e] == b'^'
209 209 || pattern[e] == b'*'
210 210 || pattern[e..].starts_with(b".*")
211 211 {
212 212 &b""[..]
213 213 } else {
214 214 &b".*"[..]
215 215 },
216 216 &pattern[e..],
217 217 &b")"[..],
218 218 ]
219 219 .concat()
220 220 }
221 221 None => [&b".*"[..], pattern].concat(),
222 222 }
223 223 }
224 224 PatternSyntax::Path | PatternSyntax::RelPath => {
225 225 if pattern == b"." {
226 226 return vec![];
227 227 }
228 228 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
229 229 }
230 230 PatternSyntax::RootFiles => {
231 231 let mut res = if pattern == b"." {
232 232 vec![]
233 233 } else {
234 234 // Pattern is a directory name.
235 235 [escape_pattern(pattern).as_slice(), b"/"].concat()
236 236 };
237 237
238 238 // Anything after the pattern must be a non-directory.
239 239 res.extend(b"[^/]+$");
240 240 res
241 241 }
242 242 PatternSyntax::RelGlob => {
243 243 let glob_re = glob_to_re(pattern);
244 244 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
245 245 [b".*", rest, glob_suffix].concat()
246 246 } else {
247 247 [b"(?:.*/)?", glob_re.as_slice(), glob_suffix].concat()
248 248 }
249 249 }
250 250 PatternSyntax::Glob | PatternSyntax::RootGlob => {
251 251 [glob_to_re(pattern).as_slice(), glob_suffix].concat()
252 252 }
253 253 PatternSyntax::Include
254 254 | PatternSyntax::SubInclude
255 255 | PatternSyntax::ExpandedSubInclude(_)
256 256 | PatternSyntax::FilePath => unreachable!(),
257 257 }
258 258 }
259 259
260 260 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
261 261 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
262 262
263 263 /// TODO support other platforms
264 264 #[cfg(unix)]
265 265 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
266 266 if bytes.is_empty() {
267 267 return b".".to_vec();
268 268 }
269 269 let sep = b'/';
270 270
271 271 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
272 272 if initial_slashes > 2 {
273 273 // POSIX allows one or two initial slashes, but treats three or more
274 274 // as single slash.
275 275 initial_slashes = 1;
276 276 }
277 277 let components = bytes
278 278 .split(|b| *b == sep)
279 279 .filter(|c| !(c.is_empty() || c == b"."))
280 280 .fold(vec![], |mut acc, component| {
281 281 if component != b".."
282 282 || (initial_slashes == 0 && acc.is_empty())
283 283 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
284 284 {
285 285 acc.push(component)
286 286 } else if !acc.is_empty() {
287 287 acc.pop();
288 288 }
289 289 acc
290 290 });
291 291 let mut new_bytes = components.join(&sep);
292 292
293 293 if initial_slashes > 0 {
294 294 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
295 295 buf.extend(new_bytes);
296 296 new_bytes = buf;
297 297 }
298 298 if new_bytes.is_empty() {
299 299 b".".to_vec()
300 300 } else {
301 301 new_bytes
302 302 }
303 303 }
304 304
305 305 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
306 306 /// that don't need to be transformed into a regex.
307 307 pub fn build_single_regex(
308 308 entry: &IgnorePattern,
309 309 glob_suffix: &[u8],
310 310 ) -> Result<Option<Vec<u8>>, PatternError> {
311 311 let IgnorePattern {
312 312 pattern, syntax, ..
313 313 } = entry;
314 314 let pattern = match syntax {
315 315 PatternSyntax::RootGlob
316 316 | PatternSyntax::Path
317 317 | PatternSyntax::RelGlob
318 318 | PatternSyntax::RelPath
319 319 | PatternSyntax::RootFiles => normalize_path_bytes(pattern),
320 320 PatternSyntax::Include | PatternSyntax::SubInclude => {
321 321 return Err(PatternError::NonRegexPattern(entry.clone()))
322 322 }
323 323 _ => pattern.to_owned(),
324 324 };
325 325 let is_simple_rootglob = *syntax == PatternSyntax::RootGlob
326 326 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b));
327 327 if is_simple_rootglob || syntax == &PatternSyntax::FilePath {
328 328 Ok(None)
329 329 } else {
330 330 let mut entry = entry.clone();
331 331 entry.pattern = pattern;
332 332 Ok(Some(_build_single_regex(&entry, glob_suffix)))
333 333 }
334 334 }
335 335
336 336 lazy_static! {
337 337 static ref SYNTAXES: FastHashMap<&'static [u8], PatternSyntax> = {
338 338 let mut m = FastHashMap::default();
339 339
340 340 m.insert(b"re:".as_ref(), PatternSyntax::Regexp);
341 341 m.insert(b"regexp:".as_ref(), PatternSyntax::Regexp);
342 342 m.insert(b"path:".as_ref(), PatternSyntax::Path);
343 343 m.insert(b"filepath:".as_ref(), PatternSyntax::FilePath);
344 344 m.insert(b"relpath:".as_ref(), PatternSyntax::RelPath);
345 345 m.insert(b"rootfilesin:".as_ref(), PatternSyntax::RootFiles);
346 346 m.insert(b"relglob:".as_ref(), PatternSyntax::RelGlob);
347 347 m.insert(b"relre:".as_ref(), PatternSyntax::RelRegexp);
348 348 m.insert(b"glob:".as_ref(), PatternSyntax::Glob);
349 349 m.insert(b"rootglob:".as_ref(), PatternSyntax::RootGlob);
350 350 m.insert(b"include:".as_ref(), PatternSyntax::Include);
351 351 m.insert(b"subinclude:".as_ref(), PatternSyntax::SubInclude);
352 352
353 353 m
354 354 };
355 355 }
356 356
357 357 #[derive(Debug)]
358 358 pub enum PatternFileWarning {
359 359 /// (file path, syntax bytes)
360 360 InvalidSyntax(PathBuf, Vec<u8>),
361 361 /// File path
362 362 NoSuchFile(PathBuf),
363 363 }
364 364
365 365 pub fn parse_one_pattern(
366 366 pattern: &[u8],
367 367 source: &Path,
368 368 default: PatternSyntax,
369 369 normalize: bool,
370 370 ) -> IgnorePattern {
371 371 let mut pattern_bytes: &[u8] = pattern;
372 372 let mut syntax = default;
373 373
374 374 for (s, val) in SYNTAXES.iter() {
375 375 if let Some(rest) = pattern_bytes.drop_prefix(s) {
376 376 syntax = val.clone();
377 377 pattern_bytes = rest;
378 378 break;
379 379 }
380 380 }
381 381
382 382 let pattern = match syntax {
383 383 PatternSyntax::RootGlob
384 384 | PatternSyntax::Path
385 385 | PatternSyntax::Glob
386 386 | PatternSyntax::RelGlob
387 387 | PatternSyntax::RelPath
388 388 | PatternSyntax::RootFiles
389 389 if normalize =>
390 390 {
391 391 normalize_path_bytes(pattern_bytes)
392 392 }
393 393 _ => pattern_bytes.to_vec(),
394 394 };
395 395
396 396 IgnorePattern {
397 397 syntax,
398 398 pattern,
399 399 source: source.to_owned(),
400 400 }
401 401 }
402 402
403 403 pub fn parse_pattern_file_contents(
404 404 lines: &[u8],
405 405 file_path: &Path,
406 406 default_syntax_override: Option<PatternSyntax>,
407 407 warn: bool,
408 408 relativize: bool,
409 409 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
410 410 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
411 411
412 412 #[allow(clippy::trivial_regex)]
413 413 let comment_escape_regex = Regex::new(r"\\#").unwrap();
414 414 let mut inputs: Vec<IgnorePattern> = vec![];
415 415 let mut warnings: Vec<PatternFileWarning> = vec![];
416 416
417 417 let mut current_syntax =
418 418 default_syntax_override.unwrap_or(PatternSyntax::RelRegexp);
419 419
420 420 for mut line in lines.split(|c| *c == b'\n') {
421 421 let line_buf;
422 422 if line.contains(&b'#') {
423 423 if let Some(cap) = comment_regex.captures(line) {
424 424 line = &line[..cap.get(1).unwrap().end()]
425 425 }
426 426 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
427 427 line = &line_buf;
428 428 }
429 429
430 430 let line = line.trim_end();
431 431
432 432 if line.is_empty() {
433 433 continue;
434 434 }
435 435
436 436 if let Some(syntax) = line.drop_prefix(b"syntax:") {
437 437 let syntax = syntax.trim();
438 438
439 439 if let Some(parsed) =
440 440 SYNTAXES.get([syntax, &b":"[..]].concat().as_slice())
441 441 {
442 442 current_syntax = parsed.clone();
443 443 } else if warn {
444 444 warnings.push(PatternFileWarning::InvalidSyntax(
445 445 file_path.to_owned(),
446 446 syntax.to_owned(),
447 447 ));
448 448 }
449 449 } else {
450 450 let pattern = parse_one_pattern(
451 451 line,
452 452 file_path,
453 453 current_syntax.clone(),
454 454 false,
455 455 );
456 456 inputs.push(if relativize {
457 457 pattern.to_relative()
458 458 } else {
459 459 pattern
460 460 })
461 461 }
462 462 }
463 463 Ok((inputs, warnings))
464 464 }
465 465
466 466 pub fn parse_pattern_args(
467 467 patterns: Vec<Vec<u8>>,
468 468 cwd: &Path,
469 469 root: &Path,
470 470 ) -> Result<Vec<IgnorePattern>, HgPathError> {
471 471 let mut ignore_patterns: Vec<IgnorePattern> = Vec::new();
472 472 for pattern in patterns {
473 473 let pattern = parse_one_pattern(
474 474 &pattern,
475 475 Path::new("<args>"),
476 476 PatternSyntax::RelPath,
477 477 true,
478 478 );
479 479 match pattern.syntax {
480 480 PatternSyntax::RelGlob | PatternSyntax::RelPath => {
481 481 let name = get_path_from_bytes(&pattern.pattern);
482 482 let canon = canonical_path(root, cwd, name)?;
483 483 ignore_patterns.push(IgnorePattern {
484 484 syntax: pattern.syntax,
485 485 pattern: get_bytes_from_path(canon),
486 486 source: pattern.source,
487 487 })
488 488 }
489 489 _ => ignore_patterns.push(pattern.to_owned()),
490 490 };
491 491 }
492 492 Ok(ignore_patterns)
493 493 }
494 494
495 495 pub fn read_pattern_file(
496 496 file_path: &Path,
497 497 warn: bool,
498 498 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
499 499 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
500 500 match std::fs::read(file_path) {
501 501 Ok(contents) => {
502 502 inspect_pattern_bytes(file_path, &contents);
503 503 parse_pattern_file_contents(&contents, file_path, None, warn, true)
504 504 }
505 505 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok((
506 506 vec![],
507 507 vec![PatternFileWarning::NoSuchFile(file_path.to_owned())],
508 508 )),
509 509 Err(e) => Err(e.into()),
510 510 }
511 511 }
512 512
513 513 /// Represents an entry in an "ignore" file.
514 514 #[derive(Debug, Eq, PartialEq, Clone)]
515 515 pub struct IgnorePattern {
516 516 pub syntax: PatternSyntax,
517 517 pub pattern: Vec<u8>,
518 518 pub source: PathBuf,
519 519 }
520 520
521 521 impl IgnorePattern {
522 522 pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self {
523 523 Self {
524 524 syntax,
525 525 pattern: pattern.to_owned(),
526 526 source: source.to_owned(),
527 527 }
528 528 }
529 529
530 530 pub fn to_relative(self) -> Self {
531 531 let Self {
532 532 syntax,
533 533 pattern,
534 534 source,
535 535 } = self;
536 536 Self {
537 537 syntax: match syntax {
538 538 PatternSyntax::Regexp => PatternSyntax::RelRegexp,
539 539 PatternSyntax::Glob => PatternSyntax::RelGlob,
540 540 x => x,
541 541 },
542 542 pattern,
543 543 source,
544 544 }
545 545 }
546 546 }
547 547
548 548 pub type PatternResult<T> = Result<T, PatternError>;
549 549
550 550 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
551 551 /// and `subinclude:` patterns.
552 552 ///
553 553 /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
554 554 /// is used for the latter to form a tree of patterns.
555 555 pub fn get_patterns_from_file(
556 556 pattern_file: &Path,
557 557 root_dir: &Path,
558 558 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
559 559 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
560 560 let (patterns, mut warnings) =
561 561 read_pattern_file(pattern_file, true, inspect_pattern_bytes)?;
562 562 let patterns = patterns
563 563 .into_iter()
564 564 .flat_map(|entry| -> PatternResult<_> {
565 565 Ok(match &entry.syntax {
566 566 PatternSyntax::Include => {
567 567 let inner_include =
568 568 root_dir.join(get_path_from_bytes(&entry.pattern));
569 569 let (inner_pats, inner_warnings) = get_patterns_from_file(
570 570 &inner_include,
571 571 root_dir,
572 572 inspect_pattern_bytes,
573 573 )?;
574 574 warnings.extend(inner_warnings);
575 575 inner_pats
576 576 }
577 577 PatternSyntax::SubInclude => {
578 578 let mut sub_include = SubInclude::new(
579 579 root_dir,
580 580 &entry.pattern,
581 581 &entry.source,
582 582 )?;
583 583 let (inner_patterns, inner_warnings) =
584 584 get_patterns_from_file(
585 585 &sub_include.path,
586 586 &sub_include.root,
587 587 inspect_pattern_bytes,
588 588 )?;
589 589 sub_include.included_patterns = inner_patterns;
590 590 warnings.extend(inner_warnings);
591 591 vec![IgnorePattern {
592 592 syntax: PatternSyntax::ExpandedSubInclude(Box::new(
593 593 sub_include,
594 594 )),
595 595 ..entry
596 596 }]
597 597 }
598 598 _ => vec![entry],
599 599 })
600 600 })
601 601 .flatten()
602 602 .collect();
603 603
604 604 Ok((patterns, warnings))
605 605 }
606 606
607 607 /// Holds all the information needed to handle a `subinclude:` pattern.
608 608 #[derive(Debug, PartialEq, Eq, Clone)]
609 609 pub struct SubInclude {
610 610 /// Will be used for repository (hg) paths that start with this prefix.
611 611 /// It is relative to the current working directory, so comparing against
612 612 /// repository paths is painless.
613 613 pub prefix: HgPathBuf,
614 614 /// The file itself, containing the patterns
615 615 pub path: PathBuf,
616 616 /// Folder in the filesystem where this it applies
617 617 pub root: PathBuf,
618 618
619 619 pub included_patterns: Vec<IgnorePattern>,
620 620 }
621 621
622 622 impl SubInclude {
623 623 pub fn new(
624 624 root_dir: &Path,
625 625 pattern: &[u8],
626 626 source: &Path,
627 627 ) -> Result<SubInclude, HgPathError> {
628 628 let normalized_source =
629 629 normalize_path_bytes(&get_bytes_from_path(source));
630 630
631 631 let source_root = get_path_from_bytes(&normalized_source);
632 let source_root =
633 source_root.parent().unwrap_or_else(|| source_root.deref());
632 let source_root = source_root.parent().unwrap_or(source_root);
634 633
635 634 let path = source_root.join(get_path_from_bytes(pattern));
636 635 let new_root = path.parent().unwrap_or_else(|| path.deref());
637 636
638 637 let prefix = canonical_path(root_dir, root_dir, new_root)?;
639 638
640 639 Ok(Self {
641 640 prefix: path_to_hg_path_buf(prefix).map(|mut p| {
642 641 if !p.is_empty() {
643 642 p.push_byte(b'/');
644 643 }
645 644 p
646 645 })?,
647 646 path: path.to_owned(),
648 647 root: new_root.to_owned(),
649 648 included_patterns: Vec::new(),
650 649 })
651 650 }
652 651 }
653 652
654 653 /// Separate and pre-process subincludes from other patterns for the "ignore"
655 654 /// phase.
656 655 pub fn filter_subincludes(
657 656 ignore_patterns: Vec<IgnorePattern>,
658 657 ) -> Result<(Vec<SubInclude>, Vec<IgnorePattern>), HgPathError> {
659 658 let mut subincludes = vec![];
660 659 let mut others = vec![];
661 660
662 661 for pattern in ignore_patterns {
663 662 if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
664 663 {
665 664 subincludes.push(*sub_include);
666 665 } else {
667 666 others.push(pattern)
668 667 }
669 668 }
670 669 Ok((subincludes, others))
671 670 }
672 671
673 672 #[cfg(test)]
674 673 mod tests {
675 674 use super::*;
676 675 use pretty_assertions::assert_eq;
677 676
678 677 #[test]
679 678 fn escape_pattern_test() {
680 679 let untouched =
681 680 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
682 681 assert_eq!(escape_pattern(untouched), untouched.to_vec());
683 682 // All escape codes
684 683 assert_eq!(
685 escape_pattern(br#"()[]{}?*+-|^$\\.&~#\t\n\r\v\f"#),
686 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\\t\\n\\r\\v\\f"#
687 .to_vec()
684 escape_pattern(br"()[]{}?*+-|^$\\.&~#\t\n\r\v\f"),
685 br"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\\t\\n\\r\\v\\f".to_vec()
688 686 );
689 687 }
690 688
691 689 #[test]
692 690 fn glob_test() {
693 assert_eq!(glob_to_re(br#"?"#), br#"."#);
694 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
695 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
696 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
697 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
698 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
699 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
700 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
691 assert_eq!(glob_to_re(br"?"), br".");
692 assert_eq!(glob_to_re(br"*"), br"[^/]*");
693 assert_eq!(glob_to_re(br"**"), br".*");
694 assert_eq!(glob_to_re(br"**/a"), br"(?:.*/)?a");
695 assert_eq!(glob_to_re(br"a/**/b"), br"a/(?:.*/)?b");
696 assert_eq!(glob_to_re(br"[a*?!^][^b][!c]"), br"[a*?!^][\^b][^c]");
697 assert_eq!(glob_to_re(br"{a,b}"), br"(?:a|b)");
698 assert_eq!(glob_to_re(br".\*\?"), br"\.\*\?");
701 699 }
702 700
703 701 #[test]
704 702 fn test_parse_pattern_file_contents() {
705 703 let lines = b"syntax: glob\n*.elc";
706 704
707 705 assert_eq!(
708 706 parse_pattern_file_contents(
709 707 lines,
710 708 Path::new("file_path"),
711 709 None,
712 710 false,
713 711 true,
714 712 )
715 713 .unwrap()
716 714 .0,
717 715 vec![IgnorePattern::new(
718 716 PatternSyntax::RelGlob,
719 717 b"*.elc",
720 718 Path::new("file_path")
721 719 )],
722 720 );
723 721
724 722 let lines = b"syntax: include\nsyntax: glob";
725 723
726 724 assert_eq!(
727 725 parse_pattern_file_contents(
728 726 lines,
729 727 Path::new("file_path"),
730 728 None,
731 729 false,
732 730 true,
733 731 )
734 732 .unwrap()
735 733 .0,
736 734 vec![]
737 735 );
738 736 let lines = b"glob:**.o";
739 737 assert_eq!(
740 738 parse_pattern_file_contents(
741 739 lines,
742 740 Path::new("file_path"),
743 741 None,
744 742 false,
745 743 true,
746 744 )
747 745 .unwrap()
748 746 .0,
749 747 vec![IgnorePattern::new(
750 748 PatternSyntax::RelGlob,
751 749 b"**.o",
752 750 Path::new("file_path")
753 751 )]
754 752 );
755 753 }
756 754
757 755 #[test]
758 756 fn test_build_single_regex() {
759 757 assert_eq!(
760 758 build_single_regex(
761 759 &IgnorePattern::new(
762 760 PatternSyntax::RelGlob,
763 761 b"rust/target/",
764 762 Path::new("")
765 763 ),
766 764 b"(?:/|$)"
767 765 )
768 766 .unwrap(),
769 767 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
770 768 );
771 769 assert_eq!(
772 770 build_single_regex(
773 771 &IgnorePattern::new(
774 772 PatternSyntax::Regexp,
775 773 br"rust/target/\d+",
776 774 Path::new("")
777 775 ),
778 776 b"(?:/|$)"
779 777 )
780 778 .unwrap(),
781 779 Some(br"rust/target/\d+".to_vec()),
782 780 );
783 781 }
784 782
785 783 #[test]
786 784 fn test_build_single_regex_shortcut() {
787 785 assert_eq!(
788 786 build_single_regex(
789 787 &IgnorePattern::new(
790 788 PatternSyntax::RootGlob,
791 789 b"",
792 790 Path::new("")
793 791 ),
794 792 b"(?:/|$)"
795 793 )
796 794 .unwrap(),
797 795 None,
798 796 );
799 797 assert_eq!(
800 798 build_single_regex(
801 799 &IgnorePattern::new(
802 800 PatternSyntax::RootGlob,
803 801 b"whatever",
804 802 Path::new("")
805 803 ),
806 804 b"(?:/|$)"
807 805 )
808 806 .unwrap(),
809 807 None,
810 808 );
811 809 assert_eq!(
812 810 build_single_regex(
813 811 &IgnorePattern::new(
814 812 PatternSyntax::RootGlob,
815 813 b"*.o",
816 814 Path::new("")
817 815 ),
818 816 b"(?:/|$)"
819 817 )
820 818 .unwrap(),
821 819 Some(br"[^/]*\.o(?:/|$)".to_vec()),
822 820 );
823 821 }
824 822
825 823 #[test]
826 824 fn test_build_single_relregex() {
827 825 assert_eq!(
828 826 build_single_regex(
829 827 &IgnorePattern::new(
830 828 PatternSyntax::RelRegexp,
831 829 b"^ba{2}r",
832 830 Path::new("")
833 831 ),
834 832 b"(?:/|$)"
835 833 )
836 834 .unwrap(),
837 835 Some(b"^ba{2}r".to_vec()),
838 836 );
839 837 assert_eq!(
840 838 build_single_regex(
841 839 &IgnorePattern::new(
842 840 PatternSyntax::RelRegexp,
843 841 b"ba{2}r",
844 842 Path::new("")
845 843 ),
846 844 b"(?:/|$)"
847 845 )
848 846 .unwrap(),
849 847 Some(b".*ba{2}r".to_vec()),
850 848 );
851 849 assert_eq!(
852 850 build_single_regex(
853 851 &IgnorePattern::new(
854 852 PatternSyntax::RelRegexp,
855 853 b"(?ia)ba{2}r",
856 854 Path::new("")
857 855 ),
858 856 b"(?:/|$)"
859 857 )
860 858 .unwrap(),
861 859 Some(b"(?ia:.*ba{2}r)".to_vec()),
862 860 );
863 861 assert_eq!(
864 862 build_single_regex(
865 863 &IgnorePattern::new(
866 864 PatternSyntax::RelRegexp,
867 865 b"(?ia)^ba{2}r",
868 866 Path::new("")
869 867 ),
870 868 b"(?:/|$)"
871 869 )
872 870 .unwrap(),
873 871 Some(b"(?ia:^ba{2}r)".to_vec()),
874 872 );
875 873 }
876 874 }
@@ -1,2110 +1,2109 b''
1 1 // matchers.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Structs and types for matching files and directories.
9 9
10 10 use format_bytes::format_bytes;
11 11 use once_cell::sync::OnceCell;
12 12
13 13 use crate::{
14 14 dirstate::dirs_multiset::DirsChildrenMultiset,
15 15 filepatterns::{
16 16 build_single_regex, filter_subincludes, get_patterns_from_file,
17 17 PatternFileWarning, PatternResult,
18 18 },
19 19 utils::{
20 20 files::find_dirs,
21 21 hg_path::{HgPath, HgPathBuf, HgPathError},
22 22 Escaped,
23 23 },
24 24 DirsMultiset, FastHashMap, IgnorePattern, PatternError, PatternSyntax,
25 25 };
26 26
27 27 use crate::dirstate::status::IgnoreFnType;
28 28 use crate::filepatterns::normalize_path_bytes;
29 29 use std::collections::HashSet;
30 30 use std::fmt::{Display, Error, Formatter};
31 use std::ops::Deref;
32 31 use std::path::{Path, PathBuf};
33 32 use std::{borrow::ToOwned, collections::BTreeSet};
34 33
35 34 #[derive(Debug, PartialEq)]
36 35 pub enum VisitChildrenSet {
37 36 /// Don't visit anything
38 37 Empty,
39 38 /// Only visit this directory
40 39 This,
41 40 /// Visit this directory and these subdirectories
42 41 /// TODO Should we implement a `NonEmptyHashSet`?
43 42 Set(HashSet<HgPathBuf>),
44 43 /// Visit this directory and all subdirectories
45 44 Recursive,
46 45 }
47 46
48 47 pub trait Matcher: core::fmt::Debug {
49 48 /// Explicitly listed files
50 49 fn file_set(&self) -> Option<&HashSet<HgPathBuf>>;
51 50 /// Returns whether `filename` is in `file_set`
52 51 fn exact_match(&self, filename: &HgPath) -> bool;
53 52 /// Returns whether `filename` is matched by this matcher
54 53 fn matches(&self, filename: &HgPath) -> bool;
55 54 /// Decides whether a directory should be visited based on whether it
56 55 /// has potential matches in it or one of its subdirectories, and
57 56 /// potentially lists which subdirectories of that directory should be
58 57 /// visited. This is based on the match's primary, included, and excluded
59 58 /// patterns.
60 59 ///
61 60 /// # Example
62 61 ///
63 62 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
64 63 /// return the following values (assuming the implementation of
65 64 /// visit_children_set is capable of recognizing this; some implementations
66 65 /// are not).
67 66 ///
68 67 /// ```text
69 68 /// ```ignore
70 69 /// '' -> {'foo', 'qux'}
71 70 /// 'baz' -> set()
72 71 /// 'foo' -> {'bar'}
73 72 /// // Ideally this would be `Recursive`, but since the prefix nature of
74 73 /// // matchers is applied to the entire matcher, we have to downgrade this
75 74 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
76 75 /// // `RootFilesIn'-kind matcher being mixed in.
77 76 /// 'foo/bar' -> 'this'
78 77 /// 'qux' -> 'this'
79 78 /// ```
80 79 /// # Important
81 80 ///
82 81 /// Most matchers do not know if they're representing files or
83 82 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
84 83 /// file or a directory, so `visit_children_set('dir')` for most matchers
85 84 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
86 85 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
87 86 /// it may return `VisitChildrenSet::This`.
88 87 /// Do not rely on the return being a `HashSet` indicating that there are
89 88 /// no files in this dir to investigate (or equivalently that if there are
90 89 /// files to investigate in 'dir' that it will always return
91 90 /// `VisitChildrenSet::This`).
92 91 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet;
93 92 /// Matcher will match everything and `files_set()` will be empty:
94 93 /// optimization might be possible.
95 94 fn matches_everything(&self) -> bool;
96 95 /// Matcher will match exactly the files in `files_set()`: optimization
97 96 /// might be possible.
98 97 fn is_exact(&self) -> bool;
99 98 }
100 99
101 100 /// Matches everything.
102 101 ///```
103 102 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
104 103 ///
105 104 /// let matcher = AlwaysMatcher;
106 105 ///
107 106 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
108 107 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
109 108 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
110 109 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
111 110 /// ```
112 111 #[derive(Debug)]
113 112 pub struct AlwaysMatcher;
114 113
115 114 impl Matcher for AlwaysMatcher {
116 115 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
117 116 None
118 117 }
119 118 fn exact_match(&self, _filename: &HgPath) -> bool {
120 119 false
121 120 }
122 121 fn matches(&self, _filename: &HgPath) -> bool {
123 122 true
124 123 }
125 124 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
126 125 VisitChildrenSet::Recursive
127 126 }
128 127 fn matches_everything(&self) -> bool {
129 128 true
130 129 }
131 130 fn is_exact(&self) -> bool {
132 131 false
133 132 }
134 133 }
135 134
136 135 /// Matches nothing.
137 136 #[derive(Debug)]
138 137 pub struct NeverMatcher;
139 138
140 139 impl Matcher for NeverMatcher {
141 140 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
142 141 None
143 142 }
144 143 fn exact_match(&self, _filename: &HgPath) -> bool {
145 144 false
146 145 }
147 146 fn matches(&self, _filename: &HgPath) -> bool {
148 147 false
149 148 }
150 149 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
151 150 VisitChildrenSet::Empty
152 151 }
153 152 fn matches_everything(&self) -> bool {
154 153 false
155 154 }
156 155 fn is_exact(&self) -> bool {
157 156 true
158 157 }
159 158 }
160 159
161 160 /// Matches the input files exactly. They are interpreted as paths, not
162 161 /// patterns.
163 162 ///
164 163 ///```
165 164 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} };
166 165 ///
167 166 /// let files = vec![HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")];
168 167 /// let matcher = FileMatcher::new(files).unwrap();
169 168 ///
170 169 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
171 170 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
172 171 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
173 172 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
174 173 /// ```
175 174 #[derive(Debug)]
176 175 pub struct FileMatcher {
177 176 files: HashSet<HgPathBuf>,
178 177 dirs: DirsMultiset,
179 178 sorted_visitchildrenset_candidates: OnceCell<BTreeSet<HgPathBuf>>,
180 179 }
181 180
182 181 impl FileMatcher {
183 182 pub fn new(files: Vec<HgPathBuf>) -> Result<Self, HgPathError> {
184 183 let dirs = DirsMultiset::from_manifest(&files)?;
185 184 Ok(Self {
186 files: HashSet::from_iter(files.into_iter()),
185 files: HashSet::from_iter(files),
187 186 dirs,
188 187 sorted_visitchildrenset_candidates: OnceCell::new(),
189 188 })
190 189 }
191 190 fn inner_matches(&self, filename: &HgPath) -> bool {
192 191 self.files.contains(filename.as_ref())
193 192 }
194 193 }
195 194
196 195 impl Matcher for FileMatcher {
197 196 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
198 197 Some(&self.files)
199 198 }
200 199 fn exact_match(&self, filename: &HgPath) -> bool {
201 200 self.inner_matches(filename)
202 201 }
203 202 fn matches(&self, filename: &HgPath) -> bool {
204 203 self.inner_matches(filename)
205 204 }
206 205 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
207 206 if self.files.is_empty() || !self.dirs.contains(directory) {
208 207 return VisitChildrenSet::Empty;
209 208 }
210 209
211 210 let compute_candidates = || -> BTreeSet<HgPathBuf> {
212 211 let mut candidates: BTreeSet<HgPathBuf> =
213 212 self.dirs.iter().cloned().collect();
214 213 candidates.extend(self.files.iter().cloned());
215 214 candidates.remove(HgPath::new(b""));
216 215 candidates
217 216 };
218 217 let candidates =
219 218 if directory.as_ref().is_empty() {
220 219 compute_candidates()
221 220 } else {
222 221 let sorted_candidates = self
223 222 .sorted_visitchildrenset_candidates
224 223 .get_or_init(compute_candidates);
225 224 let directory_bytes = directory.as_ref().as_bytes();
226 225 let start: HgPathBuf =
227 226 format_bytes!(b"{}/", directory_bytes).into();
228 227 let start_len = start.len();
229 228 // `0` sorts after `/`
230 229 let end = format_bytes!(b"{}0", directory_bytes).into();
231 230 BTreeSet::from_iter(sorted_candidates.range(start..end).map(
232 231 |c| HgPathBuf::from_bytes(&c.as_bytes()[start_len..]),
233 232 ))
234 233 };
235 234
236 235 // `self.dirs` includes all of the directories, recursively, so if
237 236 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
238 237 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
239 238 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
240 239 // subdir will be in there without a slash.
241 240 VisitChildrenSet::Set(
242 241 candidates
243 242 .into_iter()
244 243 .filter_map(|c| {
245 244 if c.bytes().all(|b| *b != b'/') {
246 245 Some(c)
247 246 } else {
248 247 None
249 248 }
250 249 })
251 250 .collect(),
252 251 )
253 252 }
254 253 fn matches_everything(&self) -> bool {
255 254 false
256 255 }
257 256 fn is_exact(&self) -> bool {
258 257 true
259 258 }
260 259 }
261 260
262 261 /// Matches a set of (kind, pat, source) against a 'root' directory.
263 262 /// (Currently the 'root' directory is effectively always empty)
264 263 /// ```
265 264 /// use hg::{
266 265 /// matchers::{PatternMatcher, Matcher},
267 266 /// IgnorePattern,
268 267 /// PatternSyntax,
269 268 /// utils::hg_path::{HgPath, HgPathBuf}
270 269 /// };
271 270 /// use std::collections::HashSet;
272 271 /// use std::path::Path;
273 272 /// ///
274 273 /// let ignore_patterns : Vec<IgnorePattern> =
275 274 /// vec![IgnorePattern::new(PatternSyntax::Regexp, br".*\.c$", Path::new("")),
276 275 /// IgnorePattern::new(PatternSyntax::Path, b"foo/a", Path::new("")),
277 276 /// IgnorePattern::new(PatternSyntax::RelPath, b"b", Path::new("")),
278 277 /// IgnorePattern::new(PatternSyntax::Glob, b"*.h", Path::new("")),
279 278 /// ];
280 279 /// let matcher = PatternMatcher::new(ignore_patterns).unwrap();
281 280 /// ///
282 281 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true); // matches re:.*\.c$
283 282 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
284 283 /// assert_eq!(matcher.matches(HgPath::new(b"foo/a")), true); // matches path:foo/a
285 284 /// assert_eq!(matcher.matches(HgPath::new(b"a")), false); // does not match path:b, since 'root' is 'foo'
286 285 /// assert_eq!(matcher.matches(HgPath::new(b"b")), true); // matches relpath:b, since 'root' is 'foo'
287 286 /// assert_eq!(matcher.matches(HgPath::new(b"lib.h")), true); // matches glob:*.h
288 287 /// assert_eq!(matcher.file_set().unwrap(),
289 288 /// &HashSet::from([HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"foo/a"),
290 289 /// HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"b")]));
291 290 /// assert_eq!(matcher.exact_match(HgPath::new(b"foo/a")), true);
292 291 /// assert_eq!(matcher.exact_match(HgPath::new(b"b")), true);
293 292 /// assert_eq!(matcher.exact_match(HgPath::new(b"lib.h")), false); // exact matches are for (rel)path kinds
294 293 /// ```
295 294 pub struct PatternMatcher<'a> {
296 295 patterns: Vec<u8>,
297 296 match_fn: IgnoreFnType<'a>,
298 297 /// Whether all the patterns match a prefix (i.e. recursively)
299 298 prefix: bool,
300 299 files: HashSet<HgPathBuf>,
301 300 dirs: DirsMultiset,
302 301 }
303 302
304 303 impl core::fmt::Debug for PatternMatcher<'_> {
305 304 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
306 305 f.debug_struct("PatternMatcher")
307 306 .field("patterns", &String::from_utf8_lossy(&self.patterns))
308 307 .field("prefix", &self.prefix)
309 308 .field("files", &self.files)
310 309 .field("dirs", &self.dirs)
311 310 .finish()
312 311 }
313 312 }
314 313
315 314 impl<'a> PatternMatcher<'a> {
316 315 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
317 316 let (files, _) = roots_and_dirs(&ignore_patterns);
318 317 let dirs = DirsMultiset::from_manifest(&files)?;
319 let files: HashSet<HgPathBuf> = HashSet::from_iter(files.into_iter());
318 let files: HashSet<HgPathBuf> = HashSet::from_iter(files);
320 319
321 320 let prefix = ignore_patterns.iter().all(|k| {
322 321 matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
323 322 });
324 323 let (patterns, match_fn) = build_match(ignore_patterns, b"$")?;
325 324
326 325 Ok(Self {
327 326 patterns,
328 327 match_fn,
329 328 prefix,
330 329 files,
331 330 dirs,
332 331 })
333 332 }
334 333 }
335 334
336 335 impl<'a> Matcher for PatternMatcher<'a> {
337 336 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
338 337 Some(&self.files)
339 338 }
340 339
341 340 fn exact_match(&self, filename: &HgPath) -> bool {
342 341 self.files.contains(filename)
343 342 }
344 343
345 344 fn matches(&self, filename: &HgPath) -> bool {
346 345 if self.files.contains(filename) {
347 346 return true;
348 347 }
349 348 (self.match_fn)(filename)
350 349 }
351 350
352 351 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
353 352 if self.prefix && self.files.contains(directory) {
354 353 return VisitChildrenSet::Recursive;
355 354 }
356 355 let path_or_parents_in_set = find_dirs(directory)
357 356 .any(|parent_dir| self.files.contains(parent_dir));
358 357 if self.dirs.contains(directory) || path_or_parents_in_set {
359 358 VisitChildrenSet::This
360 359 } else {
361 360 VisitChildrenSet::Empty
362 361 }
363 362 }
364 363
365 364 fn matches_everything(&self) -> bool {
366 365 false
367 366 }
368 367
369 368 fn is_exact(&self) -> bool {
370 369 false
371 370 }
372 371 }
373 372
374 373 /// Matches files that are included in the ignore rules.
375 374 /// ```
376 375 /// use hg::{
377 376 /// matchers::{IncludeMatcher, Matcher},
378 377 /// IgnorePattern,
379 378 /// PatternSyntax,
380 379 /// utils::hg_path::HgPath
381 380 /// };
382 381 /// use std::path::Path;
383 382 /// ///
384 383 /// let ignore_patterns =
385 384 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
386 385 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
387 386 /// ///
388 387 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
389 388 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
390 389 /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
391 390 /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
392 391 /// ```
393 392 pub struct IncludeMatcher<'a> {
394 393 patterns: Vec<u8>,
395 394 match_fn: IgnoreFnType<'a>,
396 395 /// Whether all the patterns match a prefix (i.e. recursively)
397 396 prefix: bool,
398 397 roots: HashSet<HgPathBuf>,
399 398 dirs: HashSet<HgPathBuf>,
400 399 parents: HashSet<HgPathBuf>,
401 400 }
402 401
403 402 impl core::fmt::Debug for IncludeMatcher<'_> {
404 403 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
405 404 f.debug_struct("IncludeMatcher")
406 405 .field("patterns", &String::from_utf8_lossy(&self.patterns))
407 406 .field("prefix", &self.prefix)
408 407 .field("roots", &self.roots)
409 408 .field("dirs", &self.dirs)
410 409 .field("parents", &self.parents)
411 410 .finish()
412 411 }
413 412 }
414 413
415 414 impl<'a> Matcher for IncludeMatcher<'a> {
416 415 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
417 416 None
418 417 }
419 418
420 419 fn exact_match(&self, _filename: &HgPath) -> bool {
421 420 false
422 421 }
423 422
424 423 fn matches(&self, filename: &HgPath) -> bool {
425 424 (self.match_fn)(filename)
426 425 }
427 426
428 427 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
429 428 let dir = directory;
430 429 if self.prefix && self.roots.contains(dir) {
431 430 return VisitChildrenSet::Recursive;
432 431 }
433 432 if self.roots.contains(HgPath::new(b""))
434 433 || self.roots.contains(dir)
435 434 || self.dirs.contains(dir)
436 435 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
437 436 {
438 437 return VisitChildrenSet::This;
439 438 }
440 439
441 440 if self.parents.contains(dir.as_ref()) {
442 441 let multiset = self.get_all_parents_children();
443 442 if let Some(children) = multiset.get(dir) {
444 443 return VisitChildrenSet::Set(
445 444 children.iter().map(HgPathBuf::from).collect(),
446 445 );
447 446 }
448 447 }
449 448 VisitChildrenSet::Empty
450 449 }
451 450
452 451 fn matches_everything(&self) -> bool {
453 452 false
454 453 }
455 454
456 455 fn is_exact(&self) -> bool {
457 456 false
458 457 }
459 458 }
460 459
461 460 /// The union of multiple matchers. Will match if any of the matchers match.
462 461 #[derive(Debug)]
463 462 pub struct UnionMatcher {
464 463 matchers: Vec<Box<dyn Matcher + Sync>>,
465 464 }
466 465
467 466 impl Matcher for UnionMatcher {
468 467 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
469 468 None
470 469 }
471 470
472 471 fn exact_match(&self, _filename: &HgPath) -> bool {
473 472 false
474 473 }
475 474
476 475 fn matches(&self, filename: &HgPath) -> bool {
477 476 self.matchers.iter().any(|m| m.matches(filename))
478 477 }
479 478
480 479 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
481 480 let mut result = HashSet::new();
482 481 let mut this = false;
483 482 for matcher in self.matchers.iter() {
484 483 let visit = matcher.visit_children_set(directory);
485 484 match visit {
486 485 VisitChildrenSet::Empty => continue,
487 486 VisitChildrenSet::This => {
488 487 this = true;
489 488 // Don't break, we might have an 'all' in here.
490 489 continue;
491 490 }
492 491 VisitChildrenSet::Set(set) => {
493 492 result.extend(set);
494 493 }
495 494 VisitChildrenSet::Recursive => {
496 495 return visit;
497 496 }
498 497 }
499 498 }
500 499 if this {
501 500 return VisitChildrenSet::This;
502 501 }
503 502 if result.is_empty() {
504 503 VisitChildrenSet::Empty
505 504 } else {
506 505 VisitChildrenSet::Set(result)
507 506 }
508 507 }
509 508
510 509 fn matches_everything(&self) -> bool {
511 510 // TODO Maybe if all are AlwaysMatcher?
512 511 false
513 512 }
514 513
515 514 fn is_exact(&self) -> bool {
516 515 false
517 516 }
518 517 }
519 518
520 519 impl UnionMatcher {
521 520 pub fn new(matchers: Vec<Box<dyn Matcher + Sync>>) -> Self {
522 521 Self { matchers }
523 522 }
524 523 }
525 524
526 525 #[derive(Debug)]
527 526 pub struct IntersectionMatcher {
528 527 m1: Box<dyn Matcher + Sync>,
529 528 m2: Box<dyn Matcher + Sync>,
530 529 files: Option<HashSet<HgPathBuf>>,
531 530 }
532 531
533 532 impl Matcher for IntersectionMatcher {
534 533 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
535 534 self.files.as_ref()
536 535 }
537 536
538 537 fn exact_match(&self, filename: &HgPath) -> bool {
539 538 self.files.as_ref().map_or(false, |f| f.contains(filename))
540 539 }
541 540
542 541 fn matches(&self, filename: &HgPath) -> bool {
543 542 self.m1.matches(filename) && self.m2.matches(filename)
544 543 }
545 544
546 545 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
547 546 let m1_set = self.m1.visit_children_set(directory);
548 547 if m1_set == VisitChildrenSet::Empty {
549 548 return VisitChildrenSet::Empty;
550 549 }
551 550 let m2_set = self.m2.visit_children_set(directory);
552 551 if m2_set == VisitChildrenSet::Empty {
553 552 return VisitChildrenSet::Empty;
554 553 }
555 554
556 555 if m1_set == VisitChildrenSet::Recursive {
557 556 return m2_set;
558 557 } else if m2_set == VisitChildrenSet::Recursive {
559 558 return m1_set;
560 559 }
561 560
562 561 match (&m1_set, &m2_set) {
563 562 (VisitChildrenSet::Recursive, _) => m2_set,
564 563 (_, VisitChildrenSet::Recursive) => m1_set,
565 564 (VisitChildrenSet::This, _) | (_, VisitChildrenSet::This) => {
566 565 VisitChildrenSet::This
567 566 }
568 567 (VisitChildrenSet::Set(m1), VisitChildrenSet::Set(m2)) => {
569 568 let set: HashSet<_> = m1.intersection(m2).cloned().collect();
570 569 if set.is_empty() {
571 570 VisitChildrenSet::Empty
572 571 } else {
573 572 VisitChildrenSet::Set(set)
574 573 }
575 574 }
576 575 _ => unreachable!(),
577 576 }
578 577 }
579 578
580 579 fn matches_everything(&self) -> bool {
581 580 self.m1.matches_everything() && self.m2.matches_everything()
582 581 }
583 582
584 583 fn is_exact(&self) -> bool {
585 584 self.m1.is_exact() || self.m2.is_exact()
586 585 }
587 586 }
588 587
589 588 impl IntersectionMatcher {
590 589 pub fn new(
591 590 mut m1: Box<dyn Matcher + Sync>,
592 591 mut m2: Box<dyn Matcher + Sync>,
593 592 ) -> Self {
594 593 let files = if m1.is_exact() || m2.is_exact() {
595 594 if !m1.is_exact() {
596 595 std::mem::swap(&mut m1, &mut m2);
597 596 }
598 597 m1.file_set().map(|m1_files| {
599 598 m1_files.iter().cloned().filter(|f| m2.matches(f)).collect()
600 599 })
601 600 } else {
602 601 // without exact input file sets, we can't do an exact
603 602 // intersection, so we must over-approximate by
604 603 // unioning instead
605 604 m1.file_set().map(|m1_files| match m2.file_set() {
606 605 Some(m2_files) => m1_files.union(m2_files).cloned().collect(),
607 606 None => m1_files.iter().cloned().collect(),
608 607 })
609 608 };
610 609 Self { m1, m2, files }
611 610 }
612 611 }
613 612
614 613 #[derive(Debug)]
615 614 pub struct DifferenceMatcher {
616 615 base: Box<dyn Matcher + Sync>,
617 616 excluded: Box<dyn Matcher + Sync>,
618 617 files: Option<HashSet<HgPathBuf>>,
619 618 }
620 619
621 620 impl Matcher for DifferenceMatcher {
622 621 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
623 622 self.files.as_ref()
624 623 }
625 624
626 625 fn exact_match(&self, filename: &HgPath) -> bool {
627 626 self.files.as_ref().map_or(false, |f| f.contains(filename))
628 627 }
629 628
630 629 fn matches(&self, filename: &HgPath) -> bool {
631 630 self.base.matches(filename) && !self.excluded.matches(filename)
632 631 }
633 632
634 633 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
635 634 let excluded_set = self.excluded.visit_children_set(directory);
636 635 if excluded_set == VisitChildrenSet::Recursive {
637 636 return VisitChildrenSet::Empty;
638 637 }
639 638 let base_set = self.base.visit_children_set(directory);
640 639 // Possible values for base: 'recursive', 'this', set(...), set()
641 640 // Possible values for excluded: 'this', set(...), set()
642 641 // If excluded has nothing under here that we care about, return base,
643 642 // even if it's 'recursive'.
644 643 if excluded_set == VisitChildrenSet::Empty {
645 644 return base_set;
646 645 }
647 646 match base_set {
648 647 VisitChildrenSet::This | VisitChildrenSet::Recursive => {
649 648 // Never return 'recursive' here if excluded_set is any kind of
650 649 // non-empty (either 'this' or set(foo)), since excluded might
651 650 // return set() for a subdirectory.
652 651 VisitChildrenSet::This
653 652 }
654 653 set => {
655 654 // Possible values for base: set(...), set()
656 655 // Possible values for excluded: 'this', set(...)
657 656 // We ignore excluded set results. They're possibly incorrect:
658 657 // base = path:dir/subdir
659 658 // excluded=rootfilesin:dir,
660 659 // visit_children_set(''):
661 660 // base returns {'dir'}, excluded returns {'dir'}, if we
662 661 // subtracted we'd return set(), which is *not* correct, we
663 662 // still need to visit 'dir'!
664 663 set
665 664 }
666 665 }
667 666 }
668 667
669 668 fn matches_everything(&self) -> bool {
670 669 false
671 670 }
672 671
673 672 fn is_exact(&self) -> bool {
674 673 self.base.is_exact()
675 674 }
676 675 }
677 676
678 677 impl DifferenceMatcher {
679 678 pub fn new(
680 679 base: Box<dyn Matcher + Sync>,
681 680 excluded: Box<dyn Matcher + Sync>,
682 681 ) -> Self {
683 682 let base_is_exact = base.is_exact();
684 683 let base_files = base.file_set().map(ToOwned::to_owned);
685 684 let mut new = Self {
686 685 base,
687 686 excluded,
688 687 files: None,
689 688 };
690 689 if base_is_exact {
691 690 new.files = base_files.map(|files| {
692 691 files.iter().cloned().filter(|f| new.matches(f)).collect()
693 692 });
694 693 }
695 694 new
696 695 }
697 696 }
698 697
699 698 /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded
700 699 /// contexts.
701 700 ///
702 701 /// The `status` algorithm makes heavy use of threads, and calling `is_match`
703 702 /// from many threads at once is prone to contention, probably within the
704 703 /// scratch space needed as the regex DFA is built lazily.
705 704 ///
706 705 /// We are in the process of raising the issue upstream, but for now
707 706 /// the workaround used here is to store the `Regex` in a lazily populated
708 707 /// thread-local variable, sharing the initial read-only compilation, but
709 708 /// not the lazy dfa scratch space mentioned above.
710 709 ///
711 710 /// This reduces the contention observed with 16+ threads, but does not
712 711 /// completely remove it. Hopefully this can be addressed upstream.
713 712 struct RegexMatcher {
714 713 /// Compiled at the start of the status algorithm, used as a base for
715 714 /// cloning in each thread-local `self.local`, thus sharing the expensive
716 715 /// first compilation.
717 716 base: regex::bytes::Regex,
718 717 /// Thread-local variable that holds the `Regex` that is actually queried
719 718 /// from each thread.
720 719 local: thread_local::ThreadLocal<regex::bytes::Regex>,
721 720 }
722 721
723 722 impl RegexMatcher {
724 723 /// Returns whether the path matches the stored `Regex`.
725 724 pub fn is_match(&self, path: &HgPath) -> bool {
726 725 self.local
727 726 .get_or(|| self.base.clone())
728 727 .is_match(path.as_bytes())
729 728 }
730 729 }
731 730
732 731 /// Returns a function that matches an `HgPath` against the given regex
733 732 /// pattern.
734 733 ///
735 734 /// This can fail when the pattern is invalid or not supported by the
736 735 /// underlying engine (the `regex` crate), for instance anything with
737 736 /// back-references.
738 737 #[logging_timer::time("trace")]
739 738 fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> {
740 739 use std::io::Write;
741 740
742 741 // The `regex` crate adds `.*` to the start and end of expressions if there
743 742 // are no anchors, so add the start anchor.
744 743 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
745 744 for byte in pattern {
746 745 if *byte > 127 {
747 746 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
748 747 } else {
749 748 escaped_bytes.push(*byte);
750 749 }
751 750 }
752 751 escaped_bytes.push(b')');
753 752
754 753 // Avoid the cost of UTF8 checking
755 754 //
756 755 // # Safety
757 756 // This is safe because we escaped all non-ASCII bytes.
758 757 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
759 758 let re = regex::bytes::RegexBuilder::new(&pattern_string)
760 759 .unicode(false)
761 760 // Big repos with big `.hgignore` will hit the default limit and
762 761 // incur a significant performance hit. One repo's `hg status` hit
763 762 // multiple *minutes*.
764 763 .dfa_size_limit(50 * (1 << 20))
765 764 .build()
766 765 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
767 766
768 767 Ok(RegexMatcher {
769 768 base: re,
770 769 local: Default::default(),
771 770 })
772 771 }
773 772
774 773 /// Returns the regex pattern and a function that matches an `HgPath` against
775 774 /// said regex formed by the given ignore patterns.
776 fn build_regex_match<'a, 'b>(
777 ignore_patterns: &'a [IgnorePattern],
775 fn build_regex_match<'a>(
776 ignore_patterns: &[IgnorePattern],
778 777 glob_suffix: &[u8],
779 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'b>)> {
778 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
780 779 let mut regexps = vec![];
781 780 let mut exact_set = HashSet::new();
782 781
783 782 for pattern in ignore_patterns {
784 783 if let Some(re) = build_single_regex(pattern, glob_suffix)? {
785 784 regexps.push(re);
786 785 } else {
787 786 let exact = normalize_path_bytes(&pattern.pattern);
788 787 exact_set.insert(HgPathBuf::from_bytes(&exact));
789 788 }
790 789 }
791 790
792 791 let full_regex = regexps.join(&b'|');
793 792
794 793 // An empty pattern would cause the regex engine to incorrectly match the
795 794 // (empty) root directory
796 795 let func = if !(regexps.is_empty()) {
797 796 let matcher = re_matcher(&full_regex)?;
798 797 let func = move |filename: &HgPath| {
799 798 exact_set.contains(filename) || matcher.is_match(filename)
800 799 };
801 800 Box::new(func) as IgnoreFnType
802 801 } else {
803 802 let func = move |filename: &HgPath| exact_set.contains(filename);
804 803 Box::new(func) as IgnoreFnType
805 804 };
806 805
807 806 Ok((full_regex, func))
808 807 }
809 808
810 809 /// Returns roots and directories corresponding to each pattern.
811 810 ///
812 811 /// This calculates the roots and directories exactly matching the patterns and
813 812 /// returns a tuple of (roots, dirs). It does not return other directories
814 813 /// which may also need to be considered, like the parent directories.
815 814 fn roots_and_dirs(
816 815 ignore_patterns: &[IgnorePattern],
817 816 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
818 817 let mut roots = Vec::new();
819 818 let mut dirs = Vec::new();
820 819
821 820 for ignore_pattern in ignore_patterns {
822 821 let IgnorePattern {
823 822 syntax, pattern, ..
824 823 } = ignore_pattern;
825 824 match syntax {
826 825 PatternSyntax::RootGlob | PatternSyntax::Glob => {
827 826 let mut root = HgPathBuf::new();
828 827 for p in pattern.split(|c| *c == b'/') {
829 828 if p.iter()
830 829 .any(|c| matches!(*c, b'[' | b'{' | b'*' | b'?'))
831 830 {
832 831 break;
833 832 }
834 833 root.push(HgPathBuf::from_bytes(p).as_ref());
835 834 }
836 835 roots.push(root);
837 836 }
838 837 PatternSyntax::Path
839 838 | PatternSyntax::RelPath
840 839 | PatternSyntax::FilePath => {
841 840 let pat = HgPath::new(if pattern == b"." {
842 841 &[] as &[u8]
843 842 } else {
844 843 pattern
845 844 });
846 845 roots.push(pat.to_owned());
847 846 }
848 847 PatternSyntax::RootFiles => {
849 848 let pat = if pattern == b"." {
850 849 &[] as &[u8]
851 850 } else {
852 851 pattern
853 852 };
854 853 dirs.push(HgPathBuf::from_bytes(pat));
855 854 }
856 855 _ => {
857 856 roots.push(HgPathBuf::new());
858 857 }
859 858 }
860 859 }
861 860 (roots, dirs)
862 861 }
863 862
864 863 /// Paths extracted from patterns
865 864 #[derive(Debug, PartialEq)]
866 865 struct RootsDirsAndParents {
867 866 /// Directories to match recursively
868 867 pub roots: HashSet<HgPathBuf>,
869 868 /// Directories to match non-recursively
870 869 pub dirs: HashSet<HgPathBuf>,
871 870 /// Implicitly required directories to go to items in either roots or dirs
872 871 pub parents: HashSet<HgPathBuf>,
873 872 }
874 873
875 874 /// Extract roots, dirs and parents from patterns.
876 875 fn roots_dirs_and_parents(
877 876 ignore_patterns: &[IgnorePattern],
878 877 ) -> PatternResult<RootsDirsAndParents> {
879 878 let (roots, dirs) = roots_and_dirs(ignore_patterns);
880 879
881 880 let mut parents = HashSet::new();
882 881
883 882 parents.extend(
884 883 DirsMultiset::from_manifest(&dirs)?
885 884 .iter()
886 885 .map(ToOwned::to_owned),
887 886 );
888 887 parents.extend(
889 888 DirsMultiset::from_manifest(&roots)?
890 889 .iter()
891 890 .map(ToOwned::to_owned),
892 891 );
893 892
894 893 Ok(RootsDirsAndParents {
895 894 roots: HashSet::from_iter(roots),
896 895 dirs: HashSet::from_iter(dirs),
897 896 parents,
898 897 })
899 898 }
900 899
901 900 /// Returns a function that checks whether a given file (in the general sense)
902 901 /// should be matched.
903 902 fn build_match<'a>(
904 903 ignore_patterns: Vec<IgnorePattern>,
905 904 glob_suffix: &[u8],
906 905 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
907 906 let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![];
908 907 // For debugging and printing
909 908 let mut patterns = vec![];
910 909
911 910 let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
912 911
913 912 if !subincludes.is_empty() {
914 913 // Build prefix-based matcher functions for subincludes
915 914 let mut submatchers = FastHashMap::default();
916 915 let mut prefixes = vec![];
917 916
918 917 for sub_include in subincludes {
919 918 let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
920 919 let match_fn =
921 920 Box::new(move |path: &HgPath| matcher.matches(path));
922 921 prefixes.push(sub_include.prefix.clone());
923 922 submatchers.insert(sub_include.prefix.clone(), match_fn);
924 923 }
925 924
926 925 let match_subinclude = move |filename: &HgPath| {
927 926 for prefix in prefixes.iter() {
928 927 if let Some(rel) = filename.relative_to(prefix) {
929 928 if (submatchers[prefix])(rel) {
930 929 return true;
931 930 }
932 931 }
933 932 }
934 933 false
935 934 };
936 935
937 936 match_funcs.push(Box::new(match_subinclude));
938 937 }
939 938
940 939 if !ignore_patterns.is_empty() {
941 940 // Either do dumb matching if all patterns are rootfiles, or match
942 941 // with a regex.
943 942 if ignore_patterns
944 943 .iter()
945 944 .all(|k| k.syntax == PatternSyntax::RootFiles)
946 945 {
947 946 let dirs: HashSet<_> = ignore_patterns
948 947 .iter()
949 948 .map(|k| k.pattern.to_owned())
950 949 .collect();
951 950 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
952 951
953 952 let match_func = move |path: &HgPath| -> bool {
954 953 let path = path.as_bytes();
955 954 let i = path.iter().rfind(|a| **a == b'/');
956 955 let dir = if let Some(i) = i {
957 956 &path[..*i as usize]
958 957 } else {
959 958 b"."
960 959 };
961 dirs.contains(dir.deref())
960 dirs.contains(dir)
962 961 };
963 962 match_funcs.push(Box::new(match_func));
964 963
965 964 patterns.extend(b"rootfilesin: ");
966 965 dirs_vec.sort();
967 966 patterns.extend(dirs_vec.escaped_bytes());
968 967 } else {
969 968 let (new_re, match_func) =
970 969 build_regex_match(&ignore_patterns, glob_suffix)?;
971 970 patterns = new_re;
972 971 match_funcs.push(match_func)
973 972 }
974 973 }
975 974
976 975 Ok(if match_funcs.len() == 1 {
977 976 (patterns, match_funcs.remove(0))
978 977 } else {
979 978 (
980 979 patterns,
981 980 Box::new(move |f: &HgPath| -> bool {
982 981 match_funcs.iter().any(|match_func| match_func(f))
983 982 }),
984 983 )
985 984 })
986 985 }
987 986
988 987 /// Parses all "ignore" files with their recursive includes and returns a
989 988 /// function that checks whether a given file (in the general sense) should be
990 989 /// ignored.
991 990 pub fn get_ignore_matcher<'a>(
992 991 mut all_pattern_files: Vec<PathBuf>,
993 992 root_dir: &Path,
994 993 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
995 994 ) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> {
996 995 let mut all_patterns = vec![];
997 996 let mut all_warnings = vec![];
998 997
999 998 // Sort to make the ordering of calls to `inspect_pattern_bytes`
1000 999 // deterministic even if the ordering of `all_pattern_files` is not (such
1001 1000 // as when a iteration order of a Python dict or Rust HashMap is involved).
1002 1001 // Sort by "string" representation instead of the default by component
1003 1002 // (with a Rust-specific definition of a component)
1004 1003 all_pattern_files
1005 1004 .sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str()));
1006 1005
1007 1006 for pattern_file in &all_pattern_files {
1008 1007 let (patterns, warnings) = get_patterns_from_file(
1009 1008 pattern_file,
1010 1009 root_dir,
1011 1010 inspect_pattern_bytes,
1012 1011 )?;
1013 1012
1014 1013 all_patterns.extend(patterns.to_owned());
1015 1014 all_warnings.extend(warnings);
1016 1015 }
1017 1016 let matcher = IncludeMatcher::new(all_patterns)?;
1018 1017 Ok((matcher, all_warnings))
1019 1018 }
1020 1019
1021 1020 /// Parses all "ignore" files with their recursive includes and returns a
1022 1021 /// function that checks whether a given file (in the general sense) should be
1023 1022 /// ignored.
1024 1023 pub fn get_ignore_function<'a>(
1025 1024 all_pattern_files: Vec<PathBuf>,
1026 1025 root_dir: &Path,
1027 1026 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
1028 1027 ) -> PatternResult<(IgnoreFnType<'a>, Vec<PatternFileWarning>)> {
1029 1028 let res =
1030 1029 get_ignore_matcher(all_pattern_files, root_dir, inspect_pattern_bytes);
1031 1030 res.map(|(matcher, all_warnings)| {
1032 1031 let res: IgnoreFnType<'a> =
1033 1032 Box::new(move |path: &HgPath| matcher.matches(path));
1034 1033
1035 1034 (res, all_warnings)
1036 1035 })
1037 1036 }
1038 1037
1039 1038 impl<'a> IncludeMatcher<'a> {
1040 1039 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
1041 1040 let RootsDirsAndParents {
1042 1041 roots,
1043 1042 dirs,
1044 1043 parents,
1045 1044 } = roots_dirs_and_parents(&ignore_patterns)?;
1046 1045 let prefix = ignore_patterns.iter().all(|k| {
1047 1046 matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
1048 1047 });
1049 1048 let (patterns, match_fn) = build_match(ignore_patterns, b"(?:/|$)")?;
1050 1049
1051 1050 Ok(Self {
1052 1051 patterns,
1053 1052 match_fn,
1054 1053 prefix,
1055 1054 roots,
1056 1055 dirs,
1057 1056 parents,
1058 1057 })
1059 1058 }
1060 1059
1061 1060 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
1062 1061 // TODO cache
1063 1062 let thing = self
1064 1063 .dirs
1065 1064 .iter()
1066 1065 .chain(self.roots.iter())
1067 1066 .chain(self.parents.iter());
1068 1067 DirsChildrenMultiset::new(thing, Some(&self.parents))
1069 1068 }
1070 1069
1071 1070 pub fn debug_get_patterns(&self) -> &[u8] {
1072 1071 self.patterns.as_ref()
1073 1072 }
1074 1073 }
1075 1074
1076 1075 impl<'a> Display for IncludeMatcher<'a> {
1077 1076 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
1078 1077 // XXX What about exact matches?
1079 1078 // I'm not sure it's worth it to clone the HashSet and keep it
1080 1079 // around just in case someone wants to display the matcher, plus
1081 1080 // it's going to be unreadable after a few entries, but we need to
1082 1081 // inform in this display that exact matches are being used and are
1083 1082 // (on purpose) missing from the `includes`.
1084 1083 write!(
1085 1084 f,
1086 1085 "IncludeMatcher(includes='{}')",
1087 1086 String::from_utf8_lossy(&self.patterns.escaped_bytes())
1088 1087 )
1089 1088 }
1090 1089 }
1091 1090
1092 1091 #[cfg(test)]
1093 1092 mod tests {
1094 1093 use super::*;
1095 1094 use pretty_assertions::assert_eq;
1096 1095 use std::path::Path;
1097 1096
1098 1097 #[test]
1099 1098 fn test_roots_and_dirs() {
1100 1099 let pats = vec![
1101 1100 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
1102 1101 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
1103 1102 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1104 1103 ];
1105 1104 let (roots, dirs) = roots_and_dirs(&pats);
1106 1105
1107 1106 assert_eq!(
1108 1107 roots,
1109 1108 vec!(
1110 1109 HgPathBuf::from_bytes(b"g/h"),
1111 1110 HgPathBuf::from_bytes(b"g/h"),
1112 1111 HgPathBuf::new()
1113 1112 ),
1114 1113 );
1115 1114 assert_eq!(dirs, vec!());
1116 1115 }
1117 1116
1118 1117 #[test]
1119 1118 fn test_roots_dirs_and_parents() {
1120 1119 let pats = vec![
1121 1120 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
1122 1121 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
1123 1122 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1124 1123 ];
1125 1124
1126 1125 let mut roots = HashSet::new();
1127 1126 roots.insert(HgPathBuf::from_bytes(b"g/h"));
1128 1127 roots.insert(HgPathBuf::new());
1129 1128
1130 1129 let dirs = HashSet::new();
1131 1130
1132 1131 let mut parents = HashSet::new();
1133 1132 parents.insert(HgPathBuf::new());
1134 1133 parents.insert(HgPathBuf::from_bytes(b"g"));
1135 1134
1136 1135 assert_eq!(
1137 1136 roots_dirs_and_parents(&pats).unwrap(),
1138 1137 RootsDirsAndParents {
1139 1138 roots,
1140 1139 dirs,
1141 1140 parents
1142 1141 }
1143 1142 );
1144 1143 }
1145 1144
1146 1145 #[test]
1147 1146 fn test_filematcher_visit_children_set() {
1148 1147 // Visitchildrenset
1149 1148 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")];
1150 1149 let matcher = FileMatcher::new(files).unwrap();
1151 1150
1152 1151 let mut set = HashSet::new();
1153 1152 set.insert(HgPathBuf::from_bytes(b"dir"));
1154 1153 assert_eq!(
1155 1154 matcher.visit_children_set(HgPath::new(b"")),
1156 1155 VisitChildrenSet::Set(set)
1157 1156 );
1158 1157
1159 1158 let mut set = HashSet::new();
1160 1159 set.insert(HgPathBuf::from_bytes(b"subdir"));
1161 1160 assert_eq!(
1162 1161 matcher.visit_children_set(HgPath::new(b"dir")),
1163 1162 VisitChildrenSet::Set(set)
1164 1163 );
1165 1164
1166 1165 let mut set = HashSet::new();
1167 1166 set.insert(HgPathBuf::from_bytes(b"foo.txt"));
1168 1167 assert_eq!(
1169 1168 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1170 1169 VisitChildrenSet::Set(set)
1171 1170 );
1172 1171
1173 1172 assert_eq!(
1174 1173 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1175 1174 VisitChildrenSet::Empty
1176 1175 );
1177 1176 assert_eq!(
1178 1177 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
1179 1178 VisitChildrenSet::Empty
1180 1179 );
1181 1180 assert_eq!(
1182 1181 matcher.visit_children_set(HgPath::new(b"folder")),
1183 1182 VisitChildrenSet::Empty
1184 1183 );
1185 1184 }
1186 1185
1187 1186 #[test]
1188 1187 fn test_filematcher_visit_children_set_files_and_dirs() {
1189 1188 let files = vec![
1190 1189 HgPathBuf::from_bytes(b"rootfile.txt"),
1191 1190 HgPathBuf::from_bytes(b"a/file1.txt"),
1192 1191 HgPathBuf::from_bytes(b"a/b/file2.txt"),
1193 1192 // No file in a/b/c
1194 1193 HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"),
1195 1194 ];
1196 1195 let matcher = FileMatcher::new(files).unwrap();
1197 1196
1198 1197 let mut set = HashSet::new();
1199 1198 set.insert(HgPathBuf::from_bytes(b"a"));
1200 1199 set.insert(HgPathBuf::from_bytes(b"rootfile.txt"));
1201 1200 assert_eq!(
1202 1201 matcher.visit_children_set(HgPath::new(b"")),
1203 1202 VisitChildrenSet::Set(set)
1204 1203 );
1205 1204
1206 1205 let mut set = HashSet::new();
1207 1206 set.insert(HgPathBuf::from_bytes(b"b"));
1208 1207 set.insert(HgPathBuf::from_bytes(b"file1.txt"));
1209 1208 assert_eq!(
1210 1209 matcher.visit_children_set(HgPath::new(b"a")),
1211 1210 VisitChildrenSet::Set(set)
1212 1211 );
1213 1212
1214 1213 let mut set = HashSet::new();
1215 1214 set.insert(HgPathBuf::from_bytes(b"c"));
1216 1215 set.insert(HgPathBuf::from_bytes(b"file2.txt"));
1217 1216 assert_eq!(
1218 1217 matcher.visit_children_set(HgPath::new(b"a/b")),
1219 1218 VisitChildrenSet::Set(set)
1220 1219 );
1221 1220
1222 1221 let mut set = HashSet::new();
1223 1222 set.insert(HgPathBuf::from_bytes(b"d"));
1224 1223 assert_eq!(
1225 1224 matcher.visit_children_set(HgPath::new(b"a/b/c")),
1226 1225 VisitChildrenSet::Set(set)
1227 1226 );
1228 1227 let mut set = HashSet::new();
1229 1228 set.insert(HgPathBuf::from_bytes(b"file4.txt"));
1230 1229 assert_eq!(
1231 1230 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
1232 1231 VisitChildrenSet::Set(set)
1233 1232 );
1234 1233
1235 1234 assert_eq!(
1236 1235 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
1237 1236 VisitChildrenSet::Empty
1238 1237 );
1239 1238 assert_eq!(
1240 1239 matcher.visit_children_set(HgPath::new(b"folder")),
1241 1240 VisitChildrenSet::Empty
1242 1241 );
1243 1242 }
1244 1243
1245 1244 #[test]
1246 1245 fn test_patternmatcher() {
1247 1246 // VisitdirPrefix
1248 1247 let m = PatternMatcher::new(vec![IgnorePattern::new(
1249 1248 PatternSyntax::Path,
1250 1249 b"dir/subdir",
1251 1250 Path::new(""),
1252 1251 )])
1253 1252 .unwrap();
1254 1253 assert_eq!(
1255 1254 m.visit_children_set(HgPath::new(b"")),
1256 1255 VisitChildrenSet::This
1257 1256 );
1258 1257 assert_eq!(
1259 1258 m.visit_children_set(HgPath::new(b"dir")),
1260 1259 VisitChildrenSet::This
1261 1260 );
1262 1261 assert_eq!(
1263 1262 m.visit_children_set(HgPath::new(b"dir/subdir")),
1264 1263 VisitChildrenSet::Recursive
1265 1264 );
1266 1265 // OPT: This should probably be Recursive if its parent is?
1267 1266 assert_eq!(
1268 1267 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1269 1268 VisitChildrenSet::This
1270 1269 );
1271 1270 assert_eq!(
1272 1271 m.visit_children_set(HgPath::new(b"folder")),
1273 1272 VisitChildrenSet::Empty
1274 1273 );
1275 1274
1276 1275 // VisitchildrensetPrefix
1277 1276 let m = PatternMatcher::new(vec![IgnorePattern::new(
1278 1277 PatternSyntax::Path,
1279 1278 b"dir/subdir",
1280 1279 Path::new(""),
1281 1280 )])
1282 1281 .unwrap();
1283 1282 assert_eq!(
1284 1283 m.visit_children_set(HgPath::new(b"")),
1285 1284 VisitChildrenSet::This
1286 1285 );
1287 1286 assert_eq!(
1288 1287 m.visit_children_set(HgPath::new(b"dir")),
1289 1288 VisitChildrenSet::This
1290 1289 );
1291 1290 assert_eq!(
1292 1291 m.visit_children_set(HgPath::new(b"dir/subdir")),
1293 1292 VisitChildrenSet::Recursive
1294 1293 );
1295 1294 // OPT: This should probably be Recursive if its parent is?
1296 1295 assert_eq!(
1297 1296 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1298 1297 VisitChildrenSet::This
1299 1298 );
1300 1299 assert_eq!(
1301 1300 m.visit_children_set(HgPath::new(b"folder")),
1302 1301 VisitChildrenSet::Empty
1303 1302 );
1304 1303
1305 1304 // VisitdirRootfilesin
1306 1305 let m = PatternMatcher::new(vec![IgnorePattern::new(
1307 1306 PatternSyntax::RootFiles,
1308 1307 b"dir/subdir",
1309 1308 Path::new(""),
1310 1309 )])
1311 1310 .unwrap();
1312 1311 assert_eq!(
1313 1312 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1314 1313 VisitChildrenSet::Empty
1315 1314 );
1316 1315 assert_eq!(
1317 1316 m.visit_children_set(HgPath::new(b"folder")),
1318 1317 VisitChildrenSet::Empty
1319 1318 );
1320 1319 // FIXME: These should probably be This.
1321 1320 assert_eq!(
1322 1321 m.visit_children_set(HgPath::new(b"")),
1323 1322 VisitChildrenSet::Empty
1324 1323 );
1325 1324 assert_eq!(
1326 1325 m.visit_children_set(HgPath::new(b"dir")),
1327 1326 VisitChildrenSet::Empty
1328 1327 );
1329 1328 assert_eq!(
1330 1329 m.visit_children_set(HgPath::new(b"dir/subdir")),
1331 1330 VisitChildrenSet::Empty
1332 1331 );
1333 1332
1334 1333 // VisitchildrensetRootfilesin
1335 1334 let m = PatternMatcher::new(vec![IgnorePattern::new(
1336 1335 PatternSyntax::RootFiles,
1337 1336 b"dir/subdir",
1338 1337 Path::new(""),
1339 1338 )])
1340 1339 .unwrap();
1341 1340 assert_eq!(
1342 1341 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1343 1342 VisitChildrenSet::Empty
1344 1343 );
1345 1344 assert_eq!(
1346 1345 m.visit_children_set(HgPath::new(b"folder")),
1347 1346 VisitChildrenSet::Empty
1348 1347 );
1349 1348 // FIXME: These should probably be {'dir'}, {'subdir'} and This,
1350 1349 // respectively, or at least This for all three.
1351 1350 assert_eq!(
1352 1351 m.visit_children_set(HgPath::new(b"")),
1353 1352 VisitChildrenSet::Empty
1354 1353 );
1355 1354 assert_eq!(
1356 1355 m.visit_children_set(HgPath::new(b"dir")),
1357 1356 VisitChildrenSet::Empty
1358 1357 );
1359 1358 assert_eq!(
1360 1359 m.visit_children_set(HgPath::new(b"dir/subdir")),
1361 1360 VisitChildrenSet::Empty
1362 1361 );
1363 1362
1364 1363 // VisitdirGlob
1365 1364 let m = PatternMatcher::new(vec![IgnorePattern::new(
1366 1365 PatternSyntax::Glob,
1367 1366 b"dir/z*",
1368 1367 Path::new(""),
1369 1368 )])
1370 1369 .unwrap();
1371 1370 assert_eq!(
1372 1371 m.visit_children_set(HgPath::new(b"")),
1373 1372 VisitChildrenSet::This
1374 1373 );
1375 1374 // FIXME: This probably should be This
1376 1375 assert_eq!(
1377 1376 m.visit_children_set(HgPath::new(b"dir")),
1378 1377 VisitChildrenSet::Empty
1379 1378 );
1380 1379 assert_eq!(
1381 1380 m.visit_children_set(HgPath::new(b"folder")),
1382 1381 VisitChildrenSet::Empty
1383 1382 );
1384 1383 // OPT: these should probably be False.
1385 1384 assert_eq!(
1386 1385 m.visit_children_set(HgPath::new(b"dir/subdir")),
1387 1386 VisitChildrenSet::This
1388 1387 );
1389 1388 assert_eq!(
1390 1389 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1391 1390 VisitChildrenSet::This
1392 1391 );
1393 1392
1394 1393 // VisitchildrensetGlob
1395 1394 let m = PatternMatcher::new(vec![IgnorePattern::new(
1396 1395 PatternSyntax::Glob,
1397 1396 b"dir/z*",
1398 1397 Path::new(""),
1399 1398 )])
1400 1399 .unwrap();
1401 1400 assert_eq!(
1402 1401 m.visit_children_set(HgPath::new(b"")),
1403 1402 VisitChildrenSet::This
1404 1403 );
1405 1404 assert_eq!(
1406 1405 m.visit_children_set(HgPath::new(b"folder")),
1407 1406 VisitChildrenSet::Empty
1408 1407 );
1409 1408 // FIXME: This probably should be This
1410 1409 assert_eq!(
1411 1410 m.visit_children_set(HgPath::new(b"dir")),
1412 1411 VisitChildrenSet::Empty
1413 1412 );
1414 1413 // OPT: these should probably be Empty
1415 1414 assert_eq!(
1416 1415 m.visit_children_set(HgPath::new(b"dir/subdir")),
1417 1416 VisitChildrenSet::This
1418 1417 );
1419 1418 assert_eq!(
1420 1419 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1421 1420 VisitChildrenSet::This
1422 1421 );
1423 1422
1424 1423 // VisitdirFilepath
1425 1424 let m = PatternMatcher::new(vec![IgnorePattern::new(
1426 1425 PatternSyntax::FilePath,
1427 1426 b"dir/z",
1428 1427 Path::new(""),
1429 1428 )])
1430 1429 .unwrap();
1431 1430 assert_eq!(
1432 1431 m.visit_children_set(HgPath::new(b"")),
1433 1432 VisitChildrenSet::This
1434 1433 );
1435 1434 assert_eq!(
1436 1435 m.visit_children_set(HgPath::new(b"dir")),
1437 1436 VisitChildrenSet::This
1438 1437 );
1439 1438 assert_eq!(
1440 1439 m.visit_children_set(HgPath::new(b"folder")),
1441 1440 VisitChildrenSet::Empty
1442 1441 );
1443 1442 assert_eq!(
1444 1443 m.visit_children_set(HgPath::new(b"dir/subdir")),
1445 1444 VisitChildrenSet::Empty
1446 1445 );
1447 1446 assert_eq!(
1448 1447 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1449 1448 VisitChildrenSet::Empty
1450 1449 );
1451 1450
1452 1451 // VisitchildrensetFilepath
1453 1452 let m = PatternMatcher::new(vec![IgnorePattern::new(
1454 1453 PatternSyntax::FilePath,
1455 1454 b"dir/z",
1456 1455 Path::new(""),
1457 1456 )])
1458 1457 .unwrap();
1459 1458 assert_eq!(
1460 1459 m.visit_children_set(HgPath::new(b"")),
1461 1460 VisitChildrenSet::This
1462 1461 );
1463 1462 assert_eq!(
1464 1463 m.visit_children_set(HgPath::new(b"folder")),
1465 1464 VisitChildrenSet::Empty
1466 1465 );
1467 1466 assert_eq!(
1468 1467 m.visit_children_set(HgPath::new(b"dir")),
1469 1468 VisitChildrenSet::This
1470 1469 );
1471 1470 assert_eq!(
1472 1471 m.visit_children_set(HgPath::new(b"dir/subdir")),
1473 1472 VisitChildrenSet::Empty
1474 1473 );
1475 1474 assert_eq!(
1476 1475 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1477 1476 VisitChildrenSet::Empty
1478 1477 );
1479 1478 }
1480 1479
1481 1480 #[test]
1482 1481 fn test_includematcher() {
1483 1482 // VisitchildrensetPrefix
1484 1483 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1485 1484 PatternSyntax::RelPath,
1486 1485 b"dir/subdir",
1487 1486 Path::new(""),
1488 1487 )])
1489 1488 .unwrap();
1490 1489
1491 1490 let mut set = HashSet::new();
1492 1491 set.insert(HgPathBuf::from_bytes(b"dir"));
1493 1492 assert_eq!(
1494 1493 matcher.visit_children_set(HgPath::new(b"")),
1495 1494 VisitChildrenSet::Set(set)
1496 1495 );
1497 1496
1498 1497 let mut set = HashSet::new();
1499 1498 set.insert(HgPathBuf::from_bytes(b"subdir"));
1500 1499 assert_eq!(
1501 1500 matcher.visit_children_set(HgPath::new(b"dir")),
1502 1501 VisitChildrenSet::Set(set)
1503 1502 );
1504 1503 assert_eq!(
1505 1504 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1506 1505 VisitChildrenSet::Recursive
1507 1506 );
1508 1507 // OPT: This should probably be 'all' if its parent is?
1509 1508 assert_eq!(
1510 1509 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1511 1510 VisitChildrenSet::This
1512 1511 );
1513 1512 assert_eq!(
1514 1513 matcher.visit_children_set(HgPath::new(b"folder")),
1515 1514 VisitChildrenSet::Empty
1516 1515 );
1517 1516
1518 1517 // VisitchildrensetRootfilesin
1519 1518 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1520 1519 PatternSyntax::RootFiles,
1521 1520 b"dir/subdir",
1522 1521 Path::new(""),
1523 1522 )])
1524 1523 .unwrap();
1525 1524
1526 1525 let mut set = HashSet::new();
1527 1526 set.insert(HgPathBuf::from_bytes(b"dir"));
1528 1527 assert_eq!(
1529 1528 matcher.visit_children_set(HgPath::new(b"")),
1530 1529 VisitChildrenSet::Set(set)
1531 1530 );
1532 1531
1533 1532 let mut set = HashSet::new();
1534 1533 set.insert(HgPathBuf::from_bytes(b"subdir"));
1535 1534 assert_eq!(
1536 1535 matcher.visit_children_set(HgPath::new(b"dir")),
1537 1536 VisitChildrenSet::Set(set)
1538 1537 );
1539 1538
1540 1539 assert_eq!(
1541 1540 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1542 1541 VisitChildrenSet::This
1543 1542 );
1544 1543 assert_eq!(
1545 1544 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1546 1545 VisitChildrenSet::Empty
1547 1546 );
1548 1547 assert_eq!(
1549 1548 matcher.visit_children_set(HgPath::new(b"folder")),
1550 1549 VisitChildrenSet::Empty
1551 1550 );
1552 1551
1553 1552 // VisitchildrensetGlob
1554 1553 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1555 1554 PatternSyntax::Glob,
1556 1555 b"dir/z*",
1557 1556 Path::new(""),
1558 1557 )])
1559 1558 .unwrap();
1560 1559
1561 1560 let mut set = HashSet::new();
1562 1561 set.insert(HgPathBuf::from_bytes(b"dir"));
1563 1562 assert_eq!(
1564 1563 matcher.visit_children_set(HgPath::new(b"")),
1565 1564 VisitChildrenSet::Set(set)
1566 1565 );
1567 1566 assert_eq!(
1568 1567 matcher.visit_children_set(HgPath::new(b"folder")),
1569 1568 VisitChildrenSet::Empty
1570 1569 );
1571 1570 assert_eq!(
1572 1571 matcher.visit_children_set(HgPath::new(b"dir")),
1573 1572 VisitChildrenSet::This
1574 1573 );
1575 1574 // OPT: these should probably be set().
1576 1575 assert_eq!(
1577 1576 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1578 1577 VisitChildrenSet::This
1579 1578 );
1580 1579 assert_eq!(
1581 1580 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1582 1581 VisitChildrenSet::This
1583 1582 );
1584 1583
1585 1584 // VisitchildrensetFilePath
1586 1585 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1587 1586 PatternSyntax::FilePath,
1588 1587 b"dir/z",
1589 1588 Path::new(""),
1590 1589 )])
1591 1590 .unwrap();
1592 1591
1593 1592 let mut set = HashSet::new();
1594 1593 set.insert(HgPathBuf::from_bytes(b"dir"));
1595 1594 assert_eq!(
1596 1595 matcher.visit_children_set(HgPath::new(b"")),
1597 1596 VisitChildrenSet::Set(set)
1598 1597 );
1599 1598 assert_eq!(
1600 1599 matcher.visit_children_set(HgPath::new(b"folder")),
1601 1600 VisitChildrenSet::Empty
1602 1601 );
1603 1602 let mut set = HashSet::new();
1604 1603 set.insert(HgPathBuf::from_bytes(b"z"));
1605 1604 assert_eq!(
1606 1605 matcher.visit_children_set(HgPath::new(b"dir")),
1607 1606 VisitChildrenSet::Set(set)
1608 1607 );
1609 1608 // OPT: these should probably be set().
1610 1609 assert_eq!(
1611 1610 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1612 1611 VisitChildrenSet::Empty
1613 1612 );
1614 1613 assert_eq!(
1615 1614 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1616 1615 VisitChildrenSet::Empty
1617 1616 );
1618 1617
1619 1618 // Test multiple patterns
1620 1619 let matcher = IncludeMatcher::new(vec![
1621 1620 IgnorePattern::new(PatternSyntax::RelPath, b"foo", Path::new("")),
1622 1621 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1623 1622 ])
1624 1623 .unwrap();
1625 1624
1626 1625 assert_eq!(
1627 1626 matcher.visit_children_set(HgPath::new(b"")),
1628 1627 VisitChildrenSet::This
1629 1628 );
1630 1629
1631 1630 // Test multiple patterns
1632 1631 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1633 1632 PatternSyntax::Glob,
1634 1633 b"**/*.exe",
1635 1634 Path::new(""),
1636 1635 )])
1637 1636 .unwrap();
1638 1637
1639 1638 assert_eq!(
1640 1639 matcher.visit_children_set(HgPath::new(b"")),
1641 1640 VisitChildrenSet::This
1642 1641 );
1643 1642 }
1644 1643
1645 1644 #[test]
1646 1645 fn test_unionmatcher() {
1647 1646 // Path + Rootfiles
1648 1647 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1649 1648 PatternSyntax::RelPath,
1650 1649 b"dir/subdir",
1651 1650 Path::new(""),
1652 1651 )])
1653 1652 .unwrap();
1654 1653 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1655 1654 PatternSyntax::RootFiles,
1656 1655 b"dir",
1657 1656 Path::new(""),
1658 1657 )])
1659 1658 .unwrap();
1660 1659 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1661 1660
1662 1661 let mut set = HashSet::new();
1663 1662 set.insert(HgPathBuf::from_bytes(b"dir"));
1664 1663 assert_eq!(
1665 1664 matcher.visit_children_set(HgPath::new(b"")),
1666 1665 VisitChildrenSet::Set(set)
1667 1666 );
1668 1667 assert_eq!(
1669 1668 matcher.visit_children_set(HgPath::new(b"dir")),
1670 1669 VisitChildrenSet::This
1671 1670 );
1672 1671 assert_eq!(
1673 1672 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1674 1673 VisitChildrenSet::Recursive
1675 1674 );
1676 1675 assert_eq!(
1677 1676 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1678 1677 VisitChildrenSet::Empty
1679 1678 );
1680 1679 assert_eq!(
1681 1680 matcher.visit_children_set(HgPath::new(b"folder")),
1682 1681 VisitChildrenSet::Empty
1683 1682 );
1684 1683 assert_eq!(
1685 1684 matcher.visit_children_set(HgPath::new(b"folder")),
1686 1685 VisitChildrenSet::Empty
1687 1686 );
1688 1687
1689 1688 // OPT: These next two could be 'all' instead of 'this'.
1690 1689 assert_eq!(
1691 1690 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1692 1691 VisitChildrenSet::This
1693 1692 );
1694 1693 assert_eq!(
1695 1694 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1696 1695 VisitChildrenSet::This
1697 1696 );
1698 1697
1699 1698 // Path + unrelated Path
1700 1699 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1701 1700 PatternSyntax::RelPath,
1702 1701 b"dir/subdir",
1703 1702 Path::new(""),
1704 1703 )])
1705 1704 .unwrap();
1706 1705 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1707 1706 PatternSyntax::RelPath,
1708 1707 b"folder",
1709 1708 Path::new(""),
1710 1709 )])
1711 1710 .unwrap();
1712 1711 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1713 1712
1714 1713 let mut set = HashSet::new();
1715 1714 set.insert(HgPathBuf::from_bytes(b"folder"));
1716 1715 set.insert(HgPathBuf::from_bytes(b"dir"));
1717 1716 assert_eq!(
1718 1717 matcher.visit_children_set(HgPath::new(b"")),
1719 1718 VisitChildrenSet::Set(set)
1720 1719 );
1721 1720 let mut set = HashSet::new();
1722 1721 set.insert(HgPathBuf::from_bytes(b"subdir"));
1723 1722 assert_eq!(
1724 1723 matcher.visit_children_set(HgPath::new(b"dir")),
1725 1724 VisitChildrenSet::Set(set)
1726 1725 );
1727 1726
1728 1727 assert_eq!(
1729 1728 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1730 1729 VisitChildrenSet::Recursive
1731 1730 );
1732 1731 assert_eq!(
1733 1732 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1734 1733 VisitChildrenSet::Empty
1735 1734 );
1736 1735
1737 1736 assert_eq!(
1738 1737 matcher.visit_children_set(HgPath::new(b"folder")),
1739 1738 VisitChildrenSet::Recursive
1740 1739 );
1741 1740 // OPT: These next two could be 'all' instead of 'this'.
1742 1741 assert_eq!(
1743 1742 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1744 1743 VisitChildrenSet::This
1745 1744 );
1746 1745 assert_eq!(
1747 1746 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1748 1747 VisitChildrenSet::This
1749 1748 );
1750 1749
1751 1750 // Path + subpath
1752 1751 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1753 1752 PatternSyntax::RelPath,
1754 1753 b"dir/subdir/x",
1755 1754 Path::new(""),
1756 1755 )])
1757 1756 .unwrap();
1758 1757 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1759 1758 PatternSyntax::RelPath,
1760 1759 b"dir/subdir",
1761 1760 Path::new(""),
1762 1761 )])
1763 1762 .unwrap();
1764 1763 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1765 1764
1766 1765 let mut set = HashSet::new();
1767 1766 set.insert(HgPathBuf::from_bytes(b"dir"));
1768 1767 assert_eq!(
1769 1768 matcher.visit_children_set(HgPath::new(b"")),
1770 1769 VisitChildrenSet::Set(set)
1771 1770 );
1772 1771 let mut set = HashSet::new();
1773 1772 set.insert(HgPathBuf::from_bytes(b"subdir"));
1774 1773 assert_eq!(
1775 1774 matcher.visit_children_set(HgPath::new(b"dir")),
1776 1775 VisitChildrenSet::Set(set)
1777 1776 );
1778 1777
1779 1778 assert_eq!(
1780 1779 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1781 1780 VisitChildrenSet::Recursive
1782 1781 );
1783 1782 assert_eq!(
1784 1783 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1785 1784 VisitChildrenSet::Empty
1786 1785 );
1787 1786
1788 1787 assert_eq!(
1789 1788 matcher.visit_children_set(HgPath::new(b"folder")),
1790 1789 VisitChildrenSet::Empty
1791 1790 );
1792 1791 assert_eq!(
1793 1792 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1794 1793 VisitChildrenSet::Recursive
1795 1794 );
1796 1795 // OPT: this should probably be 'all' not 'this'.
1797 1796 assert_eq!(
1798 1797 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1799 1798 VisitChildrenSet::This
1800 1799 );
1801 1800 }
1802 1801
1803 1802 #[test]
1804 1803 fn test_intersectionmatcher() {
1805 1804 // Include path + Include rootfiles
1806 1805 let m1 = Box::new(
1807 1806 IncludeMatcher::new(vec![IgnorePattern::new(
1808 1807 PatternSyntax::RelPath,
1809 1808 b"dir/subdir",
1810 1809 Path::new(""),
1811 1810 )])
1812 1811 .unwrap(),
1813 1812 );
1814 1813 let m2 = Box::new(
1815 1814 IncludeMatcher::new(vec![IgnorePattern::new(
1816 1815 PatternSyntax::RootFiles,
1817 1816 b"dir",
1818 1817 Path::new(""),
1819 1818 )])
1820 1819 .unwrap(),
1821 1820 );
1822 1821 let matcher = IntersectionMatcher::new(m1, m2);
1823 1822
1824 1823 let mut set = HashSet::new();
1825 1824 set.insert(HgPathBuf::from_bytes(b"dir"));
1826 1825 assert_eq!(
1827 1826 matcher.visit_children_set(HgPath::new(b"")),
1828 1827 VisitChildrenSet::Set(set)
1829 1828 );
1830 1829 assert_eq!(
1831 1830 matcher.visit_children_set(HgPath::new(b"dir")),
1832 1831 VisitChildrenSet::This
1833 1832 );
1834 1833 assert_eq!(
1835 1834 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1836 1835 VisitChildrenSet::Empty
1837 1836 );
1838 1837 assert_eq!(
1839 1838 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1840 1839 VisitChildrenSet::Empty
1841 1840 );
1842 1841 assert_eq!(
1843 1842 matcher.visit_children_set(HgPath::new(b"folder")),
1844 1843 VisitChildrenSet::Empty
1845 1844 );
1846 1845 assert_eq!(
1847 1846 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1848 1847 VisitChildrenSet::Empty
1849 1848 );
1850 1849 assert_eq!(
1851 1850 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1852 1851 VisitChildrenSet::Empty
1853 1852 );
1854 1853
1855 1854 // Non intersecting paths
1856 1855 let m1 = Box::new(
1857 1856 IncludeMatcher::new(vec![IgnorePattern::new(
1858 1857 PatternSyntax::RelPath,
1859 1858 b"dir/subdir",
1860 1859 Path::new(""),
1861 1860 )])
1862 1861 .unwrap(),
1863 1862 );
1864 1863 let m2 = Box::new(
1865 1864 IncludeMatcher::new(vec![IgnorePattern::new(
1866 1865 PatternSyntax::RelPath,
1867 1866 b"folder",
1868 1867 Path::new(""),
1869 1868 )])
1870 1869 .unwrap(),
1871 1870 );
1872 1871 let matcher = IntersectionMatcher::new(m1, m2);
1873 1872
1874 1873 assert_eq!(
1875 1874 matcher.visit_children_set(HgPath::new(b"")),
1876 1875 VisitChildrenSet::Empty
1877 1876 );
1878 1877 assert_eq!(
1879 1878 matcher.visit_children_set(HgPath::new(b"dir")),
1880 1879 VisitChildrenSet::Empty
1881 1880 );
1882 1881 assert_eq!(
1883 1882 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1884 1883 VisitChildrenSet::Empty
1885 1884 );
1886 1885 assert_eq!(
1887 1886 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1888 1887 VisitChildrenSet::Empty
1889 1888 );
1890 1889 assert_eq!(
1891 1890 matcher.visit_children_set(HgPath::new(b"folder")),
1892 1891 VisitChildrenSet::Empty
1893 1892 );
1894 1893 assert_eq!(
1895 1894 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1896 1895 VisitChildrenSet::Empty
1897 1896 );
1898 1897 assert_eq!(
1899 1898 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1900 1899 VisitChildrenSet::Empty
1901 1900 );
1902 1901
1903 1902 // Nested paths
1904 1903 let m1 = Box::new(
1905 1904 IncludeMatcher::new(vec![IgnorePattern::new(
1906 1905 PatternSyntax::RelPath,
1907 1906 b"dir/subdir/x",
1908 1907 Path::new(""),
1909 1908 )])
1910 1909 .unwrap(),
1911 1910 );
1912 1911 let m2 = Box::new(
1913 1912 IncludeMatcher::new(vec![IgnorePattern::new(
1914 1913 PatternSyntax::RelPath,
1915 1914 b"dir/subdir",
1916 1915 Path::new(""),
1917 1916 )])
1918 1917 .unwrap(),
1919 1918 );
1920 1919 let matcher = IntersectionMatcher::new(m1, m2);
1921 1920
1922 1921 let mut set = HashSet::new();
1923 1922 set.insert(HgPathBuf::from_bytes(b"dir"));
1924 1923 assert_eq!(
1925 1924 matcher.visit_children_set(HgPath::new(b"")),
1926 1925 VisitChildrenSet::Set(set)
1927 1926 );
1928 1927
1929 1928 let mut set = HashSet::new();
1930 1929 set.insert(HgPathBuf::from_bytes(b"subdir"));
1931 1930 assert_eq!(
1932 1931 matcher.visit_children_set(HgPath::new(b"dir")),
1933 1932 VisitChildrenSet::Set(set)
1934 1933 );
1935 1934 let mut set = HashSet::new();
1936 1935 set.insert(HgPathBuf::from_bytes(b"x"));
1937 1936 assert_eq!(
1938 1937 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1939 1938 VisitChildrenSet::Set(set)
1940 1939 );
1941 1940 assert_eq!(
1942 1941 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1943 1942 VisitChildrenSet::Empty
1944 1943 );
1945 1944 assert_eq!(
1946 1945 matcher.visit_children_set(HgPath::new(b"folder")),
1947 1946 VisitChildrenSet::Empty
1948 1947 );
1949 1948 assert_eq!(
1950 1949 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1951 1950 VisitChildrenSet::Empty
1952 1951 );
1953 1952 // OPT: this should probably be 'all' not 'this'.
1954 1953 assert_eq!(
1955 1954 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1956 1955 VisitChildrenSet::This
1957 1956 );
1958 1957
1959 1958 // Diverging paths
1960 1959 let m1 = Box::new(
1961 1960 IncludeMatcher::new(vec![IgnorePattern::new(
1962 1961 PatternSyntax::RelPath,
1963 1962 b"dir/subdir/x",
1964 1963 Path::new(""),
1965 1964 )])
1966 1965 .unwrap(),
1967 1966 );
1968 1967 let m2 = Box::new(
1969 1968 IncludeMatcher::new(vec![IgnorePattern::new(
1970 1969 PatternSyntax::RelPath,
1971 1970 b"dir/subdir/z",
1972 1971 Path::new(""),
1973 1972 )])
1974 1973 .unwrap(),
1975 1974 );
1976 1975 let matcher = IntersectionMatcher::new(m1, m2);
1977 1976
1978 1977 // OPT: these next two could probably be Empty as well.
1979 1978 let mut set = HashSet::new();
1980 1979 set.insert(HgPathBuf::from_bytes(b"dir"));
1981 1980 assert_eq!(
1982 1981 matcher.visit_children_set(HgPath::new(b"")),
1983 1982 VisitChildrenSet::Set(set)
1984 1983 );
1985 1984 // OPT: these next two could probably be Empty as well.
1986 1985 let mut set = HashSet::new();
1987 1986 set.insert(HgPathBuf::from_bytes(b"subdir"));
1988 1987 assert_eq!(
1989 1988 matcher.visit_children_set(HgPath::new(b"dir")),
1990 1989 VisitChildrenSet::Set(set)
1991 1990 );
1992 1991 assert_eq!(
1993 1992 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1994 1993 VisitChildrenSet::Empty
1995 1994 );
1996 1995 assert_eq!(
1997 1996 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1998 1997 VisitChildrenSet::Empty
1999 1998 );
2000 1999 assert_eq!(
2001 2000 matcher.visit_children_set(HgPath::new(b"folder")),
2002 2001 VisitChildrenSet::Empty
2003 2002 );
2004 2003 assert_eq!(
2005 2004 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
2006 2005 VisitChildrenSet::Empty
2007 2006 );
2008 2007 assert_eq!(
2009 2008 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
2010 2009 VisitChildrenSet::Empty
2011 2010 );
2012 2011 }
2013 2012
2014 2013 #[test]
2015 2014 fn test_differencematcher() {
2016 2015 // Two alwaysmatchers should function like a nevermatcher
2017 2016 let m1 = AlwaysMatcher;
2018 2017 let m2 = AlwaysMatcher;
2019 2018 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
2020 2019
2021 2020 for case in &[
2022 2021 &b""[..],
2023 2022 b"dir",
2024 2023 b"dir/subdir",
2025 2024 b"dir/subdir/z",
2026 2025 b"dir/foo",
2027 2026 b"dir/subdir/x",
2028 2027 b"folder",
2029 2028 ] {
2030 2029 assert_eq!(
2031 2030 matcher.visit_children_set(HgPath::new(case)),
2032 2031 VisitChildrenSet::Empty
2033 2032 );
2034 2033 }
2035 2034
2036 2035 // One always and one never should behave the same as an always
2037 2036 let m1 = AlwaysMatcher;
2038 2037 let m2 = NeverMatcher;
2039 2038 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
2040 2039
2041 2040 for case in &[
2042 2041 &b""[..],
2043 2042 b"dir",
2044 2043 b"dir/subdir",
2045 2044 b"dir/subdir/z",
2046 2045 b"dir/foo",
2047 2046 b"dir/subdir/x",
2048 2047 b"folder",
2049 2048 ] {
2050 2049 assert_eq!(
2051 2050 matcher.visit_children_set(HgPath::new(case)),
2052 2051 VisitChildrenSet::Recursive
2053 2052 );
2054 2053 }
2055 2054
2056 2055 // Two include matchers
2057 2056 let m1 = Box::new(
2058 2057 IncludeMatcher::new(vec![IgnorePattern::new(
2059 2058 PatternSyntax::RelPath,
2060 2059 b"dir/subdir",
2061 2060 Path::new("/repo"),
2062 2061 )])
2063 2062 .unwrap(),
2064 2063 );
2065 2064 let m2 = Box::new(
2066 2065 IncludeMatcher::new(vec![IgnorePattern::new(
2067 2066 PatternSyntax::RootFiles,
2068 2067 b"dir",
2069 2068 Path::new("/repo"),
2070 2069 )])
2071 2070 .unwrap(),
2072 2071 );
2073 2072
2074 2073 let matcher = DifferenceMatcher::new(m1, m2);
2075 2074
2076 2075 let mut set = HashSet::new();
2077 2076 set.insert(HgPathBuf::from_bytes(b"dir"));
2078 2077 assert_eq!(
2079 2078 matcher.visit_children_set(HgPath::new(b"")),
2080 2079 VisitChildrenSet::Set(set)
2081 2080 );
2082 2081
2083 2082 let mut set = HashSet::new();
2084 2083 set.insert(HgPathBuf::from_bytes(b"subdir"));
2085 2084 assert_eq!(
2086 2085 matcher.visit_children_set(HgPath::new(b"dir")),
2087 2086 VisitChildrenSet::Set(set)
2088 2087 );
2089 2088 assert_eq!(
2090 2089 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
2091 2090 VisitChildrenSet::Recursive
2092 2091 );
2093 2092 assert_eq!(
2094 2093 matcher.visit_children_set(HgPath::new(b"dir/foo")),
2095 2094 VisitChildrenSet::Empty
2096 2095 );
2097 2096 assert_eq!(
2098 2097 matcher.visit_children_set(HgPath::new(b"folder")),
2099 2098 VisitChildrenSet::Empty
2100 2099 );
2101 2100 assert_eq!(
2102 2101 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
2103 2102 VisitChildrenSet::This
2104 2103 );
2105 2104 assert_eq!(
2106 2105 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
2107 2106 VisitChildrenSet::This
2108 2107 );
2109 2108 }
2110 2109 }
@@ -1,782 +1,782 b''
1 1 use crate::changelog::Changelog;
2 2 use crate::config::{Config, ConfigError, ConfigParseError};
3 3 use crate::dirstate::DirstateParents;
4 4 use crate::dirstate_tree::dirstate_map::DirstateMapWriteMode;
5 5 use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
6 6 use crate::dirstate_tree::owning::OwningDirstateMap;
7 7 use crate::errors::HgResultExt;
8 8 use crate::errors::{HgError, IoResultExt};
9 9 use crate::lock::{try_with_lock_no_wait, LockError};
10 10 use crate::manifest::{Manifest, Manifestlog};
11 11 use crate::revlog::filelog::Filelog;
12 12 use crate::revlog::RevlogError;
13 13 use crate::utils::debug::debug_wait_for_file_or_print;
14 14 use crate::utils::files::get_path_from_bytes;
15 15 use crate::utils::hg_path::HgPath;
16 16 use crate::utils::SliceExt;
17 17 use crate::vfs::{is_dir, is_file, Vfs};
18 18 use crate::DirstateError;
19 19 use crate::{requirements, NodePrefix, UncheckedRevision};
20 20 use std::cell::{Ref, RefCell, RefMut};
21 21 use std::collections::HashSet;
22 22 use std::io::Seek;
23 23 use std::io::SeekFrom;
24 24 use std::io::Write as IoWrite;
25 25 use std::path::{Path, PathBuf};
26 26
27 27 const V2_MAX_READ_ATTEMPTS: usize = 5;
28 28
29 29 type DirstateMapIdentity = (Option<u64>, Option<Vec<u8>>, usize);
30 30
31 31 /// A repository on disk
32 32 pub struct Repo {
33 33 working_directory: PathBuf,
34 34 dot_hg: PathBuf,
35 35 store: PathBuf,
36 36 requirements: HashSet<String>,
37 37 config: Config,
38 38 dirstate_parents: LazyCell<DirstateParents>,
39 39 dirstate_map: LazyCell<OwningDirstateMap>,
40 40 changelog: LazyCell<Changelog>,
41 41 manifestlog: LazyCell<Manifestlog>,
42 42 }
43 43
44 44 #[derive(Debug, derive_more::From)]
45 45 pub enum RepoError {
46 46 NotFound {
47 47 at: PathBuf,
48 48 },
49 49 #[from]
50 50 ConfigParseError(ConfigParseError),
51 51 #[from]
52 52 Other(HgError),
53 53 }
54 54
55 55 impl From<ConfigError> for RepoError {
56 56 fn from(error: ConfigError) -> Self {
57 57 match error {
58 58 ConfigError::Parse(error) => error.into(),
59 59 ConfigError::Other(error) => error.into(),
60 60 }
61 61 }
62 62 }
63 63
64 64 impl Repo {
65 65 /// tries to find nearest repository root in current working directory or
66 66 /// its ancestors
67 67 pub fn find_repo_root() -> Result<PathBuf, RepoError> {
68 68 let current_directory = crate::utils::current_dir()?;
69 69 // ancestors() is inclusive: it first yields `current_directory`
70 70 // as-is.
71 71 for ancestor in current_directory.ancestors() {
72 72 if is_dir(ancestor.join(".hg"))? {
73 73 return Ok(ancestor.to_path_buf());
74 74 }
75 75 }
76 76 Err(RepoError::NotFound {
77 77 at: current_directory,
78 78 })
79 79 }
80 80
81 81 /// Find a repository, either at the given path (which must contain a `.hg`
82 82 /// sub-directory) or by searching the current directory and its
83 83 /// ancestors.
84 84 ///
85 85 /// A method with two very different "modes" like this usually a code smell
86 86 /// to make two methods instead, but in this case an `Option` is what rhg
87 87 /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
88 88 /// Having two methods would just move that `if` to almost all callers.
89 89 pub fn find(
90 90 config: &Config,
91 91 explicit_path: Option<PathBuf>,
92 92 ) -> Result<Self, RepoError> {
93 93 if let Some(root) = explicit_path {
94 94 if is_dir(root.join(".hg"))? {
95 95 Self::new_at_path(root, config)
96 96 } else if is_file(&root)? {
97 97 Err(HgError::unsupported("bundle repository").into())
98 98 } else {
99 99 Err(RepoError::NotFound { at: root })
100 100 }
101 101 } else {
102 102 let root = Self::find_repo_root()?;
103 103 Self::new_at_path(root, config)
104 104 }
105 105 }
106 106
107 107 /// To be called after checking that `.hg` is a sub-directory
108 108 fn new_at_path(
109 109 working_directory: PathBuf,
110 110 config: &Config,
111 111 ) -> Result<Self, RepoError> {
112 112 let dot_hg = working_directory.join(".hg");
113 113
114 114 let mut repo_config_files =
115 115 vec![dot_hg.join("hgrc"), dot_hg.join("hgrc-not-shared")];
116 116
117 117 let hg_vfs = Vfs { base: &dot_hg };
118 118 let mut reqs = requirements::load_if_exists(hg_vfs)?;
119 119 let relative =
120 120 reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
121 121 let shared =
122 122 reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
123 123
124 124 // From `mercurial/localrepo.py`:
125 125 //
126 126 // if .hg/requires contains the sharesafe requirement, it means
127 127 // there exists a `.hg/store/requires` too and we should read it
128 128 // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
129 129 // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
130 130 // is not present, refer checkrequirementscompat() for that
131 131 //
132 132 // However, if SHARESAFE_REQUIREMENT is not present, it means that the
133 133 // repository was shared the old way. We check the share source
134 134 // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
135 135 // current repository needs to be reshared
136 136 let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
137 137
138 138 let store_path;
139 139 if !shared {
140 140 store_path = dot_hg.join("store");
141 141 } else {
142 142 let bytes = hg_vfs.read("sharedpath")?;
143 143 let mut shared_path =
144 144 get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n'))
145 145 .to_owned();
146 146 if relative {
147 147 shared_path = dot_hg.join(shared_path)
148 148 }
149 149 if !is_dir(&shared_path)? {
150 150 return Err(HgError::corrupted(format!(
151 151 ".hg/sharedpath points to nonexistent directory {}",
152 152 shared_path.display()
153 153 ))
154 154 .into());
155 155 }
156 156
157 157 store_path = shared_path.join("store");
158 158
159 159 let source_is_share_safe =
160 160 requirements::load(Vfs { base: &shared_path })?
161 161 .contains(requirements::SHARESAFE_REQUIREMENT);
162 162
163 163 if share_safe != source_is_share_safe {
164 164 return Err(HgError::unsupported("share-safe mismatch").into());
165 165 }
166 166
167 167 if share_safe {
168 168 repo_config_files.insert(0, shared_path.join("hgrc"))
169 169 }
170 170 }
171 171 if share_safe {
172 172 reqs.extend(requirements::load(Vfs { base: &store_path })?);
173 173 }
174 174
175 175 let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
176 176 config.combine_with_repo(&repo_config_files)?
177 177 } else {
178 178 config.clone()
179 179 };
180 180
181 181 let repo = Self {
182 182 requirements: reqs,
183 183 working_directory,
184 184 store: store_path,
185 185 dot_hg,
186 186 config: repo_config,
187 187 dirstate_parents: LazyCell::new(),
188 188 dirstate_map: LazyCell::new(),
189 189 changelog: LazyCell::new(),
190 190 manifestlog: LazyCell::new(),
191 191 };
192 192
193 193 requirements::check(&repo)?;
194 194
195 195 Ok(repo)
196 196 }
197 197
198 198 pub fn working_directory_path(&self) -> &Path {
199 199 &self.working_directory
200 200 }
201 201
202 202 pub fn requirements(&self) -> &HashSet<String> {
203 203 &self.requirements
204 204 }
205 205
206 206 pub fn config(&self) -> &Config {
207 207 &self.config
208 208 }
209 209
210 210 /// For accessing repository files (in `.hg`), except for the store
211 211 /// (`.hg/store`).
212 212 pub fn hg_vfs(&self) -> Vfs<'_> {
213 213 Vfs { base: &self.dot_hg }
214 214 }
215 215
216 216 /// For accessing repository store files (in `.hg/store`)
217 217 pub fn store_vfs(&self) -> Vfs<'_> {
218 218 Vfs { base: &self.store }
219 219 }
220 220
221 221 /// For accessing the working copy
222 222 pub fn working_directory_vfs(&self) -> Vfs<'_> {
223 223 Vfs {
224 224 base: &self.working_directory,
225 225 }
226 226 }
227 227
228 228 pub fn try_with_wlock_no_wait<R>(
229 229 &self,
230 230 f: impl FnOnce() -> R,
231 231 ) -> Result<R, LockError> {
232 232 try_with_lock_no_wait(self.hg_vfs(), "wlock", f)
233 233 }
234 234
235 235 /// Whether this repo should use dirstate-v2.
236 236 /// The presence of `dirstate-v2` in the requirements does not mean that
237 237 /// the on-disk dirstate is necessarily in version 2. In most cases,
238 238 /// a dirstate-v2 file will indeed be found, but in rare cases (like the
239 239 /// upgrade mechanism being cut short), the on-disk version will be a
240 240 /// v1 file.
241 241 /// Semantically, having a requirement only means that a client cannot
242 242 /// properly understand or properly update the repo if it lacks the support
243 243 /// for the required feature, but not that that feature is actually used
244 244 /// in all occasions.
245 245 pub fn use_dirstate_v2(&self) -> bool {
246 246 self.requirements
247 247 .contains(requirements::DIRSTATE_V2_REQUIREMENT)
248 248 }
249 249
250 250 pub fn has_sparse(&self) -> bool {
251 251 self.requirements.contains(requirements::SPARSE_REQUIREMENT)
252 252 }
253 253
254 254 pub fn has_narrow(&self) -> bool {
255 255 self.requirements.contains(requirements::NARROW_REQUIREMENT)
256 256 }
257 257
258 258 pub fn has_nodemap(&self) -> bool {
259 259 self.requirements
260 260 .contains(requirements::NODEMAP_REQUIREMENT)
261 261 }
262 262
263 263 fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> {
264 264 Ok(self
265 265 .hg_vfs()
266 266 .read("dirstate")
267 267 .io_not_found_as_none()?
268 268 .unwrap_or_default())
269 269 }
270 270
271 271 fn dirstate_identity(&self) -> Result<Option<u64>, HgError> {
272 272 use std::os::unix::fs::MetadataExt;
273 273 Ok(self
274 274 .hg_vfs()
275 275 .symlink_metadata("dirstate")
276 276 .io_not_found_as_none()?
277 277 .map(|meta| meta.ino()))
278 278 }
279 279
280 280 pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
281 281 Ok(*self
282 282 .dirstate_parents
283 283 .get_or_init(|| self.read_dirstate_parents())?)
284 284 }
285 285
286 286 fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
287 287 let dirstate = self.dirstate_file_contents()?;
288 288 let parents = if dirstate.is_empty() {
289 289 DirstateParents::NULL
290 290 } else if self.use_dirstate_v2() {
291 291 let docket_res =
292 292 crate::dirstate_tree::on_disk::read_docket(&dirstate);
293 293 match docket_res {
294 294 Ok(docket) => docket.parents(),
295 295 Err(_) => {
296 296 log::info!(
297 297 "Parsing dirstate docket failed, \
298 298 falling back to dirstate-v1"
299 299 );
300 300 *crate::dirstate::parsers::parse_dirstate_parents(
301 301 &dirstate,
302 302 )?
303 303 }
304 304 }
305 305 } else {
306 306 *crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
307 307 };
308 308 self.dirstate_parents.set(parents);
309 309 Ok(parents)
310 310 }
311 311
312 312 /// Returns the information read from the dirstate docket necessary to
313 313 /// check if the data file has been updated/deleted by another process
314 314 /// since we last read the dirstate.
315 315 /// Namely, the inode, data file uuid and the data size.
316 316 fn get_dirstate_data_file_integrity(
317 317 &self,
318 318 ) -> Result<DirstateMapIdentity, HgError> {
319 319 assert!(
320 320 self.use_dirstate_v2(),
321 321 "accessing dirstate data file ID without dirstate-v2"
322 322 );
323 323 // Get the identity before the contents since we could have a race
324 324 // between the two. Having an identity that is too old is fine, but
325 325 // one that is younger than the content change is bad.
326 326 let identity = self.dirstate_identity()?;
327 327 let dirstate = self.dirstate_file_contents()?;
328 328 if dirstate.is_empty() {
329 329 self.dirstate_parents.set(DirstateParents::NULL);
330 330 Ok((identity, None, 0))
331 331 } else {
332 332 let docket_res =
333 333 crate::dirstate_tree::on_disk::read_docket(&dirstate);
334 334 match docket_res {
335 335 Ok(docket) => {
336 336 self.dirstate_parents.set(docket.parents());
337 337 Ok((
338 338 identity,
339 339 Some(docket.uuid.to_owned()),
340 340 docket.data_size(),
341 341 ))
342 342 }
343 343 Err(_) => {
344 344 log::info!(
345 345 "Parsing dirstate docket failed, \
346 346 falling back to dirstate-v1"
347 347 );
348 348 let parents =
349 349 *crate::dirstate::parsers::parse_dirstate_parents(
350 350 &dirstate,
351 351 )?;
352 352 self.dirstate_parents.set(parents);
353 353 Ok((identity, None, 0))
354 354 }
355 355 }
356 356 }
357 357 }
358 358
359 359 fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
360 360 if self.use_dirstate_v2() {
361 361 // The v2 dirstate is split into a docket and a data file.
362 362 // Since we don't always take the `wlock` to read it
363 363 // (like in `hg status`), it is susceptible to races.
364 364 // A simple retry method should be enough since full rewrites
365 365 // only happen when too much garbage data is present and
366 366 // this race is unlikely.
367 367 let mut tries = 0;
368 368
369 369 while tries < V2_MAX_READ_ATTEMPTS {
370 370 tries += 1;
371 371 match self.read_docket_and_data_file() {
372 372 Ok(m) => {
373 373 return Ok(m);
374 374 }
375 375 Err(e) => match e {
376 376 DirstateError::Common(HgError::RaceDetected(
377 377 context,
378 378 )) => {
379 379 log::info!(
380 380 "dirstate read race detected {} (retry {}/{})",
381 381 context,
382 382 tries,
383 383 V2_MAX_READ_ATTEMPTS,
384 384 );
385 385 continue;
386 386 }
387 387 _ => {
388 388 log::info!(
389 389 "Reading dirstate v2 failed, \
390 390 falling back to v1"
391 391 );
392 392 return self.new_dirstate_map_v1();
393 393 }
394 394 },
395 395 }
396 396 }
397 397 let error = HgError::abort(
398 398 format!("dirstate read race happened {tries} times in a row"),
399 399 255,
400 400 None,
401 401 );
402 402 Err(DirstateError::Common(error))
403 403 } else {
404 404 self.new_dirstate_map_v1()
405 405 }
406 406 }
407 407
408 408 fn new_dirstate_map_v1(&self) -> Result<OwningDirstateMap, DirstateError> {
409 409 debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
410 410 let identity = self.dirstate_identity()?;
411 411 let dirstate_file_contents = self.dirstate_file_contents()?;
412 412 if dirstate_file_contents.is_empty() {
413 413 self.dirstate_parents.set(DirstateParents::NULL);
414 414 Ok(OwningDirstateMap::new_empty(Vec::new()))
415 415 } else {
416 416 let (map, parents) =
417 417 OwningDirstateMap::new_v1(dirstate_file_contents, identity)?;
418 418 self.dirstate_parents.set(parents);
419 419 Ok(map)
420 420 }
421 421 }
422 422
423 423 fn read_docket_and_data_file(
424 424 &self,
425 425 ) -> Result<OwningDirstateMap, DirstateError> {
426 426 debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
427 427 let dirstate_file_contents = self.dirstate_file_contents()?;
428 428 let identity = self.dirstate_identity()?;
429 429 if dirstate_file_contents.is_empty() {
430 430 self.dirstate_parents.set(DirstateParents::NULL);
431 431 return Ok(OwningDirstateMap::new_empty(Vec::new()));
432 432 }
433 433 let docket = crate::dirstate_tree::on_disk::read_docket(
434 434 &dirstate_file_contents,
435 435 )?;
436 436 debug_wait_for_file_or_print(
437 437 self.config(),
438 438 "dirstate.post-docket-read-file",
439 439 );
440 440 self.dirstate_parents.set(docket.parents());
441 441 let uuid = docket.uuid.to_owned();
442 442 let data_size = docket.data_size();
443 443
444 444 let context = "between reading dirstate docket and data file";
445 445 let race_error = HgError::RaceDetected(context.into());
446 446 let metadata = docket.tree_metadata();
447 447
448 448 let mut map = if crate::vfs::is_on_nfs_mount(docket.data_filename()) {
449 449 // Don't mmap on NFS to prevent `SIGBUS` error on deletion
450 450 let contents = self.hg_vfs().read(docket.data_filename());
451 451 let contents = match contents {
452 452 Ok(c) => c,
453 453 Err(HgError::IoError { error, context }) => {
454 454 match error.raw_os_error().expect("real os error") {
455 455 // 2 = ENOENT, No such file or directory
456 456 // 116 = ESTALE, Stale NFS file handle
457 457 //
458 458 // TODO match on `error.kind()` when
459 459 // `ErrorKind::StaleNetworkFileHandle` is stable.
460 460 2 | 116 => {
461 461 // Race where the data file was deleted right after
462 462 // we read the docket, try again
463 463 return Err(race_error.into());
464 464 }
465 465 _ => {
466 466 return Err(
467 467 HgError::IoError { error, context }.into()
468 468 )
469 469 }
470 470 }
471 471 }
472 472 Err(e) => return Err(e.into()),
473 473 };
474 474 OwningDirstateMap::new_v2(
475 475 contents, data_size, metadata, uuid, identity,
476 476 )
477 477 } else {
478 478 match self
479 479 .hg_vfs()
480 480 .mmap_open(docket.data_filename())
481 481 .io_not_found_as_none()
482 482 {
483 483 Ok(Some(data_mmap)) => OwningDirstateMap::new_v2(
484 484 data_mmap, data_size, metadata, uuid, identity,
485 485 ),
486 486 Ok(None) => {
487 487 // Race where the data file was deleted right after we
488 488 // read the docket, try again
489 489 return Err(race_error.into());
490 490 }
491 491 Err(e) => return Err(e.into()),
492 492 }
493 493 }?;
494 494
495 495 let write_mode_config = self
496 496 .config()
497 497 .get_str(b"devel", b"dirstate.v2.data_update_mode")
498 498 .unwrap_or(Some("auto"))
499 499 .unwrap_or("auto"); // don't bother for devel options
500 500 let write_mode = match write_mode_config {
501 501 "auto" => DirstateMapWriteMode::Auto,
502 502 "force-new" => DirstateMapWriteMode::ForceNewDataFile,
503 503 "force-append" => DirstateMapWriteMode::ForceAppend,
504 504 _ => DirstateMapWriteMode::Auto,
505 505 };
506 506
507 507 map.with_dmap_mut(|m| m.set_write_mode(write_mode));
508 508
509 509 Ok(map)
510 510 }
511 511
512 512 pub fn dirstate_map(
513 513 &self,
514 514 ) -> Result<Ref<OwningDirstateMap>, DirstateError> {
515 515 self.dirstate_map.get_or_init(|| self.new_dirstate_map())
516 516 }
517 517
518 518 pub fn dirstate_map_mut(
519 519 &self,
520 520 ) -> Result<RefMut<OwningDirstateMap>, DirstateError> {
521 521 self.dirstate_map
522 522 .get_mut_or_init(|| self.new_dirstate_map())
523 523 }
524 524
525 525 fn new_changelog(&self) -> Result<Changelog, HgError> {
526 526 Changelog::open(&self.store_vfs(), self.has_nodemap())
527 527 }
528 528
529 529 pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> {
530 530 self.changelog.get_or_init(|| self.new_changelog())
531 531 }
532 532
533 533 pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> {
534 534 self.changelog.get_mut_or_init(|| self.new_changelog())
535 535 }
536 536
537 537 fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
538 538 Manifestlog::open(&self.store_vfs(), self.has_nodemap())
539 539 }
540 540
541 541 pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> {
542 542 self.manifestlog.get_or_init(|| self.new_manifestlog())
543 543 }
544 544
545 545 pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> {
546 546 self.manifestlog.get_mut_or_init(|| self.new_manifestlog())
547 547 }
548 548
549 549 /// Returns the manifest of the *changeset* with the given node ID
550 550 pub fn manifest_for_node(
551 551 &self,
552 552 node: impl Into<NodePrefix>,
553 553 ) -> Result<Manifest, RevlogError> {
554 554 self.manifestlog()?.data_for_node(
555 555 self.changelog()?
556 556 .data_for_node(node.into())?
557 557 .manifest_node()?
558 558 .into(),
559 559 )
560 560 }
561 561
562 562 /// Returns the manifest of the *changeset* with the given revision number
563 563 pub fn manifest_for_rev(
564 564 &self,
565 565 revision: UncheckedRevision,
566 566 ) -> Result<Manifest, RevlogError> {
567 567 self.manifestlog()?.data_for_node(
568 568 self.changelog()?
569 569 .data_for_rev(revision)?
570 570 .manifest_node()?
571 571 .into(),
572 572 )
573 573 }
574 574
575 575 pub fn has_subrepos(&self) -> Result<bool, DirstateError> {
576 576 if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? {
577 577 Ok(entry.tracked())
578 578 } else {
579 579 Ok(false)
580 580 }
581 581 }
582 582
583 583 pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
584 584 Filelog::open(self, path)
585 585 }
586 586
587 587 /// Write to disk any updates that were made through `dirstate_map_mut`.
588 588 ///
589 589 /// The "wlock" must be held while calling this.
590 590 /// See for example `try_with_wlock_no_wait`.
591 591 ///
592 592 /// TODO: have a `WritableRepo` type only accessible while holding the
593 593 /// lock?
594 594 pub fn write_dirstate(&self) -> Result<(), DirstateError> {
595 595 let map = self.dirstate_map()?;
596 596 // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
597 597 // it’s unset
598 598 let parents = self.dirstate_parents()?;
599 599 let (packed_dirstate, old_uuid_to_remove) = if self.use_dirstate_v2() {
600 600 let (identity, uuid, data_size) =
601 601 self.get_dirstate_data_file_integrity()?;
602 602 let identity_changed = identity != map.old_identity();
603 603 let uuid_changed = uuid.as_deref() != map.old_uuid();
604 604 let data_length_changed = data_size != map.old_data_size();
605 605
606 606 if identity_changed || uuid_changed || data_length_changed {
607 607 // If any of identity, uuid or length have changed since
608 608 // last disk read, don't write.
609 609 // This is fine because either we're in a command that doesn't
610 610 // write anything too important (like `hg status`), or we're in
611 611 // `hg add` and we're supposed to have taken the lock before
612 612 // reading anyway.
613 613 //
614 614 // TODO complain loudly if we've changed anything important
615 615 // without taking the lock.
616 616 // (see `hg help config.format.use-dirstate-tracked-hint`)
617 617 log::debug!(
618 618 "dirstate has changed since last read, not updating."
619 619 );
620 620 return Ok(());
621 621 }
622 622
623 623 let uuid_opt = map.old_uuid();
624 624 let write_mode = if uuid_opt.is_some() {
625 625 DirstateMapWriteMode::Auto
626 626 } else {
627 627 DirstateMapWriteMode::ForceNewDataFile
628 628 };
629 629 let (data, tree_metadata, append, old_data_size) =
630 630 map.pack_v2(write_mode)?;
631 631
632 632 // Reuse the uuid, or generate a new one, keeping the old for
633 633 // deletion.
634 634 let (uuid, old_uuid) = match uuid_opt {
635 635 Some(uuid) => {
636 636 let as_str = std::str::from_utf8(uuid)
637 637 .map_err(|_| {
638 638 HgError::corrupted(
639 639 "non-UTF-8 dirstate data file ID",
640 640 )
641 641 })?
642 642 .to_owned();
643 643 if append {
644 644 (as_str, None)
645 645 } else {
646 646 (DirstateDocket::new_uid(), Some(as_str))
647 647 }
648 648 }
649 649 None => (DirstateDocket::new_uid(), None),
650 650 };
651 651
652 652 let data_filename = format!("dirstate.{}", uuid);
653 653 let data_filename = self.hg_vfs().join(data_filename);
654 654 let mut options = std::fs::OpenOptions::new();
655 655 options.write(true);
656 656
657 657 // Why are we not using the O_APPEND flag when appending?
658 658 //
659 659 // - O_APPEND makes it trickier to deal with garbage at the end of
660 660 // the file, left by a previous uncommitted transaction. By
661 661 // starting the write at [old_data_size] we make sure we erase
662 662 // all such garbage.
663 663 //
664 664 // - O_APPEND requires to special-case 0-byte writes, whereas we
665 665 // don't need that.
666 666 //
667 667 // - Some OSes have bugs in implementation O_APPEND:
668 668 // revlog.py talks about a Solaris bug, but we also saw some ZFS
669 669 // bug: https://github.com/openzfs/zfs/pull/3124,
670 670 // https://github.com/openzfs/zfs/issues/13370
671 671 //
672 672 if !append {
673 673 log::trace!("creating a new dirstate data file");
674 674 options.create_new(true);
675 675 } else {
676 676 log::trace!("appending to the dirstate data file");
677 677 }
678 678
679 679 let data_size = (|| {
680 680 // TODO: loop and try another random ID if !append and this
681 681 // returns `ErrorKind::AlreadyExists`? Collision chance of two
682 682 // random IDs is one in 2**32
683 683 let mut file = options.open(&data_filename)?;
684 684 if append {
685 685 file.seek(SeekFrom::Start(old_data_size as u64))?;
686 686 }
687 687 file.write_all(&data)?;
688 688 file.flush()?;
689 file.seek(SeekFrom::Current(0))
689 file.stream_position()
690 690 })()
691 691 .when_writing_file(&data_filename)?;
692 692
693 693 let packed_dirstate = DirstateDocket::serialize(
694 694 parents,
695 695 tree_metadata,
696 696 data_size,
697 697 uuid.as_bytes(),
698 698 )
699 699 .map_err(|_: std::num::TryFromIntError| {
700 700 HgError::corrupted("overflow in dirstate docket serialization")
701 701 })?;
702 702
703 703 (packed_dirstate, old_uuid)
704 704 } else {
705 705 let identity = self.dirstate_identity()?;
706 706 if identity != map.old_identity() {
707 707 // If identity changed since last disk read, don't write.
708 708 // This is fine because either we're in a command that doesn't
709 709 // write anything too important (like `hg status`), or we're in
710 710 // `hg add` and we're supposed to have taken the lock before
711 711 // reading anyway.
712 712 //
713 713 // TODO complain loudly if we've changed anything important
714 714 // without taking the lock.
715 715 // (see `hg help config.format.use-dirstate-tracked-hint`)
716 716 log::debug!(
717 717 "dirstate has changed since last read, not updating."
718 718 );
719 719 return Ok(());
720 720 }
721 721 (map.pack_v1(parents)?, None)
722 722 };
723 723
724 724 let vfs = self.hg_vfs();
725 725 vfs.atomic_write("dirstate", &packed_dirstate)?;
726 726 if let Some(uuid) = old_uuid_to_remove {
727 727 // Remove the old data file after the new docket pointing to the
728 728 // new data file was written.
729 729 vfs.remove_file(format!("dirstate.{}", uuid))?;
730 730 }
731 731 Ok(())
732 732 }
733 733 }
734 734
735 735 /// Lazily-initialized component of `Repo` with interior mutability
736 736 ///
737 737 /// This differs from `OnceCell` in that the value can still be "deinitialized"
738 738 /// later by setting its inner `Option` to `None`. It also takes the
739 739 /// initialization function as an argument when the value is requested, not
740 740 /// when the instance is created.
741 741 struct LazyCell<T> {
742 742 value: RefCell<Option<T>>,
743 743 }
744 744
745 745 impl<T> LazyCell<T> {
746 746 fn new() -> Self {
747 747 Self {
748 748 value: RefCell::new(None),
749 749 }
750 750 }
751 751
752 752 fn set(&self, value: T) {
753 753 *self.value.borrow_mut() = Some(value)
754 754 }
755 755
756 756 fn get_or_init<E>(
757 757 &self,
758 758 init: impl Fn() -> Result<T, E>,
759 759 ) -> Result<Ref<T>, E> {
760 760 let mut borrowed = self.value.borrow();
761 761 if borrowed.is_none() {
762 762 drop(borrowed);
763 763 // Only use `borrow_mut` if it is really needed to avoid panic in
764 764 // case there is another outstanding borrow but mutation is not
765 765 // needed.
766 766 *self.value.borrow_mut() = Some(init()?);
767 767 borrowed = self.value.borrow()
768 768 }
769 769 Ok(Ref::map(borrowed, |option| option.as_ref().unwrap()))
770 770 }
771 771
772 772 fn get_mut_or_init<E>(
773 773 &self,
774 774 init: impl Fn() -> Result<T, E>,
775 775 ) -> Result<RefMut<T>, E> {
776 776 let mut borrowed = self.value.borrow_mut();
777 777 if borrowed.is_none() {
778 778 *borrowed = Some(init()?);
779 779 }
780 780 Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap()))
781 781 }
782 782 }
@@ -1,965 +1,965 b''
1 1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Mercurial concepts for handling revision history
7 7
8 8 pub mod node;
9 9 pub mod nodemap;
10 10 mod nodemap_docket;
11 11 pub mod path_encode;
12 12 pub use node::{FromHexError, Node, NodePrefix};
13 13 pub mod changelog;
14 14 pub mod filelog;
15 15 pub mod index;
16 16 pub mod manifest;
17 17 pub mod patch;
18 18
19 19 use std::borrow::Cow;
20 20 use std::io::Read;
21 21 use std::ops::Deref;
22 22 use std::path::Path;
23 23
24 24 use flate2::read::ZlibDecoder;
25 25 use sha1::{Digest, Sha1};
26 26 use std::cell::RefCell;
27 27 use zstd;
28 28
29 29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
30 30 use self::nodemap_docket::NodeMapDocket;
31 31 use super::index::Index;
32 32 use super::nodemap::{NodeMap, NodeMapError};
33 33 use crate::errors::HgError;
34 34 use crate::vfs::Vfs;
35 35
36 36 /// As noted in revlog.c, revision numbers are actually encoded in
37 37 /// 4 bytes, and are liberally converted to ints, whence the i32
38 38 pub type BaseRevision = i32;
39 39
40 40 /// Mercurial revision numbers
41 41 /// In contrast to the more general [`UncheckedRevision`], these are "checked"
42 42 /// in the sense that they should only be used for revisions that are
43 43 /// valid for a given index (i.e. in bounds).
44 44 #[derive(
45 45 Debug,
46 46 derive_more::Display,
47 47 Clone,
48 48 Copy,
49 49 Hash,
50 50 PartialEq,
51 51 Eq,
52 52 PartialOrd,
53 53 Ord,
54 54 )]
55 55 pub struct Revision(pub BaseRevision);
56 56
57 57 impl format_bytes::DisplayBytes for Revision {
58 58 fn display_bytes(
59 59 &self,
60 60 output: &mut dyn std::io::Write,
61 61 ) -> std::io::Result<()> {
62 62 self.0.display_bytes(output)
63 63 }
64 64 }
65 65
66 66 /// Unchecked Mercurial revision numbers.
67 67 ///
68 68 /// Values of this type have no guarantee of being a valid revision number
69 69 /// in any context. Use method `check_revision` to get a valid revision within
70 70 /// the appropriate index object.
71 71 #[derive(
72 72 Debug,
73 73 derive_more::Display,
74 74 Clone,
75 75 Copy,
76 76 Hash,
77 77 PartialEq,
78 78 Eq,
79 79 PartialOrd,
80 80 Ord,
81 81 )]
82 82 pub struct UncheckedRevision(pub BaseRevision);
83 83
84 84 impl format_bytes::DisplayBytes for UncheckedRevision {
85 85 fn display_bytes(
86 86 &self,
87 87 output: &mut dyn std::io::Write,
88 88 ) -> std::io::Result<()> {
89 89 self.0.display_bytes(output)
90 90 }
91 91 }
92 92
93 93 impl From<Revision> for UncheckedRevision {
94 94 fn from(value: Revision) -> Self {
95 95 Self(value.0)
96 96 }
97 97 }
98 98
99 99 impl From<BaseRevision> for UncheckedRevision {
100 100 fn from(value: BaseRevision) -> Self {
101 101 Self(value)
102 102 }
103 103 }
104 104
105 105 /// Marker expressing the absence of a parent
106 106 ///
107 107 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
108 108 /// to be smaller than all existing revisions.
109 109 pub const NULL_REVISION: Revision = Revision(-1);
110 110
111 111 /// Same as `mercurial.node.wdirrev`
112 112 ///
113 113 /// This is also equal to `i32::max_value()`, but it's better to spell
114 114 /// it out explicitely, same as in `mercurial.node`
115 115 #[allow(clippy::unreadable_literal)]
116 116 pub const WORKING_DIRECTORY_REVISION: UncheckedRevision =
117 117 UncheckedRevision(0x7fffffff);
118 118
119 119 pub const WORKING_DIRECTORY_HEX: &str =
120 120 "ffffffffffffffffffffffffffffffffffffffff";
121 121
122 122 /// The simplest expression of what we need of Mercurial DAGs.
123 123 pub trait Graph {
124 124 /// Return the two parents of the given `Revision`.
125 125 ///
126 126 /// Each of the parents can be independently `NULL_REVISION`
127 127 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
128 128 }
129 129
130 130 #[derive(Clone, Debug, PartialEq)]
131 131 pub enum GraphError {
132 132 ParentOutOfRange(Revision),
133 133 }
134 134
135 135 /// The Mercurial Revlog Index
136 136 ///
137 137 /// This is currently limited to the minimal interface that is needed for
138 138 /// the [`nodemap`](nodemap/index.html) module
139 139 pub trait RevlogIndex {
140 140 /// Total number of Revisions referenced in this index
141 141 fn len(&self) -> usize;
142 142
143 143 fn is_empty(&self) -> bool {
144 144 self.len() == 0
145 145 }
146 146
147 147 /// Return a reference to the Node or `None` for `NULL_REVISION`
148 148 fn node(&self, rev: Revision) -> Option<&Node>;
149 149
150 150 /// Return a [`Revision`] if `rev` is a valid revision number for this
151 151 /// index
152 152 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
153 153 let rev = rev.0;
154 154
155 155 if rev == NULL_REVISION.0 || (rev >= 0 && (rev as usize) < self.len())
156 156 {
157 157 Some(Revision(rev))
158 158 } else {
159 159 None
160 160 }
161 161 }
162 162 }
163 163
164 164 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
165 165 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
166 166 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
167 167 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
168 168
169 169 // Keep this in sync with REVIDX_KNOWN_FLAGS in
170 170 // mercurial/revlogutils/flagutil.py
171 171 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
172 172 | REVISION_FLAG_ELLIPSIS
173 173 | REVISION_FLAG_EXTSTORED
174 174 | REVISION_FLAG_HASCOPIESINFO;
175 175
176 176 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
177 177
178 178 #[derive(Debug, derive_more::From, derive_more::Display)]
179 179 pub enum RevlogError {
180 180 InvalidRevision,
181 181 /// Working directory is not supported
182 182 WDirUnsupported,
183 183 /// Found more than one entry whose ID match the requested prefix
184 184 AmbiguousPrefix,
185 185 #[from]
186 186 Other(HgError),
187 187 }
188 188
189 189 impl From<NodeMapError> for RevlogError {
190 190 fn from(error: NodeMapError) -> Self {
191 191 match error {
192 192 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
193 193 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
194 194 format!("nodemap point to revision {} not in index", rev),
195 195 ),
196 196 }
197 197 }
198 198 }
199 199
200 200 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
201 201 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
202 202 }
203 203
204 204 impl RevlogError {
205 205 fn corrupted<S: AsRef<str>>(context: S) -> Self {
206 206 RevlogError::Other(corrupted(context))
207 207 }
208 208 }
209 209
210 210 /// Read only implementation of revlog.
211 211 pub struct Revlog {
212 212 /// When index and data are not interleaved: bytes of the revlog index.
213 213 /// When index and data are interleaved: bytes of the revlog index and
214 214 /// data.
215 215 index: Index,
216 216 /// When index and data are not interleaved: bytes of the revlog data
217 217 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
218 218 /// When present on disk: the persistent nodemap for this revlog
219 219 nodemap: Option<nodemap::NodeTree>,
220 220 }
221 221
222 222 impl Graph for Revlog {
223 223 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
224 224 self.index.parents(rev)
225 225 }
226 226 }
227 227
228 228 impl Revlog {
229 229 /// Open a revlog index file.
230 230 ///
231 231 /// It will also open the associated data file if index and data are not
232 232 /// interleaved.
233 233 pub fn open(
234 234 store_vfs: &Vfs,
235 235 index_path: impl AsRef<Path>,
236 236 data_path: Option<&Path>,
237 237 use_nodemap: bool,
238 238 ) -> Result<Self, HgError> {
239 239 Self::open_gen(store_vfs, index_path, data_path, use_nodemap, None)
240 240 }
241 241
242 242 fn open_gen(
243 243 store_vfs: &Vfs,
244 244 index_path: impl AsRef<Path>,
245 245 data_path: Option<&Path>,
246 246 use_nodemap: bool,
247 247 nodemap_for_test: Option<nodemap::NodeTree>,
248 248 ) -> Result<Self, HgError> {
249 249 let index_path = index_path.as_ref();
250 250 let index = {
251 match store_vfs.mmap_open_opt(&index_path)? {
252 None => Index::new(Box::new(vec![])),
251 match store_vfs.mmap_open_opt(index_path)? {
252 None => Index::new(Box::<Vec<_>>::default()),
253 253 Some(index_mmap) => {
254 254 let index = Index::new(Box::new(index_mmap))?;
255 255 Ok(index)
256 256 }
257 257 }
258 258 }?;
259 259
260 260 let default_data_path = index_path.with_extension("d");
261 261
262 262 // type annotation required
263 263 // won't recognize Mmap as Deref<Target = [u8]>
264 264 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
265 265 if index.is_inline() {
266 266 None
267 267 } else {
268 268 let data_path = data_path.unwrap_or(&default_data_path);
269 269 let data_mmap = store_vfs.mmap_open(data_path)?;
270 270 Some(Box::new(data_mmap))
271 271 };
272 272
273 273 let nodemap = if index.is_inline() || !use_nodemap {
274 274 None
275 275 } else {
276 276 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
277 277 |(docket, data)| {
278 278 nodemap::NodeTree::load_bytes(
279 279 Box::new(data),
280 280 docket.data_length,
281 281 )
282 282 },
283 283 )
284 284 };
285 285
286 286 let nodemap = nodemap_for_test.or(nodemap);
287 287
288 288 Ok(Revlog {
289 289 index,
290 290 data_bytes,
291 291 nodemap,
292 292 })
293 293 }
294 294
295 295 /// Return number of entries of the `Revlog`.
296 296 pub fn len(&self) -> usize {
297 297 self.index.len()
298 298 }
299 299
300 300 /// Returns `true` if the `Revlog` has zero `entries`.
301 301 pub fn is_empty(&self) -> bool {
302 302 self.index.is_empty()
303 303 }
304 304
305 305 /// Returns the node ID for the given revision number, if it exists in this
306 306 /// revlog
307 307 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
308 308 if rev == NULL_REVISION.into() {
309 309 return Some(&NULL_NODE);
310 310 }
311 311 let rev = self.index.check_revision(rev)?;
312 312 Some(self.index.get_entry(rev)?.hash())
313 313 }
314 314
315 315 /// Return the revision number for the given node ID, if it exists in this
316 316 /// revlog
317 317 pub fn rev_from_node(
318 318 &self,
319 319 node: NodePrefix,
320 320 ) -> Result<Revision, RevlogError> {
321 321 if let Some(nodemap) = &self.nodemap {
322 322 nodemap
323 323 .find_bin(&self.index, node)?
324 324 .ok_or(RevlogError::InvalidRevision)
325 325 } else {
326 326 self.rev_from_node_no_persistent_nodemap(node)
327 327 }
328 328 }
329 329
330 330 /// Same as `rev_from_node`, without using a persistent nodemap
331 331 ///
332 332 /// This is used as fallback when a persistent nodemap is not present.
333 333 /// This happens when the persistent-nodemap experimental feature is not
334 334 /// enabled, or for small revlogs.
335 335 fn rev_from_node_no_persistent_nodemap(
336 336 &self,
337 337 node: NodePrefix,
338 338 ) -> Result<Revision, RevlogError> {
339 339 // Linear scan of the revlog
340 340 // TODO: consider building a non-persistent nodemap in memory to
341 341 // optimize these cases.
342 342 let mut found_by_prefix = None;
343 343 for rev in (-1..self.len() as BaseRevision).rev() {
344 344 let rev = Revision(rev as BaseRevision);
345 345 let candidate_node = if rev == Revision(-1) {
346 346 NULL_NODE
347 347 } else {
348 348 let index_entry =
349 349 self.index.get_entry(rev).ok_or_else(|| {
350 350 HgError::corrupted(
351 351 "revlog references a revision not in the index",
352 352 )
353 353 })?;
354 354 *index_entry.hash()
355 355 };
356 356 if node == candidate_node {
357 357 return Ok(rev);
358 358 }
359 359 if node.is_prefix_of(&candidate_node) {
360 360 if found_by_prefix.is_some() {
361 361 return Err(RevlogError::AmbiguousPrefix);
362 362 }
363 363 found_by_prefix = Some(rev)
364 364 }
365 365 }
366 366 found_by_prefix.ok_or(RevlogError::InvalidRevision)
367 367 }
368 368
369 369 /// Returns whether the given revision exists in this revlog.
370 370 pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
371 371 self.index.check_revision(rev).is_some()
372 372 }
373 373
374 374 /// Return the full data associated to a revision.
375 375 ///
376 376 /// All entries required to build the final data out of deltas will be
377 377 /// retrieved as needed, and the deltas will be applied to the inital
378 378 /// snapshot to rebuild the final data.
379 379 pub fn get_rev_data(
380 380 &self,
381 381 rev: UncheckedRevision,
382 382 ) -> Result<Cow<[u8]>, RevlogError> {
383 383 if rev == NULL_REVISION.into() {
384 384 return Ok(Cow::Borrowed(&[]));
385 385 };
386 386 self.get_entry(rev)?.data()
387 387 }
388 388
389 389 /// [`Self::get_rev_data`] for checked revisions.
390 390 pub fn get_rev_data_for_checked_rev(
391 391 &self,
392 392 rev: Revision,
393 393 ) -> Result<Cow<[u8]>, RevlogError> {
394 394 if rev == NULL_REVISION {
395 395 return Ok(Cow::Borrowed(&[]));
396 396 };
397 397 self.get_entry_for_checked_rev(rev)?.data()
398 398 }
399 399
400 400 /// Check the hash of some given data against the recorded hash.
401 401 pub fn check_hash(
402 402 &self,
403 403 p1: Revision,
404 404 p2: Revision,
405 405 expected: &[u8],
406 406 data: &[u8],
407 407 ) -> bool {
408 408 let e1 = self.index.get_entry(p1);
409 409 let h1 = match e1 {
410 410 Some(ref entry) => entry.hash(),
411 411 None => &NULL_NODE,
412 412 };
413 413 let e2 = self.index.get_entry(p2);
414 414 let h2 = match e2 {
415 415 Some(ref entry) => entry.hash(),
416 416 None => &NULL_NODE,
417 417 };
418 418
419 419 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
420 420 }
421 421
422 422 /// Build the full data of a revision out its snapshot
423 423 /// and its deltas.
424 424 fn build_data_from_deltas(
425 425 snapshot: RevlogEntry,
426 426 deltas: &[RevlogEntry],
427 427 ) -> Result<Vec<u8>, HgError> {
428 428 let snapshot = snapshot.data_chunk()?;
429 429 let deltas = deltas
430 430 .iter()
431 431 .rev()
432 432 .map(RevlogEntry::data_chunk)
433 433 .collect::<Result<Vec<_>, _>>()?;
434 434 let patches: Vec<_> =
435 435 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
436 436 let patch = patch::fold_patch_lists(&patches);
437 437 Ok(patch.apply(&snapshot))
438 438 }
439 439
440 440 /// Return the revlog data.
441 441 fn data(&self) -> &[u8] {
442 442 match &self.data_bytes {
443 443 Some(data_bytes) => data_bytes,
444 444 None => panic!(
445 445 "forgot to load the data or trying to access inline data"
446 446 ),
447 447 }
448 448 }
449 449
450 450 pub fn make_null_entry(&self) -> RevlogEntry {
451 451 RevlogEntry {
452 452 revlog: self,
453 453 rev: NULL_REVISION,
454 454 bytes: b"",
455 455 compressed_len: 0,
456 456 uncompressed_len: 0,
457 457 base_rev_or_base_of_delta_chain: None,
458 458 p1: NULL_REVISION,
459 459 p2: NULL_REVISION,
460 460 flags: NULL_REVLOG_ENTRY_FLAGS,
461 461 hash: NULL_NODE,
462 462 }
463 463 }
464 464
465 465 fn get_entry_for_checked_rev(
466 466 &self,
467 467 rev: Revision,
468 468 ) -> Result<RevlogEntry, RevlogError> {
469 469 if rev == NULL_REVISION {
470 470 return Ok(self.make_null_entry());
471 471 }
472 472 let index_entry = self
473 473 .index
474 474 .get_entry(rev)
475 475 .ok_or(RevlogError::InvalidRevision)?;
476 476 let start = index_entry.offset();
477 477 let end = start + index_entry.compressed_len() as usize;
478 478 let data = if self.index.is_inline() {
479 479 self.index.data(start, end)
480 480 } else {
481 481 &self.data()[start..end]
482 482 };
483 483 let base_rev = self
484 484 .index
485 485 .check_revision(index_entry.base_revision_or_base_of_delta_chain())
486 486 .ok_or_else(|| {
487 487 RevlogError::corrupted(format!(
488 488 "base revision for rev {} is invalid",
489 489 rev
490 490 ))
491 491 })?;
492 492 let p1 =
493 493 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
494 494 RevlogError::corrupted(format!(
495 495 "p1 for rev {} is invalid",
496 496 rev
497 497 ))
498 498 })?;
499 499 let p2 =
500 500 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
501 501 RevlogError::corrupted(format!(
502 502 "p2 for rev {} is invalid",
503 503 rev
504 504 ))
505 505 })?;
506 506 let entry = RevlogEntry {
507 507 revlog: self,
508 508 rev,
509 509 bytes: data,
510 510 compressed_len: index_entry.compressed_len(),
511 511 uncompressed_len: index_entry.uncompressed_len(),
512 512 base_rev_or_base_of_delta_chain: if base_rev == rev {
513 513 None
514 514 } else {
515 515 Some(base_rev)
516 516 },
517 517 p1,
518 518 p2,
519 519 flags: index_entry.flags(),
520 520 hash: *index_entry.hash(),
521 521 };
522 522 Ok(entry)
523 523 }
524 524
525 525 /// Get an entry of the revlog.
526 526 pub fn get_entry(
527 527 &self,
528 528 rev: UncheckedRevision,
529 529 ) -> Result<RevlogEntry, RevlogError> {
530 530 if rev == NULL_REVISION.into() {
531 531 return Ok(self.make_null_entry());
532 532 }
533 533 let rev = self.index.check_revision(rev).ok_or_else(|| {
534 534 RevlogError::corrupted(format!("rev {} is invalid", rev))
535 535 })?;
536 536 self.get_entry_for_checked_rev(rev)
537 537 }
538 538 }
539 539
540 540 /// The revlog entry's bytes and the necessary informations to extract
541 541 /// the entry's data.
542 542 #[derive(Clone)]
543 543 pub struct RevlogEntry<'revlog> {
544 544 revlog: &'revlog Revlog,
545 545 rev: Revision,
546 546 bytes: &'revlog [u8],
547 547 compressed_len: u32,
548 548 uncompressed_len: i32,
549 549 base_rev_or_base_of_delta_chain: Option<Revision>,
550 550 p1: Revision,
551 551 p2: Revision,
552 552 flags: u16,
553 553 hash: Node,
554 554 }
555 555
556 556 thread_local! {
557 557 // seems fine to [unwrap] here: this can only fail due to memory allocation
558 558 // failing, and it's normal for that to cause panic.
559 559 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
560 560 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
561 561 }
562 562
563 563 fn zstd_decompress_to_buffer(
564 564 bytes: &[u8],
565 565 buf: &mut Vec<u8>,
566 566 ) -> Result<usize, std::io::Error> {
567 567 ZSTD_DECODER
568 568 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
569 569 }
570 570
571 571 impl<'revlog> RevlogEntry<'revlog> {
572 572 pub fn revision(&self) -> Revision {
573 573 self.rev
574 574 }
575 575
576 576 pub fn node(&self) -> &Node {
577 577 &self.hash
578 578 }
579 579
580 580 pub fn uncompressed_len(&self) -> Option<u32> {
581 581 u32::try_from(self.uncompressed_len).ok()
582 582 }
583 583
584 584 pub fn has_p1(&self) -> bool {
585 585 self.p1 != NULL_REVISION
586 586 }
587 587
588 588 pub fn p1_entry(
589 589 &self,
590 590 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
591 591 if self.p1 == NULL_REVISION {
592 592 Ok(None)
593 593 } else {
594 594 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?))
595 595 }
596 596 }
597 597
598 598 pub fn p2_entry(
599 599 &self,
600 600 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
601 601 if self.p2 == NULL_REVISION {
602 602 Ok(None)
603 603 } else {
604 604 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?))
605 605 }
606 606 }
607 607
608 608 pub fn p1(&self) -> Option<Revision> {
609 609 if self.p1 == NULL_REVISION {
610 610 None
611 611 } else {
612 612 Some(self.p1)
613 613 }
614 614 }
615 615
616 616 pub fn p2(&self) -> Option<Revision> {
617 617 if self.p2 == NULL_REVISION {
618 618 None
619 619 } else {
620 620 Some(self.p2)
621 621 }
622 622 }
623 623
624 624 pub fn is_censored(&self) -> bool {
625 625 (self.flags & REVISION_FLAG_CENSORED) != 0
626 626 }
627 627
628 628 pub fn has_length_affecting_flag_processor(&self) -> bool {
629 629 // Relevant Python code: revlog.size()
630 630 // note: ELLIPSIS is known to not change the content
631 631 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
632 632 }
633 633
634 634 /// The data for this entry, after resolving deltas if any.
635 635 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
636 636 let mut entry = self.clone();
637 637 let mut delta_chain = vec![];
638 638
639 639 // The meaning of `base_rev_or_base_of_delta_chain` depends on
640 640 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
641 641 // `mercurial/revlogutils/constants.py` and the code in
642 642 // [_chaininfo] and in [index_deltachain].
643 643 let uses_generaldelta = self.revlog.index.uses_generaldelta();
644 644 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
645 645 entry = if uses_generaldelta {
646 646 delta_chain.push(entry);
647 647 self.revlog.get_entry_for_checked_rev(base_rev)?
648 648 } else {
649 649 let base_rev = UncheckedRevision(entry.rev.0 - 1);
650 650 delta_chain.push(entry);
651 651 self.revlog.get_entry(base_rev)?
652 652 };
653 653 }
654 654
655 655 let data = if delta_chain.is_empty() {
656 656 entry.data_chunk()?
657 657 } else {
658 658 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
659 659 };
660 660
661 661 Ok(data)
662 662 }
663 663
664 664 fn check_data(
665 665 &self,
666 666 data: Cow<'revlog, [u8]>,
667 667 ) -> Result<Cow<'revlog, [u8]>, RevlogError> {
668 668 if self.revlog.check_hash(
669 669 self.p1,
670 670 self.p2,
671 671 self.hash.as_bytes(),
672 672 &data,
673 673 ) {
674 674 Ok(data)
675 675 } else {
676 676 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
677 677 return Err(HgError::unsupported(
678 678 "ellipsis revisions are not supported by rhg",
679 679 )
680 680 .into());
681 681 }
682 682 Err(corrupted(format!(
683 683 "hash check failed for revision {}",
684 684 self.rev
685 685 ))
686 686 .into())
687 687 }
688 688 }
689 689
690 690 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
691 691 let data = self.rawdata()?;
692 692 if self.rev == NULL_REVISION {
693 693 return Ok(data);
694 694 }
695 695 if self.is_censored() {
696 696 return Err(HgError::CensoredNodeError.into());
697 697 }
698 698 self.check_data(data)
699 699 }
700 700
701 701 /// Extract the data contained in the entry.
702 702 /// This may be a delta. (See `is_delta`.)
703 703 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
704 704 if self.bytes.is_empty() {
705 705 return Ok(Cow::Borrowed(&[]));
706 706 }
707 707 match self.bytes[0] {
708 708 // Revision data is the entirety of the entry, including this
709 709 // header.
710 710 b'\0' => Ok(Cow::Borrowed(self.bytes)),
711 711 // Raw revision data follows.
712 712 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
713 713 // zlib (RFC 1950) data.
714 714 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
715 715 // zstd data.
716 716 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
717 717 // A proper new format should have had a repo/store requirement.
718 718 format_type => Err(corrupted(format!(
719 719 "unknown compression header '{}'",
720 720 format_type
721 721 ))),
722 722 }
723 723 }
724 724
725 725 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
726 726 let mut decoder = ZlibDecoder::new(self.bytes);
727 727 if self.is_delta() {
728 728 let mut buf = Vec::with_capacity(self.compressed_len as usize);
729 729 decoder
730 730 .read_to_end(&mut buf)
731 731 .map_err(|e| corrupted(e.to_string()))?;
732 732 Ok(buf)
733 733 } else {
734 734 let cap = self.uncompressed_len.max(0) as usize;
735 735 let mut buf = vec![0; cap];
736 736 decoder
737 737 .read_exact(&mut buf)
738 738 .map_err(|e| corrupted(e.to_string()))?;
739 739 Ok(buf)
740 740 }
741 741 }
742 742
743 743 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
744 744 let cap = self.uncompressed_len.max(0) as usize;
745 745 if self.is_delta() {
746 746 // [cap] is usually an over-estimate of the space needed because
747 747 // it's the length of delta-decoded data, but we're interested
748 748 // in the size of the delta.
749 749 // This means we have to [shrink_to_fit] to avoid holding on
750 750 // to a large chunk of memory, but it also means we must have a
751 751 // fallback branch, for the case when the delta is longer than
752 752 // the original data (surprisingly, this does happen in practice)
753 753 let mut buf = Vec::with_capacity(cap);
754 754 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
755 755 Ok(_) => buf.shrink_to_fit(),
756 756 Err(_) => {
757 757 buf.clear();
758 758 zstd::stream::copy_decode(self.bytes, &mut buf)
759 759 .map_err(|e| corrupted(e.to_string()))?;
760 760 }
761 761 };
762 762 Ok(buf)
763 763 } else {
764 764 let mut buf = Vec::with_capacity(cap);
765 765 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
766 766 .map_err(|e| corrupted(e.to_string()))?;
767 767 if len != self.uncompressed_len as usize {
768 768 Err(corrupted("uncompressed length does not match"))
769 769 } else {
770 770 Ok(buf)
771 771 }
772 772 }
773 773 }
774 774
775 775 /// Tell if the entry is a snapshot or a delta
776 776 /// (influences on decompression).
777 777 fn is_delta(&self) -> bool {
778 778 self.base_rev_or_base_of_delta_chain.is_some()
779 779 }
780 780 }
781 781
782 782 /// Calculate the hash of a revision given its data and its parents.
783 783 fn hash(
784 784 data: &[u8],
785 785 p1_hash: &[u8],
786 786 p2_hash: &[u8],
787 787 ) -> [u8; NODE_BYTES_LENGTH] {
788 788 let mut hasher = Sha1::new();
789 789 let (a, b) = (p1_hash, p2_hash);
790 790 if a > b {
791 791 hasher.update(b);
792 792 hasher.update(a);
793 793 } else {
794 794 hasher.update(a);
795 795 hasher.update(b);
796 796 }
797 797 hasher.update(data);
798 798 *hasher.finalize().as_ref()
799 799 }
800 800
801 801 #[cfg(test)]
802 802 mod tests {
803 803 use super::*;
804 804 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
805 805 use itertools::Itertools;
806 806
807 807 #[test]
808 808 fn test_empty() {
809 809 let temp = tempfile::tempdir().unwrap();
810 810 let vfs = Vfs { base: temp.path() };
811 811 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
812 812 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
813 813 assert!(revlog.is_empty());
814 814 assert_eq!(revlog.len(), 0);
815 815 assert!(revlog.get_entry(0.into()).is_err());
816 816 assert!(!revlog.has_rev(0.into()));
817 817 assert_eq!(
818 818 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
819 819 NULL_REVISION
820 820 );
821 821 let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap();
822 822 assert_eq!(null_entry.revision(), NULL_REVISION);
823 823 assert!(null_entry.data().unwrap().is_empty());
824 824 }
825 825
826 826 #[test]
827 827 fn test_inline() {
828 828 let temp = tempfile::tempdir().unwrap();
829 829 let vfs = Vfs { base: temp.path() };
830 830 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
831 831 .unwrap();
832 832 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
833 833 .unwrap();
834 834 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
835 835 .unwrap();
836 836 let entry0_bytes = IndexEntryBuilder::new()
837 837 .is_first(true)
838 838 .with_version(1)
839 839 .with_inline(true)
840 840 .with_offset(INDEX_ENTRY_SIZE)
841 841 .with_node(node0)
842 842 .build();
843 843 let entry1_bytes = IndexEntryBuilder::new()
844 844 .with_offset(INDEX_ENTRY_SIZE)
845 845 .with_node(node1)
846 846 .build();
847 847 let entry2_bytes = IndexEntryBuilder::new()
848 848 .with_offset(INDEX_ENTRY_SIZE)
849 849 .with_p1(Revision(0))
850 850 .with_p2(Revision(1))
851 851 .with_node(node2)
852 852 .build();
853 853 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
854 854 .into_iter()
855 855 .flatten()
856 856 .collect_vec();
857 857 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
858 858 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
859 859
860 860 let entry0 = revlog.get_entry(0.into()).ok().unwrap();
861 861 assert_eq!(entry0.revision(), Revision(0));
862 862 assert_eq!(*entry0.node(), node0);
863 863 assert!(!entry0.has_p1());
864 864 assert_eq!(entry0.p1(), None);
865 865 assert_eq!(entry0.p2(), None);
866 866 let p1_entry = entry0.p1_entry().unwrap();
867 867 assert!(p1_entry.is_none());
868 868 let p2_entry = entry0.p2_entry().unwrap();
869 869 assert!(p2_entry.is_none());
870 870
871 871 let entry1 = revlog.get_entry(1.into()).ok().unwrap();
872 872 assert_eq!(entry1.revision(), Revision(1));
873 873 assert_eq!(*entry1.node(), node1);
874 874 assert!(!entry1.has_p1());
875 875 assert_eq!(entry1.p1(), None);
876 876 assert_eq!(entry1.p2(), None);
877 877 let p1_entry = entry1.p1_entry().unwrap();
878 878 assert!(p1_entry.is_none());
879 879 let p2_entry = entry1.p2_entry().unwrap();
880 880 assert!(p2_entry.is_none());
881 881
882 882 let entry2 = revlog.get_entry(2.into()).ok().unwrap();
883 883 assert_eq!(entry2.revision(), Revision(2));
884 884 assert_eq!(*entry2.node(), node2);
885 885 assert!(entry2.has_p1());
886 886 assert_eq!(entry2.p1(), Some(Revision(0)));
887 887 assert_eq!(entry2.p2(), Some(Revision(1)));
888 888 let p1_entry = entry2.p1_entry().unwrap();
889 889 assert!(p1_entry.is_some());
890 890 assert_eq!(p1_entry.unwrap().revision(), Revision(0));
891 891 let p2_entry = entry2.p2_entry().unwrap();
892 892 assert!(p2_entry.is_some());
893 893 assert_eq!(p2_entry.unwrap().revision(), Revision(1));
894 894 }
895 895
896 896 #[test]
897 897 fn test_nodemap() {
898 898 let temp = tempfile::tempdir().unwrap();
899 899 let vfs = Vfs { base: temp.path() };
900 900
901 901 // building a revlog with a forced Node starting with zeros
902 902 // This is a corruption, but it does not preclude using the nodemap
903 903 // if we don't try and access the data
904 904 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
905 905 .unwrap();
906 906 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
907 907 .unwrap();
908 908 let entry0_bytes = IndexEntryBuilder::new()
909 909 .is_first(true)
910 910 .with_version(1)
911 911 .with_inline(true)
912 912 .with_offset(INDEX_ENTRY_SIZE)
913 913 .with_node(node0)
914 914 .build();
915 915 let entry1_bytes = IndexEntryBuilder::new()
916 916 .with_offset(INDEX_ENTRY_SIZE)
917 917 .with_node(node1)
918 918 .build();
919 919 let contents = vec![entry0_bytes, entry1_bytes]
920 920 .into_iter()
921 921 .flatten()
922 922 .collect_vec();
923 923 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
924 924
925 925 let mut idx = nodemap::tests::TestNtIndex::new();
926 926 idx.insert_node(Revision(0), node0).unwrap();
927 927 idx.insert_node(Revision(1), node1).unwrap();
928 928
929 929 let revlog =
930 930 Revlog::open_gen(&vfs, "foo.i", None, true, Some(idx.nt)).unwrap();
931 931
932 932 // accessing the data shows the corruption
933 933 revlog.get_entry(0.into()).unwrap().data().unwrap_err();
934 934
935 935 assert_eq!(
936 936 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
937 937 Revision(-1)
938 938 );
939 939 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), Revision(0));
940 940 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), Revision(1));
941 941 assert_eq!(
942 942 revlog
943 943 .rev_from_node(NodePrefix::from_hex("000").unwrap())
944 944 .unwrap(),
945 945 Revision(-1)
946 946 );
947 947 assert_eq!(
948 948 revlog
949 949 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
950 950 .unwrap(),
951 951 Revision(1)
952 952 );
953 953 // RevlogError does not implement PartialEq
954 954 // (ultimately because io::Error does not)
955 955 match revlog
956 956 .rev_from_node(NodePrefix::from_hex("00").unwrap())
957 957 .expect_err("Expected to give AmbiguousPrefix error")
958 958 {
959 959 RevlogError::AmbiguousPrefix => (),
960 960 e => {
961 961 panic!("Got another error than AmbiguousPrefix: {:?}", e);
962 962 }
963 963 };
964 964 }
965 965 }
@@ -1,429 +1,429 b''
1 1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 //
3 3 // This software may be used and distributed according to the terms of the
4 4 // GNU General Public License version 2 or any later version.
5 5
6 6 //! Definitions and utilities for Revision nodes
7 7 //!
8 8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
9 9 //! of a revision.
10 10
11 11 use crate::errors::HgError;
12 12 use bytes_cast::BytesCast;
13 13 use std::fmt;
14 14
15 15 /// The length in bytes of a `Node`
16 16 ///
17 17 /// This constant is meant to ease refactors of this module, and
18 18 /// are private so that calling code does not expect all nodes have
19 19 /// the same size, should we support several formats concurrently in
20 20 /// the future.
21 21 pub const NODE_BYTES_LENGTH: usize = 20;
22 22
23 23 /// Id of the null node.
24 24 ///
25 25 /// Used to indicate the absence of node.
26 26 pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
27 27
28 28 /// The length in bytes of a `Node`
29 29 ///
30 30 /// see also `NODES_BYTES_LENGTH` about it being private.
31 31 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
32 32
33 33 /// Default for UI presentation
34 34 const SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH: u8 = 12;
35 35
36 36 /// Private alias for readability and to ease future change
37 37 type NodeData = [u8; NODE_BYTES_LENGTH];
38 38
39 39 /// Binary revision SHA
40 40 ///
41 41 /// ## Future changes of hash size
42 42 ///
43 43 /// To accomodate future changes of hash size, Rust callers
44 44 /// should use the conversion methods at the boundaries (FFI, actual
45 45 /// computation of hashes and I/O) only, and only if required.
46 46 ///
47 47 /// All other callers outside of unit tests should just handle `Node` values
48 48 /// and never make any assumption on the actual length, using [`nybbles_len`]
49 49 /// if they need a loop boundary.
50 50 ///
51 51 /// All methods that create a `Node` either take a type that enforces
52 52 /// the size or return an error at runtime.
53 53 ///
54 54 /// [`nybbles_len`]: #method.nybbles_len
55 55 #[derive(Copy, Clone, PartialEq, BytesCast, derive_more::From)]
56 56 #[repr(transparent)]
57 57 pub struct Node {
58 58 data: NodeData,
59 59 }
60 60
61 61 impl fmt::Debug for Node {
62 62 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
63 63 let n = format!("{:x?}", self.data);
64 64 // We're using debug_tuple because it makes the output a little
65 65 // more compact without losing data.
66 66 f.debug_tuple("Node").field(&n).finish()
67 67 }
68 68 }
69 69
70 70 /// The node value for NULL_REVISION
71 71 pub const NULL_NODE: Node = Node {
72 72 data: [0; NODE_BYTES_LENGTH],
73 73 };
74 74
75 75 /// Return an error if the slice has an unexpected length
76 76 impl<'a> TryFrom<&'a [u8]> for &'a Node {
77 77 type Error = ();
78 78
79 79 #[inline]
80 80 fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
81 81 match Node::from_bytes(bytes) {
82 82 Ok((node, rest)) if rest.is_empty() => Ok(node),
83 83 _ => Err(()),
84 84 }
85 85 }
86 86 }
87 87
88 88 /// Return an error if the slice has an unexpected length
89 89 impl TryFrom<&'_ [u8]> for Node {
90 90 type Error = std::array::TryFromSliceError;
91 91
92 92 #[inline]
93 93 fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> {
94 94 let data = bytes.try_into()?;
95 95 Ok(Self { data })
96 96 }
97 97 }
98 98
99 99 impl From<&'_ NodeData> for Node {
100 100 #[inline]
101 101 fn from(data: &'_ NodeData) -> Self {
102 102 Self { data: *data }
103 103 }
104 104 }
105 105
106 106 impl fmt::LowerHex for Node {
107 107 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
108 108 for &byte in &self.data {
109 109 write!(f, "{:02x}", byte)?
110 110 }
111 111 Ok(())
112 112 }
113 113 }
114 114
115 115 #[derive(Debug)]
116 116 pub struct FromHexError;
117 117
118 118 /// Low level utility function, also for prefixes
119 119 fn get_nybble(s: &[u8], i: usize) -> u8 {
120 120 if i % 2 == 0 {
121 121 s[i / 2] >> 4
122 122 } else {
123 123 s[i / 2] & 0x0f
124 124 }
125 125 }
126 126
127 127 impl Node {
128 128 /// Retrieve the `i`th half-byte of the binary data.
129 129 ///
130 130 /// This is also the `i`th hexadecimal digit in numeric form,
131 131 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
132 132 pub fn get_nybble(&self, i: usize) -> u8 {
133 133 get_nybble(&self.data, i)
134 134 }
135 135
136 136 /// Length of the data, in nybbles
137 137 pub fn nybbles_len(&self) -> usize {
138 138 // public exposure as an instance method only, so that we can
139 139 // easily support several sizes of hashes if needed in the future.
140 140 NODE_NYBBLES_LENGTH
141 141 }
142 142
143 143 /// Convert from hexadecimal string representation
144 144 ///
145 145 /// Exact length is required.
146 146 ///
147 147 /// To be used in FFI and I/O only, in order to facilitate future
148 148 /// changes of hash format.
149 149 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> {
150 150 let prefix = NodePrefix::from_hex(hex)?;
151 151 if prefix.nybbles_len() == NODE_NYBBLES_LENGTH {
152 152 Ok(Self { data: prefix.data })
153 153 } else {
154 154 Err(FromHexError)
155 155 }
156 156 }
157 157
158 158 /// `from_hex`, but for input from an internal file of the repository such
159 159 /// as a changelog or manifest entry.
160 160 ///
161 161 /// An error is treated as repository corruption.
162 162 pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> {
163 163 Self::from_hex(hex.as_ref()).map_err(|FromHexError| {
164 164 HgError::CorruptedRepository(format!(
165 165 "Expected a full hexadecimal node ID, found {}",
166 166 String::from_utf8_lossy(hex.as_ref())
167 167 ))
168 168 })
169 169 }
170 170
171 171 /// Provide access to binary data
172 172 ///
173 173 /// This is needed by FFI layers, for instance to return expected
174 174 /// binary values to Python.
175 175 pub fn as_bytes(&self) -> &[u8] {
176 176 &self.data
177 177 }
178 178
179 179 pub fn short(&self) -> NodePrefix {
180 180 NodePrefix {
181 181 nybbles_len: SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH,
182 182 data: self.data,
183 183 }
184 184 }
185 185
186 186 pub fn pad_to_256_bits(&self) -> [u8; 32] {
187 187 let mut bits = [0; 32];
188 188 bits[..NODE_BYTES_LENGTH].copy_from_slice(&self.data);
189 189 bits
190 190 }
191 191 }
192 192
193 193 /// The beginning of a binary revision SHA.
194 194 ///
195 195 /// Since it can potentially come from an hexadecimal representation with
196 196 /// odd length, it needs to carry around whether the last 4 bits are relevant
197 197 /// or not.
198 198 #[derive(Debug, PartialEq, Copy, Clone)]
199 199 pub struct NodePrefix {
200 200 /// In `1..=NODE_NYBBLES_LENGTH`
201 201 nybbles_len: u8,
202 202 /// The first `4 * length_in_nybbles` bits are used (considering bits
203 203 /// within a bytes in big-endian: most significant first), the rest
204 204 /// are zero.
205 205 data: NodeData,
206 206 }
207 207
208 208 impl NodePrefix {
209 209 /// Convert from hexadecimal string representation
210 210 ///
211 211 /// Similarly to `hex::decode`, can be used with Unicode string types
212 212 /// (`String`, `&str`) as well as bytes.
213 213 ///
214 214 /// To be used in FFI and I/O only, in order to facilitate future
215 215 /// changes of hash format.
216 216 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> {
217 217 let hex = hex.as_ref();
218 218 let len = hex.len();
219 219 if len > NODE_NYBBLES_LENGTH || len == 0 {
220 220 return Err(FromHexError);
221 221 }
222 222
223 223 let mut data = [0; NODE_BYTES_LENGTH];
224 224 let mut nybbles_len = 0;
225 225 for &ascii_byte in hex {
226 226 let nybble = match char::from(ascii_byte).to_digit(16) {
227 227 Some(digit) => digit as u8,
228 228 None => return Err(FromHexError),
229 229 };
230 230 // Fill in the upper half of a byte first, then the lower half.
231 231 let shift = if nybbles_len % 2 == 0 { 4 } else { 0 };
232 232 data[nybbles_len as usize / 2] |= nybble << shift;
233 233 nybbles_len += 1;
234 234 }
235 235 Ok(Self { data, nybbles_len })
236 236 }
237 237
238 238 pub fn nybbles_len(&self) -> usize {
239 239 self.nybbles_len as _
240 240 }
241 241
242 242 pub fn is_prefix_of(&self, node: &Node) -> bool {
243 243 let full_bytes = self.nybbles_len() / 2;
244 244 if self.data[..full_bytes] != node.data[..full_bytes] {
245 245 return false;
246 246 }
247 247 if self.nybbles_len() % 2 == 0 {
248 248 return true;
249 249 }
250 250 let last = self.nybbles_len() - 1;
251 251 self.get_nybble(last) == node.get_nybble(last)
252 252 }
253 253
254 254 /// Retrieve the `i`th half-byte from the prefix.
255 255 ///
256 256 /// This is also the `i`th hexadecimal digit in numeric form,
257 257 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
258 258 pub fn get_nybble(&self, i: usize) -> u8 {
259 259 assert!(i < self.nybbles_len());
260 260 get_nybble(&self.data, i)
261 261 }
262 262
263 263 fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ {
264 264 (0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i))
265 265 }
266 266
267 267 /// Return the index first nybble that's different from `node`
268 268 ///
269 269 /// If the return value is `None` that means that `self` is
270 270 /// a prefix of `node`, but the current method is a bit slower
271 271 /// than `is_prefix_of`.
272 272 ///
273 273 /// Returned index is as in `get_nybble`, i.e., starting at 0.
274 274 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
275 275 self.iter_nybbles()
276 276 .zip(NodePrefix::from(*node).iter_nybbles())
277 277 .position(|(a, b)| a != b)
278 278 }
279 279 }
280 280
281 281 impl fmt::LowerHex for NodePrefix {
282 282 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
283 283 let full_bytes = self.nybbles_len() / 2;
284 284 for &byte in &self.data[..full_bytes] {
285 285 write!(f, "{:02x}", byte)?
286 286 }
287 287 if self.nybbles_len() % 2 == 1 {
288 288 let last = self.nybbles_len() - 1;
289 289 write!(f, "{:x}", self.get_nybble(last))?
290 290 }
291 291 Ok(())
292 292 }
293 293 }
294 294
295 295 /// A shortcut for full `Node` references
296 296 impl From<&'_ Node> for NodePrefix {
297 297 fn from(node: &'_ Node) -> Self {
298 298 NodePrefix {
299 299 nybbles_len: node.nybbles_len() as _,
300 300 data: node.data,
301 301 }
302 302 }
303 303 }
304 304
305 305 /// A shortcut for full `Node` references
306 306 impl From<Node> for NodePrefix {
307 307 fn from(node: Node) -> Self {
308 308 NodePrefix {
309 309 nybbles_len: node.nybbles_len() as _,
310 310 data: node.data,
311 311 }
312 312 }
313 313 }
314 314
315 315 impl PartialEq<Node> for NodePrefix {
316 316 fn eq(&self, other: &Node) -> bool {
317 317 self.data == other.data && self.nybbles_len() == other.nybbles_len()
318 318 }
319 319 }
320 320
321 321 #[cfg(test)]
322 322 mod tests {
323 323 use super::*;
324 324
325 325 const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef";
326 326 const SAMPLE_NODE: Node = Node {
327 327 data: [
328 328 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
329 329 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
330 330 ],
331 331 };
332 332
333 333 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
334 334 /// The padding is made with zeros.
335 335 pub fn hex_pad_right(hex: &str) -> String {
336 336 let mut res = hex.to_string();
337 337 while res.len() < NODE_NYBBLES_LENGTH {
338 338 res.push('0');
339 339 }
340 340 res
341 341 }
342 342
343 343 #[test]
344 344 fn test_node_from_hex() {
345 345 let not_hex = "012... oops";
346 346 let too_short = "0123";
347 347 let too_long = format!("{}0", SAMPLE_NODE_HEX);
348 348 assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE);
349 349 assert!(Node::from_hex(not_hex).is_err());
350 350 assert!(Node::from_hex(too_short).is_err());
351 assert!(Node::from_hex(&too_long).is_err());
351 assert!(Node::from_hex(too_long).is_err());
352 352 }
353 353
354 354 #[test]
355 355 fn test_node_encode_hex() {
356 356 assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX);
357 357 }
358 358
359 359 #[test]
360 360 fn test_prefix_from_to_hex() -> Result<(), FromHexError> {
361 361 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1");
362 362 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a");
363 363 assert_eq!(
364 364 format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?),
365 365 SAMPLE_NODE_HEX
366 366 );
367 367 Ok(())
368 368 }
369 369
370 370 #[test]
371 371 fn test_prefix_from_hex_errors() {
372 372 assert!(NodePrefix::from_hex("testgr").is_err());
373 373 let mut long = format!("{:x}", NULL_NODE);
374 374 long.push('c');
375 375 assert!(NodePrefix::from_hex(&long).is_err())
376 376 }
377 377
378 378 #[test]
379 379 fn test_is_prefix_of() -> Result<(), FromHexError> {
380 380 let mut node_data = [0; NODE_BYTES_LENGTH];
381 381 node_data[0] = 0x12;
382 382 node_data[1] = 0xca;
383 383 let node = Node::from(node_data);
384 384 assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node));
385 385 assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node));
386 386 assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node));
387 387 assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node));
388 388 Ok(())
389 389 }
390 390
391 391 #[test]
392 392 fn test_get_nybble() -> Result<(), FromHexError> {
393 393 let prefix = NodePrefix::from_hex("dead6789cafe")?;
394 394 assert_eq!(prefix.get_nybble(0), 13);
395 395 assert_eq!(prefix.get_nybble(7), 9);
396 396 Ok(())
397 397 }
398 398
399 399 #[test]
400 400 fn test_first_different_nybble_even_prefix() {
401 401 let prefix = NodePrefix::from_hex("12ca").unwrap();
402 402 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
403 403 assert_eq!(prefix.first_different_nybble(&node), Some(0));
404 404 node.data[0] = 0x13;
405 405 assert_eq!(prefix.first_different_nybble(&node), Some(1));
406 406 node.data[0] = 0x12;
407 407 assert_eq!(prefix.first_different_nybble(&node), Some(2));
408 408 node.data[1] = 0xca;
409 409 // now it is a prefix
410 410 assert_eq!(prefix.first_different_nybble(&node), None);
411 411 }
412 412
413 413 #[test]
414 414 fn test_first_different_nybble_odd_prefix() {
415 415 let prefix = NodePrefix::from_hex("12c").unwrap();
416 416 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
417 417 assert_eq!(prefix.first_different_nybble(&node), Some(0));
418 418 node.data[0] = 0x13;
419 419 assert_eq!(prefix.first_different_nybble(&node), Some(1));
420 420 node.data[0] = 0x12;
421 421 assert_eq!(prefix.first_different_nybble(&node), Some(2));
422 422 node.data[1] = 0xca;
423 423 // now it is a prefix
424 424 assert_eq!(prefix.first_different_nybble(&node), None);
425 425 }
426 426 }
427 427
428 428 #[cfg(test)]
429 429 pub use tests::hex_pad_right;
@@ -1,1102 +1,1108 b''
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
7 7 //!
8 8 //! This provides a variation on the 16-ary radix tree that is
9 9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
10 10 //! on disk.
11 11 //!
12 12 //! Following existing implicit conventions, the "nodemap" terminology
13 13 //! is used in a more abstract context.
14 14
15 15 use crate::UncheckedRevision;
16 16
17 17 use super::{
18 18 node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION,
19 19 };
20 20
21 21 use bytes_cast::{unaligned, BytesCast};
22 22 use std::cmp::max;
23 23 use std::fmt;
24 24 use std::mem::{self, align_of, size_of};
25 25 use std::ops::Deref;
26 26 use std::ops::Index;
27 27
28 28 #[derive(Debug, PartialEq)]
29 29 pub enum NodeMapError {
30 30 /// A `NodePrefix` matches several [`Revision`]s.
31 31 ///
32 32 /// This can be returned by methods meant for (at most) one match.
33 33 MultipleResults,
34 34 /// A `Revision` stored in the nodemap could not be found in the index
35 35 RevisionNotInIndex(UncheckedRevision),
36 36 }
37 37
38 38 /// Mapping system from Mercurial nodes to revision numbers.
39 39 ///
40 40 /// ## `RevlogIndex` and `NodeMap`
41 41 ///
42 42 /// One way to think about their relationship is that
43 43 /// the `NodeMap` is a prefix-oriented reverse index of the [`Node`]
44 44 /// information carried by a [`RevlogIndex`].
45 45 ///
46 46 /// Many of the methods in this trait take a `RevlogIndex` argument
47 47 /// which is used for validation of their results. This index must naturally
48 48 /// be the one the `NodeMap` is about, and it must be consistent.
49 49 ///
50 50 /// Notably, the `NodeMap` must not store
51 51 /// information about more `Revision` values than there are in the index.
52 52 /// In these methods, an encountered `Revision` is not in the index, a
53 53 /// [RevisionNotInIndex](NodeMapError) error is returned.
54 54 ///
55 55 /// In insert operations, the rule is thus that the `NodeMap` must always
56 56 /// be updated after the `RevlogIndex` it is about.
57 57 pub trait NodeMap {
58 58 /// Find the unique `Revision` having the given `Node`
59 59 ///
60 60 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
61 61 fn find_node(
62 62 &self,
63 63 index: &impl RevlogIndex,
64 64 node: &Node,
65 65 ) -> Result<Option<Revision>, NodeMapError> {
66 66 self.find_bin(index, node.into())
67 67 }
68 68
69 69 /// Find the unique Revision whose `Node` starts with a given binary prefix
70 70 ///
71 71 /// If no Revision matches the given prefix, `Ok(None)` is returned.
72 72 ///
73 73 /// If several Revisions match the given prefix, a
74 74 /// [MultipleResults](NodeMapError) error is returned.
75 75 fn find_bin(
76 76 &self,
77 77 idx: &impl RevlogIndex,
78 78 prefix: NodePrefix,
79 79 ) -> Result<Option<Revision>, NodeMapError>;
80 80
81 81 /// Give the size of the shortest node prefix that determines
82 82 /// the revision uniquely.
83 83 ///
84 84 /// From a binary node prefix, if it is matched in the node map, this
85 85 /// returns the number of hexadecimal digits that would had sufficed
86 86 /// to find the revision uniquely.
87 87 ///
88 88 /// Returns `None` if no [`Revision`] could be found for the prefix.
89 89 ///
90 90 /// If several Revisions match the given prefix, a
91 91 /// [MultipleResults](NodeMapError) error is returned.
92 92 fn unique_prefix_len_bin(
93 93 &self,
94 94 idx: &impl RevlogIndex,
95 95 node_prefix: NodePrefix,
96 96 ) -> Result<Option<usize>, NodeMapError>;
97 97
98 98 /// Same as [unique_prefix_len_bin](Self::unique_prefix_len_bin), with
99 99 /// a full [`Node`] as input
100 100 fn unique_prefix_len_node(
101 101 &self,
102 102 idx: &impl RevlogIndex,
103 103 node: &Node,
104 104 ) -> Result<Option<usize>, NodeMapError> {
105 105 self.unique_prefix_len_bin(idx, node.into())
106 106 }
107 107 }
108 108
109 109 pub trait MutableNodeMap: NodeMap {
110 110 fn insert<I: RevlogIndex>(
111 111 &mut self,
112 112 index: &I,
113 113 node: &Node,
114 114 rev: Revision,
115 115 ) -> Result<(), NodeMapError>;
116 116 }
117 117
118 118 /// Low level NodeTree [`Block`] elements
119 119 ///
120 120 /// These are exactly as for instance on persistent storage.
121 121 type RawElement = unaligned::I32Be;
122 122
123 123 /// High level representation of values in NodeTree
124 124 /// [`Blocks`](struct.Block.html)
125 125 ///
126 126 /// This is the high level representation that most algorithms should
127 127 /// use.
128 128 #[derive(Clone, Debug, Eq, PartialEq)]
129 129 enum Element {
130 130 // This is not a Mercurial revision. It's a `i32` because this is the
131 131 // right type for this structure.
132 132 Rev(i32),
133 133 Block(usize),
134 134 None,
135 135 }
136 136
137 137 impl From<RawElement> for Element {
138 138 /// Conversion from low level representation, after endianness conversion.
139 139 ///
140 140 /// See [`Block`](struct.Block.html) for explanation about the encoding.
141 141 fn from(raw: RawElement) -> Element {
142 142 let int = raw.get();
143 143 if int >= 0 {
144 144 Element::Block(int as usize)
145 145 } else if int == -1 {
146 146 Element::None
147 147 } else {
148 148 Element::Rev(-int - 2)
149 149 }
150 150 }
151 151 }
152 152
153 153 impl From<Element> for RawElement {
154 154 fn from(element: Element) -> RawElement {
155 155 RawElement::from(match element {
156 156 Element::None => 0,
157 157 Element::Block(i) => i as i32,
158 158 Element::Rev(rev) => -rev - 2,
159 159 })
160 160 }
161 161 }
162 162
163 163 const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble
164 164
165 165 /// A logical block of the [`NodeTree`], packed with a fixed size.
166 166 ///
167 167 /// These are always used in container types implementing `Index<Block>`,
168 168 /// such as `&Block`
169 169 ///
170 170 /// As an array of integers, its ith element encodes that the
171 171 /// ith potential edge from the block, representing the ith hexadecimal digit
172 172 /// (nybble) `i` is either:
173 173 ///
174 174 /// - absent (value -1)
175 175 /// - another `Block` in the same indexable container (value ≥ 0)
176 176 /// - a [`Revision`] leaf (value ≤ -2)
177 177 ///
178 178 /// Endianness has to be fixed for consistency on shared storage across
179 179 /// different architectures.
180 180 ///
181 181 /// A key difference with the C `nodetree` is that we need to be
182 182 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
183 183 /// rather than 0 and the [`Revision`] range upper limit of -2 instead of -1.
184 184 ///
185 185 /// Another related difference is that `NULL_REVISION` (-1) is not
186 186 /// represented at all, because we want an immutable empty nodetree
187 187 /// to be valid.
188 188 #[derive(Copy, Clone, BytesCast, PartialEq)]
189 189 #[repr(transparent)]
190 190 pub struct Block([RawElement; ELEMENTS_PER_BLOCK]);
191 191
192 192 impl Block {
193 193 fn new() -> Self {
194 194 let absent_node = RawElement::from(-1);
195 195 Block([absent_node; ELEMENTS_PER_BLOCK])
196 196 }
197 197
198 198 fn get(&self, nybble: u8) -> Element {
199 199 self.0[nybble as usize].into()
200 200 }
201 201
202 202 fn set(&mut self, nybble: u8, element: Element) {
203 203 self.0[nybble as usize] = element.into()
204 204 }
205 205 }
206 206
207 207 impl fmt::Debug for Block {
208 208 /// sparse representation for testing and debugging purposes
209 209 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
210 210 f.debug_map()
211 211 .entries((0..16).filter_map(|i| match self.get(i) {
212 212 Element::None => None,
213 213 element => Some((i, element)),
214 214 }))
215 215 .finish()
216 216 }
217 217 }
218 218
219 219 /// A mutable 16-radix tree with the root block logically at the end
220 220 ///
221 221 /// Because of the append only nature of our node trees, we need to
222 222 /// keep the original untouched and store new blocks separately.
223 223 ///
224 224 /// The mutable root [`Block`] is kept apart so that we don't have to rebump
225 225 /// it on each insertion.
226 226 pub struct NodeTree {
227 227 readonly: Box<dyn Deref<Target = [Block]> + Send>,
228 228 growable: Vec<Block>,
229 229 root: Block,
230 230 masked_inner_blocks: usize,
231 231 }
232 232
233 233 impl Index<usize> for NodeTree {
234 234 type Output = Block;
235 235
236 236 fn index(&self, i: usize) -> &Block {
237 237 let ro_len = self.readonly.len();
238 238 if i < ro_len {
239 239 &self.readonly[i]
240 240 } else if i == ro_len + self.growable.len() {
241 241 &self.root
242 242 } else {
243 243 &self.growable[i - ro_len]
244 244 }
245 245 }
246 246 }
247 247
248 248 /// Return `None` unless the [`Node`] for `rev` has given prefix in `idx`.
249 249 fn has_prefix_or_none(
250 250 idx: &impl RevlogIndex,
251 251 prefix: NodePrefix,
252 252 rev: UncheckedRevision,
253 253 ) -> Result<Option<Revision>, NodeMapError> {
254 254 match idx.check_revision(rev) {
255 255 Some(checked) => idx
256 256 .node(checked)
257 257 .ok_or(NodeMapError::RevisionNotInIndex(rev))
258 258 .map(|node| {
259 259 if prefix.is_prefix_of(node) {
260 260 Some(checked)
261 261 } else {
262 262 None
263 263 }
264 264 }),
265 265 None => Err(NodeMapError::RevisionNotInIndex(rev)),
266 266 }
267 267 }
268 268
269 269 /// validate that the candidate's node starts indeed with given prefix,
270 270 /// and treat ambiguities related to [`NULL_REVISION`].
271 271 ///
272 272 /// From the data in the NodeTree, one can only conclude that some
273 273 /// revision is the only one for a *subprefix* of the one being looked up.
274 274 fn validate_candidate(
275 275 idx: &impl RevlogIndex,
276 276 prefix: NodePrefix,
277 277 candidate: (Option<UncheckedRevision>, usize),
278 278 ) -> Result<(Option<Revision>, usize), NodeMapError> {
279 279 let (rev, steps) = candidate;
280 280 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
281 281 rev.map_or(Ok((None, steps)), |r| {
282 282 has_prefix_or_none(idx, prefix, r)
283 283 .map(|opt| (opt, max(steps, nz_nybble + 1)))
284 284 })
285 285 } else {
286 286 // the prefix is only made of zeros; NULL_REVISION always matches it
287 287 // and any other *valid* result is an ambiguity
288 288 match rev {
289 289 None => Ok((Some(NULL_REVISION), steps + 1)),
290 290 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
291 291 None => Ok((Some(NULL_REVISION), steps + 1)),
292 292 _ => Err(NodeMapError::MultipleResults),
293 293 },
294 294 }
295 295 }
296 296 }
297 297
298 298 impl NodeTree {
299 299 /// Initiate a NodeTree from an immutable slice-like of `Block`
300 300 ///
301 301 /// We keep `readonly` and clone its root block if it isn't empty.
302 302 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
303 303 let root = readonly.last().cloned().unwrap_or_else(Block::new);
304 304 NodeTree {
305 305 readonly,
306 306 growable: Vec::new(),
307 307 root,
308 308 masked_inner_blocks: 0,
309 309 }
310 310 }
311 311
312 312 /// Create from an opaque bunch of bytes
313 313 ///
314 314 /// The created [`NodeTreeBytes`] from `bytes`,
315 315 /// of which exactly `amount` bytes are used.
316 316 ///
317 317 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
318 318 /// - `amount` is expressed in bytes, and is not automatically derived from
319 319 /// `bytes`, so that a caller that manages them atomically can perform
320 320 /// temporary disk serializations and still rollback easily if needed.
321 321 /// First use-case for this would be to support Mercurial shell hooks.
322 322 ///
323 323 /// panics if `buffer` is smaller than `amount`
324 324 pub fn load_bytes(
325 325 bytes: Box<dyn Deref<Target = [u8]> + Send>,
326 326 amount: usize,
327 327 ) -> Self {
328 328 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
329 329 }
330 330
331 331 /// Retrieve added [`Block`]s and the original immutable data
332 332 pub fn into_readonly_and_added(
333 333 self,
334 334 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
335 335 let mut vec = self.growable;
336 336 let readonly = self.readonly;
337 337 if readonly.last() != Some(&self.root) {
338 338 vec.push(self.root);
339 339 }
340 340 (readonly, vec)
341 341 }
342 342
343 343 /// Retrieve added [`Block]s as bytes, ready to be written to persistent
344 344 /// storage
345 345 pub fn into_readonly_and_added_bytes(
346 346 self,
347 347 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
348 348 let (readonly, vec) = self.into_readonly_and_added();
349 349 // Prevent running `v`'s destructor so we are in complete control
350 350 // of the allocation.
351 351 let vec = mem::ManuallyDrop::new(vec);
352 352
353 353 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
354 354 // bytes, so this is perfectly safe.
355 355 let bytes = unsafe {
356 356 // Check for compatible allocation layout.
357 357 // (Optimized away by constant-folding + dead code elimination.)
358 358 assert_eq!(size_of::<Block>(), 64);
359 359 assert_eq!(align_of::<Block>(), 1);
360 360
361 361 // /!\ Any use of `vec` after this is use-after-free.
362 362 // TODO: use `into_raw_parts` once stabilized
363 363 Vec::from_raw_parts(
364 364 vec.as_ptr() as *mut u8,
365 365 vec.len() * size_of::<Block>(),
366 366 vec.capacity() * size_of::<Block>(),
367 367 )
368 368 };
369 369 (readonly, bytes)
370 370 }
371 371
372 372 /// Total number of blocks
373 373 fn len(&self) -> usize {
374 374 self.readonly.len() + self.growable.len() + 1
375 375 }
376 376
377 377 /// Implemented for completeness
378 378 ///
379 379 /// A `NodeTree` always has at least the mutable root block.
380 380 #[allow(dead_code)]
381 381 fn is_empty(&self) -> bool {
382 382 false
383 383 }
384 384
385 385 /// Main working method for `NodeTree` searches
386 386 ///
387 387 /// The first returned value is the result of analysing `NodeTree` data
388 388 /// *alone*: whereas `None` guarantees that the given prefix is absent
389 389 /// from the [`NodeTree`] data (but still could match [`NULL_NODE`]), with
390 390 /// `Some(rev)`, it is to be understood that `rev` is the unique
391 391 /// [`Revision`] that could match the prefix. Actually, all that can
392 392 /// be inferred from
393 393 /// the `NodeTree` data is that `rev` is the revision with the longest
394 394 /// common node prefix with the given prefix.
395 395 /// We return an [`UncheckedRevision`] because we have no guarantee that
396 396 /// the revision we found is valid for the index.
397 397 ///
398 398 /// The second returned value is the size of the smallest subprefix
399 399 /// of `prefix` that would give the same result, i.e. not the
400 400 /// [MultipleResults](NodeMapError) error variant (again, using only the
401 401 /// data of the [`NodeTree`]).
402 402 fn lookup(
403 403 &self,
404 404 prefix: NodePrefix,
405 405 ) -> Result<(Option<UncheckedRevision>, usize), NodeMapError> {
406 406 for (i, visit_item) in self.visit(prefix).enumerate() {
407 407 if let Some(opt) = visit_item.final_revision() {
408 408 return Ok((opt, i + 1));
409 409 }
410 410 }
411 411 Err(NodeMapError::MultipleResults)
412 412 }
413 413
414 414 fn visit(&self, prefix: NodePrefix) -> NodeTreeVisitor {
415 415 NodeTreeVisitor {
416 416 nt: self,
417 417 prefix,
418 418 visit: self.len() - 1,
419 419 nybble_idx: 0,
420 420 done: false,
421 421 }
422 422 }
423 423 /// Return a mutable reference for `Block` at index `idx`.
424 424 ///
425 425 /// If `idx` lies in the immutable area, then the reference is to
426 426 /// a newly appended copy.
427 427 ///
428 428 /// Returns (new_idx, glen, mut_ref) where
429 429 ///
430 430 /// - `new_idx` is the index of the mutable `Block`
431 431 /// - `mut_ref` is a mutable reference to the mutable Block.
432 432 /// - `glen` is the new length of `self.growable`
433 433 ///
434 434 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
435 435 /// itself because of the mutable borrow taken with the returned `Block`
436 436 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
437 437 let ro_blocks = &self.readonly;
438 438 let ro_len = ro_blocks.len();
439 439 let glen = self.growable.len();
440 440 if idx < ro_len {
441 441 self.masked_inner_blocks += 1;
442 442 self.growable.push(ro_blocks[idx]);
443 443 (glen + ro_len, &mut self.growable[glen], glen + 1)
444 444 } else if glen + ro_len == idx {
445 445 (idx, &mut self.root, glen)
446 446 } else {
447 447 (idx, &mut self.growable[idx - ro_len], glen)
448 448 }
449 449 }
450 450
451 451 /// Main insertion method
452 452 ///
453 453 /// This will dive in the node tree to find the deepest `Block` for
454 454 /// `node`, split it as much as needed and record `node` in there.
455 455 /// The method then backtracks, updating references in all the visited
456 456 /// blocks from the root.
457 457 ///
458 458 /// All the mutated `Block` are copied first to the growable part if
459 459 /// needed. That happens for those in the immutable part except the root.
460 460 pub fn insert<I: RevlogIndex>(
461 461 &mut self,
462 462 index: &I,
463 463 node: &Node,
464 464 rev: Revision,
465 465 ) -> Result<(), NodeMapError> {
466 466 let ro_len = &self.readonly.len();
467 467
468 468 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
469 469 let read_nybbles = visit_steps.len();
470 470 // visit_steps cannot be empty, since we always visit the root block
471 471 let deepest = visit_steps.pop().unwrap();
472 472
473 473 let (mut block_idx, mut block, mut glen) =
474 474 self.mutable_block(deepest.block_idx);
475 475
476 476 if let Element::Rev(old_rev) = deepest.element {
477 477 let old_node = index
478 478 .check_revision(old_rev.into())
479 479 .and_then(|rev| index.node(rev))
480 480 .ok_or_else(|| {
481 481 NodeMapError::RevisionNotInIndex(old_rev.into())
482 482 })?;
483 483 if old_node == node {
484 484 return Ok(()); // avoid creating lots of useless blocks
485 485 }
486 486
487 487 // Looping over the tail of nybbles in both nodes, creating
488 488 // new blocks until we find the difference
489 489 let mut new_block_idx = ro_len + glen;
490 490 let mut nybble = deepest.nybble;
491 491 for nybble_pos in read_nybbles..node.nybbles_len() {
492 492 block.set(nybble, Element::Block(new_block_idx));
493 493
494 494 let new_nybble = node.get_nybble(nybble_pos);
495 495 let old_nybble = old_node.get_nybble(nybble_pos);
496 496
497 497 if old_nybble == new_nybble {
498 498 self.growable.push(Block::new());
499 499 block = &mut self.growable[glen];
500 500 glen += 1;
501 501 new_block_idx += 1;
502 502 nybble = new_nybble;
503 503 } else {
504 504 let mut new_block = Block::new();
505 505 new_block.set(old_nybble, Element::Rev(old_rev));
506 506 new_block.set(new_nybble, Element::Rev(rev.0));
507 507 self.growable.push(new_block);
508 508 break;
509 509 }
510 510 }
511 511 } else {
512 512 // Free slot in the deepest block: no splitting has to be done
513 513 block.set(deepest.nybble, Element::Rev(rev.0));
514 514 }
515 515
516 516 // Backtrack over visit steps to update references
517 517 while let Some(visited) = visit_steps.pop() {
518 518 let to_write = Element::Block(block_idx);
519 519 if visit_steps.is_empty() {
520 520 self.root.set(visited.nybble, to_write);
521 521 break;
522 522 }
523 523 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
524 524 if block.get(visited.nybble) == to_write {
525 525 break;
526 526 }
527 527 block.set(visited.nybble, to_write);
528 528 block_idx = new_idx;
529 529 }
530 530 Ok(())
531 531 }
532 532
533 533 /// Make the whole `NodeTree` logically empty, without touching the
534 534 /// immutable part.
535 535 pub fn invalidate_all(&mut self) {
536 536 self.root = Block::new();
537 537 self.growable = Vec::new();
538 538 self.masked_inner_blocks = self.readonly.len();
539 539 }
540 540
541 541 /// Return the number of blocks in the readonly part that are currently
542 542 /// masked in the mutable part.
543 543 ///
544 544 /// The `NodeTree` structure has no efficient way to know how many blocks
545 545 /// are already unreachable in the readonly part.
546 546 ///
547 547 /// After a call to `invalidate_all()`, the returned number can be actually
548 548 /// bigger than the whole readonly part, a conventional way to mean that
549 549 /// all the readonly blocks have been masked. This is what is really
550 550 /// useful to the caller and does not require to know how many were
551 551 /// actually unreachable to begin with.
552 552 pub fn masked_readonly_blocks(&self) -> usize {
553 553 if let Some(readonly_root) = self.readonly.last() {
554 554 if readonly_root == &self.root {
555 555 return 0;
556 556 }
557 557 } else {
558 558 return 0;
559 559 }
560 560 self.masked_inner_blocks + 1
561 561 }
562 562 }
563 563
564 564 pub struct NodeTreeBytes {
565 565 buffer: Box<dyn Deref<Target = [u8]> + Send>,
566 566 len_in_blocks: usize,
567 567 }
568 568
569 569 impl NodeTreeBytes {
570 570 fn new(
571 571 buffer: Box<dyn Deref<Target = [u8]> + Send>,
572 572 amount: usize,
573 573 ) -> Self {
574 574 assert!(buffer.len() >= amount);
575 575 let len_in_blocks = amount / size_of::<Block>();
576 576 NodeTreeBytes {
577 577 buffer,
578 578 len_in_blocks,
579 579 }
580 580 }
581 581 }
582 582
583 583 impl Deref for NodeTreeBytes {
584 584 type Target = [Block];
585 585
586 586 fn deref(&self) -> &[Block] {
587 587 Block::slice_from_bytes(&self.buffer, self.len_in_blocks)
588 588 // `NodeTreeBytes::new` already asserted that `self.buffer` is
589 589 // large enough.
590 590 .unwrap()
591 591 .0
592 592 }
593 593 }
594 594
595 595 struct NodeTreeVisitor<'n> {
596 596 nt: &'n NodeTree,
597 597 prefix: NodePrefix,
598 598 visit: usize,
599 599 nybble_idx: usize,
600 600 done: bool,
601 601 }
602 602
603 603 #[derive(Debug, PartialEq, Clone)]
604 604 struct NodeTreeVisitItem {
605 605 block_idx: usize,
606 606 nybble: u8,
607 607 element: Element,
608 608 }
609 609
610 610 impl<'n> Iterator for NodeTreeVisitor<'n> {
611 611 type Item = NodeTreeVisitItem;
612 612
613 613 fn next(&mut self) -> Option<Self::Item> {
614 614 if self.done || self.nybble_idx >= self.prefix.nybbles_len() {
615 615 return None;
616 616 }
617 617
618 618 let nybble = self.prefix.get_nybble(self.nybble_idx);
619 619 self.nybble_idx += 1;
620 620
621 621 let visit = self.visit;
622 622 let element = self.nt[visit].get(nybble);
623 623 if let Element::Block(idx) = element {
624 624 self.visit = idx;
625 625 } else {
626 626 self.done = true;
627 627 }
628 628
629 629 Some(NodeTreeVisitItem {
630 630 block_idx: visit,
631 631 nybble,
632 632 element,
633 633 })
634 634 }
635 635 }
636 636
637 637 impl NodeTreeVisitItem {
638 638 // Return `Some(opt)` if this item is final, with `opt` being the
639 639 // `UncheckedRevision` that it may represent.
640 640 //
641 641 // If the item is not terminal, return `None`
642 642 fn final_revision(&self) -> Option<Option<UncheckedRevision>> {
643 643 match self.element {
644 644 Element::Block(_) => None,
645 645 Element::Rev(r) => Some(Some(r.into())),
646 646 Element::None => Some(None),
647 647 }
648 648 }
649 649 }
650 650
651 651 impl From<Vec<Block>> for NodeTree {
652 652 fn from(vec: Vec<Block>) -> Self {
653 653 Self::new(Box::new(vec))
654 654 }
655 655 }
656 656
657 657 impl fmt::Debug for NodeTree {
658 658 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
659 let readonly: &[Block] = &*self.readonly;
659 let readonly: &[Block] = &self.readonly;
660 660 write!(
661 661 f,
662 662 "readonly: {:?}, growable: {:?}, root: {:?}",
663 663 readonly, self.growable, self.root
664 664 )
665 665 }
666 666 }
667 667
668 668 impl Default for NodeTree {
669 669 /// Create a fully mutable empty NodeTree
670 670 fn default() -> Self {
671 NodeTree::new(Box::new(Vec::new()))
671 NodeTree::new(Box::<Vec<_>>::default())
672 672 }
673 673 }
674 674
675 675 impl NodeMap for NodeTree {
676 676 fn find_bin<'a>(
677 677 &self,
678 678 idx: &impl RevlogIndex,
679 679 prefix: NodePrefix,
680 680 ) -> Result<Option<Revision>, NodeMapError> {
681 681 validate_candidate(idx, prefix, self.lookup(prefix)?)
682 682 .map(|(opt, _shortest)| opt)
683 683 }
684 684
685 685 fn unique_prefix_len_bin<'a>(
686 686 &self,
687 687 idx: &impl RevlogIndex,
688 688 prefix: NodePrefix,
689 689 ) -> Result<Option<usize>, NodeMapError> {
690 690 validate_candidate(idx, prefix, self.lookup(prefix)?)
691 691 .map(|(opt, shortest)| opt.map(|_rev| shortest))
692 692 }
693 693 }
694 694
695 695 #[cfg(test)]
696 696 pub mod tests {
697 697 use super::NodeMapError::*;
698 698 use super::*;
699 699 use crate::revlog::node::{hex_pad_right, Node};
700 700 use std::collections::HashMap;
701 701
702 702 /// Creates a `Block` using a syntax close to the `Debug` output
703 703 macro_rules! block {
704 704 {$($nybble:tt : $variant:ident($val:tt)),*} => (
705 705 {
706 706 let mut block = Block::new();
707 707 $(block.set($nybble, Element::$variant($val)));*;
708 708 block
709 709 }
710 710 )
711 711 }
712 712
713 713 /// Shorthand to reduce boilerplate when creating [`Revision`] for testing
714 714 macro_rules! R {
715 715 ($revision:literal) => {
716 716 Revision($revision)
717 717 };
718 718 }
719 719
720 720 #[test]
721 721 fn test_block_debug() {
722 722 let mut block = Block::new();
723 723 block.set(1, Element::Rev(3));
724 724 block.set(10, Element::Block(0));
725 725 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
726 726 }
727 727
728 728 #[test]
729 729 fn test_block_macro() {
730 730 let block = block! {5: Block(2)};
731 731 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
732 732
733 733 let block = block! {13: Rev(15), 5: Block(2)};
734 734 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
735 735 }
736 736
737 737 #[test]
738 738 fn test_raw_block() {
739 739 let mut raw = [255u8; 64];
740 740
741 741 let mut counter = 0;
742 742 for val in [0_i32, 15, -2, -1, -3].iter() {
743 743 for byte in val.to_be_bytes().iter() {
744 744 raw[counter] = *byte;
745 745 counter += 1;
746 746 }
747 747 }
748 748 let (block, _) = Block::from_bytes(&raw).unwrap();
749 749 assert_eq!(block.get(0), Element::Block(0));
750 750 assert_eq!(block.get(1), Element::Block(15));
751 751 assert_eq!(block.get(3), Element::None);
752 752 assert_eq!(block.get(2), Element::Rev(0));
753 753 assert_eq!(block.get(4), Element::Rev(1));
754 754 }
755 755
756 756 type TestIndex = HashMap<UncheckedRevision, Node>;
757 757
758 758 impl RevlogIndex for TestIndex {
759 759 fn node(&self, rev: Revision) -> Option<&Node> {
760 760 self.get(&rev.into())
761 761 }
762 762
763 763 fn len(&self) -> usize {
764 764 self.len()
765 765 }
766 766
767 767 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
768 768 self.get(&rev).map(|_| Revision(rev.0))
769 769 }
770 770 }
771 771
772 772 /// Pad hexadecimal Node prefix with zeros on the right
773 773 ///
774 774 /// This avoids having to repeatedly write very long hexadecimal
775 775 /// strings for test data, and brings actual hash size independency.
776 776 #[cfg(test)]
777 777 fn pad_node(hex: &str) -> Node {
778 Node::from_hex(&hex_pad_right(hex)).unwrap()
778 Node::from_hex(hex_pad_right(hex)).unwrap()
779 779 }
780 780
781 781 /// Pad hexadecimal Node prefix with zeros on the right, then insert
782 782 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
783 783 idx.insert(rev.into(), pad_node(hex));
784 784 }
785 785
786 786 fn sample_nodetree() -> NodeTree {
787 787 NodeTree::from(vec![
788 788 block![0: Rev(9)],
789 789 block![0: Rev(0), 1: Rev(9)],
790 790 block![0: Block(1), 1:Rev(1)],
791 791 ])
792 792 }
793 793
794 794 fn hex(s: &str) -> NodePrefix {
795 795 NodePrefix::from_hex(s).unwrap()
796 796 }
797 797
798 798 #[test]
799 799 fn test_nt_debug() {
800 800 let nt = sample_nodetree();
801 801 assert_eq!(
802 802 format!("{:?}", nt),
803 803 "readonly: \
804 804 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
805 805 growable: [], \
806 806 root: {0: Block(1), 1: Rev(1)}",
807 807 );
808 808 }
809 809
810 810 #[test]
811 811 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
812 812 let mut idx: TestIndex = HashMap::new();
813 813 pad_insert(&mut idx, R!(1), "1234deadcafe");
814 814
815 815 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
816 816 assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(R!(1)));
817 817 assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(R!(1)));
818 818 assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(R!(1)));
819 819 assert_eq!(nt.find_bin(&idx, hex("1a"))?, None);
820 820 assert_eq!(nt.find_bin(&idx, hex("ab"))?, None);
821 821
822 822 // and with full binary Nodes
823 823 assert_eq!(
824 824 nt.find_node(&idx, idx.get(&1.into()).unwrap())?,
825 825 Some(R!(1))
826 826 );
827 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
827 let unknown = Node::from_hex(hex_pad_right("3d")).unwrap();
828 828 assert_eq!(nt.find_node(&idx, &unknown)?, None);
829 829 Ok(())
830 830 }
831 831
832 832 #[test]
833 833 fn test_immutable_find_one_jump() {
834 834 let mut idx = TestIndex::new();
835 835 pad_insert(&mut idx, R!(9), "012");
836 836 pad_insert(&mut idx, R!(0), "00a");
837 837
838 838 let nt = sample_nodetree();
839 839
840 840 assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults));
841 841 assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(R!(9))));
842 842 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
843 843 assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(R!(0))));
844 844 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3)));
845 845 assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION)));
846 846 }
847 847
848 848 #[test]
849 849 fn test_mutated_find() -> Result<(), NodeMapError> {
850 850 let mut idx = TestIndex::new();
851 851 pad_insert(&mut idx, R!(9), "012");
852 852 pad_insert(&mut idx, R!(0), "00a");
853 853 pad_insert(&mut idx, R!(2), "cafe");
854 854 pad_insert(&mut idx, R!(3), "15");
855 855 pad_insert(&mut idx, R!(1), "10");
856 856
857 857 let nt = NodeTree {
858 858 readonly: sample_nodetree().readonly,
859 859 growable: vec![block![0: Rev(1), 5: Rev(3)]],
860 860 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
861 861 masked_inner_blocks: 1,
862 862 };
863 863 assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(R!(1)));
864 864 assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(R!(2)));
865 865 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1));
866 866 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
867 867 assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION));
868 868 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3));
869 869 assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(R!(9)));
870 870 assert_eq!(nt.masked_readonly_blocks(), 2);
871 871 Ok(())
872 872 }
873 873
874 874 pub struct TestNtIndex {
875 875 pub index: TestIndex,
876 876 pub nt: NodeTree,
877 877 }
878 878
879 879 impl TestNtIndex {
880 880 pub fn new() -> Self {
881 881 TestNtIndex {
882 882 index: HashMap::new(),
883 883 nt: NodeTree::default(),
884 884 }
885 885 }
886 886
887 887 pub fn insert_node(
888 888 &mut self,
889 889 rev: Revision,
890 890 node: Node,
891 891 ) -> Result<(), NodeMapError> {
892 892 self.index.insert(rev.into(), node);
893 893 self.nt.insert(&self.index, &node, rev)?;
894 894 Ok(())
895 895 }
896 896
897 897 pub fn insert(
898 898 &mut self,
899 899 rev: Revision,
900 900 hex: &str,
901 901 ) -> Result<(), NodeMapError> {
902 902 let node = pad_node(hex);
903 return self.insert_node(rev, node);
903 self.insert_node(rev, node)
904 904 }
905 905
906 906 fn find_hex(
907 907 &self,
908 908 prefix: &str,
909 909 ) -> Result<Option<Revision>, NodeMapError> {
910 910 self.nt.find_bin(&self.index, hex(prefix))
911 911 }
912 912
913 913 fn unique_prefix_len_hex(
914 914 &self,
915 915 prefix: &str,
916 916 ) -> Result<Option<usize>, NodeMapError> {
917 917 self.nt.unique_prefix_len_bin(&self.index, hex(prefix))
918 918 }
919 919
920 920 /// Drain `added` and restart a new one
921 921 fn commit(self) -> Self {
922 922 let mut as_vec: Vec<Block> =
923 923 self.nt.readonly.iter().copied().collect();
924 924 as_vec.extend(self.nt.growable);
925 925 as_vec.push(self.nt.root);
926 926
927 927 Self {
928 928 index: self.index,
929 929 nt: NodeTree::from(as_vec),
930 930 }
931 931 }
932 932 }
933 933
934 impl Default for TestNtIndex {
935 fn default() -> Self {
936 Self::new()
937 }
938 }
939
934 940 #[test]
935 941 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
936 942 let mut idx = TestNtIndex::new();
937 943 idx.insert(Revision(0), "1234")?;
938 944 assert_eq!(idx.find_hex("1")?, Some(R!(0)));
939 945 assert_eq!(idx.find_hex("12")?, Some(R!(0)));
940 946
941 947 // let's trigger a simple split
942 948 idx.insert(Revision(1), "1a34")?;
943 949 assert_eq!(idx.nt.growable.len(), 1);
944 950 assert_eq!(idx.find_hex("12")?, Some(R!(0)));
945 951 assert_eq!(idx.find_hex("1a")?, Some(R!(1)));
946 952
947 953 // reinserting is a no_op
948 954 idx.insert(Revision(1), "1a34")?;
949 955 assert_eq!(idx.nt.growable.len(), 1);
950 956 assert_eq!(idx.find_hex("12")?, Some(R!(0)));
951 957 assert_eq!(idx.find_hex("1a")?, Some(R!(1)));
952 958
953 959 idx.insert(Revision(2), "1a01")?;
954 960 assert_eq!(idx.nt.growable.len(), 2);
955 961 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
956 962 assert_eq!(idx.find_hex("12")?, Some(R!(0)));
957 963 assert_eq!(idx.find_hex("1a3")?, Some(R!(1)));
958 964 assert_eq!(idx.find_hex("1a0")?, Some(R!(2)));
959 965 assert_eq!(idx.find_hex("1a12")?, None);
960 966
961 967 // now let's make it split and create more than one additional block
962 968 idx.insert(Revision(3), "1a345")?;
963 969 assert_eq!(idx.nt.growable.len(), 4);
964 970 assert_eq!(idx.find_hex("1a340")?, Some(R!(1)));
965 971 assert_eq!(idx.find_hex("1a345")?, Some(R!(3)));
966 972 assert_eq!(idx.find_hex("1a341")?, None);
967 973
968 974 // there's no readonly block to mask
969 975 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
970 976 Ok(())
971 977 }
972 978
973 979 #[test]
974 980 fn test_unique_prefix_len_zero_prefix() {
975 981 let mut idx = TestNtIndex::new();
976 982 idx.insert(Revision(0), "00000abcd").unwrap();
977 983
978 984 assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
979 985 // in the nodetree proper, this will be found at the first nybble
980 986 // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
981 987 // but the first difference with `NULL_NODE`
982 988 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
983 989 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
984 990
985 991 // same with odd result
986 992 idx.insert(Revision(1), "00123").unwrap();
987 993 assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
988 994 assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
989 995
990 996 // these are unchanged of course
991 997 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
992 998 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
993 999 }
994 1000
995 1001 #[test]
996 1002 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
997 1003 // check that the splitting loop is long enough
998 1004 let mut nt_idx = TestNtIndex::new();
999 1005 let nt = &mut nt_idx.nt;
1000 1006 let idx = &mut nt_idx.index;
1001 1007
1002 1008 let node0_hex = hex_pad_right("444444");
1003 1009 let mut node1_hex = hex_pad_right("444444");
1004 1010 node1_hex.pop();
1005 1011 node1_hex.push('5');
1006 let node0 = Node::from_hex(&node0_hex).unwrap();
1012 let node0 = Node::from_hex(node0_hex).unwrap();
1007 1013 let node1 = Node::from_hex(&node1_hex).unwrap();
1008 1014
1009 1015 idx.insert(0.into(), node0);
1010 1016 nt.insert(idx, &node0, R!(0))?;
1011 1017 idx.insert(1.into(), node1);
1012 1018 nt.insert(idx, &node1, R!(1))?;
1013 1019
1014 1020 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(R!(0)));
1015 1021 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(R!(1)));
1016 1022 Ok(())
1017 1023 }
1018 1024
1019 1025 #[test]
1020 1026 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
1021 1027 let mut idx = TestNtIndex::new();
1022 1028 idx.insert(Revision(0), "1234")?;
1023 1029 idx.insert(Revision(1), "1235")?;
1024 1030 idx.insert(Revision(2), "131")?;
1025 1031 idx.insert(Revision(3), "cafe")?;
1026 1032 let mut idx = idx.commit();
1027 1033 assert_eq!(idx.find_hex("1234")?, Some(R!(0)));
1028 1034 assert_eq!(idx.find_hex("1235")?, Some(R!(1)));
1029 1035 assert_eq!(idx.find_hex("131")?, Some(R!(2)));
1030 1036 assert_eq!(idx.find_hex("cafe")?, Some(R!(3)));
1031 1037 // we did not add anything since init from readonly
1032 1038 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
1033 1039
1034 1040 idx.insert(Revision(4), "123A")?;
1035 1041 assert_eq!(idx.find_hex("1234")?, Some(R!(0)));
1036 1042 assert_eq!(idx.find_hex("1235")?, Some(R!(1)));
1037 1043 assert_eq!(idx.find_hex("131")?, Some(R!(2)));
1038 1044 assert_eq!(idx.find_hex("cafe")?, Some(R!(3)));
1039 1045 assert_eq!(idx.find_hex("123A")?, Some(R!(4)));
1040 1046 // we masked blocks for all prefixes of "123", including the root
1041 1047 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1042 1048
1043 1049 eprintln!("{:?}", idx.nt);
1044 1050 idx.insert(Revision(5), "c0")?;
1045 1051 assert_eq!(idx.find_hex("cafe")?, Some(R!(3)));
1046 1052 assert_eq!(idx.find_hex("c0")?, Some(R!(5)));
1047 1053 assert_eq!(idx.find_hex("c1")?, None);
1048 1054 assert_eq!(idx.find_hex("1234")?, Some(R!(0)));
1049 1055 // inserting "c0" is just splitting the 'c' slot of the mutable root,
1050 1056 // it doesn't mask anything
1051 1057 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1052 1058
1053 1059 Ok(())
1054 1060 }
1055 1061
1056 1062 #[test]
1057 1063 fn test_invalidate_all() -> Result<(), NodeMapError> {
1058 1064 let mut idx = TestNtIndex::new();
1059 1065 idx.insert(Revision(0), "1234")?;
1060 1066 idx.insert(Revision(1), "1235")?;
1061 1067 idx.insert(Revision(2), "131")?;
1062 1068 idx.insert(Revision(3), "cafe")?;
1063 1069 let mut idx = idx.commit();
1064 1070
1065 1071 idx.nt.invalidate_all();
1066 1072
1067 1073 assert_eq!(idx.find_hex("1234")?, None);
1068 1074 assert_eq!(idx.find_hex("1235")?, None);
1069 1075 assert_eq!(idx.find_hex("131")?, None);
1070 1076 assert_eq!(idx.find_hex("cafe")?, None);
1071 1077 // all the readonly blocks have been masked, this is the
1072 1078 // conventional expected response
1073 1079 assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
1074 1080 Ok(())
1075 1081 }
1076 1082
1077 1083 #[test]
1078 1084 fn test_into_added_empty() {
1079 1085 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
1080 1086 assert!(sample_nodetree()
1081 1087 .into_readonly_and_added_bytes()
1082 1088 .1
1083 1089 .is_empty());
1084 1090 }
1085 1091
1086 1092 #[test]
1087 1093 fn test_into_added_bytes() -> Result<(), NodeMapError> {
1088 1094 let mut idx = TestNtIndex::new();
1089 1095 idx.insert(Revision(0), "1234")?;
1090 1096 let mut idx = idx.commit();
1091 1097 idx.insert(Revision(4), "cafe")?;
1092 1098 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1093 1099
1094 1100 // only the root block has been changed
1095 1101 assert_eq!(bytes.len(), size_of::<Block>());
1096 1102 // big endian for -2
1097 1103 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1098 1104 // big endian for -6
1099 1105 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
1100 1106 Ok(())
1101 1107 }
1102 1108 }
@@ -1,108 +1,108 b''
1 1 use crate::errors::{HgError, HgResultExt};
2 2 use bytes_cast::{unaligned, BytesCast};
3 3 use memmap2::Mmap;
4 4 use std::path::{Path, PathBuf};
5 5
6 6 use crate::vfs::Vfs;
7 7
8 8 const ONDISK_VERSION: u8 = 1;
9 9
10 10 pub(super) struct NodeMapDocket {
11 11 pub data_length: usize,
12 12 // TODO: keep here more of the data from `parse()` when we need it
13 13 }
14 14
15 15 #[derive(BytesCast)]
16 16 #[repr(C)]
17 17 struct DocketHeader {
18 18 uid_size: u8,
19 19 _tip_rev: unaligned::U64Be,
20 20 data_length: unaligned::U64Be,
21 21 _data_unused: unaligned::U64Be,
22 22 tip_node_size: unaligned::U64Be,
23 23 }
24 24
25 25 impl NodeMapDocket {
26 26 /// Return `Ok(None)` when the caller should proceed without a persistent
27 27 /// nodemap:
28 28 ///
29 29 /// * This revlog does not have a `.n` docket file (it is not generated for
30 30 /// small revlogs), or
31 31 /// * The docket has an unsupported version number (repositories created by
32 32 /// later hg, maybe that should be a requirement instead?), or
33 33 /// * The docket file points to a missing (likely deleted) data file (this
34 34 /// can happen in a rare race condition).
35 35 pub fn read_from_file(
36 36 store_vfs: &Vfs,
37 37 index_path: &Path,
38 38 ) -> Result<Option<(Self, Mmap)>, HgError> {
39 39 let docket_path = index_path.with_extension("n");
40 40 let docket_bytes = if let Some(bytes) =
41 41 store_vfs.read(&docket_path).io_not_found_as_none()?
42 42 {
43 43 bytes
44 44 } else {
45 45 return Ok(None);
46 46 };
47 47
48 48 let input = if let Some((&ONDISK_VERSION, rest)) =
49 49 docket_bytes.split_first()
50 50 {
51 51 rest
52 52 } else {
53 53 return Ok(None);
54 54 };
55 55
56 56 /// Treat any error as a parse error
57 57 fn parse<T, E>(result: Result<T, E>) -> Result<T, HgError> {
58 58 result
59 59 .map_err(|_| HgError::corrupted("nodemap docket parse error"))
60 60 }
61 61
62 62 let (header, rest) = parse(DocketHeader::from_bytes(input))?;
63 63 let uid_size = header.uid_size as usize;
64 64 // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
65 65 // systems?
66 66 let tip_node_size = header.tip_node_size.get() as usize;
67 67 let data_length = header.data_length.get() as usize;
68 68 let (uid, rest) = parse(u8::slice_from_bytes(rest, uid_size))?;
69 69 let (_tip_node, _rest) =
70 70 parse(u8::slice_from_bytes(rest, tip_node_size))?;
71 71 let uid = parse(std::str::from_utf8(uid))?;
72 72 let docket = NodeMapDocket { data_length };
73 73
74 74 let data_path = rawdata_path(&docket_path, uid);
75 75 // TODO: use `vfs.read()` here when the `persistent-nodemap.mmap`
76 76 // config is false?
77 77 if let Some(mmap) =
78 store_vfs.mmap_open(&data_path).io_not_found_as_none()?
78 store_vfs.mmap_open(data_path).io_not_found_as_none()?
79 79 {
80 80 if mmap.len() >= data_length {
81 81 Ok(Some((docket, mmap)))
82 82 } else {
83 83 Err(HgError::corrupted("persistent nodemap too short"))
84 84 }
85 85 } else {
86 86 // Even if .hg/requires opted in, some revlogs are deemed small
87 87 // enough to not need a persistent nodemap.
88 88 Ok(None)
89 89 }
90 90 }
91 91 }
92 92
93 93 fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf {
94 94 let docket_name = docket_path
95 95 .file_name()
96 96 .expect("expected a base name")
97 97 .to_str()
98 98 .expect("expected an ASCII file name in the store");
99 99 let prefix = docket_name
100 100 .strip_suffix(".n.a")
101 101 .or_else(|| docket_name.strip_suffix(".n"))
102 102 .expect("expected docket path in .n or .n.a");
103 103 let name = format!("{}-{}.nd", prefix, uid);
104 104 docket_path
105 105 .parent()
106 106 .expect("expected a non-root path")
107 107 .join(name)
108 108 }
@@ -1,436 +1,436 b''
1 1 // files.rs
2 2 //
3 3 // Copyright 2019
4 4 // Raphaël Gomès <rgomes@octobus.net>,
5 5 // Yuya Nishihara <yuya@tcha.org>
6 6 //
7 7 // This software may be used and distributed according to the terms of the
8 8 // GNU General Public License version 2 or any later version.
9 9
10 10 //! Functions for fiddling with files.
11 11
12 12 use crate::utils::{
13 13 hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
14 14 path_auditor::PathAuditor,
15 15 replace_slice,
16 16 };
17 17 use lazy_static::lazy_static;
18 18 use same_file::is_same_file;
19 19 use std::borrow::{Cow, ToOwned};
20 20 use std::ffi::{OsStr, OsString};
21 21 use std::iter::FusedIterator;
22 22 use std::ops::Deref;
23 23 use std::path::{Path, PathBuf};
24 24
25 25 pub fn get_os_str_from_bytes(bytes: &[u8]) -> &OsStr {
26 26 let os_str;
27 27 #[cfg(unix)]
28 28 {
29 29 use std::os::unix::ffi::OsStrExt;
30 30 os_str = std::ffi::OsStr::from_bytes(bytes);
31 31 }
32 32 // TODO Handle other platforms
33 33 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
34 34 // Perhaps, the return type would have to be Result<PathBuf>.
35 35 os_str
36 36 }
37 37
38 38 pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
39 39 Path::new(get_os_str_from_bytes(bytes))
40 40 }
41 41
42 42 // TODO: need to convert from WTF8 to MBCS bytes on Windows.
43 43 // that's why Vec<u8> is returned.
44 44 #[cfg(unix)]
45 45 pub fn get_bytes_from_path(path: impl AsRef<Path>) -> Vec<u8> {
46 46 get_bytes_from_os_str(path.as_ref())
47 47 }
48 48
49 49 #[cfg(unix)]
50 50 pub fn get_bytes_from_os_str(str: impl AsRef<OsStr>) -> Vec<u8> {
51 51 use std::os::unix::ffi::OsStrExt;
52 52 str.as_ref().as_bytes().to_vec()
53 53 }
54 54
55 55 #[cfg(unix)]
56 56 pub fn get_bytes_from_os_string(str: OsString) -> Vec<u8> {
57 57 use std::os::unix::ffi::OsStringExt;
58 58 str.into_vec()
59 59 }
60 60
61 61 /// An iterator over repository path yielding itself and its ancestors.
62 62 #[derive(Copy, Clone, Debug)]
63 63 pub struct Ancestors<'a> {
64 64 next: Option<&'a HgPath>,
65 65 }
66 66
67 67 impl<'a> Iterator for Ancestors<'a> {
68 68 type Item = &'a HgPath;
69 69
70 70 fn next(&mut self) -> Option<Self::Item> {
71 71 let next = self.next;
72 72 self.next = match self.next {
73 73 Some(s) if s.is_empty() => None,
74 74 Some(s) => {
75 75 let p = s.bytes().rposition(|c| *c == b'/').unwrap_or(0);
76 76 Some(HgPath::new(&s.as_bytes()[..p]))
77 77 }
78 78 None => None,
79 79 };
80 80 next
81 81 }
82 82 }
83 83
84 84 impl<'a> FusedIterator for Ancestors<'a> {}
85 85
86 86 /// An iterator over repository path yielding itself and its ancestors.
87 87 #[derive(Copy, Clone, Debug)]
88 88 pub(crate) struct AncestorsWithBase<'a> {
89 89 next: Option<(&'a HgPath, &'a HgPath)>,
90 90 }
91 91
92 92 impl<'a> Iterator for AncestorsWithBase<'a> {
93 93 type Item = (&'a HgPath, &'a HgPath);
94 94
95 95 fn next(&mut self) -> Option<Self::Item> {
96 96 let next = self.next;
97 97 self.next = match self.next {
98 98 Some((s, _)) if s.is_empty() => None,
99 99 Some((s, _)) => Some(s.split_filename()),
100 100 None => None,
101 101 };
102 102 next
103 103 }
104 104 }
105 105
106 106 impl<'a> FusedIterator for AncestorsWithBase<'a> {}
107 107
108 108 /// Returns an iterator yielding ancestor directories of the given repository
109 109 /// path.
110 110 ///
111 111 /// The path is separated by '/', and must not start with '/'.
112 112 ///
113 113 /// The path itself isn't included unless it is b"" (meaning the root
114 114 /// directory.)
115 115 pub fn find_dirs(path: &HgPath) -> Ancestors {
116 116 let mut dirs = Ancestors { next: Some(path) };
117 117 if !path.is_empty() {
118 118 dirs.next(); // skip itself
119 119 }
120 120 dirs
121 121 }
122 122
123 123 /// Returns an iterator yielding ancestor directories of the given repository
124 124 /// path.
125 125 ///
126 126 /// The path is separated by '/', and must not start with '/'.
127 127 ///
128 128 /// The path itself isn't included unless it is b"" (meaning the root
129 129 /// directory.)
130 130 pub(crate) fn find_dirs_with_base(path: &HgPath) -> AncestorsWithBase {
131 131 let mut dirs = AncestorsWithBase {
132 132 next: Some((path, HgPath::new(b""))),
133 133 };
134 134 if !path.is_empty() {
135 135 dirs.next(); // skip itself
136 136 }
137 137 dirs
138 138 }
139 139
140 140 /// TODO more than ASCII?
141 141 pub fn normalize_case(path: &HgPath) -> HgPathBuf {
142 142 #[cfg(windows)] // NTFS compares via upper()
143 143 return path.to_ascii_uppercase();
144 144 #[cfg(unix)]
145 145 path.to_ascii_lowercase()
146 146 }
147 147
148 148 lazy_static! {
149 149 static ref IGNORED_CHARS: Vec<Vec<u8>> = {
150 150 [
151 151 0x200c, 0x200d, 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d,
152 152 0x202e, 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
153 153 ]
154 154 .iter()
155 155 .map(|code| {
156 156 std::char::from_u32(*code)
157 157 .unwrap()
158 158 .encode_utf8(&mut [0; 3])
159 159 .bytes()
160 160 .collect()
161 161 })
162 162 .collect()
163 163 };
164 164 }
165 165
166 166 fn hfs_ignore_clean(bytes: &[u8]) -> Vec<u8> {
167 167 let mut buf = bytes.to_owned();
168 168 let needs_escaping = bytes.iter().any(|b| *b == b'\xe2' || *b == b'\xef');
169 169 if needs_escaping {
170 170 for forbidden in IGNORED_CHARS.iter() {
171 171 replace_slice(&mut buf, forbidden, &[])
172 172 }
173 173 buf
174 174 } else {
175 175 buf
176 176 }
177 177 }
178 178
179 179 pub fn lower_clean(bytes: &[u8]) -> Vec<u8> {
180 180 hfs_ignore_clean(&bytes.to_ascii_lowercase())
181 181 }
182 182
183 183 /// Returns the canonical path of `name`, given `cwd` and `root`
184 184 pub fn canonical_path(
185 185 root: impl AsRef<Path>,
186 186 cwd: impl AsRef<Path>,
187 187 name: impl AsRef<Path>,
188 188 ) -> Result<PathBuf, HgPathError> {
189 189 // TODO add missing normalization for other platforms
190 190 let root = root.as_ref();
191 191 let cwd = cwd.as_ref();
192 192 let name = name.as_ref();
193 193
194 194 let name = if !name.is_absolute() {
195 root.join(&cwd).join(&name)
195 root.join(cwd).join(name)
196 196 } else {
197 197 name.to_owned()
198 198 };
199 let auditor = PathAuditor::new(&root);
200 if name != root && name.starts_with(&root) {
201 let name = name.strip_prefix(&root).unwrap();
199 let auditor = PathAuditor::new(root);
200 if name != root && name.starts_with(root) {
201 let name = name.strip_prefix(root).unwrap();
202 202 auditor.audit_path(path_to_hg_path_buf(name)?)?;
203 203 Ok(name.to_owned())
204 204 } else if name == root {
205 205 Ok("".into())
206 206 } else {
207 207 // Determine whether `name' is in the hierarchy at or beneath `root',
208 208 // by iterating name=name.parent() until it returns `None` (can't
209 209 // check name == '/', because that doesn't work on windows).
210 210 let mut name = name.deref();
211 211 let original_name = name.to_owned();
212 212 loop {
213 let same = is_same_file(&name, &root).unwrap_or(false);
213 let same = is_same_file(name, root).unwrap_or(false);
214 214 if same {
215 215 if name == original_name {
216 216 // `name` was actually the same as root (maybe a symlink)
217 217 return Ok("".into());
218 218 }
219 219 // `name` is a symlink to root, so `original_name` is under
220 220 // root
221 let rel_path = original_name.strip_prefix(&name).unwrap();
222 auditor.audit_path(path_to_hg_path_buf(&rel_path)?)?;
221 let rel_path = original_name.strip_prefix(name).unwrap();
222 auditor.audit_path(path_to_hg_path_buf(rel_path)?)?;
223 223 return Ok(rel_path.to_owned());
224 224 }
225 225 name = match name.parent() {
226 226 None => break,
227 227 Some(p) => p,
228 228 };
229 229 }
230 230 // TODO hint to the user about using --cwd
231 231 // Bubble up the responsibility to Python for now
232 232 Err(HgPathError::NotUnderRoot {
233 233 path: original_name,
234 234 root: root.to_owned(),
235 235 })
236 236 }
237 237 }
238 238
239 239 /// Returns the representation of the path relative to the current working
240 240 /// directory for display purposes.
241 241 ///
242 242 /// `cwd` is a `HgPath`, so it is considered relative to the root directory
243 243 /// of the repository.
244 244 ///
245 245 /// # Examples
246 246 ///
247 247 /// ```
248 248 /// use hg::utils::hg_path::HgPath;
249 249 /// use hg::utils::files::relativize_path;
250 250 /// use std::borrow::Cow;
251 251 ///
252 252 /// let file = HgPath::new(b"nested/file");
253 253 /// let cwd = HgPath::new(b"");
254 254 /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"nested/file"));
255 255 ///
256 256 /// let cwd = HgPath::new(b"nested");
257 257 /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"file"));
258 258 ///
259 259 /// let cwd = HgPath::new(b"other");
260 260 /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"../nested/file"));
261 261 /// ```
262 262 pub fn relativize_path(path: &HgPath, cwd: impl AsRef<HgPath>) -> Cow<[u8]> {
263 263 if cwd.as_ref().is_empty() {
264 264 Cow::Borrowed(path.as_bytes())
265 265 } else {
266 266 // This is not all accurate as to how large `res` will actually be, but
267 267 // profiling `rhg files` on a large-ish repo shows it’s better than
268 268 // starting from a zero-capacity `Vec` and letting `extend` reallocate
269 269 // repeatedly.
270 270 let guesstimate = path.as_bytes().len();
271 271
272 272 let mut res: Vec<u8> = Vec::with_capacity(guesstimate);
273 273 let mut path_iter = path.as_bytes().split(|b| *b == b'/').peekable();
274 274 let mut cwd_iter =
275 275 cwd.as_ref().as_bytes().split(|b| *b == b'/').peekable();
276 276 loop {
277 277 match (path_iter.peek(), cwd_iter.peek()) {
278 278 (Some(a), Some(b)) if a == b => (),
279 279 _ => break,
280 280 }
281 281 path_iter.next();
282 282 cwd_iter.next();
283 283 }
284 284 let mut need_sep = false;
285 285 for _ in cwd_iter {
286 286 if need_sep {
287 287 res.extend(b"/")
288 288 } else {
289 289 need_sep = true
290 290 };
291 291 res.extend(b"..");
292 292 }
293 293 for c in path_iter {
294 294 if need_sep {
295 295 res.extend(b"/")
296 296 } else {
297 297 need_sep = true
298 298 };
299 299 res.extend(c);
300 300 }
301 301 Cow::Owned(res)
302 302 }
303 303 }
304 304
305 305 #[cfg(test)]
306 306 mod tests {
307 307 use super::*;
308 308 use pretty_assertions::assert_eq;
309 309
310 310 #[test]
311 311 fn find_dirs_some() {
312 312 let mut dirs = super::find_dirs(HgPath::new(b"foo/bar/baz"));
313 313 assert_eq!(dirs.next(), Some(HgPath::new(b"foo/bar")));
314 314 assert_eq!(dirs.next(), Some(HgPath::new(b"foo")));
315 315 assert_eq!(dirs.next(), Some(HgPath::new(b"")));
316 316 assert_eq!(dirs.next(), None);
317 317 assert_eq!(dirs.next(), None);
318 318 }
319 319
320 320 #[test]
321 321 fn find_dirs_empty() {
322 322 // looks weird, but mercurial.pathutil.finddirs(b"") yields b""
323 323 let mut dirs = super::find_dirs(HgPath::new(b""));
324 324 assert_eq!(dirs.next(), Some(HgPath::new(b"")));
325 325 assert_eq!(dirs.next(), None);
326 326 assert_eq!(dirs.next(), None);
327 327 }
328 328
329 329 #[test]
330 330 fn test_find_dirs_with_base_some() {
331 331 let mut dirs = super::find_dirs_with_base(HgPath::new(b"foo/bar/baz"));
332 332 assert_eq!(
333 333 dirs.next(),
334 334 Some((HgPath::new(b"foo/bar"), HgPath::new(b"baz")))
335 335 );
336 336 assert_eq!(
337 337 dirs.next(),
338 338 Some((HgPath::new(b"foo"), HgPath::new(b"bar")))
339 339 );
340 340 assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"foo"))));
341 341 assert_eq!(dirs.next(), None);
342 342 assert_eq!(dirs.next(), None);
343 343 }
344 344
345 345 #[test]
346 346 fn test_find_dirs_with_base_empty() {
347 347 let mut dirs = super::find_dirs_with_base(HgPath::new(b""));
348 348 assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b""))));
349 349 assert_eq!(dirs.next(), None);
350 350 assert_eq!(dirs.next(), None);
351 351 }
352 352
353 353 #[test]
354 354 fn test_canonical_path() {
355 355 let root = Path::new("/repo");
356 356 let cwd = Path::new("/dir");
357 357 let name = Path::new("filename");
358 358 assert_eq!(
359 359 canonical_path(root, cwd, name),
360 360 Err(HgPathError::NotUnderRoot {
361 361 path: PathBuf::from("/dir/filename"),
362 362 root: root.to_path_buf()
363 363 })
364 364 );
365 365
366 366 let root = Path::new("/repo");
367 367 let cwd = Path::new("/");
368 368 let name = Path::new("filename");
369 369 assert_eq!(
370 370 canonical_path(root, cwd, name),
371 371 Err(HgPathError::NotUnderRoot {
372 372 path: PathBuf::from("/filename"),
373 373 root: root.to_path_buf()
374 374 })
375 375 );
376 376
377 377 let root = Path::new("/repo");
378 378 let cwd = Path::new("/");
379 379 let name = Path::new("repo/filename");
380 380 assert_eq!(
381 381 canonical_path(root, cwd, name),
382 382 Ok(PathBuf::from("filename"))
383 383 );
384 384
385 385 let root = Path::new("/repo");
386 386 let cwd = Path::new("/repo");
387 387 let name = Path::new("filename");
388 388 assert_eq!(
389 389 canonical_path(root, cwd, name),
390 390 Ok(PathBuf::from("filename"))
391 391 );
392 392
393 393 let root = Path::new("/repo");
394 394 let cwd = Path::new("/repo/subdir");
395 395 let name = Path::new("filename");
396 396 assert_eq!(
397 397 canonical_path(root, cwd, name),
398 398 Ok(PathBuf::from("subdir/filename"))
399 399 );
400 400 }
401 401
402 402 #[test]
403 403 fn test_canonical_path_not_rooted() {
404 404 use std::fs::create_dir;
405 405 use tempfile::tempdir;
406 406
407 407 let base_dir = tempdir().unwrap();
408 408 let base_dir_path = base_dir.path();
409 409 let beneath_repo = base_dir_path.join("a");
410 410 let root = base_dir_path.join("a/b");
411 411 let out_of_repo = base_dir_path.join("c");
412 412 let under_repo_symlink = out_of_repo.join("d");
413 413
414 414 create_dir(&beneath_repo).unwrap();
415 415 create_dir(&root).unwrap();
416 416
417 417 // TODO make portable
418 418 std::os::unix::fs::symlink(&root, &out_of_repo).unwrap();
419 419
420 420 assert_eq!(
421 421 canonical_path(&root, Path::new(""), out_of_repo),
422 422 Ok(PathBuf::from(""))
423 423 );
424 424 assert_eq!(
425 425 canonical_path(&root, Path::new(""), &beneath_repo),
426 426 Err(HgPathError::NotUnderRoot {
427 427 path: beneath_repo,
428 428 root: root.to_owned()
429 429 })
430 430 );
431 431 assert_eq!(
432 canonical_path(&root, Path::new(""), &under_repo_symlink),
432 canonical_path(&root, Path::new(""), under_repo_symlink),
433 433 Ok(PathBuf::from("d"))
434 434 );
435 435 }
436 436 }
@@ -1,223 +1,223 b''
1 1 // path_auditor.rs
2 2 //
3 3 // Copyright 2020
4 4 // Raphaël Gomès <rgomes@octobus.net>,
5 5 //
6 6 // This software may be used and distributed according to the terms of the
7 7 // GNU General Public License version 2 or any later version.
8 8
9 9 use crate::utils::{
10 10 files::lower_clean,
11 11 find_slice_in_slice,
12 12 hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf, HgPathError},
13 13 };
14 14 use std::collections::HashSet;
15 15 use std::path::{Path, PathBuf};
16 16 use std::sync::{Mutex, RwLock};
17 17
18 18 /// Ensures that a path is valid for use in the repository i.e. does not use
19 19 /// any banned components, does not traverse a symlink, etc.
20 20 #[derive(Debug, Default)]
21 21 pub struct PathAuditor {
22 22 audited: Mutex<HashSet<HgPathBuf>>,
23 23 audited_dirs: RwLock<HashSet<HgPathBuf>>,
24 24 root: PathBuf,
25 25 }
26 26
27 27 impl PathAuditor {
28 28 pub fn new(root: impl AsRef<Path>) -> Self {
29 29 Self {
30 30 root: root.as_ref().to_owned(),
31 31 ..Default::default()
32 32 }
33 33 }
34 34 pub fn audit_path(
35 35 &self,
36 36 path: impl AsRef<HgPath>,
37 37 ) -> Result<(), HgPathError> {
38 38 // TODO windows "localpath" normalization
39 39 let path = path.as_ref();
40 40 if path.is_empty() {
41 41 return Ok(());
42 42 }
43 43 // TODO case normalization
44 44 if self.audited.lock().unwrap().contains(path) {
45 45 return Ok(());
46 46 }
47 47 // AIX ignores "/" at end of path, others raise EISDIR.
48 48 let last_byte = path.as_bytes()[path.len() - 1];
49 49 if last_byte == b'/' || last_byte == b'\\' {
50 50 return Err(HgPathError::EndsWithSlash(path.to_owned()));
51 51 }
52 52 let parts: Vec<_> = path
53 53 .as_bytes()
54 54 .split(|b| std::path::is_separator(*b as char))
55 55 .collect();
56 56
57 57 let first_component = lower_clean(parts[0]);
58 58 let first_component = first_component.as_slice();
59 59 if !path.split_drive().0.is_empty()
60 60 || (first_component == b".hg"
61 61 || first_component == b".hg."
62 62 || first_component == b"")
63 63 || parts.iter().any(|c| c == b"..")
64 64 {
65 65 return Err(HgPathError::InsideDotHg(path.to_owned()));
66 66 }
67 67
68 68 // Windows shortname aliases
69 69 for part in parts.iter() {
70 70 if part.contains(&b'~') {
71 71 let mut split = part.splitn(2, |b| *b == b'~');
72 72 let first =
73 73 split.next().unwrap().to_owned().to_ascii_uppercase();
74 74 let last = split.next().unwrap();
75 75 if last.iter().all(u8::is_ascii_digit)
76 76 && (first == b"HG" || first == b"HG8B6C")
77 77 {
78 78 return Err(HgPathError::ContainsIllegalComponent(
79 79 path.to_owned(),
80 80 ));
81 81 }
82 82 }
83 83 }
84 84 let lower_path = lower_clean(path.as_bytes());
85 85 if find_slice_in_slice(&lower_path, b".hg").is_some() {
86 86 let lower_parts: Vec<_> = path
87 87 .as_bytes()
88 88 .split(|b| std::path::is_separator(*b as char))
89 89 .collect();
90 90 for pattern in [b".hg".to_vec(), b".hg.".to_vec()].iter() {
91 91 if let Some(pos) = lower_parts[1..]
92 92 .iter()
93 93 .position(|part| part == &pattern.as_slice())
94 94 {
95 95 let base = lower_parts[..=pos]
96 96 .iter()
97 97 .fold(HgPathBuf::new(), |acc, p| {
98 98 acc.join(HgPath::new(p))
99 99 });
100 100 return Err(HgPathError::IsInsideNestedRepo {
101 101 path: path.to_owned(),
102 102 nested_repo: base,
103 103 });
104 104 }
105 105 }
106 106 }
107 107
108 108 let parts = &parts[..parts.len().saturating_sub(1)];
109 109
110 110 // We don't want to add "foo/bar/baz" to `audited_dirs` before checking
111 111 // if there's a "foo/.hg" directory. This also means we won't
112 112 // accidentally traverse a symlink into some other filesystem (which
113 113 // is potentially expensive to access).
114 114 for index in 0..parts.len() {
115 115 let prefix = &parts[..=index].join(&b'/');
116 116 let prefix = HgPath::new(prefix);
117 117 if self.audited_dirs.read().unwrap().contains(prefix) {
118 118 continue;
119 119 }
120 self.check_filesystem(&prefix, &path)?;
120 self.check_filesystem(prefix, path)?;
121 121 self.audited_dirs.write().unwrap().insert(prefix.to_owned());
122 122 }
123 123
124 124 self.audited.lock().unwrap().insert(path.to_owned());
125 125
126 126 Ok(())
127 127 }
128 128
129 129 pub fn check_filesystem(
130 130 &self,
131 131 prefix: impl AsRef<HgPath>,
132 132 path: impl AsRef<HgPath>,
133 133 ) -> Result<(), HgPathError> {
134 134 let prefix = prefix.as_ref();
135 135 let path = path.as_ref();
136 136 let current_path = self.root.join(
137 137 hg_path_to_path_buf(prefix)
138 138 .map_err(|_| HgPathError::NotFsCompliant(path.to_owned()))?,
139 139 );
140 140 match std::fs::symlink_metadata(&current_path) {
141 141 Err(e) => {
142 142 // EINVAL can be raised as invalid path syntax under win32.
143 143 if e.kind() != std::io::ErrorKind::NotFound
144 144 && e.kind() != std::io::ErrorKind::InvalidInput
145 145 && e.raw_os_error() != Some(20)
146 146 {
147 147 // Rust does not yet have an `ErrorKind` for
148 148 // `NotADirectory` (errno 20)
149 149 // It happens if the dirstate contains `foo/bar` and
150 150 // foo is not a directory
151 151 return Err(HgPathError::NotFsCompliant(path.to_owned()));
152 152 }
153 153 }
154 154 Ok(meta) => {
155 155 if meta.file_type().is_symlink() {
156 156 return Err(HgPathError::TraversesSymbolicLink {
157 157 path: path.to_owned(),
158 158 symlink: prefix.to_owned(),
159 159 });
160 160 }
161 161 if meta.file_type().is_dir()
162 162 && current_path.join(".hg").is_dir()
163 163 {
164 164 return Err(HgPathError::IsInsideNestedRepo {
165 165 path: path.to_owned(),
166 166 nested_repo: prefix.to_owned(),
167 167 });
168 168 }
169 169 }
170 170 };
171 171
172 172 Ok(())
173 173 }
174 174
175 175 pub fn check(&self, path: impl AsRef<HgPath>) -> bool {
176 176 self.audit_path(path).is_ok()
177 177 }
178 178 }
179 179
180 180 #[cfg(test)]
181 181 mod tests {
182 182 use super::*;
183 183 use std::fs::{create_dir, File};
184 184 use tempfile::tempdir;
185 185
186 186 #[test]
187 187 fn test_path_auditor() {
188 188 let base_dir = tempdir().unwrap();
189 189 let base_dir_path = base_dir.path();
190 190 let auditor = PathAuditor::new(base_dir_path);
191 191
192 192 let path = HgPath::new(b".hg/00changelog.i");
193 193 assert_eq!(
194 194 auditor.audit_path(path),
195 195 Err(HgPathError::InsideDotHg(path.to_owned()))
196 196 );
197 197 let path = HgPath::new(b"this/is/nested/.hg/thing.txt");
198 198 assert_eq!(
199 199 auditor.audit_path(path),
200 200 Err(HgPathError::IsInsideNestedRepo {
201 201 path: path.to_owned(),
202 202 nested_repo: HgPathBuf::from_bytes(b"this/is/nested")
203 203 })
204 204 );
205 205
206 create_dir(&base_dir_path.join("realdir")).unwrap();
207 File::create(&base_dir_path.join("realdir/realfile")).unwrap();
206 create_dir(base_dir_path.join("realdir")).unwrap();
207 File::create(base_dir_path.join("realdir/realfile")).unwrap();
208 208 // TODO make portable
209 209 std::os::unix::fs::symlink(
210 &base_dir_path.join("realdir"),
211 &base_dir_path.join("symlink"),
210 base_dir_path.join("realdir"),
211 base_dir_path.join("symlink"),
212 212 )
213 213 .unwrap();
214 214 let path = HgPath::new(b"symlink/realfile");
215 215 assert_eq!(
216 216 auditor.audit_path(path),
217 217 Err(HgPathError::TraversesSymbolicLink {
218 218 path: path.to_owned(),
219 219 symlink: HgPathBuf::from_bytes(b"symlink"),
220 220 })
221 221 );
222 222 }
223 223 }
@@ -1,307 +1,307 b''
1 1 // status.rs
2 2 //
3 3 // Copyright 2019, Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Bindings for the `hg::status` module provided by the
9 9 //! `hg-core` crate. From Python, this will be seen as
10 10 //! `rustext.dirstate.status`.
11 11
12 12 use crate::{dirstate::DirstateMap, exceptions::FallbackError};
13 13 use cpython::{
14 14 exc::ValueError, ObjectProtocol, PyBytes, PyErr, PyList, PyObject,
15 15 PyResult, PyTuple, Python, PythonObject, ToPyObject,
16 16 };
17 17 use hg::dirstate::status::StatusPath;
18 18 use hg::matchers::{
19 19 DifferenceMatcher, IntersectionMatcher, Matcher, NeverMatcher,
20 20 UnionMatcher,
21 21 };
22 22 use hg::{
23 23 matchers::{AlwaysMatcher, FileMatcher, IncludeMatcher},
24 24 parse_pattern_syntax,
25 25 utils::{
26 26 files::{get_bytes_from_path, get_path_from_bytes},
27 27 hg_path::{HgPath, HgPathBuf},
28 28 },
29 29 BadMatch, DirstateStatus, IgnorePattern, PatternFileWarning, StatusError,
30 30 StatusOptions,
31 31 };
32 32 use std::borrow::Borrow;
33 33
34 34 fn collect_status_path_list(py: Python, paths: &[StatusPath<'_>]) -> PyList {
35 35 collect_pybytes_list(py, paths.iter().map(|item| &*item.path))
36 36 }
37 37
38 38 /// This will be useless once trait impls for collection are added to `PyBytes`
39 39 /// upstream.
40 40 fn collect_pybytes_list(
41 41 py: Python,
42 42 iter: impl Iterator<Item = impl AsRef<HgPath>>,
43 43 ) -> PyList {
44 44 let list = PyList::new(py, &[]);
45 45
46 46 for path in iter {
47 47 list.append(
48 48 py,
49 49 PyBytes::new(py, path.as_ref().as_bytes()).into_object(),
50 50 )
51 51 }
52 52
53 53 list
54 54 }
55 55
56 56 fn collect_bad_matches(
57 57 py: Python,
58 58 collection: &[(impl AsRef<HgPath>, BadMatch)],
59 59 ) -> PyResult<PyList> {
60 60 let list = PyList::new(py, &[]);
61 61
62 62 let os = py.import("os")?;
63 63 let get_error_message = |code: i32| -> PyResult<_> {
64 64 os.call(
65 65 py,
66 66 "strerror",
67 67 PyTuple::new(py, &[code.to_py_object(py).into_object()]),
68 68 None,
69 69 )
70 70 };
71 71
72 72 for (path, bad_match) in collection.iter() {
73 73 let message = match bad_match {
74 74 BadMatch::OsError(code) => get_error_message(*code)?,
75 75 BadMatch::BadType(bad_type) => {
76 76 format!("unsupported file type (type is {})", bad_type)
77 77 .to_py_object(py)
78 78 .into_object()
79 79 }
80 80 };
81 81 list.append(
82 82 py,
83 83 (PyBytes::new(py, path.as_ref().as_bytes()), message)
84 84 .to_py_object(py)
85 85 .into_object(),
86 86 )
87 87 }
88 88
89 89 Ok(list)
90 90 }
91 91
92 92 fn handle_fallback(py: Python, err: StatusError) -> PyErr {
93 93 match err {
94 94 StatusError::Pattern(e) => {
95 95 let as_string = e.to_string();
96 96 log::trace!("Rust status fallback: `{}`", &as_string);
97 97
98 98 PyErr::new::<FallbackError, _>(py, &as_string)
99 99 }
100 100 e => PyErr::new::<ValueError, _>(py, e.to_string()),
101 101 }
102 102 }
103 103
104 104 pub fn status_wrapper(
105 105 py: Python,
106 106 dmap: DirstateMap,
107 107 matcher: PyObject,
108 108 root_dir: PyObject,
109 109 ignore_files: PyList,
110 110 check_exec: bool,
111 111 list_clean: bool,
112 112 list_ignored: bool,
113 113 list_unknown: bool,
114 114 collect_traversed_dirs: bool,
115 115 ) -> PyResult<PyTuple> {
116 116 let bytes = root_dir.extract::<PyBytes>(py)?;
117 117 let root_dir = get_path_from_bytes(bytes.data(py));
118 118
119 119 let dmap: DirstateMap = dmap.to_py_object(py);
120 120 let mut dmap = dmap.get_inner_mut(py);
121 121
122 122 let ignore_files: PyResult<Vec<_>> = ignore_files
123 123 .iter(py)
124 124 .map(|b| {
125 125 let file = b.extract::<PyBytes>(py)?;
126 126 Ok(get_path_from_bytes(file.data(py)).to_owned())
127 127 })
128 128 .collect();
129 129 let ignore_files = ignore_files?;
130 130 // The caller may call `copymap.items()` separately
131 131 let list_copies = false;
132 132
133 133 let after_status = |res: Result<(DirstateStatus<'_>, _), StatusError>| {
134 134 let (status_res, warnings) =
135 135 res.map_err(|e| handle_fallback(py, e))?;
136 136 build_response(py, status_res, warnings)
137 137 };
138 138
139 139 let matcher = extract_matcher(py, matcher)?;
140 140 dmap.with_status(
141 141 &*matcher,
142 142 root_dir.to_path_buf(),
143 143 ignore_files,
144 144 StatusOptions {
145 145 check_exec,
146 146 list_clean,
147 147 list_ignored,
148 148 list_unknown,
149 149 list_copies,
150 150 collect_traversed_dirs,
151 151 },
152 152 after_status,
153 153 )
154 154 }
155 155
156 156 /// Transform a Python matcher into a Rust matcher.
157 157 fn extract_matcher(
158 158 py: Python,
159 159 matcher: PyObject,
160 160 ) -> PyResult<Box<dyn Matcher + Sync>> {
161 161 match matcher.get_type(py).name(py).borrow() {
162 162 "alwaysmatcher" => Ok(Box::new(AlwaysMatcher)),
163 163 "nevermatcher" => Ok(Box::new(NeverMatcher)),
164 164 "exactmatcher" => {
165 165 let files = matcher.call_method(
166 166 py,
167 167 "files",
168 168 PyTuple::new(py, &[]),
169 169 None,
170 170 )?;
171 171 let files: PyList = files.cast_into(py)?;
172 172 let files: PyResult<Vec<HgPathBuf>> = files
173 173 .iter(py)
174 174 .map(|f| {
175 175 Ok(HgPathBuf::from_bytes(
176 176 f.extract::<PyBytes>(py)?.data(py),
177 177 ))
178 178 })
179 179 .collect();
180 180
181 181 let files = files?;
182 182 let file_matcher = FileMatcher::new(files)
183 183 .map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?;
184 184 Ok(Box::new(file_matcher))
185 185 }
186 186 "includematcher" => {
187 187 // Get the patterns from Python even though most of them are
188 188 // redundant with those we will parse later on, as they include
189 189 // those passed from the command line.
190 190 let ignore_patterns: PyResult<Vec<_>> = matcher
191 191 .getattr(py, "_kindpats")?
192 192 .iter(py)?
193 193 .map(|k| {
194 194 let k = k?;
195 195 let syntax = parse_pattern_syntax(
196 196 &[
197 197 k.get_item(py, 0)?
198 198 .extract::<PyBytes>(py)?
199 199 .data(py),
200 200 &b":"[..],
201 201 ]
202 202 .concat(),
203 203 )
204 204 .map_err(|e| {
205 205 handle_fallback(py, StatusError::Pattern(e))
206 206 })?;
207 207 let pattern = k.get_item(py, 1)?.extract::<PyBytes>(py)?;
208 208 let pattern = pattern.data(py);
209 209 let source = k.get_item(py, 2)?.extract::<PyBytes>(py)?;
210 210 let source = get_path_from_bytes(source.data(py));
211 211 let new = IgnorePattern::new(syntax, pattern, source);
212 212 Ok(new)
213 213 })
214 214 .collect();
215 215
216 216 let ignore_patterns = ignore_patterns?;
217 217
218 218 let matcher = IncludeMatcher::new(ignore_patterns)
219 219 .map_err(|e| handle_fallback(py, e.into()))?;
220 220
221 221 Ok(Box::new(matcher))
222 222 }
223 223 "unionmatcher" => {
224 224 let matchers: PyResult<Vec<_>> = matcher
225 225 .getattr(py, "_matchers")?
226 226 .iter(py)?
227 227 .map(|py_matcher| extract_matcher(py, py_matcher?))
228 228 .collect();
229 229
230 230 Ok(Box::new(UnionMatcher::new(matchers?)))
231 231 }
232 232 "intersectionmatcher" => {
233 233 let m1 = extract_matcher(py, matcher.getattr(py, "_m1")?)?;
234 234 let m2 = extract_matcher(py, matcher.getattr(py, "_m2")?)?;
235 235
236 236 Ok(Box::new(IntersectionMatcher::new(m1, m2)))
237 237 }
238 238 "differencematcher" => {
239 239 let m1 = extract_matcher(py, matcher.getattr(py, "_m1")?)?;
240 240 let m2 = extract_matcher(py, matcher.getattr(py, "_m2")?)?;
241 241
242 242 Ok(Box::new(DifferenceMatcher::new(m1, m2)))
243 243 }
244 244 e => Err(PyErr::new::<FallbackError, _>(
245 245 py,
246 246 format!("Unsupported matcher {}", e),
247 247 )),
248 248 }
249 249 }
250 250
251 251 fn build_response(
252 252 py: Python,
253 253 status_res: DirstateStatus,
254 254 warnings: Vec<PatternFileWarning>,
255 255 ) -> PyResult<PyTuple> {
256 256 let modified = collect_status_path_list(py, &status_res.modified);
257 257 let added = collect_status_path_list(py, &status_res.added);
258 258 let removed = collect_status_path_list(py, &status_res.removed);
259 259 let deleted = collect_status_path_list(py, &status_res.deleted);
260 260 let clean = collect_status_path_list(py, &status_res.clean);
261 261 let ignored = collect_status_path_list(py, &status_res.ignored);
262 262 let unknown = collect_status_path_list(py, &status_res.unknown);
263 263 let unsure = collect_status_path_list(py, &status_res.unsure);
264 264 let bad = collect_bad_matches(py, &status_res.bad)?;
265 265 let traversed = collect_pybytes_list(py, status_res.traversed.iter());
266 266 let dirty = status_res.dirty.to_py_object(py);
267 267 let py_warnings = PyList::new(py, &[]);
268 268 for warning in warnings.iter() {
269 269 // We use duck-typing on the Python side for dispatch, good enough for
270 270 // now.
271 271 match warning {
272 272 PatternFileWarning::InvalidSyntax(file, syn) => {
273 273 py_warnings.append(
274 274 py,
275 275 (
276 PyBytes::new(py, &get_bytes_from_path(&file)),
276 PyBytes::new(py, &get_bytes_from_path(file)),
277 277 PyBytes::new(py, syn),
278 278 )
279 279 .to_py_object(py)
280 280 .into_object(),
281 281 );
282 282 }
283 283 PatternFileWarning::NoSuchFile(file) => py_warnings.append(
284 284 py,
285 PyBytes::new(py, &get_bytes_from_path(&file)).into_object(),
285 PyBytes::new(py, &get_bytes_from_path(file)).into_object(),
286 286 ),
287 287 }
288 288 }
289 289
290 290 Ok(PyTuple::new(
291 291 py,
292 292 &[
293 293 unsure.into_object(),
294 294 modified.into_object(),
295 295 added.into_object(),
296 296 removed.into_object(),
297 297 deleted.into_object(),
298 298 clean.into_object(),
299 299 ignored.into_object(),
300 300 unknown.into_object(),
301 301 py_warnings.into_object(),
302 302 bad.into_object(),
303 303 traversed.into_object(),
304 304 dirty.into_object(),
305 305 ][..],
306 306 ))
307 307 }
@@ -1,524 +1,524 b''
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 cindex,
10 10 utils::{node_from_py_bytes, node_from_py_object},
11 11 PyRevision,
12 12 };
13 13 use cpython::{
14 14 buffer::{Element, PyBuffer},
15 15 exc::{IndexError, ValueError},
16 16 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
17 17 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
18 18 };
19 19 use hg::{
20 20 nodemap::{Block, NodeMapError, NodeTree},
21 21 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
22 22 BaseRevision, Revision, UncheckedRevision,
23 23 };
24 24 use std::cell::RefCell;
25 25
26 26 /// Return a Struct implementing the Graph trait
27 27 pub(crate) fn pyindex_to_graph(
28 28 py: Python,
29 29 index: PyObject,
30 30 ) -> PyResult<cindex::Index> {
31 31 match index.extract::<MixedIndex>(py) {
32 32 Ok(midx) => Ok(midx.clone_cindex(py)),
33 33 Err(_) => cindex::Index::new(py, index),
34 34 }
35 35 }
36 36
37 37 py_class!(pub class MixedIndex |py| {
38 38 data cindex: RefCell<cindex::Index>;
39 39 data nt: RefCell<Option<NodeTree>>;
40 40 data docket: RefCell<Option<PyObject>>;
41 41 // Holds a reference to the mmap'ed persistent nodemap data
42 42 data mmap: RefCell<Option<PyBuffer>>;
43 43
44 44 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
45 45 Self::new(py, cindex)
46 46 }
47 47
48 48 /// Compatibility layer used for Python consumers needing access to the C index
49 49 ///
50 50 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
51 51 /// that may need to build a custom `nodetree`, based on a specified revset.
52 52 /// With a Rust implementation of the nodemap, we will be able to get rid of
53 53 /// this, by exposing our own standalone nodemap class,
54 54 /// ready to accept `MixedIndex`.
55 55 def get_cindex(&self) -> PyResult<PyObject> {
56 56 Ok(self.cindex(py).borrow().inner().clone_ref(py))
57 57 }
58 58
59 59 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
60 60
61 61 /// Return Revision if found, raises a bare `error.RevlogError`
62 62 /// in case of ambiguity, same as C version does
63 63 def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
64 64 let opt = self.get_nodetree(py)?.borrow();
65 65 let nt = opt.as_ref().unwrap();
66 66 let idx = &*self.cindex(py).borrow();
67 67 let node = node_from_py_bytes(py, &node)?;
68 68 let res = nt.find_bin(idx, node.into());
69 69 Ok(res.map_err(|e| nodemap_error(py, e))?.map(Into::into))
70 70 }
71 71
72 72 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
73 73 /// is not found.
74 74 ///
75 75 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
76 76 /// will catch and rewrap with it
77 77 def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
78 78 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
79 79 }
80 80
81 81 /// return True if the node exist in the index
82 82 def has_node(&self, node: PyBytes) -> PyResult<bool> {
83 83 self.get_rev(py, node).map(|opt| opt.is_some())
84 84 }
85 85
86 86 /// find length of shortest hex nodeid of a binary ID
87 87 def shortest(&self, node: PyBytes) -> PyResult<usize> {
88 88 let opt = self.get_nodetree(py)?.borrow();
89 89 let nt = opt.as_ref().unwrap();
90 90 let idx = &*self.cindex(py).borrow();
91 91 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
92 92 {
93 93 Ok(Some(l)) => Ok(l),
94 94 Ok(None) => Err(revlog_error(py)),
95 95 Err(e) => Err(nodemap_error(py, e)),
96 96 }
97 97 }
98 98
99 99 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
100 100 let opt = self.get_nodetree(py)?.borrow();
101 101 let nt = opt.as_ref().unwrap();
102 102 let idx = &*self.cindex(py).borrow();
103 103
104 104 let node_as_string = if cfg!(feature = "python3-sys") {
105 105 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
106 106 }
107 107 else {
108 108 let node = node.extract::<PyBytes>(py)?;
109 109 String::from_utf8_lossy(node.data(py)).to_string()
110 110 };
111 111
112 112 let prefix = NodePrefix::from_hex(&node_as_string)
113 113 .map_err(|_| PyErr::new::<ValueError, _>(
114 114 py, format!("Invalid node or prefix '{}'", node_as_string))
115 115 )?;
116 116
117 117 nt.find_bin(idx, prefix)
118 118 // TODO make an inner API returning the node directly
119 119 .map(|opt| opt.map(
120 120 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
121 121 .map_err(|e| nodemap_error(py, e))
122 122
123 123 }
124 124
125 125 /// append an index entry
126 126 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
127 127 if tup.len(py) < 8 {
128 128 // this is better than the panic promised by tup.get_item()
129 129 return Err(
130 130 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
131 131 }
132 132 let node_bytes = tup.get_item(py, 7).extract(py)?;
133 133 let node = node_from_py_object(py, &node_bytes)?;
134 134
135 135 let mut idx = self.cindex(py).borrow_mut();
136 136
137 137 // This is ok since we will just add the revision to the index
138 138 let rev = Revision(idx.len() as BaseRevision);
139 139 idx.append(py, tup)?;
140 140
141 141 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
142 142 .insert(&*idx, &node, rev)
143 143 .map_err(|e| nodemap_error(py, e))?;
144 144 Ok(py.None())
145 145 }
146 146
147 147 def __delitem__(&self, key: PyObject) -> PyResult<()> {
148 148 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
149 149 self.cindex(py).borrow().inner().del_item(py, key)?;
150 150 let mut opt = self.get_nodetree(py)?.borrow_mut();
151 151 let nt = opt.as_mut().unwrap();
152 152 nt.invalidate_all();
153 153 self.fill_nodemap(py, nt)?;
154 154 Ok(())
155 155 }
156 156
157 157 //
158 158 // Reforwarded C index API
159 159 //
160 160
161 161 // index_methods (tp_methods). Same ordering as in revlog.c
162 162
163 163 /// return the gca set of the given revs
164 164 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
165 165 self.call_cindex(py, "ancestors", args, kw)
166 166 }
167 167
168 168 /// return the heads of the common ancestors of the given revs
169 169 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
170 170 self.call_cindex(py, "commonancestorsheads", args, kw)
171 171 }
172 172
173 173 /// Clear the index caches and inner py_class data.
174 174 /// It is Python's responsibility to call `update_nodemap_data` again.
175 175 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
176 176 self.nt(py).borrow_mut().take();
177 177 self.docket(py).borrow_mut().take();
178 178 self.mmap(py).borrow_mut().take();
179 179 self.call_cindex(py, "clearcaches", args, kw)
180 180 }
181 181
182 182 /// return the raw binary string representing a revision
183 183 def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
184 184 self.call_cindex(py, "entry_binary", args, kw)
185 185 }
186 186
187 187 /// return a binary packed version of the header
188 188 def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
189 189 self.call_cindex(py, "pack_header", args, kw)
190 190 }
191 191
192 192 /// get an index entry
193 193 def get(&self, *args, **kw) -> PyResult<PyObject> {
194 194 self.call_cindex(py, "get", args, kw)
195 195 }
196 196
197 197 /// compute phases
198 198 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
199 199 self.call_cindex(py, "computephasesmapsets", args, kw)
200 200 }
201 201
202 202 /// reachableroots
203 203 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
204 204 self.call_cindex(py, "reachableroots2", args, kw)
205 205 }
206 206
207 207 /// get head revisions
208 208 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
209 209 self.call_cindex(py, "headrevs", args, kw)
210 210 }
211 211
212 212 /// get filtered head revisions
213 213 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
214 214 self.call_cindex(py, "headrevsfiltered", args, kw)
215 215 }
216 216
217 217 /// True if the object is a snapshot
218 218 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
219 219 self.call_cindex(py, "issnapshot", args, kw)
220 220 }
221 221
222 222 /// Gather snapshot data in a cache dict
223 223 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
224 224 self.call_cindex(py, "findsnapshots", args, kw)
225 225 }
226 226
227 227 /// determine revisions with deltas to reconstruct fulltext
228 228 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
229 229 self.call_cindex(py, "deltachain", args, kw)
230 230 }
231 231
232 232 /// slice planned chunk read to reach a density threshold
233 233 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
234 234 self.call_cindex(py, "slicechunktodensity", args, kw)
235 235 }
236 236
237 237 /// stats for the index
238 238 def stats(&self, *args, **kw) -> PyResult<PyObject> {
239 239 self.call_cindex(py, "stats", args, kw)
240 240 }
241 241
242 242 // index_sequence_methods and index_mapping_methods.
243 243 //
244 244 // Since we call back through the high level Python API,
245 245 // there's no point making a distinction between index_get
246 246 // and index_getitem.
247 247
248 248 def __len__(&self) -> PyResult<usize> {
249 249 self.cindex(py).borrow().inner().len(py)
250 250 }
251 251
252 252 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
253 253 // this conversion seems needless, but that's actually because
254 254 // `index_getitem` does not handle conversion from PyLong,
255 255 // which expressions such as [e for e in index] internally use.
256 256 // Note that we don't seem to have a direct way to call
257 257 // PySequence_GetItem (does the job), which would possibly be better
258 258 // for performance
259 259 let key = match key.extract::<i32>(py) {
260 260 Ok(rev) => rev.to_py_object(py).into_object(),
261 261 Err(_) => key,
262 262 };
263 263 self.cindex(py).borrow().inner().get_item(py, key)
264 264 }
265 265
266 266 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
267 267 self.cindex(py).borrow().inner().set_item(py, key, value)
268 268 }
269 269
270 270 def __contains__(&self, item: PyObject) -> PyResult<bool> {
271 271 // ObjectProtocol does not seem to provide contains(), so
272 272 // this is an equivalent implementation of the index_contains()
273 273 // defined in revlog.c
274 274 let cindex = self.cindex(py).borrow();
275 275 match item.extract::<i32>(py) {
276 276 Ok(rev) => {
277 277 Ok(rev >= -1 && rev < cindex.inner().len(py)? as BaseRevision)
278 278 }
279 279 Err(_) => {
280 280 cindex.inner().call_method(
281 281 py,
282 282 "has_node",
283 283 PyTuple::new(py, &[item]),
284 284 None)?
285 285 .extract(py)
286 286 }
287 287 }
288 288 }
289 289
290 290 def nodemap_data_all(&self) -> PyResult<PyBytes> {
291 291 self.inner_nodemap_data_all(py)
292 292 }
293 293
294 294 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
295 295 self.inner_nodemap_data_incremental(py)
296 296 }
297 297 def update_nodemap_data(
298 298 &self,
299 299 docket: PyObject,
300 300 nm_data: PyObject
301 301 ) -> PyResult<PyObject> {
302 302 self.inner_update_nodemap_data(py, docket, nm_data)
303 303 }
304 304
305 305 @property
306 306 def entry_size(&self) -> PyResult<PyInt> {
307 307 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
308 308 }
309 309
310 310 @property
311 311 def rust_ext_compat(&self) -> PyResult<PyInt> {
312 312 self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py)
313 313 }
314 314
315 315 });
316 316
317 317 impl MixedIndex {
318 318 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
319 319 Self::create_instance(
320 320 py,
321 321 RefCell::new(cindex::Index::new(py, cindex)?),
322 322 RefCell::new(None),
323 323 RefCell::new(None),
324 324 RefCell::new(None),
325 325 )
326 326 }
327 327
328 328 /// This is scaffolding at this point, but it could also become
329 329 /// a way to start a persistent nodemap or perform a
330 330 /// vacuum / repack operation
331 331 fn fill_nodemap(
332 332 &self,
333 333 py: Python,
334 334 nt: &mut NodeTree,
335 335 ) -> PyResult<PyObject> {
336 336 let index = self.cindex(py).borrow();
337 337 for r in 0..index.len() {
338 338 let rev = Revision(r as BaseRevision);
339 339 // in this case node() won't ever return None
340 340 nt.insert(&*index, index.node(rev).unwrap(), rev)
341 341 .map_err(|e| nodemap_error(py, e))?
342 342 }
343 343 Ok(py.None())
344 344 }
345 345
346 346 fn get_nodetree<'a>(
347 347 &'a self,
348 348 py: Python<'a>,
349 349 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
350 350 if self.nt(py).borrow().is_none() {
351 let readonly = Box::new(Vec::new());
351 let readonly = Box::<Vec<_>>::default();
352 352 let mut nt = NodeTree::load_bytes(readonly, 0);
353 353 self.fill_nodemap(py, &mut nt)?;
354 354 self.nt(py).borrow_mut().replace(nt);
355 355 }
356 356 Ok(self.nt(py))
357 357 }
358 358
359 359 /// forward a method call to the underlying C index
360 360 fn call_cindex(
361 361 &self,
362 362 py: Python,
363 363 name: &str,
364 364 args: &PyTuple,
365 365 kwargs: Option<&PyDict>,
366 366 ) -> PyResult<PyObject> {
367 367 self.cindex(py)
368 368 .borrow()
369 369 .inner()
370 370 .call_method(py, name, args, kwargs)
371 371 }
372 372
373 373 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
374 374 self.cindex(py).borrow().clone_ref(py)
375 375 }
376 376
377 377 /// Returns the full nodemap bytes to be written as-is to disk
378 378 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
379 379 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
380 380 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
381 381
382 382 // If there's anything readonly, we need to build the data again from
383 383 // scratch
384 384 let bytes = if readonly.len() > 0 {
385 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
385 let mut nt = NodeTree::load_bytes(Box::<Vec<_>>::default(), 0);
386 386 self.fill_nodemap(py, &mut nt)?;
387 387
388 388 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
389 389 assert_eq!(readonly.len(), 0);
390 390
391 391 bytes
392 392 } else {
393 393 bytes
394 394 };
395 395
396 396 let bytes = PyBytes::new(py, &bytes);
397 397 Ok(bytes)
398 398 }
399 399
400 400 /// Returns the last saved docket along with the size of any changed data
401 401 /// (in number of blocks), and said data as bytes.
402 402 fn inner_nodemap_data_incremental(
403 403 &self,
404 404 py: Python,
405 405 ) -> PyResult<PyObject> {
406 406 let docket = self.docket(py).borrow();
407 407 let docket = match docket.as_ref() {
408 408 Some(d) => d,
409 409 None => return Ok(py.None()),
410 410 };
411 411
412 412 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
413 413 let masked_blocks = node_tree.masked_readonly_blocks();
414 414 let (_, data) = node_tree.into_readonly_and_added_bytes();
415 415 let changed = masked_blocks * std::mem::size_of::<Block>();
416 416
417 417 Ok((docket, changed, PyBytes::new(py, &data))
418 418 .to_py_object(py)
419 419 .into_object())
420 420 }
421 421
422 422 /// Update the nodemap from the new (mmaped) data.
423 423 /// The docket is kept as a reference for later incremental calls.
424 424 fn inner_update_nodemap_data(
425 425 &self,
426 426 py: Python,
427 427 docket: PyObject,
428 428 nm_data: PyObject,
429 429 ) -> PyResult<PyObject> {
430 430 let buf = PyBuffer::get(py, &nm_data)?;
431 431 let len = buf.item_count();
432 432
433 433 // Build a slice from the mmap'ed buffer data
434 434 let cbuf = buf.buf_ptr();
435 435 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
436 436 && buf.is_c_contiguous()
437 437 && u8::is_compatible_format(buf.format())
438 438 {
439 439 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
440 440 } else {
441 441 return Err(PyErr::new::<ValueError, _>(
442 442 py,
443 443 "Nodemap data buffer has an invalid memory representation"
444 444 .to_string(),
445 445 ));
446 446 };
447 447
448 448 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
449 449 // pointer.
450 450 self.mmap(py).borrow_mut().replace(buf);
451 451
452 452 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
453 453
454 454 let data_tip = docket
455 455 .getattr(py, "tip_rev")?
456 456 .extract::<BaseRevision>(py)?
457 457 .into();
458 458 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
459 459 let idx = self.cindex(py).borrow();
460 460 let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
461 461 nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
462 462 })?;
463 463 let current_tip = idx.len();
464 464
465 465 for r in (data_tip.0 + 1)..current_tip as BaseRevision {
466 466 let rev = Revision(r);
467 467 // in this case node() won't ever return None
468 468 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
469 469 .map_err(|e| nodemap_error(py, e))?
470 470 }
471 471
472 472 *self.nt(py).borrow_mut() = Some(nt);
473 473
474 474 Ok(py.None())
475 475 }
476 476 }
477 477
478 478 fn revlog_error(py: Python) -> PyErr {
479 479 match py
480 480 .import("mercurial.error")
481 481 .and_then(|m| m.get(py, "RevlogError"))
482 482 {
483 483 Err(e) => e,
484 484 Ok(cls) => PyErr::from_instance(
485 485 py,
486 486 cls.call(py, (py.None(),), None).ok().into_py_object(py),
487 487 ),
488 488 }
489 489 }
490 490
491 491 fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
492 492 PyErr::new::<ValueError, _>(
493 493 py,
494 494 format!(
495 495 "Inconsistency: Revision {} found in nodemap \
496 496 is not in revlog index",
497 497 rev
498 498 ),
499 499 )
500 500 }
501 501
502 502 /// Standard treatment of NodeMapError
503 503 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
504 504 match err {
505 505 NodeMapError::MultipleResults => revlog_error(py),
506 506 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
507 507 }
508 508 }
509 509
510 510 /// Create the module, with __package__ given from parent
511 511 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
512 512 let dotted_name = &format!("{}.revlog", package);
513 513 let m = PyModule::new(py, dotted_name)?;
514 514 m.add(py, "__package__", package)?;
515 515 m.add(py, "__doc__", "RevLog - Rust implementations")?;
516 516
517 517 m.add_class::<MixedIndex>(py)?;
518 518
519 519 let sys = PyModule::import(py, "sys")?;
520 520 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
521 521 sys_modules.set_item(py, dotted_name, &m)?;
522 522
523 523 Ok(m)
524 524 }
@@ -1,116 +1,116 b''
1 1 use crate::error::CommandError;
2 2 use clap::Arg;
3 3 use format_bytes::format_bytes;
4 4 use hg::operations::cat;
5 5 use hg::utils::hg_path::HgPathBuf;
6 6 use std::ffi::OsString;
7 7 use std::os::unix::prelude::OsStrExt;
8 8
9 9 pub const HELP_TEXT: &str = "
10 10 Output the current or given revision of files
11 11 ";
12 12
13 13 pub fn args() -> clap::Command {
14 14 clap::command!("cat")
15 15 .arg(
16 16 Arg::new("rev")
17 17 .help("search the repository as it is in REV")
18 18 .short('r')
19 19 .long("rev")
20 20 .value_name("REV"),
21 21 )
22 22 .arg(
23 23 clap::Arg::new("files")
24 24 .required(true)
25 25 .num_args(1..)
26 26 .value_name("FILE")
27 27 .value_parser(clap::value_parser!(std::ffi::OsString))
28 28 .help("Files to output"),
29 29 )
30 30 .about(HELP_TEXT)
31 31 }
32 32
33 33 #[logging_timer::time("trace")]
34 34 pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
35 35 let cat_enabled = invocation.config.get_bool(b"rhg", b"cat")?;
36 36 if !cat_enabled {
37 37 return Err(CommandError::unsupported(
38 38 "cat is disabled in rhg (enable it with 'rhg.cat = true' \
39 39 or enable fallback with 'rhg.on-unsupported = fallback')",
40 40 ));
41 41 }
42 42
43 43 let rev = invocation.subcommand_args.get_one::<String>("rev");
44 44 let file_args =
45 45 match invocation.subcommand_args.get_many::<OsString>("files") {
46 46 Some(files) => files
47 47 .filter(|s| !s.is_empty())
48 48 .map(|s| s.as_os_str())
49 49 .collect(),
50 50 None => vec![],
51 51 };
52 52
53 53 let repo = invocation.repo?;
54 54 let cwd = hg::utils::current_dir()?;
55 55 let working_directory = repo.working_directory_path();
56 56 let working_directory = cwd.join(working_directory); // Make it absolute
57 57
58 58 let mut files = vec![];
59 59 for file in file_args {
60 60 if file.as_bytes().starts_with(b"set:") {
61 61 let message = "fileset";
62 62 return Err(CommandError::unsupported(message));
63 63 }
64 64
65 let normalized = cwd.join(&file);
65 let normalized = cwd.join(file);
66 66 // TODO: actually normalize `..` path segments etc?
67 67 let dotted = normalized.components().any(|c| c.as_os_str() == "..");
68 68 if file.as_bytes() == b"." || dotted {
69 69 let message = "`..` or `.` path segment";
70 70 return Err(CommandError::unsupported(message));
71 71 }
72 72 let relative_path = working_directory
73 73 .strip_prefix(&cwd)
74 74 .unwrap_or(&working_directory);
75 75 let stripped = normalized
76 76 .strip_prefix(&working_directory)
77 77 .map_err(|_| {
78 78 CommandError::abort(format!(
79 79 "abort: {} not under root '{}'\n(consider using '--cwd {}')",
80 80 String::from_utf8_lossy(file.as_bytes()),
81 81 working_directory.display(),
82 82 relative_path.display(),
83 83 ))
84 84 })?;
85 85 let hg_file = HgPathBuf::try_from(stripped.to_path_buf())
86 86 .map_err(|e| CommandError::abort(e.to_string()))?;
87 87 files.push(hg_file);
88 88 }
89 89 let files = files.iter().map(|file| file.as_ref()).collect();
90 90 // TODO probably move this to a util function like `repo.default_rev` or
91 91 // something when it's used somewhere else
92 92 let rev = match rev {
93 93 Some(r) => r.to_string(),
94 94 None => format!("{:x}", repo.dirstate_parents()?.p1),
95 95 };
96 96
97 97 let output = cat(repo, &rev, files).map_err(|e| (e, rev.as_str()))?;
98 98 for (_file, contents) in output.results {
99 99 invocation.ui.write_stdout(&contents)?;
100 100 }
101 101 if !output.missing.is_empty() {
102 102 let short = format!("{:x}", output.node.short()).into_bytes();
103 103 for path in &output.missing {
104 104 invocation.ui.write_stderr(&format_bytes!(
105 105 b"{}: no such file in rev {}\n",
106 106 path.as_bytes(),
107 107 short
108 108 ))?;
109 109 }
110 110 }
111 111 if output.found_any {
112 112 Ok(())
113 113 } else {
114 114 Err(CommandError::Unsuccessful)
115 115 }
116 116 }
@@ -1,71 +1,71 b''
1 1 use crate::error::CommandError;
2 2 use clap::Arg;
3 3 use clap::ArgGroup;
4 4 use hg::operations::{debug_data, DebugDataKind};
5 5
6 6 pub const HELP_TEXT: &str = "
7 7 Dump the contents of a data file revision
8 8 ";
9 9
10 10 pub fn args() -> clap::Command {
11 11 clap::command!("debugdata")
12 12 .arg(
13 13 Arg::new("changelog")
14 14 .help("open changelog")
15 15 .short('c')
16 16 .action(clap::ArgAction::SetTrue),
17 17 )
18 18 .arg(
19 19 Arg::new("manifest")
20 20 .help("open manifest")
21 21 .short('m')
22 22 .action(clap::ArgAction::SetTrue),
23 23 )
24 24 .group(
25 25 ArgGroup::new("revlog")
26 .args(&["changelog", "manifest"])
26 .args(["changelog", "manifest"])
27 27 .required(true),
28 28 )
29 29 .arg(
30 30 Arg::new("rev")
31 31 .help("revision")
32 32 .required(true)
33 33 .value_name("REV"),
34 34 )
35 35 .about(HELP_TEXT)
36 36 }
37 37
38 38 #[logging_timer::time("trace")]
39 39 pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
40 40 let args = invocation.subcommand_args;
41 41 let rev = args
42 42 .get_one::<String>("rev")
43 43 .expect("rev should be a required argument");
44 44 let kind = match (
45 45 args.get_one::<bool>("changelog").unwrap(),
46 46 args.get_one::<bool>("manifest").unwrap(),
47 47 ) {
48 48 (true, false) => DebugDataKind::Changelog,
49 49 (false, true) => DebugDataKind::Manifest,
50 50 (true, true) => {
51 51 unreachable!("Should not happen since options are exclusive")
52 52 }
53 53 (false, false) => {
54 54 unreachable!("Should not happen since options are required")
55 55 }
56 56 };
57 57
58 58 let repo = invocation.repo?;
59 59 if repo.has_narrow() {
60 60 return Err(CommandError::unsupported(
61 61 "support for ellipsis nodes is missing and repo has narrow enabled",
62 62 ));
63 63 }
64 64 let data = debug_data(repo, rev, kind).map_err(|e| (e, rev.as_ref()))?;
65 65
66 66 let mut stdout = invocation.ui.stdout_buffer();
67 67 stdout.write_all(&data)?;
68 68 stdout.flush()?;
69 69
70 70 Ok(())
71 71 }
@@ -1,28 +1,28 b''
1 1 use crate::error::CommandError;
2 2 use format_bytes::format_bytes;
3 3 use hg::errors::{IoErrorContext, IoResultExt};
4 4 use hg::utils::files::get_bytes_from_path;
5 5
6 6 pub const HELP_TEXT: &str = "
7 7 Print the root directory of the current repository.
8 8
9 9 Returns 0 on success.
10 10 ";
11 11
12 12 pub fn args() -> clap::Command {
13 13 clap::command!("root").about(HELP_TEXT)
14 14 }
15 15
16 16 pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
17 17 let repo = invocation.repo?;
18 18 let working_directory = repo.working_directory_path();
19 19 let working_directory = std::fs::canonicalize(working_directory)
20 20 .with_context(|| {
21 21 IoErrorContext::CanonicalizingPath(working_directory.to_owned())
22 22 })?;
23 let bytes = get_bytes_from_path(&working_directory);
23 let bytes = get_bytes_from_path(working_directory);
24 24 invocation
25 25 .ui
26 26 .write_stdout(&format_bytes!(b"{}\n", bytes.as_slice()))?;
27 27 Ok(())
28 28 }
@@ -1,842 +1,842 b''
1 1 extern crate log;
2 2 use crate::error::CommandError;
3 3 use crate::ui::{local_to_utf8, Ui};
4 4 use clap::{command, Arg, ArgMatches};
5 5 use format_bytes::{format_bytes, join};
6 6 use hg::config::{Config, ConfigSource, PlainInfo};
7 7 use hg::repo::{Repo, RepoError};
8 8 use hg::utils::files::{get_bytes_from_os_str, get_path_from_bytes};
9 9 use hg::utils::SliceExt;
10 10 use hg::{exit_codes, requirements};
11 11 use std::borrow::Cow;
12 12 use std::collections::HashSet;
13 13 use std::ffi::OsString;
14 14 use std::os::unix::prelude::CommandExt;
15 15 use std::path::PathBuf;
16 16 use std::process::Command;
17 17
18 18 mod blackbox;
19 19 mod color;
20 20 mod error;
21 21 mod ui;
22 22 pub mod utils {
23 23 pub mod path_utils;
24 24 }
25 25
26 26 fn main_with_result(
27 27 argv: Vec<OsString>,
28 28 process_start_time: &blackbox::ProcessStartTime,
29 29 ui: &ui::Ui,
30 30 repo: Result<&Repo, &NoRepoInCwdError>,
31 31 config: &Config,
32 32 ) -> Result<(), CommandError> {
33 33 check_unsupported(config, repo)?;
34 34
35 35 let app = command!()
36 36 .subcommand_required(true)
37 37 .arg(
38 38 Arg::new("repository")
39 39 .help("repository root directory")
40 40 .short('R')
41 41 .value_name("REPO")
42 42 // Both ok: `hg -R ./foo log` or `hg log -R ./foo`
43 43 .global(true),
44 44 )
45 45 .arg(
46 46 Arg::new("config")
47 47 .help("set/override config option (use 'section.name=value')")
48 48 .value_name("CONFIG")
49 49 .global(true)
50 50 .long("config")
51 51 // Ok: `--config section.key1=val --config section.key2=val2`
52 52 // Not ok: `--config section.key1=val section.key2=val2`
53 53 .action(clap::ArgAction::Append),
54 54 )
55 55 .arg(
56 56 Arg::new("cwd")
57 57 .help("change working directory")
58 58 .value_name("DIR")
59 59 .long("cwd")
60 60 .global(true),
61 61 )
62 62 .arg(
63 63 Arg::new("color")
64 64 .help("when to colorize (boolean, always, auto, never, or debug)")
65 65 .value_name("TYPE")
66 66 .long("color")
67 67 .global(true),
68 68 )
69 69 .version("0.0.1");
70 70 let app = add_subcommand_args(app);
71 71
72 72 let matches = app.try_get_matches_from(argv.iter())?;
73 73
74 74 let (subcommand_name, subcommand_args) =
75 75 matches.subcommand().expect("subcommand required");
76 76
77 77 // Mercurial allows users to define "defaults" for commands, fallback
78 78 // if a default is detected for the current command
79 79 let defaults = config.get_str(b"defaults", subcommand_name.as_bytes())?;
80 80 match defaults {
81 81 // Programmatic usage might set defaults to an empty string to unset
82 82 // it; allow that
83 83 None | Some("") => {}
84 84 Some(_) => {
85 85 let msg = "`defaults` config set";
86 86 return Err(CommandError::unsupported(msg));
87 87 }
88 88 }
89 89
90 90 for prefix in ["pre", "post", "fail"].iter() {
91 91 // Mercurial allows users to define generic hooks for commands,
92 92 // fallback if any are detected
93 93 let item = format!("{}-{}", prefix, subcommand_name);
94 94 let hook_for_command =
95 95 config.get_str_no_default(b"hooks", item.as_bytes())?;
96 96 if hook_for_command.is_some() {
97 97 let msg = format!("{}-{} hook defined", prefix, subcommand_name);
98 98 return Err(CommandError::unsupported(msg));
99 99 }
100 100 }
101 101 let run = subcommand_run_fn(subcommand_name)
102 102 .expect("unknown subcommand name from clap despite Command::subcommand_required");
103 103
104 104 let invocation = CliInvocation {
105 105 ui,
106 106 subcommand_args,
107 107 config,
108 108 repo,
109 109 };
110 110
111 111 if let Ok(repo) = repo {
112 112 // We don't support subrepos, fallback if the subrepos file is present
113 113 if repo.working_directory_vfs().join(".hgsub").exists() {
114 114 let msg = "subrepos (.hgsub is present)";
115 115 return Err(CommandError::unsupported(msg));
116 116 }
117 117 }
118 118
119 119 if config.is_extension_enabled(b"blackbox") {
120 120 let blackbox =
121 121 blackbox::Blackbox::new(&invocation, process_start_time)?;
122 122 blackbox.log_command_start(argv.iter());
123 123 let result = run(&invocation);
124 124 blackbox.log_command_end(
125 125 argv.iter(),
126 126 exit_code(
127 127 &result,
128 128 // TODO: show a warning or combine with original error if
129 129 // `get_bool` returns an error
130 130 config
131 131 .get_bool(b"ui", b"detailed-exit-code")
132 132 .unwrap_or(false),
133 133 ),
134 134 );
135 135 result
136 136 } else {
137 137 run(&invocation)
138 138 }
139 139 }
140 140
141 141 fn rhg_main(argv: Vec<OsString>) -> ! {
142 142 // Run this first, before we find out if the blackbox extension is even
143 143 // enabled, in order to include everything in-between in the duration
144 144 // measurements. Reading config files can be slow if they’re on NFS.
145 145 let process_start_time = blackbox::ProcessStartTime::now();
146 146
147 147 env_logger::init();
148 148
149 149 // Make sure nothing in a future version of `rhg` sets the global
150 150 // threadpool before we can cap default threads. (This is also called
151 151 // in core because Python uses the same code path, we're adding a
152 152 // redundant check.)
153 153 hg::utils::cap_default_rayon_threads()
154 154 .expect("Rayon threadpool already initialized");
155 155
156 156 let early_args = EarlyArgs::parse(&argv);
157 157
158 158 let initial_current_dir = early_args.cwd.map(|cwd| {
159 159 let cwd = get_path_from_bytes(&cwd);
160 160 std::env::current_dir()
161 161 .and_then(|initial| {
162 162 std::env::set_current_dir(cwd)?;
163 163 Ok(initial)
164 164 })
165 165 .unwrap_or_else(|error| {
166 166 exit(
167 167 &argv,
168 168 &None,
169 169 &Ui::new_infallible(&Config::empty()),
170 170 OnUnsupported::Abort,
171 171 Err(CommandError::abort(format!(
172 172 "abort: {}: '{}'",
173 173 error,
174 174 cwd.display()
175 175 ))),
176 176 false,
177 177 )
178 178 })
179 179 });
180 180
181 181 let mut non_repo_config =
182 182 Config::load_non_repo().unwrap_or_else(|error| {
183 183 // Normally this is decided based on config, but we don’t have that
184 184 // available. As of this writing config loading never returns an
185 185 // "unsupported" error but that is not enforced by the type system.
186 186 let on_unsupported = OnUnsupported::Abort;
187 187
188 188 exit(
189 189 &argv,
190 190 &initial_current_dir,
191 191 &Ui::new_infallible(&Config::empty()),
192 192 on_unsupported,
193 193 Err(error.into()),
194 194 false,
195 195 )
196 196 });
197 197
198 198 non_repo_config
199 199 .load_cli_args(early_args.config, early_args.color)
200 200 .unwrap_or_else(|error| {
201 201 exit(
202 202 &argv,
203 203 &initial_current_dir,
204 204 &Ui::new_infallible(&non_repo_config),
205 205 OnUnsupported::from_config(&non_repo_config),
206 206 Err(error.into()),
207 207 non_repo_config
208 208 .get_bool(b"ui", b"detailed-exit-code")
209 209 .unwrap_or(false),
210 210 )
211 211 });
212 212
213 213 if let Some(repo_path_bytes) = &early_args.repo {
214 214 lazy_static::lazy_static! {
215 215 static ref SCHEME_RE: regex::bytes::Regex =
216 216 // Same as `_matchscheme` in `mercurial/util.py`
217 217 regex::bytes::Regex::new("^[a-zA-Z0-9+.\\-]+:").unwrap();
218 218 }
219 219 if SCHEME_RE.is_match(repo_path_bytes) {
220 220 exit(
221 221 &argv,
222 222 &initial_current_dir,
223 223 &Ui::new_infallible(&non_repo_config),
224 224 OnUnsupported::from_config(&non_repo_config),
225 225 Err(CommandError::UnsupportedFeature {
226 226 message: format_bytes!(
227 227 b"URL-like --repository {}",
228 228 repo_path_bytes
229 229 ),
230 230 }),
231 231 // TODO: show a warning or combine with original error if
232 232 // `get_bool` returns an error
233 233 non_repo_config
234 234 .get_bool(b"ui", b"detailed-exit-code")
235 235 .unwrap_or(false),
236 236 )
237 237 }
238 238 }
239 239 let repo_arg = early_args.repo.unwrap_or_default();
240 240 let repo_path: Option<PathBuf> = {
241 241 if repo_arg.is_empty() {
242 242 None
243 243 } else {
244 244 let local_config = {
245 245 if std::env::var_os("HGRCSKIPREPO").is_none() {
246 246 // TODO: handle errors from find_repo_root
247 247 if let Ok(current_dir_path) = Repo::find_repo_root() {
248 248 let config_files = vec![
249 249 ConfigSource::AbsPath(
250 250 current_dir_path.join(".hg/hgrc"),
251 251 ),
252 252 ConfigSource::AbsPath(
253 253 current_dir_path.join(".hg/hgrc-not-shared"),
254 254 ),
255 255 ];
256 256 // TODO: handle errors from
257 257 // `load_from_explicit_sources`
258 258 Config::load_from_explicit_sources(config_files).ok()
259 259 } else {
260 260 None
261 261 }
262 262 } else {
263 263 None
264 264 }
265 265 };
266 266
267 267 let non_repo_config_val = {
268 268 let non_repo_val = non_repo_config.get(b"paths", &repo_arg);
269 269 match &non_repo_val {
270 270 Some(val) if !val.is_empty() => home::home_dir()
271 271 .unwrap_or_else(|| PathBuf::from("~"))
272 272 .join(get_path_from_bytes(val))
273 273 .canonicalize()
274 274 // TODO: handle error and make it similar to python
275 275 // implementation maybe?
276 276 .ok(),
277 277 _ => None,
278 278 }
279 279 };
280 280
281 281 let config_val = match &local_config {
282 282 None => non_repo_config_val,
283 283 Some(val) => {
284 284 let local_config_val = val.get(b"paths", &repo_arg);
285 285 match &local_config_val {
286 286 Some(val) if !val.is_empty() => {
287 287 // presence of a local_config assures that
288 288 // current_dir
289 289 // wont result in an Error
290 290 let canpath = hg::utils::current_dir()
291 291 .unwrap()
292 292 .join(get_path_from_bytes(val))
293 293 .canonicalize();
294 294 canpath.ok().or(non_repo_config_val)
295 295 }
296 296 _ => non_repo_config_val,
297 297 }
298 298 }
299 299 };
300 300 config_val
301 301 .or_else(|| Some(get_path_from_bytes(&repo_arg).to_path_buf()))
302 302 }
303 303 };
304 304
305 305 let simple_exit =
306 306 |ui: &Ui, config: &Config, result: Result<(), CommandError>| -> ! {
307 307 exit(
308 308 &argv,
309 309 &initial_current_dir,
310 310 ui,
311 311 OnUnsupported::from_config(config),
312 312 result,
313 313 // TODO: show a warning or combine with original error if
314 314 // `get_bool` returns an error
315 315 non_repo_config
316 316 .get_bool(b"ui", b"detailed-exit-code")
317 317 .unwrap_or(false),
318 318 )
319 319 };
320 320 let early_exit = |config: &Config, error: CommandError| -> ! {
321 321 simple_exit(&Ui::new_infallible(config), config, Err(error))
322 322 };
323 323 let repo_result = match Repo::find(&non_repo_config, repo_path.to_owned())
324 324 {
325 325 Ok(repo) => Ok(repo),
326 326 Err(RepoError::NotFound { at }) if repo_path.is_none() => {
327 327 // Not finding a repo is not fatal yet, if `-R` was not given
328 328 Err(NoRepoInCwdError { cwd: at })
329 329 }
330 330 Err(error) => early_exit(&non_repo_config, error.into()),
331 331 };
332 332
333 333 let config = if let Ok(repo) = &repo_result {
334 334 repo.config()
335 335 } else {
336 336 &non_repo_config
337 337 };
338 338
339 339 let mut config_cow = Cow::Borrowed(config);
340 340 config_cow.to_mut().apply_plain(PlainInfo::from_env());
341 341 if !ui::plain(Some("tweakdefaults"))
342 342 && config_cow
343 343 .as_ref()
344 344 .get_bool(b"ui", b"tweakdefaults")
345 345 .unwrap_or_else(|error| early_exit(config, error.into()))
346 346 {
347 347 config_cow.to_mut().tweakdefaults()
348 348 };
349 349 let config = config_cow.as_ref();
350 350 let ui = Ui::new(config)
351 351 .unwrap_or_else(|error| early_exit(config, error.into()));
352 352
353 353 if let Ok(true) = config.get_bool(b"rhg", b"fallback-immediately") {
354 354 exit(
355 355 &argv,
356 356 &initial_current_dir,
357 357 &ui,
358 358 OnUnsupported::fallback(config),
359 359 Err(CommandError::unsupported(
360 360 "`rhg.fallback-immediately is true`",
361 361 )),
362 362 false,
363 363 )
364 364 }
365 365
366 366 let result = main_with_result(
367 367 argv.iter().map(|s| s.to_owned()).collect(),
368 368 &process_start_time,
369 369 &ui,
370 370 repo_result.as_ref(),
371 371 config,
372 372 );
373 373 simple_exit(&ui, config, result)
374 374 }
375 375
376 376 fn main() -> ! {
377 377 rhg_main(std::env::args_os().collect())
378 378 }
379 379
380 380 fn exit_code(
381 381 result: &Result<(), CommandError>,
382 382 use_detailed_exit_code: bool,
383 383 ) -> i32 {
384 384 match result {
385 385 Ok(()) => exit_codes::OK,
386 386 Err(CommandError::Abort {
387 387 detailed_exit_code, ..
388 388 }) => {
389 389 if use_detailed_exit_code {
390 390 *detailed_exit_code
391 391 } else {
392 392 exit_codes::ABORT
393 393 }
394 394 }
395 395 Err(CommandError::Unsuccessful) => exit_codes::UNSUCCESSFUL,
396 396 // Exit with a specific code and no error message to let a potential
397 397 // wrapper script fallback to Python-based Mercurial.
398 398 Err(CommandError::UnsupportedFeature { .. }) => {
399 399 exit_codes::UNIMPLEMENTED
400 400 }
401 401 Err(CommandError::InvalidFallback { .. }) => {
402 402 exit_codes::INVALID_FALLBACK
403 403 }
404 404 }
405 405 }
406 406
407 fn exit<'a>(
408 original_args: &'a [OsString],
407 fn exit(
408 original_args: &[OsString],
409 409 initial_current_dir: &Option<PathBuf>,
410 410 ui: &Ui,
411 411 mut on_unsupported: OnUnsupported,
412 412 result: Result<(), CommandError>,
413 413 use_detailed_exit_code: bool,
414 414 ) -> ! {
415 415 if let (
416 416 OnUnsupported::Fallback { executable },
417 417 Err(CommandError::UnsupportedFeature { message }),
418 418 ) = (&on_unsupported, &result)
419 419 {
420 420 let mut args = original_args.iter();
421 421 let executable = match executable {
422 422 None => {
423 423 exit_no_fallback(
424 424 ui,
425 425 OnUnsupported::Abort,
426 426 Err(CommandError::abort(
427 427 "abort: 'rhg.on-unsupported=fallback' without \
428 428 'rhg.fallback-executable' set.",
429 429 )),
430 430 false,
431 431 );
432 432 }
433 433 Some(executable) => executable,
434 434 };
435 435 let executable_path = get_path_from_bytes(executable);
436 436 let this_executable = args.next().expect("exepcted argv[0] to exist");
437 437 if executable_path == *this_executable {
438 438 // Avoid spawning infinitely many processes until resource
439 439 // exhaustion.
440 440 let _ = ui.write_stderr(&format_bytes!(
441 441 b"Blocking recursive fallback. The 'rhg.fallback-executable = {}' config \
442 442 points to `rhg` itself.\n",
443 443 executable
444 444 ));
445 445 on_unsupported = OnUnsupported::Abort
446 446 } else {
447 447 log::debug!("falling back (see trace-level log)");
448 448 log::trace!("{}", local_to_utf8(message));
449 449 if let Err(err) = which::which(executable_path) {
450 450 exit_no_fallback(
451 451 ui,
452 452 OnUnsupported::Abort,
453 453 Err(CommandError::InvalidFallback {
454 454 path: executable.to_owned(),
455 455 err: err.to_string(),
456 456 }),
457 457 use_detailed_exit_code,
458 458 )
459 459 }
460 460 // `args` is now `argv[1..]` since we’ve already consumed
461 461 // `argv[0]`
462 462 let mut command = Command::new(executable_path);
463 463 command.args(args);
464 464 if let Some(initial) = initial_current_dir {
465 465 command.current_dir(initial);
466 466 }
467 467 // We don't use subprocess because proper signal handling is harder
468 468 // and we don't want to keep `rhg` around after a fallback anyway.
469 469 // For example, if `rhg` is run in the background and falls back to
470 470 // `hg` which, in turn, waits for a signal, we'll get stuck if
471 471 // we're doing plain subprocess.
472 472 //
473 473 // If `exec` returns, we can only assume our process is very broken
474 474 // (see its documentation), so only try to forward the error code
475 475 // when exiting.
476 476 let err = command.exec();
477 477 std::process::exit(
478 478 err.raw_os_error().unwrap_or(exit_codes::ABORT),
479 479 );
480 480 }
481 481 }
482 482 exit_no_fallback(ui, on_unsupported, result, use_detailed_exit_code)
483 483 }
484 484
485 485 fn exit_no_fallback(
486 486 ui: &Ui,
487 487 on_unsupported: OnUnsupported,
488 488 result: Result<(), CommandError>,
489 489 use_detailed_exit_code: bool,
490 490 ) -> ! {
491 491 match &result {
492 492 Ok(_) => {}
493 493 Err(CommandError::Unsuccessful) => {}
494 494 Err(CommandError::Abort { message, hint, .. }) => {
495 495 // Ignore errors when writing to stderr, we’re already exiting
496 496 // with failure code so there’s not much more we can do.
497 497 if !message.is_empty() {
498 498 let _ = ui.write_stderr(&format_bytes!(b"{}\n", message));
499 499 }
500 500 if let Some(hint) = hint {
501 501 let _ = ui.write_stderr(&format_bytes!(b"({})\n", hint));
502 502 }
503 503 }
504 504 Err(CommandError::UnsupportedFeature { message }) => {
505 505 match on_unsupported {
506 506 OnUnsupported::Abort => {
507 507 let _ = ui.write_stderr(&format_bytes!(
508 508 b"unsupported feature: {}\n",
509 509 message
510 510 ));
511 511 }
512 512 OnUnsupported::AbortSilent => {}
513 513 OnUnsupported::Fallback { .. } => unreachable!(),
514 514 }
515 515 }
516 516 Err(CommandError::InvalidFallback { path, err }) => {
517 517 let _ = ui.write_stderr(&format_bytes!(
518 518 b"abort: invalid fallback '{}': {}\n",
519 519 path,
520 520 err.as_bytes(),
521 521 ));
522 522 }
523 523 }
524 524 std::process::exit(exit_code(&result, use_detailed_exit_code))
525 525 }
526 526
527 527 macro_rules! subcommands {
528 528 ($( $command: ident )+) => {
529 529 mod commands {
530 530 $(
531 531 pub mod $command;
532 532 )+
533 533 }
534 534
535 535 fn add_subcommand_args(app: clap::Command) -> clap::Command {
536 536 app
537 537 $(
538 538 .subcommand(commands::$command::args())
539 539 )+
540 540 }
541 541
542 542 pub type RunFn = fn(&CliInvocation) -> Result<(), CommandError>;
543 543
544 544 fn subcommand_run_fn(name: &str) -> Option<RunFn> {
545 545 match name {
546 546 $(
547 547 stringify!($command) => Some(commands::$command::run),
548 548 )+
549 549 _ => None,
550 550 }
551 551 }
552 552 };
553 553 }
554 554
555 555 subcommands! {
556 556 cat
557 557 debugdata
558 558 debugrequirements
559 559 debugignorerhg
560 560 debugrhgsparse
561 561 files
562 562 root
563 563 config
564 564 status
565 565 }
566 566
567 567 pub struct CliInvocation<'a> {
568 568 ui: &'a Ui,
569 569 subcommand_args: &'a ArgMatches,
570 570 config: &'a Config,
571 571 /// References inside `Result` is a bit peculiar but allow
572 572 /// `invocation.repo?` to work out with `&CliInvocation` since this
573 573 /// `Result` type is `Copy`.
574 574 repo: Result<&'a Repo, &'a NoRepoInCwdError>,
575 575 }
576 576
577 577 struct NoRepoInCwdError {
578 578 cwd: PathBuf,
579 579 }
580 580
581 581 /// CLI arguments to be parsed "early" in order to be able to read
582 582 /// configuration before using Clap. Ideally we would also use Clap for this,
583 583 /// see <https://github.com/clap-rs/clap/discussions/2366>.
584 584 ///
585 585 /// These arguments are still declared when we do use Clap later, so that Clap
586 586 /// does not return an error for their presence.
587 587 struct EarlyArgs {
588 588 /// Values of all `--config` arguments. (Possibly none)
589 589 config: Vec<Vec<u8>>,
590 590 /// Value of all the `--color` argument, if any.
591 591 color: Option<Vec<u8>>,
592 592 /// Value of the `-R` or `--repository` argument, if any.
593 593 repo: Option<Vec<u8>>,
594 594 /// Value of the `--cwd` argument, if any.
595 595 cwd: Option<Vec<u8>>,
596 596 }
597 597
598 598 impl EarlyArgs {
599 599 fn parse<'a>(args: impl IntoIterator<Item = &'a OsString>) -> Self {
600 600 let mut args = args.into_iter().map(get_bytes_from_os_str);
601 601 let mut config = Vec::new();
602 602 let mut color = None;
603 603 let mut repo = None;
604 604 let mut cwd = None;
605 605 // Use `while let` instead of `for` so that we can also call
606 606 // `args.next()` inside the loop.
607 607 while let Some(arg) = args.next() {
608 608 if arg == b"--config" {
609 609 if let Some(value) = args.next() {
610 610 config.push(value)
611 611 }
612 612 } else if let Some(value) = arg.drop_prefix(b"--config=") {
613 613 config.push(value.to_owned())
614 614 }
615 615
616 616 if arg == b"--color" {
617 617 if let Some(value) = args.next() {
618 618 color = Some(value)
619 619 }
620 620 } else if let Some(value) = arg.drop_prefix(b"--color=") {
621 621 color = Some(value.to_owned())
622 622 }
623 623
624 624 if arg == b"--cwd" {
625 625 if let Some(value) = args.next() {
626 626 cwd = Some(value)
627 627 }
628 628 } else if let Some(value) = arg.drop_prefix(b"--cwd=") {
629 629 cwd = Some(value.to_owned())
630 630 }
631 631
632 632 if arg == b"--repository" || arg == b"-R" {
633 633 if let Some(value) = args.next() {
634 634 repo = Some(value)
635 635 }
636 636 } else if let Some(value) = arg.drop_prefix(b"--repository=") {
637 637 repo = Some(value.to_owned())
638 638 } else if let Some(value) = arg.drop_prefix(b"-R") {
639 639 repo = Some(value.to_owned())
640 640 }
641 641 }
642 642 Self {
643 643 config,
644 644 color,
645 645 repo,
646 646 cwd,
647 647 }
648 648 }
649 649 }
650 650
651 651 /// What to do when encountering some unsupported feature.
652 652 ///
653 653 /// See `HgError::UnsupportedFeature` and `CommandError::UnsupportedFeature`.
654 654 enum OnUnsupported {
655 655 /// Print an error message describing what feature is not supported,
656 656 /// and exit with code 252.
657 657 Abort,
658 658 /// Silently exit with code 252.
659 659 AbortSilent,
660 660 /// Try running a Python implementation
661 661 Fallback { executable: Option<Vec<u8>> },
662 662 }
663 663
664 664 impl OnUnsupported {
665 665 const DEFAULT: Self = OnUnsupported::Abort;
666 666
667 667 fn fallback_executable(config: &Config) -> Option<Vec<u8>> {
668 668 config
669 669 .get(b"rhg", b"fallback-executable")
670 670 .map(|x| x.to_owned())
671 671 }
672 672
673 673 fn fallback(config: &Config) -> Self {
674 674 OnUnsupported::Fallback {
675 675 executable: Self::fallback_executable(config),
676 676 }
677 677 }
678 678
679 679 fn from_config(config: &Config) -> Self {
680 680 match config
681 681 .get(b"rhg", b"on-unsupported")
682 682 .map(|value| value.to_ascii_lowercase())
683 683 .as_deref()
684 684 {
685 685 Some(b"abort") => OnUnsupported::Abort,
686 686 Some(b"abort-silent") => OnUnsupported::AbortSilent,
687 687 Some(b"fallback") => Self::fallback(config),
688 688 None => Self::DEFAULT,
689 689 Some(_) => {
690 690 // TODO: warn about unknown config value
691 691 Self::DEFAULT
692 692 }
693 693 }
694 694 }
695 695 }
696 696
697 697 /// The `*` extension is an edge-case for config sub-options that apply to all
698 698 /// extensions. For now, only `:required` exists, but that may change in the
699 699 /// future.
700 700 const SUPPORTED_EXTENSIONS: &[&[u8]] = &[
701 701 b"blackbox",
702 702 b"share",
703 703 b"sparse",
704 704 b"narrow",
705 705 b"*",
706 706 b"strip",
707 707 b"rebase",
708 708 ];
709 709
710 710 fn check_extensions(config: &Config) -> Result<(), CommandError> {
711 711 if let Some(b"*") = config.get(b"rhg", b"ignored-extensions") {
712 712 // All extensions are to be ignored, nothing to do here
713 713 return Ok(());
714 714 }
715 715
716 716 let enabled: HashSet<&[u8]> = config
717 717 .iter_section(b"extensions")
718 718 .filter_map(|(extension, value)| {
719 719 if value == b"!" {
720 720 // Filter out disabled extensions
721 721 return None;
722 722 }
723 723 // Ignore extension suboptions. Only `required` exists for now.
724 724 // `rhg` either supports an extension or doesn't, so it doesn't
725 725 // make sense to consider the loading of an extension.
726 726 let actual_extension =
727 727 extension.split_2(b':').unwrap_or((extension, b"")).0;
728 728 Some(actual_extension)
729 729 })
730 730 .collect();
731 731
732 732 let mut unsupported = enabled;
733 733 for supported in SUPPORTED_EXTENSIONS {
734 734 unsupported.remove(supported);
735 735 }
736 736
737 737 if let Some(ignored_list) = config.get_list(b"rhg", b"ignored-extensions")
738 738 {
739 739 for ignored in ignored_list {
740 740 unsupported.remove(ignored.as_slice());
741 741 }
742 742 }
743 743
744 744 if unsupported.is_empty() {
745 745 Ok(())
746 746 } else {
747 747 let mut unsupported: Vec<_> = unsupported.into_iter().collect();
748 748 // Sort the extensions to get a stable output
749 749 unsupported.sort();
750 750 Err(CommandError::UnsupportedFeature {
751 751 message: format_bytes!(
752 752 b"extensions: {} (consider adding them to 'rhg.ignored-extensions' config)",
753 753 join(unsupported, b", ")
754 754 ),
755 755 })
756 756 }
757 757 }
758 758
759 759 /// Array of tuples of (auto upgrade conf, feature conf, local requirement)
760 760 #[allow(clippy::type_complexity)]
761 761 const AUTO_UPGRADES: &[((&str, &str), (&str, &str), &str)] = &[
762 762 (
763 763 ("format", "use-share-safe.automatic-upgrade-of-mismatching-repositories"),
764 764 ("format", "use-share-safe"),
765 765 requirements::SHARESAFE_REQUIREMENT,
766 766 ),
767 767 (
768 768 ("format", "use-dirstate-tracked-hint.automatic-upgrade-of-mismatching-repositories"),
769 769 ("format", "use-dirstate-tracked-hint"),
770 770 requirements::DIRSTATE_TRACKED_HINT_V1,
771 771 ),
772 772 (
773 773 ("format", "use-dirstate-v2.automatic-upgrade-of-mismatching-repositories"),
774 774 ("format", "use-dirstate-v2"),
775 775 requirements::DIRSTATE_V2_REQUIREMENT,
776 776 ),
777 777 ];
778 778
779 779 /// Mercurial allows users to automatically upgrade their repository.
780 780 /// `rhg` does not have the ability to upgrade yet, so fallback if an upgrade
781 781 /// is needed.
782 782 fn check_auto_upgrade(
783 783 config: &Config,
784 784 reqs: &HashSet<String>,
785 785 ) -> Result<(), CommandError> {
786 786 for (upgrade_conf, feature_conf, local_req) in AUTO_UPGRADES.iter() {
787 787 let auto_upgrade = config
788 788 .get_bool(upgrade_conf.0.as_bytes(), upgrade_conf.1.as_bytes())?;
789 789
790 790 if auto_upgrade {
791 791 let want_it = config.get_bool(
792 792 feature_conf.0.as_bytes(),
793 793 feature_conf.1.as_bytes(),
794 794 )?;
795 795 let have_it = reqs.contains(*local_req);
796 796
797 797 let action = match (want_it, have_it) {
798 798 (true, false) => Some("upgrade"),
799 799 (false, true) => Some("downgrade"),
800 800 _ => None,
801 801 };
802 802 if let Some(action) = action {
803 803 let message = format!(
804 804 "automatic {} {}.{}",
805 805 action, upgrade_conf.0, upgrade_conf.1
806 806 );
807 807 return Err(CommandError::unsupported(message));
808 808 }
809 809 }
810 810 }
811 811 Ok(())
812 812 }
813 813
814 814 fn check_unsupported(
815 815 config: &Config,
816 816 repo: Result<&Repo, &NoRepoInCwdError>,
817 817 ) -> Result<(), CommandError> {
818 818 check_extensions(config)?;
819 819
820 820 if std::env::var_os("HG_PENDING").is_some() {
821 821 // TODO: only if the value is `== repo.working_directory`?
822 822 // What about relative v.s. absolute paths?
823 823 Err(CommandError::unsupported("$HG_PENDING"))?
824 824 }
825 825
826 826 if let Ok(repo) = repo {
827 827 if repo.has_subrepos()? {
828 828 Err(CommandError::unsupported("sub-repositories"))?
829 829 }
830 830 check_auto_upgrade(config, repo.requirements())?;
831 831 }
832 832
833 833 if config.has_non_empty_section(b"encode") {
834 834 Err(CommandError::unsupported("[encode] config"))?
835 835 }
836 836
837 837 if config.has_non_empty_section(b"decode") {
838 838 Err(CommandError::unsupported("[decode] config"))?
839 839 }
840 840
841 841 Ok(())
842 842 }
@@ -1,307 +1,307 b''
1 1 use crate::color::ColorConfig;
2 2 use crate::color::Effect;
3 3 use crate::error::CommandError;
4 4 use format_bytes::format_bytes;
5 5 use format_bytes::write_bytes;
6 6 use hg::config::Config;
7 7 use hg::config::PlainInfo;
8 8 use hg::errors::HgError;
9 9 use hg::repo::Repo;
10 10 use hg::sparse;
11 11 use hg::utils::files::get_bytes_from_path;
12 12 use hg::PatternFileWarning;
13 13 use std::borrow::Cow;
14 14 use std::io;
15 15 use std::io::{ErrorKind, Write};
16 16
17 17 pub struct Ui {
18 18 stdout: std::io::Stdout,
19 19 stderr: std::io::Stderr,
20 20 colors: Option<ColorConfig>,
21 21 }
22 22
23 23 /// The kind of user interface error
24 24 pub enum UiError {
25 25 /// The standard output stream cannot be written to
26 26 StdoutError(io::Error),
27 27 /// The standard error stream cannot be written to
28 28 StderrError(io::Error),
29 29 }
30 30
31 31 /// The commandline user interface
32 32 impl Ui {
33 33 pub fn new(config: &Config) -> Result<Self, HgError> {
34 34 Ok(Ui {
35 35 // If using something else, also adapt `isatty()` below.
36 36 stdout: std::io::stdout(),
37 37
38 38 stderr: std::io::stderr(),
39 39 colors: ColorConfig::new(config)?,
40 40 })
41 41 }
42 42
43 43 /// Default to no color if color configuration errors.
44 44 ///
45 45 /// Useful when we’re already handling another error.
46 46 pub fn new_infallible(config: &Config) -> Self {
47 47 Ui {
48 48 // If using something else, also adapt `isatty()` below.
49 49 stdout: std::io::stdout(),
50 50
51 51 stderr: std::io::stderr(),
52 52 colors: ColorConfig::new(config).unwrap_or(None),
53 53 }
54 54 }
55 55
56 56 /// Returns a buffered handle on stdout for faster batch printing
57 57 /// operations.
58 58 pub fn stdout_buffer(&self) -> StdoutBuffer<std::io::StdoutLock> {
59 59 StdoutBuffer::new(self.stdout.lock())
60 60 }
61 61
62 62 /// Write bytes to stdout
63 63 pub fn write_stdout(&self, bytes: &[u8]) -> Result<(), UiError> {
64 64 let mut stdout = self.stdout.lock();
65 65
66 66 stdout.write_all(bytes).or_else(handle_stdout_error)?;
67 67
68 68 stdout.flush().or_else(handle_stdout_error)
69 69 }
70 70
71 71 /// Write bytes to stderr
72 72 pub fn write_stderr(&self, bytes: &[u8]) -> Result<(), UiError> {
73 73 let mut stderr = self.stderr.lock();
74 74
75 75 stderr.write_all(bytes).or_else(handle_stderr_error)?;
76 76
77 77 stderr.flush().or_else(handle_stderr_error)
78 78 }
79 79
80 80 /// Write bytes to stdout with the given label
81 81 ///
82 82 /// Like the optional `label` parameter in `mercurial/ui.py`,
83 83 /// this label influences the color used for this output.
84 84 pub fn write_stdout_labelled(
85 85 &self,
86 86 bytes: &[u8],
87 87 label: &str,
88 88 ) -> Result<(), UiError> {
89 89 if let Some(colors) = &self.colors {
90 90 if let Some(effects) = colors.styles.get(label.as_bytes()) {
91 91 if !effects.is_empty() {
92 92 return self
93 93 .write_stdout_with_effects(bytes, effects)
94 94 .or_else(handle_stdout_error);
95 95 }
96 96 }
97 97 }
98 98 self.write_stdout(bytes)
99 99 }
100 100
101 101 fn write_stdout_with_effects(
102 102 &self,
103 103 bytes: &[u8],
104 104 effects: &[Effect],
105 105 ) -> io::Result<()> {
106 106 let stdout = &mut self.stdout.lock();
107 107 let mut write_line = |line: &[u8], first: bool| {
108 108 // `line` does not include the newline delimiter
109 109 if !first {
110 110 stdout.write_all(b"\n")?;
111 111 }
112 112 if line.is_empty() {
113 113 return Ok(());
114 114 }
115 115 /// 0x1B == 27 == 0o33
116 116 const ASCII_ESCAPE: &[u8] = b"\x1b";
117 117 write_bytes!(stdout, b"{}[0", ASCII_ESCAPE)?;
118 118 for effect in effects {
119 119 write_bytes!(stdout, b";{}", effect)?;
120 120 }
121 121 write_bytes!(stdout, b"m")?;
122 122 stdout.write_all(line)?;
123 123 write_bytes!(stdout, b"{}[0m", ASCII_ESCAPE)
124 124 };
125 125 let mut lines = bytes.split(|&byte| byte == b'\n');
126 126 if let Some(first) = lines.next() {
127 127 write_line(first, true)?;
128 128 for line in lines {
129 129 write_line(line, false)?
130 130 }
131 131 }
132 132 stdout.flush()
133 133 }
134 134 }
135 135
136 136 // TODO: pass the PlainInfo to call sites directly and
137 137 // delete this function
138 138 pub fn plain(opt_feature: Option<&str>) -> bool {
139 139 let plain_info = PlainInfo::from_env();
140 140 match opt_feature {
141 141 None => plain_info.is_plain(),
142 142 Some(feature) => plain_info.is_feature_plain(feature),
143 143 }
144 144 }
145 145
146 146 /// A buffered stdout writer for faster batch printing operations.
147 147 pub struct StdoutBuffer<W: Write> {
148 148 buf: io::BufWriter<W>,
149 149 }
150 150
151 151 impl<W: Write> StdoutBuffer<W> {
152 152 pub fn new(writer: W) -> Self {
153 153 let buf = io::BufWriter::new(writer);
154 154 Self { buf }
155 155 }
156 156
157 157 /// Write bytes to stdout buffer
158 158 pub fn write_all(&mut self, bytes: &[u8]) -> Result<(), UiError> {
159 159 self.buf.write_all(bytes).or_else(handle_stdout_error)
160 160 }
161 161
162 162 /// Flush bytes to stdout
163 163 pub fn flush(&mut self) -> Result<(), UiError> {
164 164 self.buf.flush().or_else(handle_stdout_error)
165 165 }
166 166 }
167 167
168 168 /// Sometimes writing to stdout is not possible, try writing to stderr to
169 169 /// signal that failure, otherwise just bail.
170 170 fn handle_stdout_error(error: io::Error) -> Result<(), UiError> {
171 171 if let ErrorKind::BrokenPipe = error.kind() {
172 172 // This makes `| head` work for example
173 173 return Ok(());
174 174 }
175 175 let mut stderr = io::stderr();
176 176
177 177 stderr
178 178 .write_all(&format_bytes!(
179 179 b"abort: {}\n",
180 180 error.to_string().as_bytes()
181 181 ))
182 182 .map_err(UiError::StderrError)?;
183 183
184 184 stderr.flush().map_err(UiError::StderrError)?;
185 185
186 186 Err(UiError::StdoutError(error))
187 187 }
188 188
189 189 /// Sometimes writing to stderr is not possible.
190 190 fn handle_stderr_error(error: io::Error) -> Result<(), UiError> {
191 191 // A broken pipe should not result in a error
192 192 // like with `| head` for example
193 193 if let ErrorKind::BrokenPipe = error.kind() {
194 194 return Ok(());
195 195 }
196 196 Err(UiError::StdoutError(error))
197 197 }
198 198
199 199 /// Encode rust strings according to the user system.
200 200 pub fn utf8_to_local(s: &str) -> Cow<[u8]> {
201 201 // TODO encode for the user's system //
202 202 let bytes = s.as_bytes();
203 203 Cow::Borrowed(bytes)
204 204 }
205 205
206 206 /// Decode user system bytes to Rust string.
207 207 pub fn local_to_utf8(s: &[u8]) -> Cow<str> {
208 208 // TODO decode from the user's system
209 209 String::from_utf8_lossy(s)
210 210 }
211 211
212 212 /// Should formatted output be used?
213 213 ///
214 214 /// Note: rhg does not have the formatter mechanism yet,
215 215 /// but this is also used when deciding whether to use color.
216 216 pub fn formatted(config: &Config) -> Result<bool, HgError> {
217 217 if let Some(formatted) = config.get_option(b"ui", b"formatted")? {
218 218 Ok(formatted)
219 219 } else {
220 220 isatty(config)
221 221 }
222 222 }
223 223
224 224 pub enum RelativePaths {
225 225 Legacy,
226 226 Bool(bool),
227 227 }
228 228
229 229 pub fn relative_paths(config: &Config) -> Result<RelativePaths, HgError> {
230 230 Ok(match config.get(b"ui", b"relative-paths") {
231 231 None | Some(b"legacy") => RelativePaths::Legacy,
232 232 _ => RelativePaths::Bool(config.get_bool(b"ui", b"relative-paths")?),
233 233 })
234 234 }
235 235
236 236 fn isatty(config: &Config) -> Result<bool, HgError> {
237 237 Ok(if config.get_bool(b"ui", b"nontty")? {
238 238 false
239 239 } else {
240 240 atty::is(atty::Stream::Stdout)
241 241 })
242 242 }
243 243
244 244 /// Return the formatted bytestring corresponding to a pattern file warning,
245 245 /// as expected by the CLI.
246 246 pub(crate) fn format_pattern_file_warning(
247 247 warning: &PatternFileWarning,
248 248 repo: &Repo,
249 249 ) -> Vec<u8> {
250 250 match warning {
251 251 PatternFileWarning::InvalidSyntax(path, syntax) => format_bytes!(
252 252 b"{}: ignoring invalid syntax '{}'\n",
253 253 get_bytes_from_path(path),
254 &*syntax
254 syntax
255 255 ),
256 256 PatternFileWarning::NoSuchFile(path) => {
257 257 let path = if let Ok(relative) =
258 258 path.strip_prefix(repo.working_directory_path())
259 259 {
260 260 relative
261 261 } else {
262 &*path
262 path
263 263 };
264 264 format_bytes!(
265 265 b"skipping unreadable pattern file '{}': \
266 266 No such file or directory\n",
267 267 get_bytes_from_path(path),
268 268 )
269 269 }
270 270 }
271 271 }
272 272
273 273 /// Print with `Ui` the formatted bytestring corresponding to a
274 274 /// sparse/narrow warning, as expected by the CLI.
275 275 pub(crate) fn print_narrow_sparse_warnings(
276 276 narrow_warnings: &[sparse::SparseWarning],
277 277 sparse_warnings: &[sparse::SparseWarning],
278 278 ui: &Ui,
279 279 repo: &Repo,
280 280 ) -> Result<(), CommandError> {
281 281 for warning in narrow_warnings.iter().chain(sparse_warnings) {
282 282 match &warning {
283 283 sparse::SparseWarning::RootWarning { context, line } => {
284 284 let msg = format_bytes!(
285 285 b"warning: {} profile cannot use paths \"
286 286 starting with /, ignoring {}\n",
287 287 context,
288 288 line
289 289 );
290 290 ui.write_stderr(&msg)?;
291 291 }
292 292 sparse::SparseWarning::ProfileNotFound { profile, rev } => {
293 293 let msg = format_bytes!(
294 294 b"warning: sparse profile '{}' not found \"
295 295 in rev {} - ignoring it\n",
296 296 profile,
297 297 rev
298 298 );
299 299 ui.write_stderr(&msg)?;
300 300 }
301 301 sparse::SparseWarning::Pattern(e) => {
302 302 ui.write_stderr(&format_pattern_file_warning(e, repo))?;
303 303 }
304 304 }
305 305 }
306 306 Ok(())
307 307 }
General Comments 0
You need to be logged in to leave comments. Login now