# HG changeset patch # User Raphaël Gomès # Date 2022-11-02 11:05:34 # Node ID 363923bd51cdd83a04571707e2d2f4c213dffef7 # Parent ca19335e86e586d4efa801317af5b8b48b9796e8 dirstate-v2: hash the source of the ignore patterns as well Fixes the test introduced in the last changeset. This caused the hash to change, which means that the check in the test had to be adapted. Since this hash is only done as a caching mechanism, invalidation does not pose any backwards compatibility issues. diff --git a/mercurial/helptext/internals/dirstate-v2.txt b/mercurial/helptext/internals/dirstate-v2.txt --- a/mercurial/helptext/internals/dirstate-v2.txt +++ b/mercurial/helptext/internals/dirstate-v2.txt @@ -283,8 +283,16 @@ We define: in inclusion order. This definition is recursive, as included files can themselves include more files. -This hash is defined as the SHA-1 of the concatenation (in sorted -order) of the "expanded contents" of each "root" ignore file. +* "filepath" as the bytes of the ignore file path + relative to the root of the repository if inside the repository, + or the untouched path as defined in the configuration. + +This hash is defined as the SHA-1 of the following line format: + + \n + +for each "root" ignore file. (in sorted order) + (Note that computing this does not require actually concatenating into a single contiguous byte sequence. Instead a SHA-1 hasher object can be created diff --git a/rust/hg-core/src/dirstate_tree/status.rs b/rust/hg-core/src/dirstate_tree/status.rs --- a/rust/hg-core/src/dirstate_tree/status.rs +++ b/rust/hg-core/src/dirstate_tree/status.rs @@ -10,6 +10,7 @@ use crate::dirstate_tree::on_disk::Dirst use crate::matchers::get_ignore_function; use crate::matchers::Matcher; use crate::utils::files::get_bytes_from_os_string; +use crate::utils::files::get_bytes_from_path; use crate::utils::files::get_path_from_bytes; use crate::utils::hg_path::HgPath; use crate::BadMatch; @@ -66,7 +67,7 @@ pub fn status<'dirstate>( let (ignore_fn, warnings) = get_ignore_function( ignore_files, &root_dir, - &mut |_pattern_bytes| {}, + &mut |_source, _pattern_bytes| {}, )?; (ignore_fn, warnings, None) } @@ -75,7 +76,24 @@ pub fn status<'dirstate>( let (ignore_fn, warnings) = get_ignore_function( ignore_files, &root_dir, - &mut |pattern_bytes| hasher.update(pattern_bytes), + &mut |source, pattern_bytes| { + // If inside the repo, use the relative version to + // make it deterministic inside tests. + // The performance hit should be negligible. + let source = source + .strip_prefix(&root_dir) + .unwrap_or(source); + let source = get_bytes_from_path(source); + + let mut subhasher = Sha1::new(); + subhasher.update(pattern_bytes); + let patterns_hash = subhasher.finalize(); + + hasher.update(source); + hasher.update(b" "); + hasher.update(patterns_hash); + hasher.update(b"\n"); + }, )?; let new_hash = *hasher.finalize().as_ref(); let changed = new_hash != dmap.ignore_patterns_hash; diff --git a/rust/hg-core/src/filepatterns.rs b/rust/hg-core/src/filepatterns.rs --- a/rust/hg-core/src/filepatterns.rs +++ b/rust/hg-core/src/filepatterns.rs @@ -412,11 +412,11 @@ pub fn parse_pattern_file_contents( pub fn read_pattern_file( file_path: &Path, warn: bool, - inspect_pattern_bytes: &mut impl FnMut(&[u8]), + inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]), ) -> Result<(Vec, Vec), PatternError> { match std::fs::read(file_path) { Ok(contents) => { - inspect_pattern_bytes(&contents); + inspect_pattern_bytes(file_path, &contents); parse_pattern_file_contents(&contents, file_path, None, warn) } Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(( @@ -455,7 +455,7 @@ pub type PatternResult = Result PatternResult<(Vec, Vec)> { let (patterns, mut warnings) = read_pattern_file(pattern_file, true, inspect_pattern_bytes)?; diff --git a/rust/hg-core/src/matchers.rs b/rust/hg-core/src/matchers.rs --- a/rust/hg-core/src/matchers.rs +++ b/rust/hg-core/src/matchers.rs @@ -838,7 +838,7 @@ fn build_match<'a, 'b>( pub fn get_ignore_matcher<'a>( mut all_pattern_files: Vec, root_dir: &Path, - inspect_pattern_bytes: &mut impl FnMut(&[u8]), + inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]), ) -> PatternResult<(IncludeMatcher<'a>, Vec)> { let mut all_patterns = vec![]; let mut all_warnings = vec![]; @@ -871,7 +871,7 @@ pub fn get_ignore_matcher<'a>( pub fn get_ignore_function<'a>( all_pattern_files: Vec, root_dir: &Path, - inspect_pattern_bytes: &mut impl FnMut(&[u8]), + inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]), ) -> PatternResult<(IgnoreFnType<'a>, Vec)> { let res = get_ignore_matcher(all_pattern_files, root_dir, inspect_pattern_bytes); diff --git a/rust/rhg/src/commands/debugignorerhg.rs b/rust/rhg/src/commands/debugignorerhg.rs --- a/rust/rhg/src/commands/debugignorerhg.rs +++ b/rust/rhg/src/commands/debugignorerhg.rs @@ -25,7 +25,7 @@ pub fn run(invocation: &crate::CliInvoca let (ignore_matcher, warnings) = get_ignore_matcher( vec![ignore_file], &repo.working_directory_path().to_owned(), - &mut |_pattern_bytes| (), + &mut |_source, _pattern_bytes| (), ) .map_err(|e| StatusError::from(e))?; diff --git a/tests/test-hgignore.t b/tests/test-hgignore.t --- a/tests/test-hgignore.t +++ b/tests/test-hgignore.t @@ -421,18 +421,24 @@ Windows paths are accepted on input Check the hash of ignore patterns written in the dirstate This is an optimization that is only relevant when using the Rust extensions + $ cat_filename_and_hash () { + > for i in "$@"; do + > printf "$i " + > cat "$i" | "$TESTDIR"/f --raw-sha1 | sed 's/^raw-sha1=//' + > done + > } $ hg status > /dev/null - $ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1 - sha1=6e315b60f15fb5dfa02be00f3e2c8f923051f5ff + $ cat_filename_and_hash .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1 + sha1=c0beb296395d48ced8e14f39009c4ea6e409bfe6 $ hg debugstate --docket | grep ignore - ignore pattern hash: 6e315b60f15fb5dfa02be00f3e2c8f923051f5ff + ignore pattern hash: c0beb296395d48ced8e14f39009c4ea6e409bfe6 $ echo rel > .hg/testhgignorerel $ hg status > /dev/null - $ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1 - sha1=dea19cc7119213f24b6b582a4bae7b0cb063e34e + $ cat_filename_and_hash .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1 + sha1=b8e63d3428ec38abc68baa27631516d5ec46b7fa $ hg debugstate --docket | grep ignore - ignore pattern hash: dea19cc7119213f24b6b582a4bae7b0cb063e34e + ignore pattern hash: b8e63d3428ec38abc68baa27631516d5ec46b7fa $ cd .. Check that the hash depends on the source of the hgignore patterns @@ -460,6 +466,6 @@ Check that the hash depends on the sourc $ hg status M dir1/.hgignore M dir2/.hgignore - ? dir1/subdir/ignored1 (missing-correct-output !) + ? dir1/subdir/ignored1 #endif