##// END OF EJS Templates
rust: apply clippy lints...
Raphaël Gomès -
r52600:ec717174 default
parent child Browse files
Show More
@@ -1,1937 +1,1932
1 use bytes_cast::BytesCast;
1 use bytes_cast::BytesCast;
2 use std::borrow::Cow;
2 use std::borrow::Cow;
3 use std::path::PathBuf;
3 use std::path::PathBuf;
4
4
5 use super::on_disk;
5 use super::on_disk;
6 use super::on_disk::DirstateV2ParseError;
6 use super::on_disk::DirstateV2ParseError;
7 use super::owning::OwningDirstateMap;
7 use super::owning::OwningDirstateMap;
8 use super::path_with_basename::WithBasename;
8 use super::path_with_basename::WithBasename;
9 use crate::dirstate::parsers::pack_entry;
9 use crate::dirstate::parsers::pack_entry;
10 use crate::dirstate::parsers::packed_entry_size;
10 use crate::dirstate::parsers::packed_entry_size;
11 use crate::dirstate::parsers::parse_dirstate_entries;
11 use crate::dirstate::parsers::parse_dirstate_entries;
12 use crate::dirstate::CopyMapIter;
12 use crate::dirstate::CopyMapIter;
13 use crate::dirstate::DirstateV2Data;
13 use crate::dirstate::DirstateV2Data;
14 use crate::dirstate::ParentFileData;
14 use crate::dirstate::ParentFileData;
15 use crate::dirstate::StateMapIter;
15 use crate::dirstate::StateMapIter;
16 use crate::dirstate::TruncatedTimestamp;
16 use crate::dirstate::TruncatedTimestamp;
17 use crate::matchers::Matcher;
17 use crate::matchers::Matcher;
18 use crate::utils::filter_map_results;
18 use crate::utils::filter_map_results;
19 use crate::utils::hg_path::{HgPath, HgPathBuf};
19 use crate::utils::hg_path::{HgPath, HgPathBuf};
20 use crate::DirstateEntry;
20 use crate::DirstateEntry;
21 use crate::DirstateError;
21 use crate::DirstateError;
22 use crate::DirstateMapError;
22 use crate::DirstateMapError;
23 use crate::DirstateParents;
23 use crate::DirstateParents;
24 use crate::DirstateStatus;
24 use crate::DirstateStatus;
25 use crate::FastHashbrownMap as FastHashMap;
25 use crate::FastHashbrownMap as FastHashMap;
26 use crate::PatternFileWarning;
26 use crate::PatternFileWarning;
27 use crate::StatusError;
27 use crate::StatusError;
28 use crate::StatusOptions;
28 use crate::StatusOptions;
29
29
30 /// Append to an existing data file if the amount of unreachable data (not used
30 /// Append to an existing data file if the amount of unreachable data (not used
31 /// anymore) is less than this fraction of the total amount of existing data.
31 /// anymore) is less than this fraction of the total amount of existing data.
32 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
32 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
33
33
34 #[derive(Debug, PartialEq, Eq)]
34 #[derive(Debug, PartialEq, Eq)]
35 /// Version of the on-disk format
35 /// Version of the on-disk format
36 pub enum DirstateVersion {
36 pub enum DirstateVersion {
37 V1,
37 V1,
38 V2,
38 V2,
39 }
39 }
40
40
41 #[derive(Debug, PartialEq, Eq)]
41 #[derive(Debug, PartialEq, Eq)]
42 pub enum DirstateMapWriteMode {
42 pub enum DirstateMapWriteMode {
43 Auto,
43 Auto,
44 ForceNewDataFile,
44 ForceNewDataFile,
45 ForceAppend,
45 ForceAppend,
46 }
46 }
47
47
48 #[derive(Debug)]
48 #[derive(Debug)]
49 pub struct DirstateMap<'on_disk> {
49 pub struct DirstateMap<'on_disk> {
50 /// Contents of the `.hg/dirstate` file
50 /// Contents of the `.hg/dirstate` file
51 pub(super) on_disk: &'on_disk [u8],
51 pub(super) on_disk: &'on_disk [u8],
52
52
53 pub(super) root: ChildNodes<'on_disk>,
53 pub(super) root: ChildNodes<'on_disk>,
54
54
55 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
55 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
56 pub(super) nodes_with_entry_count: u32,
56 pub(super) nodes_with_entry_count: u32,
57
57
58 /// Number of nodes anywhere in the tree that have
58 /// Number of nodes anywhere in the tree that have
59 /// `.copy_source.is_some()`.
59 /// `.copy_source.is_some()`.
60 pub(super) nodes_with_copy_source_count: u32,
60 pub(super) nodes_with_copy_source_count: u32,
61
61
62 /// See on_disk::Header
62 /// See on_disk::Header
63 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
63 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
64
64
65 /// How many bytes of `on_disk` are not used anymore
65 /// How many bytes of `on_disk` are not used anymore
66 pub(super) unreachable_bytes: u32,
66 pub(super) unreachable_bytes: u32,
67
67
68 /// Size of the data used to first load this `DirstateMap`. Used in case
68 /// Size of the data used to first load this `DirstateMap`. Used in case
69 /// we need to write some new metadata, but no new data on disk,
69 /// we need to write some new metadata, but no new data on disk,
70 /// as well as to detect writes that have happened in another process
70 /// as well as to detect writes that have happened in another process
71 /// since first read.
71 /// since first read.
72 pub(super) old_data_size: usize,
72 pub(super) old_data_size: usize,
73
73
74 /// UUID used when first loading this `DirstateMap`. Used to check if
74 /// UUID used when first loading this `DirstateMap`. Used to check if
75 /// the UUID has been changed by another process since first read.
75 /// the UUID has been changed by another process since first read.
76 /// Can be `None` if using dirstate v1 or if it's a brand new dirstate.
76 /// Can be `None` if using dirstate v1 or if it's a brand new dirstate.
77 pub(super) old_uuid: Option<Vec<u8>>,
77 pub(super) old_uuid: Option<Vec<u8>>,
78
78
79 /// Identity of the dirstate file (for dirstate-v1) or the docket file
79 /// Identity of the dirstate file (for dirstate-v1) or the docket file
80 /// (v2). Used to detect if the file has changed from another process.
80 /// (v2). Used to detect if the file has changed from another process.
81 /// Since it's always written atomically, we can compare the inode to
81 /// Since it's always written atomically, we can compare the inode to
82 /// check the file identity.
82 /// check the file identity.
83 ///
83 ///
84 /// TODO On non-Unix systems, something like hashing is a possibility?
84 /// TODO On non-Unix systems, something like hashing is a possibility?
85 pub(super) identity: Option<u64>,
85 pub(super) identity: Option<u64>,
86
86
87 pub(super) dirstate_version: DirstateVersion,
87 pub(super) dirstate_version: DirstateVersion,
88
88
89 /// Controlled by config option `devel.dirstate.v2.data_update_mode`
89 /// Controlled by config option `devel.dirstate.v2.data_update_mode`
90 pub(super) write_mode: DirstateMapWriteMode,
90 pub(super) write_mode: DirstateMapWriteMode,
91 }
91 }
92
92
93 /// Using a plain `HgPathBuf` of the full path from the repository root as a
93 /// Using a plain `HgPathBuf` of the full path from the repository root as a
94 /// map key would also work: all paths in a given map have the same parent
94 /// map key would also work: all paths in a given map have the same parent
95 /// path, so comparing full paths gives the same result as comparing base
95 /// path, so comparing full paths gives the same result as comparing base
96 /// names. However `HashMap` would waste time always re-hashing the same
96 /// names. However `HashMap` would waste time always re-hashing the same
97 /// string prefix.
97 /// string prefix.
98 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
98 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
99
99
100 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
100 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
101 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
101 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
102 #[derive(Debug)]
102 #[derive(Debug)]
103 pub(super) enum BorrowedPath<'tree, 'on_disk> {
103 pub(super) enum BorrowedPath<'tree, 'on_disk> {
104 InMemory(&'tree HgPathBuf),
104 InMemory(&'tree HgPathBuf),
105 OnDisk(&'on_disk HgPath),
105 OnDisk(&'on_disk HgPath),
106 }
106 }
107
107
108 #[derive(Debug)]
108 #[derive(Debug)]
109 pub(super) enum ChildNodes<'on_disk> {
109 pub(super) enum ChildNodes<'on_disk> {
110 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
110 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
111 OnDisk(&'on_disk [on_disk::Node]),
111 OnDisk(&'on_disk [on_disk::Node]),
112 }
112 }
113
113
114 #[derive(Debug)]
114 #[derive(Debug)]
115 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
115 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
116 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
116 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
117 OnDisk(&'on_disk [on_disk::Node]),
117 OnDisk(&'on_disk [on_disk::Node]),
118 }
118 }
119
119
120 #[derive(Debug)]
120 #[derive(Debug)]
121 pub(super) enum NodeRef<'tree, 'on_disk> {
121 pub(super) enum NodeRef<'tree, 'on_disk> {
122 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
122 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
123 OnDisk(&'on_disk on_disk::Node),
123 OnDisk(&'on_disk on_disk::Node),
124 }
124 }
125
125
126 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
126 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
127 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
127 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
128 match *self {
128 match *self {
129 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
129 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
130 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
130 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
131 }
131 }
132 }
132 }
133 }
133 }
134
134
135 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
135 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
136 type Target = HgPath;
136 type Target = HgPath;
137
137
138 fn deref(&self) -> &HgPath {
138 fn deref(&self) -> &HgPath {
139 match *self {
139 match *self {
140 BorrowedPath::InMemory(in_memory) => in_memory,
140 BorrowedPath::InMemory(in_memory) => in_memory,
141 BorrowedPath::OnDisk(on_disk) => on_disk,
141 BorrowedPath::OnDisk(on_disk) => on_disk,
142 }
142 }
143 }
143 }
144 }
144 }
145
145
146 impl Default for ChildNodes<'_> {
146 impl Default for ChildNodes<'_> {
147 fn default() -> Self {
147 fn default() -> Self {
148 ChildNodes::InMemory(Default::default())
148 ChildNodes::InMemory(Default::default())
149 }
149 }
150 }
150 }
151
151
152 impl<'on_disk> ChildNodes<'on_disk> {
152 impl<'on_disk> ChildNodes<'on_disk> {
153 pub(super) fn as_ref<'tree>(
153 pub(super) fn as_ref<'tree>(
154 &'tree self,
154 &'tree self,
155 ) -> ChildNodesRef<'tree, 'on_disk> {
155 ) -> ChildNodesRef<'tree, 'on_disk> {
156 match self {
156 match self {
157 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
157 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
158 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
158 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
159 }
159 }
160 }
160 }
161
161
162 pub(super) fn is_empty(&self) -> bool {
162 pub(super) fn is_empty(&self) -> bool {
163 match self {
163 match self {
164 ChildNodes::InMemory(nodes) => nodes.is_empty(),
164 ChildNodes::InMemory(nodes) => nodes.is_empty(),
165 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
165 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
166 }
166 }
167 }
167 }
168
168
169 fn make_mut(
169 fn make_mut(
170 &mut self,
170 &mut self,
171 on_disk: &'on_disk [u8],
171 on_disk: &'on_disk [u8],
172 unreachable_bytes: &mut u32,
172 unreachable_bytes: &mut u32,
173 ) -> Result<
173 ) -> Result<
174 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
174 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
175 DirstateV2ParseError,
175 DirstateV2ParseError,
176 > {
176 > {
177 match self {
177 match self {
178 ChildNodes::InMemory(nodes) => Ok(nodes),
178 ChildNodes::InMemory(nodes) => Ok(nodes),
179 ChildNodes::OnDisk(nodes) => {
179 ChildNodes::OnDisk(nodes) => {
180 *unreachable_bytes +=
180 *unreachable_bytes +=
181 std::mem::size_of_val::<[on_disk::Node]>(*nodes) as u32;
181 std::mem::size_of_val::<[on_disk::Node]>(*nodes) as u32;
182 let nodes = nodes
182 let nodes = nodes
183 .iter()
183 .iter()
184 .map(|node| {
184 .map(|node| {
185 Ok((
185 Ok((
186 node.path(on_disk)?,
186 node.path(on_disk)?,
187 node.to_in_memory_node(on_disk)?,
187 node.to_in_memory_node(on_disk)?,
188 ))
188 ))
189 })
189 })
190 .collect::<Result<_, _>>()?;
190 .collect::<Result<_, _>>()?;
191 *self = ChildNodes::InMemory(nodes);
191 *self = ChildNodes::InMemory(nodes);
192 match self {
192 match self {
193 ChildNodes::InMemory(nodes) => Ok(nodes),
193 ChildNodes::InMemory(nodes) => Ok(nodes),
194 ChildNodes::OnDisk(_) => unreachable!(),
194 ChildNodes::OnDisk(_) => unreachable!(),
195 }
195 }
196 }
196 }
197 }
197 }
198 }
198 }
199 }
199 }
200
200
201 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
201 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
202 pub(super) fn get(
202 pub(super) fn get(
203 &self,
203 &self,
204 base_name: &HgPath,
204 base_name: &HgPath,
205 on_disk: &'on_disk [u8],
205 on_disk: &'on_disk [u8],
206 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
206 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
207 match self {
207 match self {
208 ChildNodesRef::InMemory(nodes) => Ok(nodes
208 ChildNodesRef::InMemory(nodes) => Ok(nodes
209 .get_key_value(base_name)
209 .get_key_value(base_name)
210 .map(|(k, v)| NodeRef::InMemory(k, v))),
210 .map(|(k, v)| NodeRef::InMemory(k, v))),
211 ChildNodesRef::OnDisk(nodes) => {
211 ChildNodesRef::OnDisk(nodes) => {
212 let mut parse_result = Ok(());
212 let mut parse_result = Ok(());
213 let search_result = nodes.binary_search_by(|node| {
213 let search_result = nodes.binary_search_by(|node| {
214 match node.base_name(on_disk) {
214 match node.base_name(on_disk) {
215 Ok(node_base_name) => node_base_name.cmp(base_name),
215 Ok(node_base_name) => node_base_name.cmp(base_name),
216 Err(e) => {
216 Err(e) => {
217 parse_result = Err(e);
217 parse_result = Err(e);
218 // Dummy comparison result, `search_result` won’t
218 // Dummy comparison result, `search_result` won’t
219 // be used since `parse_result` is an error
219 // be used since `parse_result` is an error
220 std::cmp::Ordering::Equal
220 std::cmp::Ordering::Equal
221 }
221 }
222 }
222 }
223 });
223 });
224 parse_result.map(|()| {
224 parse_result.map(|()| {
225 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
225 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
226 })
226 })
227 }
227 }
228 }
228 }
229 }
229 }
230
230
231 /// Iterate in undefined order
231 /// Iterate in undefined order
232 pub(super) fn iter(
232 pub(super) fn iter(
233 &self,
233 &self,
234 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
234 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
235 match self {
235 match self {
236 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
236 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
237 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
237 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
238 ),
238 ),
239 ChildNodesRef::OnDisk(nodes) => {
239 ChildNodesRef::OnDisk(nodes) => {
240 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
240 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
241 }
241 }
242 }
242 }
243 }
243 }
244
244
245 /// Iterate in parallel in undefined order
245 /// Iterate in parallel in undefined order
246 pub(super) fn par_iter(
246 pub(super) fn par_iter(
247 &self,
247 &self,
248 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
248 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
249 {
249 {
250 use rayon::prelude::*;
250 use rayon::prelude::*;
251 match self {
251 match self {
252 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
252 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
253 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
253 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
254 ),
254 ),
255 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
255 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
256 nodes.par_iter().map(NodeRef::OnDisk),
256 nodes.par_iter().map(NodeRef::OnDisk),
257 ),
257 ),
258 }
258 }
259 }
259 }
260
260
261 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
261 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
262 match self {
262 match self {
263 ChildNodesRef::InMemory(nodes) => {
263 ChildNodesRef::InMemory(nodes) => {
264 let mut vec: Vec<_> = nodes
264 let mut vec: Vec<_> = nodes
265 .iter()
265 .iter()
266 .map(|(k, v)| NodeRef::InMemory(k, v))
266 .map(|(k, v)| NodeRef::InMemory(k, v))
267 .collect();
267 .collect();
268 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
268 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
269 match node {
269 match node {
270 NodeRef::InMemory(path, _node) => path.base_name(),
270 NodeRef::InMemory(path, _node) => path.base_name(),
271 NodeRef::OnDisk(_) => unreachable!(),
271 NodeRef::OnDisk(_) => unreachable!(),
272 }
272 }
273 }
273 }
274 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
274 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
275 // value: https://github.com/rust-lang/rust/issues/34162
275 // value: https://github.com/rust-lang/rust/issues/34162
276 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
276 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
277 vec
277 vec
278 }
278 }
279 ChildNodesRef::OnDisk(nodes) => {
279 ChildNodesRef::OnDisk(nodes) => {
280 // Nodes on disk are already sorted
280 // Nodes on disk are already sorted
281 nodes.iter().map(NodeRef::OnDisk).collect()
281 nodes.iter().map(NodeRef::OnDisk).collect()
282 }
282 }
283 }
283 }
284 }
284 }
285 }
285 }
286
286
287 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
287 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
288 pub(super) fn full_path(
288 pub(super) fn full_path(
289 &self,
289 &self,
290 on_disk: &'on_disk [u8],
290 on_disk: &'on_disk [u8],
291 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
291 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
292 match self {
292 match self {
293 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
293 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
294 NodeRef::OnDisk(node) => node.full_path(on_disk),
294 NodeRef::OnDisk(node) => node.full_path(on_disk),
295 }
295 }
296 }
296 }
297
297
298 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
298 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
299 /// HgPath>` detached from `'tree`
299 /// HgPath>` detached from `'tree`
300 pub(super) fn full_path_borrowed(
300 pub(super) fn full_path_borrowed(
301 &self,
301 &self,
302 on_disk: &'on_disk [u8],
302 on_disk: &'on_disk [u8],
303 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
303 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
304 match self {
304 match self {
305 NodeRef::InMemory(path, _node) => match path.full_path() {
305 NodeRef::InMemory(path, _node) => match path.full_path() {
306 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
306 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
307 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
307 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
308 },
308 },
309 NodeRef::OnDisk(node) => {
309 NodeRef::OnDisk(node) => {
310 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
310 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
311 }
311 }
312 }
312 }
313 }
313 }
314
314
315 pub(super) fn base_name(
315 pub(super) fn base_name(
316 &self,
316 &self,
317 on_disk: &'on_disk [u8],
317 on_disk: &'on_disk [u8],
318 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
318 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
319 match self {
319 match self {
320 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
320 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
321 NodeRef::OnDisk(node) => node.base_name(on_disk),
321 NodeRef::OnDisk(node) => node.base_name(on_disk),
322 }
322 }
323 }
323 }
324
324
325 pub(super) fn children(
325 pub(super) fn children(
326 &self,
326 &self,
327 on_disk: &'on_disk [u8],
327 on_disk: &'on_disk [u8],
328 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
328 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
329 match self {
329 match self {
330 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
330 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
331 NodeRef::OnDisk(node) => {
331 NodeRef::OnDisk(node) => {
332 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
332 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
333 }
333 }
334 }
334 }
335 }
335 }
336
336
337 pub(super) fn has_copy_source(&self) -> bool {
337 pub(super) fn has_copy_source(&self) -> bool {
338 match self {
338 match self {
339 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
339 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
340 NodeRef::OnDisk(node) => node.has_copy_source(),
340 NodeRef::OnDisk(node) => node.has_copy_source(),
341 }
341 }
342 }
342 }
343
343
344 pub(super) fn copy_source(
344 pub(super) fn copy_source(
345 &self,
345 &self,
346 on_disk: &'on_disk [u8],
346 on_disk: &'on_disk [u8],
347 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
347 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
348 match self {
348 match self {
349 NodeRef::InMemory(_path, node) => Ok(node.copy_source.as_deref()),
349 NodeRef::InMemory(_path, node) => Ok(node.copy_source.as_deref()),
350 NodeRef::OnDisk(node) => node.copy_source(on_disk),
350 NodeRef::OnDisk(node) => node.copy_source(on_disk),
351 }
351 }
352 }
352 }
353 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
353 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
354 /// HgPath>` detached from `'tree`
354 /// HgPath>` detached from `'tree`
355 pub(super) fn copy_source_borrowed(
355 pub(super) fn copy_source_borrowed(
356 &self,
356 &self,
357 on_disk: &'on_disk [u8],
357 on_disk: &'on_disk [u8],
358 ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
358 ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
359 {
359 {
360 Ok(match self {
360 Ok(match self {
361 NodeRef::InMemory(_path, node) => {
361 NodeRef::InMemory(_path, node) => {
362 node.copy_source.as_ref().map(|source| match source {
362 node.copy_source.as_ref().map(|source| match source {
363 Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
363 Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
364 Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
364 Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
365 })
365 })
366 }
366 }
367 NodeRef::OnDisk(node) => {
367 NodeRef::OnDisk(node) => {
368 node.copy_source(on_disk)?.map(BorrowedPath::OnDisk)
368 node.copy_source(on_disk)?.map(BorrowedPath::OnDisk)
369 }
369 }
370 })
370 })
371 }
371 }
372
372
373 pub(super) fn entry(
373 pub(super) fn entry(
374 &self,
374 &self,
375 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
375 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
376 match self {
376 match self {
377 NodeRef::InMemory(_path, node) => {
377 NodeRef::InMemory(_path, node) => {
378 Ok(node.data.as_entry().copied())
378 Ok(node.data.as_entry().copied())
379 }
379 }
380 NodeRef::OnDisk(node) => node.entry(),
380 NodeRef::OnDisk(node) => node.entry(),
381 }
381 }
382 }
382 }
383
383
384 pub(super) fn cached_directory_mtime(
384 pub(super) fn cached_directory_mtime(
385 &self,
385 &self,
386 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
386 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
387 match self {
387 match self {
388 NodeRef::InMemory(_path, node) => Ok(match node.data {
388 NodeRef::InMemory(_path, node) => Ok(match node.data {
389 NodeData::CachedDirectory { mtime } => Some(mtime),
389 NodeData::CachedDirectory { mtime } => Some(mtime),
390 _ => None,
390 _ => None,
391 }),
391 }),
392 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
392 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
393 }
393 }
394 }
394 }
395
395
396 pub(super) fn descendants_with_entry_count(&self) -> u32 {
396 pub(super) fn descendants_with_entry_count(&self) -> u32 {
397 match self {
397 match self {
398 NodeRef::InMemory(_path, node) => {
398 NodeRef::InMemory(_path, node) => {
399 node.descendants_with_entry_count
399 node.descendants_with_entry_count
400 }
400 }
401 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
401 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
402 }
402 }
403 }
403 }
404
404
405 pub(super) fn tracked_descendants_count(&self) -> u32 {
405 pub(super) fn tracked_descendants_count(&self) -> u32 {
406 match self {
406 match self {
407 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
407 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
408 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
408 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
409 }
409 }
410 }
410 }
411 }
411 }
412
412
413 /// Represents a file or a directory
413 /// Represents a file or a directory
414 #[derive(Default, Debug)]
414 #[derive(Default, Debug)]
415 pub(super) struct Node<'on_disk> {
415 pub(super) struct Node<'on_disk> {
416 pub(super) data: NodeData,
416 pub(super) data: NodeData,
417
417
418 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
418 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
419
419
420 pub(super) children: ChildNodes<'on_disk>,
420 pub(super) children: ChildNodes<'on_disk>,
421
421
422 /// How many (non-inclusive) descendants of this node have an entry.
422 /// How many (non-inclusive) descendants of this node have an entry.
423 pub(super) descendants_with_entry_count: u32,
423 pub(super) descendants_with_entry_count: u32,
424
424
425 /// How many (non-inclusive) descendants of this node have an entry whose
425 /// How many (non-inclusive) descendants of this node have an entry whose
426 /// state is "tracked".
426 /// state is "tracked".
427 pub(super) tracked_descendants_count: u32,
427 pub(super) tracked_descendants_count: u32,
428 }
428 }
429
429
430 #[derive(Debug)]
430 #[derive(Debug, Default)]
431 pub(super) enum NodeData {
431 pub(super) enum NodeData {
432 Entry(DirstateEntry),
432 Entry(DirstateEntry),
433 CachedDirectory { mtime: TruncatedTimestamp },
433 CachedDirectory { mtime: TruncatedTimestamp },
434 #[default]
434 None,
435 None,
435 }
436 }
436
437
437 impl Default for NodeData {
438 fn default() -> Self {
439 NodeData::None
440 }
441 }
442
443 impl NodeData {
438 impl NodeData {
444 fn has_entry(&self) -> bool {
439 fn has_entry(&self) -> bool {
445 matches!(self, NodeData::Entry(_))
440 matches!(self, NodeData::Entry(_))
446 }
441 }
447
442
448 fn as_entry(&self) -> Option<&DirstateEntry> {
443 fn as_entry(&self) -> Option<&DirstateEntry> {
449 match self {
444 match self {
450 NodeData::Entry(entry) => Some(entry),
445 NodeData::Entry(entry) => Some(entry),
451 _ => None,
446 _ => None,
452 }
447 }
453 }
448 }
454
449
455 fn as_entry_mut(&mut self) -> Option<&mut DirstateEntry> {
450 fn as_entry_mut(&mut self) -> Option<&mut DirstateEntry> {
456 match self {
451 match self {
457 NodeData::Entry(entry) => Some(entry),
452 NodeData::Entry(entry) => Some(entry),
458 _ => None,
453 _ => None,
459 }
454 }
460 }
455 }
461 }
456 }
462
457
463 impl<'on_disk> DirstateMap<'on_disk> {
458 impl<'on_disk> DirstateMap<'on_disk> {
464 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
459 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
465 Self {
460 Self {
466 on_disk,
461 on_disk,
467 root: ChildNodes::default(),
462 root: ChildNodes::default(),
468 nodes_with_entry_count: 0,
463 nodes_with_entry_count: 0,
469 nodes_with_copy_source_count: 0,
464 nodes_with_copy_source_count: 0,
470 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
465 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
471 unreachable_bytes: 0,
466 unreachable_bytes: 0,
472 old_data_size: 0,
467 old_data_size: 0,
473 old_uuid: None,
468 old_uuid: None,
474 identity: None,
469 identity: None,
475 dirstate_version: DirstateVersion::V1,
470 dirstate_version: DirstateVersion::V1,
476 write_mode: DirstateMapWriteMode::Auto,
471 write_mode: DirstateMapWriteMode::Auto,
477 }
472 }
478 }
473 }
479
474
480 #[logging_timer::time("trace")]
475 #[logging_timer::time("trace")]
481 pub fn new_v2(
476 pub fn new_v2(
482 on_disk: &'on_disk [u8],
477 on_disk: &'on_disk [u8],
483 data_size: usize,
478 data_size: usize,
484 metadata: &[u8],
479 metadata: &[u8],
485 uuid: Vec<u8>,
480 uuid: Vec<u8>,
486 identity: Option<u64>,
481 identity: Option<u64>,
487 ) -> Result<Self, DirstateError> {
482 ) -> Result<Self, DirstateError> {
488 if let Some(data) = on_disk.get(..data_size) {
483 if let Some(data) = on_disk.get(..data_size) {
489 Ok(on_disk::read(data, metadata, uuid, identity)?)
484 Ok(on_disk::read(data, metadata, uuid, identity)?)
490 } else {
485 } else {
491 Err(DirstateV2ParseError::new("not enough bytes on disk").into())
486 Err(DirstateV2ParseError::new("not enough bytes on disk").into())
492 }
487 }
493 }
488 }
494
489
495 #[logging_timer::time("trace")]
490 #[logging_timer::time("trace")]
496 pub fn new_v1(
491 pub fn new_v1(
497 on_disk: &'on_disk [u8],
492 on_disk: &'on_disk [u8],
498 identity: Option<u64>,
493 identity: Option<u64>,
499 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
494 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
500 let mut map = Self::empty(on_disk);
495 let mut map = Self::empty(on_disk);
501 if map.on_disk.is_empty() {
496 if map.on_disk.is_empty() {
502 return Ok((map, None));
497 return Ok((map, None));
503 }
498 }
504
499
505 let parents = parse_dirstate_entries(
500 let parents = parse_dirstate_entries(
506 map.on_disk,
501 map.on_disk,
507 |path, entry, copy_source| {
502 |path, entry, copy_source| {
508 let tracked = entry.tracked();
503 let tracked = entry.tracked();
509 let node = Self::get_or_insert_node_inner(
504 let node = Self::get_or_insert_node_inner(
510 map.on_disk,
505 map.on_disk,
511 &mut map.unreachable_bytes,
506 &mut map.unreachable_bytes,
512 &mut map.root,
507 &mut map.root,
513 path,
508 path,
514 WithBasename::to_cow_borrowed,
509 WithBasename::to_cow_borrowed,
515 |ancestor| {
510 |ancestor| {
516 if tracked {
511 if tracked {
517 ancestor.tracked_descendants_count += 1
512 ancestor.tracked_descendants_count += 1
518 }
513 }
519 ancestor.descendants_with_entry_count += 1
514 ancestor.descendants_with_entry_count += 1
520 },
515 },
521 )?;
516 )?;
522 assert!(
517 assert!(
523 !node.data.has_entry(),
518 !node.data.has_entry(),
524 "duplicate dirstate entry in read"
519 "duplicate dirstate entry in read"
525 );
520 );
526 assert!(
521 assert!(
527 node.copy_source.is_none(),
522 node.copy_source.is_none(),
528 "duplicate dirstate entry in read"
523 "duplicate dirstate entry in read"
529 );
524 );
530 node.data = NodeData::Entry(*entry);
525 node.data = NodeData::Entry(*entry);
531 node.copy_source = copy_source.map(Cow::Borrowed);
526 node.copy_source = copy_source.map(Cow::Borrowed);
532 map.nodes_with_entry_count += 1;
527 map.nodes_with_entry_count += 1;
533 if copy_source.is_some() {
528 if copy_source.is_some() {
534 map.nodes_with_copy_source_count += 1
529 map.nodes_with_copy_source_count += 1
535 }
530 }
536 Ok(())
531 Ok(())
537 },
532 },
538 )?;
533 )?;
539 let parents = Some(*parents);
534 let parents = Some(*parents);
540 map.identity = identity;
535 map.identity = identity;
541
536
542 Ok((map, parents))
537 Ok((map, parents))
543 }
538 }
544
539
545 /// Assuming dirstate-v2 format, returns whether the next write should
540 /// Assuming dirstate-v2 format, returns whether the next write should
546 /// append to the existing data file that contains `self.on_disk` (true),
541 /// append to the existing data file that contains `self.on_disk` (true),
547 /// or create a new data file from scratch (false).
542 /// or create a new data file from scratch (false).
548 pub(super) fn write_should_append(&self) -> bool {
543 pub(super) fn write_should_append(&self) -> bool {
549 match self.write_mode {
544 match self.write_mode {
550 DirstateMapWriteMode::ForceAppend => true,
545 DirstateMapWriteMode::ForceAppend => true,
551 DirstateMapWriteMode::ForceNewDataFile => false,
546 DirstateMapWriteMode::ForceNewDataFile => false,
552 DirstateMapWriteMode::Auto => {
547 DirstateMapWriteMode::Auto => {
553 let ratio =
548 let ratio =
554 self.unreachable_bytes as f32 / self.on_disk.len() as f32;
549 self.unreachable_bytes as f32 / self.on_disk.len() as f32;
555 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
550 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
556 }
551 }
557 }
552 }
558 }
553 }
559
554
560 fn get_node<'tree>(
555 fn get_node<'tree>(
561 &'tree self,
556 &'tree self,
562 path: &HgPath,
557 path: &HgPath,
563 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
558 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
564 let mut children = self.root.as_ref();
559 let mut children = self.root.as_ref();
565 let mut components = path.components();
560 let mut components = path.components();
566 let mut component =
561 let mut component =
567 components.next().expect("expected at least one components");
562 components.next().expect("expected at least one components");
568 loop {
563 loop {
569 if let Some(child) = children.get(component, self.on_disk)? {
564 if let Some(child) = children.get(component, self.on_disk)? {
570 if let Some(next_component) = components.next() {
565 if let Some(next_component) = components.next() {
571 component = next_component;
566 component = next_component;
572 children = child.children(self.on_disk)?;
567 children = child.children(self.on_disk)?;
573 } else {
568 } else {
574 return Ok(Some(child));
569 return Ok(Some(child));
575 }
570 }
576 } else {
571 } else {
577 return Ok(None);
572 return Ok(None);
578 }
573 }
579 }
574 }
580 }
575 }
581
576
582 pub fn has_node(
577 pub fn has_node(
583 &self,
578 &self,
584 path: &HgPath,
579 path: &HgPath,
585 ) -> Result<bool, DirstateV2ParseError> {
580 ) -> Result<bool, DirstateV2ParseError> {
586 let node = self.get_node(path)?;
581 let node = self.get_node(path)?;
587 Ok(node.is_some())
582 Ok(node.is_some())
588 }
583 }
589
584
590 /// Returns a mutable reference to the node at `path` if it exists
585 /// Returns a mutable reference to the node at `path` if it exists
591 ///
586 ///
592 /// `each_ancestor` is a callback that is called for each ancestor node
587 /// `each_ancestor` is a callback that is called for each ancestor node
593 /// when descending the tree. It is used to keep the different counters
588 /// when descending the tree. It is used to keep the different counters
594 /// of the `DirstateMap` up-to-date.
589 /// of the `DirstateMap` up-to-date.
595 fn get_node_mut<'tree>(
590 fn get_node_mut<'tree>(
596 &'tree mut self,
591 &'tree mut self,
597 path: &HgPath,
592 path: &HgPath,
598 each_ancestor: impl FnMut(&mut Node),
593 each_ancestor: impl FnMut(&mut Node),
599 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
594 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
600 Self::get_node_mut_inner(
595 Self::get_node_mut_inner(
601 self.on_disk,
596 self.on_disk,
602 &mut self.unreachable_bytes,
597 &mut self.unreachable_bytes,
603 &mut self.root,
598 &mut self.root,
604 path,
599 path,
605 each_ancestor,
600 each_ancestor,
606 )
601 )
607 }
602 }
608
603
609 /// Lower-level version of `get_node_mut`.
604 /// Lower-level version of `get_node_mut`.
610 ///
605 ///
611 /// This takes `root` instead of `&mut self` so that callers can mutate
606 /// This takes `root` instead of `&mut self` so that callers can mutate
612 /// other fields while the returned borrow is still valid.
607 /// other fields while the returned borrow is still valid.
613 ///
608 ///
614 /// `each_ancestor` is a callback that is called for each ancestor node
609 /// `each_ancestor` is a callback that is called for each ancestor node
615 /// when descending the tree. It is used to keep the different counters
610 /// when descending the tree. It is used to keep the different counters
616 /// of the `DirstateMap` up-to-date.
611 /// of the `DirstateMap` up-to-date.
617 fn get_node_mut_inner<'tree>(
612 fn get_node_mut_inner<'tree>(
618 on_disk: &'on_disk [u8],
613 on_disk: &'on_disk [u8],
619 unreachable_bytes: &mut u32,
614 unreachable_bytes: &mut u32,
620 root: &'tree mut ChildNodes<'on_disk>,
615 root: &'tree mut ChildNodes<'on_disk>,
621 path: &HgPath,
616 path: &HgPath,
622 mut each_ancestor: impl FnMut(&mut Node),
617 mut each_ancestor: impl FnMut(&mut Node),
623 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
618 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
624 let mut children = root;
619 let mut children = root;
625 let mut components = path.components();
620 let mut components = path.components();
626 let mut component =
621 let mut component =
627 components.next().expect("expected at least one components");
622 components.next().expect("expected at least one components");
628 loop {
623 loop {
629 if let Some(child) = children
624 if let Some(child) = children
630 .make_mut(on_disk, unreachable_bytes)?
625 .make_mut(on_disk, unreachable_bytes)?
631 .get_mut(component)
626 .get_mut(component)
632 {
627 {
633 if let Some(next_component) = components.next() {
628 if let Some(next_component) = components.next() {
634 each_ancestor(child);
629 each_ancestor(child);
635 component = next_component;
630 component = next_component;
636 children = &mut child.children;
631 children = &mut child.children;
637 } else {
632 } else {
638 return Ok(Some(child));
633 return Ok(Some(child));
639 }
634 }
640 } else {
635 } else {
641 return Ok(None);
636 return Ok(None);
642 }
637 }
643 }
638 }
644 }
639 }
645
640
646 /// Get a mutable reference to the node at `path`, creating it if it does
641 /// Get a mutable reference to the node at `path`, creating it if it does
647 /// not exist.
642 /// not exist.
648 ///
643 ///
649 /// `each_ancestor` is a callback that is called for each ancestor node
644 /// `each_ancestor` is a callback that is called for each ancestor node
650 /// when descending the tree. It is used to keep the different counters
645 /// when descending the tree. It is used to keep the different counters
651 /// of the `DirstateMap` up-to-date.
646 /// of the `DirstateMap` up-to-date.
652 fn get_or_insert_node<'tree, 'path>(
647 fn get_or_insert_node<'tree, 'path>(
653 &'tree mut self,
648 &'tree mut self,
654 path: &'path HgPath,
649 path: &'path HgPath,
655 each_ancestor: impl FnMut(&mut Node),
650 each_ancestor: impl FnMut(&mut Node),
656 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
651 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
657 Self::get_or_insert_node_inner(
652 Self::get_or_insert_node_inner(
658 self.on_disk,
653 self.on_disk,
659 &mut self.unreachable_bytes,
654 &mut self.unreachable_bytes,
660 &mut self.root,
655 &mut self.root,
661 path,
656 path,
662 WithBasename::to_cow_owned,
657 WithBasename::to_cow_owned,
663 each_ancestor,
658 each_ancestor,
664 )
659 )
665 }
660 }
666
661
667 /// Lower-level version of `get_or_insert_node_inner`, which is used when
662 /// Lower-level version of `get_or_insert_node_inner`, which is used when
668 /// parsing disk data to remove allocations for new nodes.
663 /// parsing disk data to remove allocations for new nodes.
669 fn get_or_insert_node_inner<'tree, 'path>(
664 fn get_or_insert_node_inner<'tree, 'path>(
670 on_disk: &'on_disk [u8],
665 on_disk: &'on_disk [u8],
671 unreachable_bytes: &mut u32,
666 unreachable_bytes: &mut u32,
672 root: &'tree mut ChildNodes<'on_disk>,
667 root: &'tree mut ChildNodes<'on_disk>,
673 path: &'path HgPath,
668 path: &'path HgPath,
674 to_cow: impl Fn(
669 to_cow: impl Fn(
675 WithBasename<&'path HgPath>,
670 WithBasename<&'path HgPath>,
676 ) -> WithBasename<Cow<'on_disk, HgPath>>,
671 ) -> WithBasename<Cow<'on_disk, HgPath>>,
677 mut each_ancestor: impl FnMut(&mut Node),
672 mut each_ancestor: impl FnMut(&mut Node),
678 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
673 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
679 let mut child_nodes = root;
674 let mut child_nodes = root;
680 let mut inclusive_ancestor_paths =
675 let mut inclusive_ancestor_paths =
681 WithBasename::inclusive_ancestors_of(path);
676 WithBasename::inclusive_ancestors_of(path);
682 let mut ancestor_path = inclusive_ancestor_paths
677 let mut ancestor_path = inclusive_ancestor_paths
683 .next()
678 .next()
684 .expect("expected at least one inclusive ancestor");
679 .expect("expected at least one inclusive ancestor");
685 loop {
680 loop {
686 let (_, child_node) = child_nodes
681 let (_, child_node) = child_nodes
687 .make_mut(on_disk, unreachable_bytes)?
682 .make_mut(on_disk, unreachable_bytes)?
688 .raw_entry_mut()
683 .raw_entry_mut()
689 .from_key(ancestor_path.base_name())
684 .from_key(ancestor_path.base_name())
690 .or_insert_with(|| (to_cow(ancestor_path), Node::default()));
685 .or_insert_with(|| (to_cow(ancestor_path), Node::default()));
691 if let Some(next) = inclusive_ancestor_paths.next() {
686 if let Some(next) = inclusive_ancestor_paths.next() {
692 each_ancestor(child_node);
687 each_ancestor(child_node);
693 ancestor_path = next;
688 ancestor_path = next;
694 child_nodes = &mut child_node.children;
689 child_nodes = &mut child_node.children;
695 } else {
690 } else {
696 return Ok(child_node);
691 return Ok(child_node);
697 }
692 }
698 }
693 }
699 }
694 }
700
695
701 #[allow(clippy::too_many_arguments)]
696 #[allow(clippy::too_many_arguments)]
702 fn reset_state(
697 fn reset_state(
703 &mut self,
698 &mut self,
704 filename: &HgPath,
699 filename: &HgPath,
705 old_entry_opt: Option<DirstateEntry>,
700 old_entry_opt: Option<DirstateEntry>,
706 wc_tracked: bool,
701 wc_tracked: bool,
707 p1_tracked: bool,
702 p1_tracked: bool,
708 p2_info: bool,
703 p2_info: bool,
709 has_meaningful_mtime: bool,
704 has_meaningful_mtime: bool,
710 parent_file_data_opt: Option<ParentFileData>,
705 parent_file_data_opt: Option<ParentFileData>,
711 ) -> Result<(), DirstateError> {
706 ) -> Result<(), DirstateError> {
712 let (had_entry, was_tracked) = match old_entry_opt {
707 let (had_entry, was_tracked) = match old_entry_opt {
713 Some(old_entry) => (true, old_entry.tracked()),
708 Some(old_entry) => (true, old_entry.tracked()),
714 None => (false, false),
709 None => (false, false),
715 };
710 };
716 let node = self.get_or_insert_node(filename, |ancestor| {
711 let node = self.get_or_insert_node(filename, |ancestor| {
717 if !had_entry {
712 if !had_entry {
718 ancestor.descendants_with_entry_count += 1;
713 ancestor.descendants_with_entry_count += 1;
719 }
714 }
720 if was_tracked {
715 if was_tracked {
721 if !wc_tracked {
716 if !wc_tracked {
722 ancestor.tracked_descendants_count = ancestor
717 ancestor.tracked_descendants_count = ancestor
723 .tracked_descendants_count
718 .tracked_descendants_count
724 .checked_sub(1)
719 .checked_sub(1)
725 .expect("tracked count to be >= 0");
720 .expect("tracked count to be >= 0");
726 }
721 }
727 } else if wc_tracked {
722 } else if wc_tracked {
728 ancestor.tracked_descendants_count += 1;
723 ancestor.tracked_descendants_count += 1;
729 }
724 }
730 })?;
725 })?;
731
726
732 let v2_data = if let Some(parent_file_data) = parent_file_data_opt {
727 let v2_data = if let Some(parent_file_data) = parent_file_data_opt {
733 DirstateV2Data {
728 DirstateV2Data {
734 wc_tracked,
729 wc_tracked,
735 p1_tracked,
730 p1_tracked,
736 p2_info,
731 p2_info,
737 mode_size: parent_file_data.mode_size,
732 mode_size: parent_file_data.mode_size,
738 mtime: if has_meaningful_mtime {
733 mtime: if has_meaningful_mtime {
739 parent_file_data.mtime
734 parent_file_data.mtime
740 } else {
735 } else {
741 None
736 None
742 },
737 },
743 ..Default::default()
738 ..Default::default()
744 }
739 }
745 } else {
740 } else {
746 DirstateV2Data {
741 DirstateV2Data {
747 wc_tracked,
742 wc_tracked,
748 p1_tracked,
743 p1_tracked,
749 p2_info,
744 p2_info,
750 ..Default::default()
745 ..Default::default()
751 }
746 }
752 };
747 };
753 node.data = NodeData::Entry(DirstateEntry::from_v2_data(v2_data));
748 node.data = NodeData::Entry(DirstateEntry::from_v2_data(v2_data));
754 if !had_entry {
749 if !had_entry {
755 self.nodes_with_entry_count += 1;
750 self.nodes_with_entry_count += 1;
756 }
751 }
757 Ok(())
752 Ok(())
758 }
753 }
759
754
760 fn set_tracked(
755 fn set_tracked(
761 &mut self,
756 &mut self,
762 filename: &HgPath,
757 filename: &HgPath,
763 old_entry_opt: Option<DirstateEntry>,
758 old_entry_opt: Option<DirstateEntry>,
764 ) -> Result<bool, DirstateV2ParseError> {
759 ) -> Result<bool, DirstateV2ParseError> {
765 let was_tracked = old_entry_opt.map_or(false, |e| e.tracked());
760 let was_tracked = old_entry_opt.map_or(false, |e| e.tracked());
766 let had_entry = old_entry_opt.is_some();
761 let had_entry = old_entry_opt.is_some();
767 let tracked_count_increment = u32::from(!was_tracked);
762 let tracked_count_increment = u32::from(!was_tracked);
768 let mut new = false;
763 let mut new = false;
769
764
770 let node = self.get_or_insert_node(filename, |ancestor| {
765 let node = self.get_or_insert_node(filename, |ancestor| {
771 if !had_entry {
766 if !had_entry {
772 ancestor.descendants_with_entry_count += 1;
767 ancestor.descendants_with_entry_count += 1;
773 }
768 }
774
769
775 ancestor.tracked_descendants_count += tracked_count_increment;
770 ancestor.tracked_descendants_count += tracked_count_increment;
776 })?;
771 })?;
777 if let Some(old_entry) = old_entry_opt {
772 if let Some(old_entry) = old_entry_opt {
778 let mut e = old_entry;
773 let mut e = old_entry;
779 if e.tracked() {
774 if e.tracked() {
780 // XXX
775 // XXX
781 // This is probably overkill for more case, but we need this to
776 // This is probably overkill for more case, but we need this to
782 // fully replace the `normallookup` call with `set_tracked`
777 // fully replace the `normallookup` call with `set_tracked`
783 // one. Consider smoothing this in the future.
778 // one. Consider smoothing this in the future.
784 e.set_possibly_dirty();
779 e.set_possibly_dirty();
785 } else {
780 } else {
786 new = true;
781 new = true;
787 e.set_tracked();
782 e.set_tracked();
788 }
783 }
789 node.data = NodeData::Entry(e)
784 node.data = NodeData::Entry(e)
790 } else {
785 } else {
791 node.data = NodeData::Entry(DirstateEntry::new_tracked());
786 node.data = NodeData::Entry(DirstateEntry::new_tracked());
792 self.nodes_with_entry_count += 1;
787 self.nodes_with_entry_count += 1;
793 new = true;
788 new = true;
794 };
789 };
795 Ok(new)
790 Ok(new)
796 }
791 }
797
792
798 /// Set a node as untracked in the dirstate.
793 /// Set a node as untracked in the dirstate.
799 ///
794 ///
800 /// It is the responsibility of the caller to remove the copy source and/or
795 /// It is the responsibility of the caller to remove the copy source and/or
801 /// the entry itself if appropriate.
796 /// the entry itself if appropriate.
802 ///
797 ///
803 /// # Panics
798 /// # Panics
804 ///
799 ///
805 /// Panics if the node does not exist.
800 /// Panics if the node does not exist.
806 fn set_untracked(
801 fn set_untracked(
807 &mut self,
802 &mut self,
808 filename: &HgPath,
803 filename: &HgPath,
809 old_entry: DirstateEntry,
804 old_entry: DirstateEntry,
810 ) -> Result<(), DirstateV2ParseError> {
805 ) -> Result<(), DirstateV2ParseError> {
811 let node = self
806 let node = self
812 .get_node_mut(filename, |ancestor| {
807 .get_node_mut(filename, |ancestor| {
813 ancestor.tracked_descendants_count = ancestor
808 ancestor.tracked_descendants_count = ancestor
814 .tracked_descendants_count
809 .tracked_descendants_count
815 .checked_sub(1)
810 .checked_sub(1)
816 .expect("tracked_descendants_count should be >= 0");
811 .expect("tracked_descendants_count should be >= 0");
817 })?
812 })?
818 .expect("node should exist");
813 .expect("node should exist");
819 let mut new_entry = old_entry;
814 let mut new_entry = old_entry;
820 new_entry.set_untracked();
815 new_entry.set_untracked();
821 node.data = NodeData::Entry(new_entry);
816 node.data = NodeData::Entry(new_entry);
822 Ok(())
817 Ok(())
823 }
818 }
824
819
825 /// Set a node as clean in the dirstate.
820 /// Set a node as clean in the dirstate.
826 ///
821 ///
827 /// It is the responsibility of the caller to remove the copy source.
822 /// It is the responsibility of the caller to remove the copy source.
828 ///
823 ///
829 /// # Panics
824 /// # Panics
830 ///
825 ///
831 /// Panics if the node does not exist.
826 /// Panics if the node does not exist.
832 fn set_clean(
827 fn set_clean(
833 &mut self,
828 &mut self,
834 filename: &HgPath,
829 filename: &HgPath,
835 old_entry: DirstateEntry,
830 old_entry: DirstateEntry,
836 mode: u32,
831 mode: u32,
837 size: u32,
832 size: u32,
838 mtime: TruncatedTimestamp,
833 mtime: TruncatedTimestamp,
839 ) -> Result<(), DirstateError> {
834 ) -> Result<(), DirstateError> {
840 let node = self
835 let node = self
841 .get_node_mut(filename, |ancestor| {
836 .get_node_mut(filename, |ancestor| {
842 if !old_entry.tracked() {
837 if !old_entry.tracked() {
843 ancestor.tracked_descendants_count += 1;
838 ancestor.tracked_descendants_count += 1;
844 }
839 }
845 })?
840 })?
846 .expect("node should exist");
841 .expect("node should exist");
847 let mut new_entry = old_entry;
842 let mut new_entry = old_entry;
848 new_entry.set_clean(mode, size, mtime);
843 new_entry.set_clean(mode, size, mtime);
849 node.data = NodeData::Entry(new_entry);
844 node.data = NodeData::Entry(new_entry);
850 Ok(())
845 Ok(())
851 }
846 }
852
847
853 /// Set a node as possibly dirty in the dirstate.
848 /// Set a node as possibly dirty in the dirstate.
854 ///
849 ///
855 /// # Panics
850 /// # Panics
856 ///
851 ///
857 /// Panics if the node does not exist.
852 /// Panics if the node does not exist.
858 fn set_possibly_dirty(
853 fn set_possibly_dirty(
859 &mut self,
854 &mut self,
860 filename: &HgPath,
855 filename: &HgPath,
861 ) -> Result<(), DirstateError> {
856 ) -> Result<(), DirstateError> {
862 let node = self
857 let node = self
863 .get_node_mut(filename, |_ancestor| {})?
858 .get_node_mut(filename, |_ancestor| {})?
864 .expect("node should exist");
859 .expect("node should exist");
865 let entry = node.data.as_entry_mut().expect("entry should exist");
860 let entry = node.data.as_entry_mut().expect("entry should exist");
866 entry.set_possibly_dirty();
861 entry.set_possibly_dirty();
867 node.data = NodeData::Entry(*entry);
862 node.data = NodeData::Entry(*entry);
868 Ok(())
863 Ok(())
869 }
864 }
870
865
871 /// Clears the cached mtime for the (potential) folder at `path`.
866 /// Clears the cached mtime for the (potential) folder at `path`.
872 pub(super) fn clear_cached_mtime(
867 pub(super) fn clear_cached_mtime(
873 &mut self,
868 &mut self,
874 path: &HgPath,
869 path: &HgPath,
875 ) -> Result<(), DirstateV2ParseError> {
870 ) -> Result<(), DirstateV2ParseError> {
876 let node = match self.get_node_mut(path, |_ancestor| {})? {
871 let node = match self.get_node_mut(path, |_ancestor| {})? {
877 Some(node) => node,
872 Some(node) => node,
878 None => return Ok(()),
873 None => return Ok(()),
879 };
874 };
880 if let NodeData::CachedDirectory { .. } = &node.data {
875 if let NodeData::CachedDirectory { .. } = &node.data {
881 node.data = NodeData::None
876 node.data = NodeData::None
882 }
877 }
883 Ok(())
878 Ok(())
884 }
879 }
885
880
886 /// Sets the cached mtime for the (potential) folder at `path`.
881 /// Sets the cached mtime for the (potential) folder at `path`.
887 pub(super) fn set_cached_mtime(
882 pub(super) fn set_cached_mtime(
888 &mut self,
883 &mut self,
889 path: &HgPath,
884 path: &HgPath,
890 mtime: TruncatedTimestamp,
885 mtime: TruncatedTimestamp,
891 ) -> Result<(), DirstateV2ParseError> {
886 ) -> Result<(), DirstateV2ParseError> {
892 let node = match self.get_node_mut(path, |_ancestor| {})? {
887 let node = match self.get_node_mut(path, |_ancestor| {})? {
893 Some(node) => node,
888 Some(node) => node,
894 None => return Ok(()),
889 None => return Ok(()),
895 };
890 };
896 match &node.data {
891 match &node.data {
897 NodeData::Entry(_) => {} // Don’t overwrite an entry
892 NodeData::Entry(_) => {} // Don’t overwrite an entry
898 NodeData::CachedDirectory { .. } | NodeData::None => {
893 NodeData::CachedDirectory { .. } | NodeData::None => {
899 node.data = NodeData::CachedDirectory { mtime }
894 node.data = NodeData::CachedDirectory { mtime }
900 }
895 }
901 }
896 }
902 Ok(())
897 Ok(())
903 }
898 }
904
899
905 fn iter_nodes<'tree>(
900 fn iter_nodes<'tree>(
906 &'tree self,
901 &'tree self,
907 ) -> impl Iterator<
902 ) -> impl Iterator<
908 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
903 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
909 > + 'tree {
904 > + 'tree {
910 // Depth first tree traversal.
905 // Depth first tree traversal.
911 //
906 //
912 // If we could afford internal iteration and recursion,
907 // If we could afford internal iteration and recursion,
913 // this would look like:
908 // this would look like:
914 //
909 //
915 // ```
910 // ```
916 // fn traverse_children(
911 // fn traverse_children(
917 // children: &ChildNodes,
912 // children: &ChildNodes,
918 // each: &mut impl FnMut(&Node),
913 // each: &mut impl FnMut(&Node),
919 // ) {
914 // ) {
920 // for child in children.values() {
915 // for child in children.values() {
921 // traverse_children(&child.children, each);
916 // traverse_children(&child.children, each);
922 // each(child);
917 // each(child);
923 // }
918 // }
924 // }
919 // }
925 // ```
920 // ```
926 //
921 //
927 // However we want an external iterator and therefore can’t use the
922 // However we want an external iterator and therefore can’t use the
928 // call stack. Use an explicit stack instead:
923 // call stack. Use an explicit stack instead:
929 let mut stack = Vec::new();
924 let mut stack = Vec::new();
930 let mut iter = self.root.as_ref().iter();
925 let mut iter = self.root.as_ref().iter();
931 std::iter::from_fn(move || {
926 std::iter::from_fn(move || {
932 while let Some(child_node) = iter.next() {
927 while let Some(child_node) = iter.next() {
933 let children = match child_node.children(self.on_disk) {
928 let children = match child_node.children(self.on_disk) {
934 Ok(children) => children,
929 Ok(children) => children,
935 Err(error) => return Some(Err(error)),
930 Err(error) => return Some(Err(error)),
936 };
931 };
937 // Pseudo-recursion
932 // Pseudo-recursion
938 let new_iter = children.iter();
933 let new_iter = children.iter();
939 let old_iter = std::mem::replace(&mut iter, new_iter);
934 let old_iter = std::mem::replace(&mut iter, new_iter);
940 stack.push((child_node, old_iter));
935 stack.push((child_node, old_iter));
941 }
936 }
942 // Found the end of a `children.iter()` iterator.
937 // Found the end of a `children.iter()` iterator.
943 if let Some((child_node, next_iter)) = stack.pop() {
938 if let Some((child_node, next_iter)) = stack.pop() {
944 // "Return" from pseudo-recursion by restoring state from the
939 // "Return" from pseudo-recursion by restoring state from the
945 // explicit stack
940 // explicit stack
946 iter = next_iter;
941 iter = next_iter;
947
942
948 Some(Ok(child_node))
943 Some(Ok(child_node))
949 } else {
944 } else {
950 // Reached the bottom of the stack, we’re done
945 // Reached the bottom of the stack, we’re done
951 None
946 None
952 }
947 }
953 })
948 })
954 }
949 }
955
950
956 fn count_dropped_path(unreachable_bytes: &mut u32, path: Cow<HgPath>) {
951 fn count_dropped_path(unreachable_bytes: &mut u32, path: Cow<HgPath>) {
957 if let Cow::Borrowed(path) = path {
952 if let Cow::Borrowed(path) = path {
958 *unreachable_bytes += path.len() as u32
953 *unreachable_bytes += path.len() as u32
959 }
954 }
960 }
955 }
961
956
962 pub(crate) fn set_write_mode(&mut self, write_mode: DirstateMapWriteMode) {
957 pub(crate) fn set_write_mode(&mut self, write_mode: DirstateMapWriteMode) {
963 self.write_mode = write_mode;
958 self.write_mode = write_mode;
964 }
959 }
965 }
960 }
966
961
967 type DebugDirstateTuple<'a> = (&'a HgPath, (u8, i32, i32, i32));
962 type DebugDirstateTuple<'a> = (&'a HgPath, (u8, i32, i32, i32));
968
963
969 impl OwningDirstateMap {
964 impl OwningDirstateMap {
970 pub fn clear(&mut self) {
965 pub fn clear(&mut self) {
971 self.with_dmap_mut(|map| {
966 self.with_dmap_mut(|map| {
972 map.root = Default::default();
967 map.root = Default::default();
973 map.nodes_with_entry_count = 0;
968 map.nodes_with_entry_count = 0;
974 map.nodes_with_copy_source_count = 0;
969 map.nodes_with_copy_source_count = 0;
975 map.unreachable_bytes = map.on_disk.len() as u32;
970 map.unreachable_bytes = map.on_disk.len() as u32;
976 });
971 });
977 }
972 }
978
973
979 pub fn set_tracked(
974 pub fn set_tracked(
980 &mut self,
975 &mut self,
981 filename: &HgPath,
976 filename: &HgPath,
982 ) -> Result<bool, DirstateV2ParseError> {
977 ) -> Result<bool, DirstateV2ParseError> {
983 let old_entry_opt = self.get(filename)?;
978 let old_entry_opt = self.get(filename)?;
984 self.with_dmap_mut(|map| map.set_tracked(filename, old_entry_opt))
979 self.with_dmap_mut(|map| map.set_tracked(filename, old_entry_opt))
985 }
980 }
986
981
987 pub fn set_untracked(
982 pub fn set_untracked(
988 &mut self,
983 &mut self,
989 filename: &HgPath,
984 filename: &HgPath,
990 ) -> Result<bool, DirstateError> {
985 ) -> Result<bool, DirstateError> {
991 let old_entry_opt = self.get(filename)?;
986 let old_entry_opt = self.get(filename)?;
992 match old_entry_opt {
987 match old_entry_opt {
993 None => Ok(false),
988 None => Ok(false),
994 Some(old_entry) => {
989 Some(old_entry) => {
995 if !old_entry.tracked() {
990 if !old_entry.tracked() {
996 // `DirstateMap::set_untracked` is not a noop if
991 // `DirstateMap::set_untracked` is not a noop if
997 // already not tracked as it will decrement the
992 // already not tracked as it will decrement the
998 // tracked counters while going down.
993 // tracked counters while going down.
999 return Ok(true);
994 return Ok(true);
1000 }
995 }
1001 if old_entry.added() {
996 if old_entry.added() {
1002 // Untracking an "added" entry will just result in a
997 // Untracking an "added" entry will just result in a
1003 // worthless entry (and other parts of the code will
998 // worthless entry (and other parts of the code will
1004 // complain about it), just drop it entirely.
999 // complain about it), just drop it entirely.
1005 self.drop_entry_and_copy_source(filename)?;
1000 self.drop_entry_and_copy_source(filename)?;
1006 return Ok(true);
1001 return Ok(true);
1007 }
1002 }
1008 if !old_entry.p2_info() {
1003 if !old_entry.p2_info() {
1009 self.copy_map_remove(filename)?;
1004 self.copy_map_remove(filename)?;
1010 }
1005 }
1011
1006
1012 self.with_dmap_mut(|map| {
1007 self.with_dmap_mut(|map| {
1013 map.set_untracked(filename, old_entry)?;
1008 map.set_untracked(filename, old_entry)?;
1014 Ok(true)
1009 Ok(true)
1015 })
1010 })
1016 }
1011 }
1017 }
1012 }
1018 }
1013 }
1019
1014
1020 pub fn set_clean(
1015 pub fn set_clean(
1021 &mut self,
1016 &mut self,
1022 filename: &HgPath,
1017 filename: &HgPath,
1023 mode: u32,
1018 mode: u32,
1024 size: u32,
1019 size: u32,
1025 mtime: TruncatedTimestamp,
1020 mtime: TruncatedTimestamp,
1026 ) -> Result<(), DirstateError> {
1021 ) -> Result<(), DirstateError> {
1027 let old_entry = match self.get(filename)? {
1022 let old_entry = match self.get(filename)? {
1028 None => {
1023 None => {
1029 return Err(
1024 return Err(
1030 DirstateMapError::PathNotFound(filename.into()).into()
1025 DirstateMapError::PathNotFound(filename.into()).into()
1031 )
1026 )
1032 }
1027 }
1033 Some(e) => e,
1028 Some(e) => e,
1034 };
1029 };
1035 self.copy_map_remove(filename)?;
1030 self.copy_map_remove(filename)?;
1036 self.with_dmap_mut(|map| {
1031 self.with_dmap_mut(|map| {
1037 map.set_clean(filename, old_entry, mode, size, mtime)
1032 map.set_clean(filename, old_entry, mode, size, mtime)
1038 })
1033 })
1039 }
1034 }
1040
1035
1041 pub fn set_possibly_dirty(
1036 pub fn set_possibly_dirty(
1042 &mut self,
1037 &mut self,
1043 filename: &HgPath,
1038 filename: &HgPath,
1044 ) -> Result<(), DirstateError> {
1039 ) -> Result<(), DirstateError> {
1045 if self.get(filename)?.is_none() {
1040 if self.get(filename)?.is_none() {
1046 return Err(DirstateMapError::PathNotFound(filename.into()).into());
1041 return Err(DirstateMapError::PathNotFound(filename.into()).into());
1047 }
1042 }
1048 self.with_dmap_mut(|map| map.set_possibly_dirty(filename))
1043 self.with_dmap_mut(|map| map.set_possibly_dirty(filename))
1049 }
1044 }
1050
1045
1051 pub fn reset_state(
1046 pub fn reset_state(
1052 &mut self,
1047 &mut self,
1053 filename: &HgPath,
1048 filename: &HgPath,
1054 wc_tracked: bool,
1049 wc_tracked: bool,
1055 p1_tracked: bool,
1050 p1_tracked: bool,
1056 p2_info: bool,
1051 p2_info: bool,
1057 has_meaningful_mtime: bool,
1052 has_meaningful_mtime: bool,
1058 parent_file_data_opt: Option<ParentFileData>,
1053 parent_file_data_opt: Option<ParentFileData>,
1059 ) -> Result<(), DirstateError> {
1054 ) -> Result<(), DirstateError> {
1060 if !(p1_tracked || p2_info || wc_tracked) {
1055 if !(p1_tracked || p2_info || wc_tracked) {
1061 self.drop_entry_and_copy_source(filename)?;
1056 self.drop_entry_and_copy_source(filename)?;
1062 return Ok(());
1057 return Ok(());
1063 }
1058 }
1064 self.copy_map_remove(filename)?;
1059 self.copy_map_remove(filename)?;
1065 let old_entry_opt = self.get(filename)?;
1060 let old_entry_opt = self.get(filename)?;
1066 self.with_dmap_mut(|map| {
1061 self.with_dmap_mut(|map| {
1067 map.reset_state(
1062 map.reset_state(
1068 filename,
1063 filename,
1069 old_entry_opt,
1064 old_entry_opt,
1070 wc_tracked,
1065 wc_tracked,
1071 p1_tracked,
1066 p1_tracked,
1072 p2_info,
1067 p2_info,
1073 has_meaningful_mtime,
1068 has_meaningful_mtime,
1074 parent_file_data_opt,
1069 parent_file_data_opt,
1075 )
1070 )
1076 })
1071 })
1077 }
1072 }
1078
1073
1079 pub fn drop_entry_and_copy_source(
1074 pub fn drop_entry_and_copy_source(
1080 &mut self,
1075 &mut self,
1081 filename: &HgPath,
1076 filename: &HgPath,
1082 ) -> Result<(), DirstateError> {
1077 ) -> Result<(), DirstateError> {
1083 let was_tracked = self.get(filename)?.map_or(false, |e| e.tracked());
1078 let was_tracked = self.get(filename)?.map_or(false, |e| e.tracked());
1084 struct Dropped {
1079 struct Dropped {
1085 was_tracked: bool,
1080 was_tracked: bool,
1086 had_entry: bool,
1081 had_entry: bool,
1087 had_copy_source: bool,
1082 had_copy_source: bool,
1088 }
1083 }
1089
1084
1090 /// If this returns `Ok(Some((dropped, removed)))`, then
1085 /// If this returns `Ok(Some((dropped, removed)))`, then
1091 ///
1086 ///
1092 /// * `dropped` is about the leaf node that was at `filename`
1087 /// * `dropped` is about the leaf node that was at `filename`
1093 /// * `removed` is whether this particular level of recursion just
1088 /// * `removed` is whether this particular level of recursion just
1094 /// removed a node in `nodes`.
1089 /// removed a node in `nodes`.
1095 fn recur<'on_disk>(
1090 fn recur<'on_disk>(
1096 on_disk: &'on_disk [u8],
1091 on_disk: &'on_disk [u8],
1097 unreachable_bytes: &mut u32,
1092 unreachable_bytes: &mut u32,
1098 nodes: &mut ChildNodes<'on_disk>,
1093 nodes: &mut ChildNodes<'on_disk>,
1099 path: &HgPath,
1094 path: &HgPath,
1100 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
1095 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
1101 let (first_path_component, rest_of_path) =
1096 let (first_path_component, rest_of_path) =
1102 path.split_first_component();
1097 path.split_first_component();
1103 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
1098 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
1104 let node = if let Some(node) = nodes.get_mut(first_path_component)
1099 let node = if let Some(node) = nodes.get_mut(first_path_component)
1105 {
1100 {
1106 node
1101 node
1107 } else {
1102 } else {
1108 return Ok(None);
1103 return Ok(None);
1109 };
1104 };
1110 let dropped;
1105 let dropped;
1111 if let Some(rest) = rest_of_path {
1106 if let Some(rest) = rest_of_path {
1112 if let Some((d, removed)) = recur(
1107 if let Some((d, removed)) = recur(
1113 on_disk,
1108 on_disk,
1114 unreachable_bytes,
1109 unreachable_bytes,
1115 &mut node.children,
1110 &mut node.children,
1116 rest,
1111 rest,
1117 )? {
1112 )? {
1118 dropped = d;
1113 dropped = d;
1119 if dropped.had_entry {
1114 if dropped.had_entry {
1120 node.descendants_with_entry_count = node
1115 node.descendants_with_entry_count = node
1121 .descendants_with_entry_count
1116 .descendants_with_entry_count
1122 .checked_sub(1)
1117 .checked_sub(1)
1123 .expect(
1118 .expect(
1124 "descendants_with_entry_count should be >= 0",
1119 "descendants_with_entry_count should be >= 0",
1125 );
1120 );
1126 }
1121 }
1127 if dropped.was_tracked {
1122 if dropped.was_tracked {
1128 node.tracked_descendants_count = node
1123 node.tracked_descendants_count = node
1129 .tracked_descendants_count
1124 .tracked_descendants_count
1130 .checked_sub(1)
1125 .checked_sub(1)
1131 .expect(
1126 .expect(
1132 "tracked_descendants_count should be >= 0",
1127 "tracked_descendants_count should be >= 0",
1133 );
1128 );
1134 }
1129 }
1135
1130
1136 // Directory caches must be invalidated when removing a
1131 // Directory caches must be invalidated when removing a
1137 // child node
1132 // child node
1138 if removed {
1133 if removed {
1139 if let NodeData::CachedDirectory { .. } = &node.data {
1134 if let NodeData::CachedDirectory { .. } = &node.data {
1140 node.data = NodeData::None
1135 node.data = NodeData::None
1141 }
1136 }
1142 }
1137 }
1143 } else {
1138 } else {
1144 return Ok(None);
1139 return Ok(None);
1145 }
1140 }
1146 } else {
1141 } else {
1147 let entry = node.data.as_entry();
1142 let entry = node.data.as_entry();
1148 let was_tracked = entry.map_or(false, |entry| entry.tracked());
1143 let was_tracked = entry.map_or(false, |entry| entry.tracked());
1149 let had_entry = entry.is_some();
1144 let had_entry = entry.is_some();
1150 if had_entry {
1145 if had_entry {
1151 node.data = NodeData::None
1146 node.data = NodeData::None
1152 }
1147 }
1153 let mut had_copy_source = false;
1148 let mut had_copy_source = false;
1154 if let Some(source) = &node.copy_source {
1149 if let Some(source) = &node.copy_source {
1155 DirstateMap::count_dropped_path(
1150 DirstateMap::count_dropped_path(
1156 unreachable_bytes,
1151 unreachable_bytes,
1157 Cow::Borrowed(source),
1152 Cow::Borrowed(source),
1158 );
1153 );
1159 had_copy_source = true;
1154 had_copy_source = true;
1160 node.copy_source = None
1155 node.copy_source = None
1161 }
1156 }
1162 dropped = Dropped {
1157 dropped = Dropped {
1163 was_tracked,
1158 was_tracked,
1164 had_entry,
1159 had_entry,
1165 had_copy_source,
1160 had_copy_source,
1166 };
1161 };
1167 }
1162 }
1168 // After recursion, for both leaf (rest_of_path is None) nodes and
1163 // After recursion, for both leaf (rest_of_path is None) nodes and
1169 // parent nodes, remove a node if it just became empty.
1164 // parent nodes, remove a node if it just became empty.
1170 let remove = !node.data.has_entry()
1165 let remove = !node.data.has_entry()
1171 && node.copy_source.is_none()
1166 && node.copy_source.is_none()
1172 && node.children.is_empty();
1167 && node.children.is_empty();
1173 if remove {
1168 if remove {
1174 let (key, _) =
1169 let (key, _) =
1175 nodes.remove_entry(first_path_component).unwrap();
1170 nodes.remove_entry(first_path_component).unwrap();
1176 DirstateMap::count_dropped_path(
1171 DirstateMap::count_dropped_path(
1177 unreachable_bytes,
1172 unreachable_bytes,
1178 Cow::Borrowed(key.full_path()),
1173 Cow::Borrowed(key.full_path()),
1179 )
1174 )
1180 }
1175 }
1181 Ok(Some((dropped, remove)))
1176 Ok(Some((dropped, remove)))
1182 }
1177 }
1183
1178
1184 self.with_dmap_mut(|map| {
1179 self.with_dmap_mut(|map| {
1185 if let Some((dropped, _removed)) = recur(
1180 if let Some((dropped, _removed)) = recur(
1186 map.on_disk,
1181 map.on_disk,
1187 &mut map.unreachable_bytes,
1182 &mut map.unreachable_bytes,
1188 &mut map.root,
1183 &mut map.root,
1189 filename,
1184 filename,
1190 )? {
1185 )? {
1191 if dropped.had_entry {
1186 if dropped.had_entry {
1192 map.nodes_with_entry_count = map
1187 map.nodes_with_entry_count = map
1193 .nodes_with_entry_count
1188 .nodes_with_entry_count
1194 .checked_sub(1)
1189 .checked_sub(1)
1195 .expect("nodes_with_entry_count should be >= 0");
1190 .expect("nodes_with_entry_count should be >= 0");
1196 }
1191 }
1197 if dropped.had_copy_source {
1192 if dropped.had_copy_source {
1198 map.nodes_with_copy_source_count = map
1193 map.nodes_with_copy_source_count = map
1199 .nodes_with_copy_source_count
1194 .nodes_with_copy_source_count
1200 .checked_sub(1)
1195 .checked_sub(1)
1201 .expect("nodes_with_copy_source_count should be >= 0");
1196 .expect("nodes_with_copy_source_count should be >= 0");
1202 }
1197 }
1203 } else {
1198 } else {
1204 debug_assert!(!was_tracked);
1199 debug_assert!(!was_tracked);
1205 }
1200 }
1206 Ok(())
1201 Ok(())
1207 })
1202 })
1208 }
1203 }
1209
1204
1210 pub fn has_tracked_dir(
1205 pub fn has_tracked_dir(
1211 &mut self,
1206 &mut self,
1212 directory: &HgPath,
1207 directory: &HgPath,
1213 ) -> Result<bool, DirstateError> {
1208 ) -> Result<bool, DirstateError> {
1214 self.with_dmap_mut(|map| {
1209 self.with_dmap_mut(|map| {
1215 if let Some(node) = map.get_node(directory)? {
1210 if let Some(node) = map.get_node(directory)? {
1216 // A node without a `DirstateEntry` was created to hold child
1211 // A node without a `DirstateEntry` was created to hold child
1217 // nodes, and is therefore a directory.
1212 // nodes, and is therefore a directory.
1218 let is_dir = node.entry()?.is_none();
1213 let is_dir = node.entry()?.is_none();
1219 Ok(is_dir && node.tracked_descendants_count() > 0)
1214 Ok(is_dir && node.tracked_descendants_count() > 0)
1220 } else {
1215 } else {
1221 Ok(false)
1216 Ok(false)
1222 }
1217 }
1223 })
1218 })
1224 }
1219 }
1225
1220
1226 pub fn has_dir(
1221 pub fn has_dir(
1227 &mut self,
1222 &mut self,
1228 directory: &HgPath,
1223 directory: &HgPath,
1229 ) -> Result<bool, DirstateError> {
1224 ) -> Result<bool, DirstateError> {
1230 self.with_dmap_mut(|map| {
1225 self.with_dmap_mut(|map| {
1231 if let Some(node) = map.get_node(directory)? {
1226 if let Some(node) = map.get_node(directory)? {
1232 // A node without a `DirstateEntry` was created to hold child
1227 // A node without a `DirstateEntry` was created to hold child
1233 // nodes, and is therefore a directory.
1228 // nodes, and is therefore a directory.
1234 let is_dir = node.entry()?.is_none();
1229 let is_dir = node.entry()?.is_none();
1235 Ok(is_dir && node.descendants_with_entry_count() > 0)
1230 Ok(is_dir && node.descendants_with_entry_count() > 0)
1236 } else {
1231 } else {
1237 Ok(false)
1232 Ok(false)
1238 }
1233 }
1239 })
1234 })
1240 }
1235 }
1241
1236
1242 #[logging_timer::time("trace")]
1237 #[logging_timer::time("trace")]
1243 pub fn pack_v1(
1238 pub fn pack_v1(
1244 &self,
1239 &self,
1245 parents: DirstateParents,
1240 parents: DirstateParents,
1246 ) -> Result<Vec<u8>, DirstateError> {
1241 ) -> Result<Vec<u8>, DirstateError> {
1247 let map = self.get_map();
1242 let map = self.get_map();
1248 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
1243 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
1249 // reallocations
1244 // reallocations
1250 let mut size = parents.as_bytes().len();
1245 let mut size = parents.as_bytes().len();
1251 for node in map.iter_nodes() {
1246 for node in map.iter_nodes() {
1252 let node = node?;
1247 let node = node?;
1253 if node.entry()?.is_some() {
1248 if node.entry()?.is_some() {
1254 size += packed_entry_size(
1249 size += packed_entry_size(
1255 node.full_path(map.on_disk)?,
1250 node.full_path(map.on_disk)?,
1256 node.copy_source(map.on_disk)?,
1251 node.copy_source(map.on_disk)?,
1257 );
1252 );
1258 }
1253 }
1259 }
1254 }
1260
1255
1261 let mut packed = Vec::with_capacity(size);
1256 let mut packed = Vec::with_capacity(size);
1262 packed.extend(parents.as_bytes());
1257 packed.extend(parents.as_bytes());
1263
1258
1264 for node in map.iter_nodes() {
1259 for node in map.iter_nodes() {
1265 let node = node?;
1260 let node = node?;
1266 if let Some(entry) = node.entry()? {
1261 if let Some(entry) = node.entry()? {
1267 pack_entry(
1262 pack_entry(
1268 node.full_path(map.on_disk)?,
1263 node.full_path(map.on_disk)?,
1269 &entry,
1264 &entry,
1270 node.copy_source(map.on_disk)?,
1265 node.copy_source(map.on_disk)?,
1271 &mut packed,
1266 &mut packed,
1272 );
1267 );
1273 }
1268 }
1274 }
1269 }
1275 Ok(packed)
1270 Ok(packed)
1276 }
1271 }
1277
1272
1278 /// Returns new data and metadata together with whether that data should be
1273 /// Returns new data and metadata together with whether that data should be
1279 /// appended to the existing data file whose content is at
1274 /// appended to the existing data file whose content is at
1280 /// `map.on_disk` (true), instead of written to a new data file
1275 /// `map.on_disk` (true), instead of written to a new data file
1281 /// (false), and the previous size of data on disk.
1276 /// (false), and the previous size of data on disk.
1282 #[logging_timer::time("trace")]
1277 #[logging_timer::time("trace")]
1283 pub fn pack_v2(
1278 pub fn pack_v2(
1284 &self,
1279 &self,
1285 write_mode: DirstateMapWriteMode,
1280 write_mode: DirstateMapWriteMode,
1286 ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool, usize), DirstateError>
1281 ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool, usize), DirstateError>
1287 {
1282 {
1288 let map = self.get_map();
1283 let map = self.get_map();
1289 on_disk::write(map, write_mode)
1284 on_disk::write(map, write_mode)
1290 }
1285 }
1291
1286
1292 /// `callback` allows the caller to process and do something with the
1287 /// `callback` allows the caller to process and do something with the
1293 /// results of the status. This is needed to do so efficiently (i.e.
1288 /// results of the status. This is needed to do so efficiently (i.e.
1294 /// without cloning the `DirstateStatus` object with its paths) because
1289 /// without cloning the `DirstateStatus` object with its paths) because
1295 /// we need to borrow from `Self`.
1290 /// we need to borrow from `Self`.
1296 pub fn with_status<R>(
1291 pub fn with_status<R>(
1297 &mut self,
1292 &mut self,
1298 matcher: &(dyn Matcher + Sync),
1293 matcher: &(dyn Matcher + Sync),
1299 root_dir: PathBuf,
1294 root_dir: PathBuf,
1300 ignore_files: Vec<PathBuf>,
1295 ignore_files: Vec<PathBuf>,
1301 options: StatusOptions,
1296 options: StatusOptions,
1302 callback: impl for<'r> FnOnce(
1297 callback: impl for<'r> FnOnce(
1303 Result<(DirstateStatus<'r>, Vec<PatternFileWarning>), StatusError>,
1298 Result<(DirstateStatus<'r>, Vec<PatternFileWarning>), StatusError>,
1304 ) -> R,
1299 ) -> R,
1305 ) -> R {
1300 ) -> R {
1306 self.with_dmap_mut(|map| {
1301 self.with_dmap_mut(|map| {
1307 callback(super::status::status(
1302 callback(super::status::status(
1308 map,
1303 map,
1309 matcher,
1304 matcher,
1310 root_dir,
1305 root_dir,
1311 ignore_files,
1306 ignore_files,
1312 options,
1307 options,
1313 ))
1308 ))
1314 })
1309 })
1315 }
1310 }
1316
1311
1317 pub fn copy_map_len(&self) -> usize {
1312 pub fn copy_map_len(&self) -> usize {
1318 let map = self.get_map();
1313 let map = self.get_map();
1319 map.nodes_with_copy_source_count as usize
1314 map.nodes_with_copy_source_count as usize
1320 }
1315 }
1321
1316
1322 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
1317 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
1323 let map = self.get_map();
1318 let map = self.get_map();
1324 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1319 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1325 Ok(if let Some(source) = node.copy_source(map.on_disk)? {
1320 Ok(if let Some(source) = node.copy_source(map.on_disk)? {
1326 Some((node.full_path(map.on_disk)?, source))
1321 Some((node.full_path(map.on_disk)?, source))
1327 } else {
1322 } else {
1328 None
1323 None
1329 })
1324 })
1330 }))
1325 }))
1331 }
1326 }
1332
1327
1333 pub fn copy_map_contains_key(
1328 pub fn copy_map_contains_key(
1334 &self,
1329 &self,
1335 key: &HgPath,
1330 key: &HgPath,
1336 ) -> Result<bool, DirstateV2ParseError> {
1331 ) -> Result<bool, DirstateV2ParseError> {
1337 let map = self.get_map();
1332 let map = self.get_map();
1338 Ok(if let Some(node) = map.get_node(key)? {
1333 Ok(if let Some(node) = map.get_node(key)? {
1339 node.has_copy_source()
1334 node.has_copy_source()
1340 } else {
1335 } else {
1341 false
1336 false
1342 })
1337 })
1343 }
1338 }
1344
1339
1345 pub fn copy_map_get(
1340 pub fn copy_map_get(
1346 &self,
1341 &self,
1347 key: &HgPath,
1342 key: &HgPath,
1348 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1343 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1349 let map = self.get_map();
1344 let map = self.get_map();
1350 if let Some(node) = map.get_node(key)? {
1345 if let Some(node) = map.get_node(key)? {
1351 if let Some(source) = node.copy_source(map.on_disk)? {
1346 if let Some(source) = node.copy_source(map.on_disk)? {
1352 return Ok(Some(source));
1347 return Ok(Some(source));
1353 }
1348 }
1354 }
1349 }
1355 Ok(None)
1350 Ok(None)
1356 }
1351 }
1357
1352
1358 pub fn copy_map_remove(
1353 pub fn copy_map_remove(
1359 &mut self,
1354 &mut self,
1360 key: &HgPath,
1355 key: &HgPath,
1361 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1356 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1362 self.with_dmap_mut(|map| {
1357 self.with_dmap_mut(|map| {
1363 let count = &mut map.nodes_with_copy_source_count;
1358 let count = &mut map.nodes_with_copy_source_count;
1364 let unreachable_bytes = &mut map.unreachable_bytes;
1359 let unreachable_bytes = &mut map.unreachable_bytes;
1365 Ok(DirstateMap::get_node_mut_inner(
1360 Ok(DirstateMap::get_node_mut_inner(
1366 map.on_disk,
1361 map.on_disk,
1367 unreachable_bytes,
1362 unreachable_bytes,
1368 &mut map.root,
1363 &mut map.root,
1369 key,
1364 key,
1370 |_ancestor| {},
1365 |_ancestor| {},
1371 )?
1366 )?
1372 .and_then(|node| {
1367 .and_then(|node| {
1373 if let Some(source) = &node.copy_source {
1368 if let Some(source) = &node.copy_source {
1374 *count = count
1369 *count = count
1375 .checked_sub(1)
1370 .checked_sub(1)
1376 .expect("nodes_with_copy_source_count should be >= 0");
1371 .expect("nodes_with_copy_source_count should be >= 0");
1377 DirstateMap::count_dropped_path(
1372 DirstateMap::count_dropped_path(
1378 unreachable_bytes,
1373 unreachable_bytes,
1379 Cow::Borrowed(source),
1374 Cow::Borrowed(source),
1380 );
1375 );
1381 }
1376 }
1382 node.copy_source.take().map(Cow::into_owned)
1377 node.copy_source.take().map(Cow::into_owned)
1383 }))
1378 }))
1384 })
1379 })
1385 }
1380 }
1386
1381
1387 pub fn copy_map_insert(
1382 pub fn copy_map_insert(
1388 &mut self,
1383 &mut self,
1389 key: &HgPath,
1384 key: &HgPath,
1390 value: &HgPath,
1385 value: &HgPath,
1391 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1386 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1392 self.with_dmap_mut(|map| {
1387 self.with_dmap_mut(|map| {
1393 let node = map.get_or_insert_node(key, |_ancestor| {})?;
1388 let node = map.get_or_insert_node(key, |_ancestor| {})?;
1394 let had_copy_source = node.copy_source.is_none();
1389 let had_copy_source = node.copy_source.is_none();
1395 let old = node
1390 let old = node
1396 .copy_source
1391 .copy_source
1397 .replace(value.to_owned().into())
1392 .replace(value.to_owned().into())
1398 .map(Cow::into_owned);
1393 .map(Cow::into_owned);
1399 if had_copy_source {
1394 if had_copy_source {
1400 map.nodes_with_copy_source_count += 1
1395 map.nodes_with_copy_source_count += 1
1401 }
1396 }
1402 Ok(old)
1397 Ok(old)
1403 })
1398 })
1404 }
1399 }
1405
1400
1406 pub fn len(&self) -> usize {
1401 pub fn len(&self) -> usize {
1407 let map = self.get_map();
1402 let map = self.get_map();
1408 map.nodes_with_entry_count as usize
1403 map.nodes_with_entry_count as usize
1409 }
1404 }
1410
1405
1411 pub fn is_empty(&self) -> bool {
1406 pub fn is_empty(&self) -> bool {
1412 self.len() == 0
1407 self.len() == 0
1413 }
1408 }
1414
1409
1415 pub fn contains_key(
1410 pub fn contains_key(
1416 &self,
1411 &self,
1417 key: &HgPath,
1412 key: &HgPath,
1418 ) -> Result<bool, DirstateV2ParseError> {
1413 ) -> Result<bool, DirstateV2ParseError> {
1419 Ok(self.get(key)?.is_some())
1414 Ok(self.get(key)?.is_some())
1420 }
1415 }
1421
1416
1422 pub fn get(
1417 pub fn get(
1423 &self,
1418 &self,
1424 key: &HgPath,
1419 key: &HgPath,
1425 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1420 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1426 let map = self.get_map();
1421 let map = self.get_map();
1427 Ok(if let Some(node) = map.get_node(key)? {
1422 Ok(if let Some(node) = map.get_node(key)? {
1428 node.entry()?
1423 node.entry()?
1429 } else {
1424 } else {
1430 None
1425 None
1431 })
1426 })
1432 }
1427 }
1433
1428
1434 pub fn iter(&self) -> StateMapIter<'_> {
1429 pub fn iter(&self) -> StateMapIter<'_> {
1435 let map = self.get_map();
1430 let map = self.get_map();
1436 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1431 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1437 Ok(if let Some(entry) = node.entry()? {
1432 Ok(if let Some(entry) = node.entry()? {
1438 Some((node.full_path(map.on_disk)?, entry))
1433 Some((node.full_path(map.on_disk)?, entry))
1439 } else {
1434 } else {
1440 None
1435 None
1441 })
1436 })
1442 }))
1437 }))
1443 }
1438 }
1444
1439
1445 pub fn iter_tracked_dirs(
1440 pub fn iter_tracked_dirs(
1446 &mut self,
1441 &mut self,
1447 ) -> Result<
1442 ) -> Result<
1448 Box<
1443 Box<
1449 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1444 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1450 + Send
1445 + Send
1451 + '_,
1446 + '_,
1452 >,
1447 >,
1453 DirstateError,
1448 DirstateError,
1454 > {
1449 > {
1455 let map = self.get_map();
1450 let map = self.get_map();
1456 let on_disk = map.on_disk;
1451 let on_disk = map.on_disk;
1457 Ok(Box::new(filter_map_results(
1452 Ok(Box::new(filter_map_results(
1458 map.iter_nodes(),
1453 map.iter_nodes(),
1459 move |node| {
1454 move |node| {
1460 Ok(if node.tracked_descendants_count() > 0 {
1455 Ok(if node.tracked_descendants_count() > 0 {
1461 Some(node.full_path(on_disk)?)
1456 Some(node.full_path(on_disk)?)
1462 } else {
1457 } else {
1463 None
1458 None
1464 })
1459 })
1465 },
1460 },
1466 )))
1461 )))
1467 }
1462 }
1468
1463
1469 /// Only public because it needs to be exposed to the Python layer.
1464 /// Only public because it needs to be exposed to the Python layer.
1470 /// It is not the full `setparents` logic, only the parts that mutate the
1465 /// It is not the full `setparents` logic, only the parts that mutate the
1471 /// entries.
1466 /// entries.
1472 pub fn setparents_fixup(
1467 pub fn setparents_fixup(
1473 &mut self,
1468 &mut self,
1474 ) -> Result<Vec<(HgPathBuf, HgPathBuf)>, DirstateV2ParseError> {
1469 ) -> Result<Vec<(HgPathBuf, HgPathBuf)>, DirstateV2ParseError> {
1475 // XXX
1470 // XXX
1476 // All the copying and re-querying is quite inefficient, but this is
1471 // All the copying and re-querying is quite inefficient, but this is
1477 // still a lot better than doing it from Python.
1472 // still a lot better than doing it from Python.
1478 //
1473 //
1479 // The better solution is to develop a mechanism for `iter_mut`,
1474 // The better solution is to develop a mechanism for `iter_mut`,
1480 // which will be a lot more involved: we're dealing with a lazy,
1475 // which will be a lot more involved: we're dealing with a lazy,
1481 // append-mostly, tree-like data structure. This will do for now.
1476 // append-mostly, tree-like data structure. This will do for now.
1482 let mut copies = vec![];
1477 let mut copies = vec![];
1483 let mut files_with_p2_info = vec![];
1478 let mut files_with_p2_info = vec![];
1484 for res in self.iter() {
1479 for res in self.iter() {
1485 let (path, entry) = res?;
1480 let (path, entry) = res?;
1486 if entry.p2_info() {
1481 if entry.p2_info() {
1487 files_with_p2_info.push(path.to_owned())
1482 files_with_p2_info.push(path.to_owned())
1488 }
1483 }
1489 }
1484 }
1490 self.with_dmap_mut(|map| {
1485 self.with_dmap_mut(|map| {
1491 for path in files_with_p2_info.iter() {
1486 for path in files_with_p2_info.iter() {
1492 let node = map.get_or_insert_node(path, |_| {})?;
1487 let node = map.get_or_insert_node(path, |_| {})?;
1493 let entry =
1488 let entry =
1494 node.data.as_entry_mut().expect("entry should exist");
1489 node.data.as_entry_mut().expect("entry should exist");
1495 entry.drop_merge_data();
1490 entry.drop_merge_data();
1496 if let Some(source) = node.copy_source.take().as_deref() {
1491 if let Some(source) = node.copy_source.take().as_deref() {
1497 copies.push((path.to_owned(), source.to_owned()));
1492 copies.push((path.to_owned(), source.to_owned()));
1498 }
1493 }
1499 }
1494 }
1500 Ok(copies)
1495 Ok(copies)
1501 })
1496 })
1502 }
1497 }
1503
1498
1504 pub fn debug_iter(
1499 pub fn debug_iter(
1505 &self,
1500 &self,
1506 all: bool,
1501 all: bool,
1507 ) -> Box<
1502 ) -> Box<
1508 dyn Iterator<Item = Result<DebugDirstateTuple, DirstateV2ParseError>>
1503 dyn Iterator<Item = Result<DebugDirstateTuple, DirstateV2ParseError>>
1509 + Send
1504 + Send
1510 + '_,
1505 + '_,
1511 > {
1506 > {
1512 let map = self.get_map();
1507 let map = self.get_map();
1513 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1508 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1514 let debug_tuple = if let Some(entry) = node.entry()? {
1509 let debug_tuple = if let Some(entry) = node.entry()? {
1515 entry.debug_tuple()
1510 entry.debug_tuple()
1516 } else if !all {
1511 } else if !all {
1517 return Ok(None);
1512 return Ok(None);
1518 } else if let Some(mtime) = node.cached_directory_mtime()? {
1513 } else if let Some(mtime) = node.cached_directory_mtime()? {
1519 (b' ', 0, -1, mtime.truncated_seconds() as i32)
1514 (b' ', 0, -1, mtime.truncated_seconds() as i32)
1520 } else {
1515 } else {
1521 (b' ', 0, -1, -1)
1516 (b' ', 0, -1, -1)
1522 };
1517 };
1523 Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
1518 Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
1524 }))
1519 }))
1525 }
1520 }
1526 }
1521 }
1527 #[cfg(test)]
1522 #[cfg(test)]
1528 mod tests {
1523 mod tests {
1529 use super::*;
1524 use super::*;
1530
1525
1531 /// Shortcut to return tracked descendants of a path.
1526 /// Shortcut to return tracked descendants of a path.
1532 /// Panics if the path does not exist.
1527 /// Panics if the path does not exist.
1533 fn tracked_descendants(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1528 fn tracked_descendants(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1534 let path = dbg!(HgPath::new(path));
1529 let path = dbg!(HgPath::new(path));
1535 let node = map.get_map().get_node(path);
1530 let node = map.get_map().get_node(path);
1536 node.unwrap().unwrap().tracked_descendants_count()
1531 node.unwrap().unwrap().tracked_descendants_count()
1537 }
1532 }
1538
1533
1539 /// Shortcut to return descendants with an entry.
1534 /// Shortcut to return descendants with an entry.
1540 /// Panics if the path does not exist.
1535 /// Panics if the path does not exist.
1541 fn descendants_with_an_entry(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1536 fn descendants_with_an_entry(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1542 let path = dbg!(HgPath::new(path));
1537 let path = dbg!(HgPath::new(path));
1543 let node = map.get_map().get_node(path);
1538 let node = map.get_map().get_node(path);
1544 node.unwrap().unwrap().descendants_with_entry_count()
1539 node.unwrap().unwrap().descendants_with_entry_count()
1545 }
1540 }
1546
1541
1547 fn assert_does_not_exist(map: &OwningDirstateMap, path: &[u8]) {
1542 fn assert_does_not_exist(map: &OwningDirstateMap, path: &[u8]) {
1548 let path = dbg!(HgPath::new(path));
1543 let path = dbg!(HgPath::new(path));
1549 let node = map.get_map().get_node(path);
1544 let node = map.get_map().get_node(path);
1550 assert!(node.unwrap().is_none());
1545 assert!(node.unwrap().is_none());
1551 }
1546 }
1552
1547
1553 /// Shortcut for path creation in tests
1548 /// Shortcut for path creation in tests
1554 fn p(b: &[u8]) -> &HgPath {
1549 fn p(b: &[u8]) -> &HgPath {
1555 HgPath::new(b)
1550 HgPath::new(b)
1556 }
1551 }
1557
1552
1558 /// Test the very simple case a single tracked file
1553 /// Test the very simple case a single tracked file
1559 #[test]
1554 #[test]
1560 fn test_tracked_descendants_simple() -> Result<(), DirstateError> {
1555 fn test_tracked_descendants_simple() -> Result<(), DirstateError> {
1561 let mut map = OwningDirstateMap::new_empty(vec![]);
1556 let mut map = OwningDirstateMap::new_empty(vec![]);
1562 assert_eq!(map.len(), 0);
1557 assert_eq!(map.len(), 0);
1563
1558
1564 map.set_tracked(p(b"some/nested/path"))?;
1559 map.set_tracked(p(b"some/nested/path"))?;
1565
1560
1566 assert_eq!(map.len(), 1);
1561 assert_eq!(map.len(), 1);
1567 assert_eq!(tracked_descendants(&map, b"some"), 1);
1562 assert_eq!(tracked_descendants(&map, b"some"), 1);
1568 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1563 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1569 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1564 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1570
1565
1571 map.set_untracked(p(b"some/nested/path"))?;
1566 map.set_untracked(p(b"some/nested/path"))?;
1572 assert_eq!(map.len(), 0);
1567 assert_eq!(map.len(), 0);
1573 assert!(map.get_map().get_node(p(b"some"))?.is_none());
1568 assert!(map.get_map().get_node(p(b"some"))?.is_none());
1574
1569
1575 Ok(())
1570 Ok(())
1576 }
1571 }
1577
1572
1578 /// Test the simple case of all tracked, but multiple files
1573 /// Test the simple case of all tracked, but multiple files
1579 #[test]
1574 #[test]
1580 fn test_tracked_descendants_multiple() -> Result<(), DirstateError> {
1575 fn test_tracked_descendants_multiple() -> Result<(), DirstateError> {
1581 let mut map = OwningDirstateMap::new_empty(vec![]);
1576 let mut map = OwningDirstateMap::new_empty(vec![]);
1582
1577
1583 map.set_tracked(p(b"some/nested/path"))?;
1578 map.set_tracked(p(b"some/nested/path"))?;
1584 map.set_tracked(p(b"some/nested/file"))?;
1579 map.set_tracked(p(b"some/nested/file"))?;
1585 // one layer without any files to test deletion cascade
1580 // one layer without any files to test deletion cascade
1586 map.set_tracked(p(b"some/other/nested/path"))?;
1581 map.set_tracked(p(b"some/other/nested/path"))?;
1587 map.set_tracked(p(b"root_file"))?;
1582 map.set_tracked(p(b"root_file"))?;
1588 map.set_tracked(p(b"some/file"))?;
1583 map.set_tracked(p(b"some/file"))?;
1589 map.set_tracked(p(b"some/file2"))?;
1584 map.set_tracked(p(b"some/file2"))?;
1590 map.set_tracked(p(b"some/file3"))?;
1585 map.set_tracked(p(b"some/file3"))?;
1591
1586
1592 assert_eq!(map.len(), 7);
1587 assert_eq!(map.len(), 7);
1593 assert_eq!(tracked_descendants(&map, b"some"), 6);
1588 assert_eq!(tracked_descendants(&map, b"some"), 6);
1594 assert_eq!(tracked_descendants(&map, b"some/nested"), 2);
1589 assert_eq!(tracked_descendants(&map, b"some/nested"), 2);
1595 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1590 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1596 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1591 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1597 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1592 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1598
1593
1599 map.set_untracked(p(b"some/nested/path"))?;
1594 map.set_untracked(p(b"some/nested/path"))?;
1600 assert_eq!(map.len(), 6);
1595 assert_eq!(map.len(), 6);
1601 assert_eq!(tracked_descendants(&map, b"some"), 5);
1596 assert_eq!(tracked_descendants(&map, b"some"), 5);
1602 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1597 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1603 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1598 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1604 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1599 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1605
1600
1606 map.set_untracked(p(b"some/nested/file"))?;
1601 map.set_untracked(p(b"some/nested/file"))?;
1607 assert_eq!(map.len(), 5);
1602 assert_eq!(map.len(), 5);
1608 assert_eq!(tracked_descendants(&map, b"some"), 4);
1603 assert_eq!(tracked_descendants(&map, b"some"), 4);
1609 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1604 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1610 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1605 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1611 assert_does_not_exist(&map, b"some_nested");
1606 assert_does_not_exist(&map, b"some_nested");
1612
1607
1613 map.set_untracked(p(b"some/other/nested/path"))?;
1608 map.set_untracked(p(b"some/other/nested/path"))?;
1614 assert_eq!(map.len(), 4);
1609 assert_eq!(map.len(), 4);
1615 assert_eq!(tracked_descendants(&map, b"some"), 3);
1610 assert_eq!(tracked_descendants(&map, b"some"), 3);
1616 assert_does_not_exist(&map, b"some/other");
1611 assert_does_not_exist(&map, b"some/other");
1617
1612
1618 map.set_untracked(p(b"root_file"))?;
1613 map.set_untracked(p(b"root_file"))?;
1619 assert_eq!(map.len(), 3);
1614 assert_eq!(map.len(), 3);
1620 assert_eq!(tracked_descendants(&map, b"some"), 3);
1615 assert_eq!(tracked_descendants(&map, b"some"), 3);
1621 assert_does_not_exist(&map, b"root_file");
1616 assert_does_not_exist(&map, b"root_file");
1622
1617
1623 map.set_untracked(p(b"some/file"))?;
1618 map.set_untracked(p(b"some/file"))?;
1624 assert_eq!(map.len(), 2);
1619 assert_eq!(map.len(), 2);
1625 assert_eq!(tracked_descendants(&map, b"some"), 2);
1620 assert_eq!(tracked_descendants(&map, b"some"), 2);
1626 assert_does_not_exist(&map, b"some/file");
1621 assert_does_not_exist(&map, b"some/file");
1627
1622
1628 map.set_untracked(p(b"some/file2"))?;
1623 map.set_untracked(p(b"some/file2"))?;
1629 assert_eq!(map.len(), 1);
1624 assert_eq!(map.len(), 1);
1630 assert_eq!(tracked_descendants(&map, b"some"), 1);
1625 assert_eq!(tracked_descendants(&map, b"some"), 1);
1631 assert_does_not_exist(&map, b"some/file2");
1626 assert_does_not_exist(&map, b"some/file2");
1632
1627
1633 map.set_untracked(p(b"some/file3"))?;
1628 map.set_untracked(p(b"some/file3"))?;
1634 assert_eq!(map.len(), 0);
1629 assert_eq!(map.len(), 0);
1635 assert_does_not_exist(&map, b"some/file3");
1630 assert_does_not_exist(&map, b"some/file3");
1636
1631
1637 Ok(())
1632 Ok(())
1638 }
1633 }
1639
1634
1640 /// Check with a mix of tracked and non-tracked items
1635 /// Check with a mix of tracked and non-tracked items
1641 #[test]
1636 #[test]
1642 fn test_tracked_descendants_different() -> Result<(), DirstateError> {
1637 fn test_tracked_descendants_different() -> Result<(), DirstateError> {
1643 let mut map = OwningDirstateMap::new_empty(vec![]);
1638 let mut map = OwningDirstateMap::new_empty(vec![]);
1644
1639
1645 // A file that was just added
1640 // A file that was just added
1646 map.set_tracked(p(b"some/nested/path"))?;
1641 map.set_tracked(p(b"some/nested/path"))?;
1647 // This has no information, the dirstate should ignore it
1642 // This has no information, the dirstate should ignore it
1648 map.reset_state(p(b"some/file"), false, false, false, false, None)?;
1643 map.reset_state(p(b"some/file"), false, false, false, false, None)?;
1649 assert_does_not_exist(&map, b"some/file");
1644 assert_does_not_exist(&map, b"some/file");
1650
1645
1651 // A file that was removed
1646 // A file that was removed
1652 map.reset_state(
1647 map.reset_state(
1653 p(b"some/nested/file"),
1648 p(b"some/nested/file"),
1654 false,
1649 false,
1655 true,
1650 true,
1656 false,
1651 false,
1657 false,
1652 false,
1658 None,
1653 None,
1659 )?;
1654 )?;
1660 assert!(!map.get(p(b"some/nested/file"))?.unwrap().tracked());
1655 assert!(!map.get(p(b"some/nested/file"))?.unwrap().tracked());
1661 // Only present in p2
1656 // Only present in p2
1662 map.reset_state(p(b"some/file3"), false, false, true, false, None)?;
1657 map.reset_state(p(b"some/file3"), false, false, true, false, None)?;
1663 assert!(!map.get(p(b"some/file3"))?.unwrap().tracked());
1658 assert!(!map.get(p(b"some/file3"))?.unwrap().tracked());
1664 // A file that was merged
1659 // A file that was merged
1665 map.reset_state(p(b"root_file"), true, true, true, false, None)?;
1660 map.reset_state(p(b"root_file"), true, true, true, false, None)?;
1666 assert!(map.get(p(b"root_file"))?.unwrap().tracked());
1661 assert!(map.get(p(b"root_file"))?.unwrap().tracked());
1667 // A file that is added, with info from p2
1662 // A file that is added, with info from p2
1668 // XXX is that actually possible?
1663 // XXX is that actually possible?
1669 map.reset_state(p(b"some/file2"), true, false, true, false, None)?;
1664 map.reset_state(p(b"some/file2"), true, false, true, false, None)?;
1670 assert!(map.get(p(b"some/file2"))?.unwrap().tracked());
1665 assert!(map.get(p(b"some/file2"))?.unwrap().tracked());
1671 // A clean file
1666 // A clean file
1672 // One layer without any files to test deletion cascade
1667 // One layer without any files to test deletion cascade
1673 map.reset_state(
1668 map.reset_state(
1674 p(b"some/other/nested/path"),
1669 p(b"some/other/nested/path"),
1675 true,
1670 true,
1676 true,
1671 true,
1677 false,
1672 false,
1678 false,
1673 false,
1679 None,
1674 None,
1680 )?;
1675 )?;
1681 assert!(map.get(p(b"some/other/nested/path"))?.unwrap().tracked());
1676 assert!(map.get(p(b"some/other/nested/path"))?.unwrap().tracked());
1682
1677
1683 assert_eq!(map.len(), 6);
1678 assert_eq!(map.len(), 6);
1684 assert_eq!(tracked_descendants(&map, b"some"), 3);
1679 assert_eq!(tracked_descendants(&map, b"some"), 3);
1685 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1680 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1686 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1681 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1687 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1682 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1688 assert_eq!(tracked_descendants(&map, b"some/other/nested/path"), 0);
1683 assert_eq!(tracked_descendants(&map, b"some/other/nested/path"), 0);
1689 assert_eq!(
1684 assert_eq!(
1690 descendants_with_an_entry(&map, b"some/other/nested/path"),
1685 descendants_with_an_entry(&map, b"some/other/nested/path"),
1691 0
1686 0
1692 );
1687 );
1693 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1688 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1694 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1689 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1695
1690
1696 // might as well check this
1691 // might as well check this
1697 map.set_untracked(p(b"path/does/not/exist"))?;
1692 map.set_untracked(p(b"path/does/not/exist"))?;
1698 assert_eq!(map.len(), 6);
1693 assert_eq!(map.len(), 6);
1699
1694
1700 map.set_untracked(p(b"some/other/nested/path"))?;
1695 map.set_untracked(p(b"some/other/nested/path"))?;
1701 // It is set untracked but not deleted since it held other information
1696 // It is set untracked but not deleted since it held other information
1702 assert_eq!(map.len(), 6);
1697 assert_eq!(map.len(), 6);
1703 assert_eq!(tracked_descendants(&map, b"some"), 2);
1698 assert_eq!(tracked_descendants(&map, b"some"), 2);
1704 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1699 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1705 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1700 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1706 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1701 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1707 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1702 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1708 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1703 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1709
1704
1710 map.set_untracked(p(b"some/nested/path"))?;
1705 map.set_untracked(p(b"some/nested/path"))?;
1711 // It is set untracked *and* deleted since it was only added
1706 // It is set untracked *and* deleted since it was only added
1712 assert_eq!(map.len(), 5);
1707 assert_eq!(map.len(), 5);
1713 assert_eq!(tracked_descendants(&map, b"some"), 1);
1708 assert_eq!(tracked_descendants(&map, b"some"), 1);
1714 assert_eq!(descendants_with_an_entry(&map, b"some"), 4);
1709 assert_eq!(descendants_with_an_entry(&map, b"some"), 4);
1715 assert_eq!(tracked_descendants(&map, b"some/nested"), 0);
1710 assert_eq!(tracked_descendants(&map, b"some/nested"), 0);
1716 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 1);
1711 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 1);
1717 assert_does_not_exist(&map, b"some/nested/path");
1712 assert_does_not_exist(&map, b"some/nested/path");
1718
1713
1719 map.set_untracked(p(b"root_file"))?;
1714 map.set_untracked(p(b"root_file"))?;
1720 // Untracked but not deleted
1715 // Untracked but not deleted
1721 assert_eq!(map.len(), 5);
1716 assert_eq!(map.len(), 5);
1722 assert!(map.get(p(b"root_file"))?.is_some());
1717 assert!(map.get(p(b"root_file"))?.is_some());
1723
1718
1724 map.set_untracked(p(b"some/file2"))?;
1719 map.set_untracked(p(b"some/file2"))?;
1725 assert_eq!(map.len(), 5);
1720 assert_eq!(map.len(), 5);
1726 assert_eq!(tracked_descendants(&map, b"some"), 0);
1721 assert_eq!(tracked_descendants(&map, b"some"), 0);
1727 assert!(map.get(p(b"some/file2"))?.is_some());
1722 assert!(map.get(p(b"some/file2"))?.is_some());
1728
1723
1729 map.set_untracked(p(b"some/file3"))?;
1724 map.set_untracked(p(b"some/file3"))?;
1730 assert_eq!(map.len(), 5);
1725 assert_eq!(map.len(), 5);
1731 assert_eq!(tracked_descendants(&map, b"some"), 0);
1726 assert_eq!(tracked_descendants(&map, b"some"), 0);
1732 assert!(map.get(p(b"some/file3"))?.is_some());
1727 assert!(map.get(p(b"some/file3"))?.is_some());
1733
1728
1734 Ok(())
1729 Ok(())
1735 }
1730 }
1736
1731
1737 /// Check that copies counter is correctly updated
1732 /// Check that copies counter is correctly updated
1738 #[test]
1733 #[test]
1739 fn test_copy_source() -> Result<(), DirstateError> {
1734 fn test_copy_source() -> Result<(), DirstateError> {
1740 let mut map = OwningDirstateMap::new_empty(vec![]);
1735 let mut map = OwningDirstateMap::new_empty(vec![]);
1741
1736
1742 // Clean file
1737 // Clean file
1743 map.reset_state(p(b"files/clean"), true, true, false, false, None)?;
1738 map.reset_state(p(b"files/clean"), true, true, false, false, None)?;
1744 // Merged file
1739 // Merged file
1745 map.reset_state(p(b"files/from_p2"), true, true, true, false, None)?;
1740 map.reset_state(p(b"files/from_p2"), true, true, true, false, None)?;
1746 // Removed file
1741 // Removed file
1747 map.reset_state(p(b"removed"), false, true, false, false, None)?;
1742 map.reset_state(p(b"removed"), false, true, false, false, None)?;
1748 // Added file
1743 // Added file
1749 map.reset_state(p(b"files/added"), true, false, false, false, None)?;
1744 map.reset_state(p(b"files/added"), true, false, false, false, None)?;
1750 // Add copy
1745 // Add copy
1751 map.copy_map_insert(p(b"files/clean"), p(b"clean_copy_source"))?;
1746 map.copy_map_insert(p(b"files/clean"), p(b"clean_copy_source"))?;
1752 assert_eq!(map.copy_map_len(), 1);
1747 assert_eq!(map.copy_map_len(), 1);
1753
1748
1754 // Copy override
1749 // Copy override
1755 map.copy_map_insert(p(b"files/clean"), p(b"other_clean_copy_source"))?;
1750 map.copy_map_insert(p(b"files/clean"), p(b"other_clean_copy_source"))?;
1756 assert_eq!(map.copy_map_len(), 1);
1751 assert_eq!(map.copy_map_len(), 1);
1757
1752
1758 // Multiple copies
1753 // Multiple copies
1759 map.copy_map_insert(p(b"removed"), p(b"removed_copy_source"))?;
1754 map.copy_map_insert(p(b"removed"), p(b"removed_copy_source"))?;
1760 assert_eq!(map.copy_map_len(), 2);
1755 assert_eq!(map.copy_map_len(), 2);
1761
1756
1762 map.copy_map_insert(p(b"files/added"), p(b"added_copy_source"))?;
1757 map.copy_map_insert(p(b"files/added"), p(b"added_copy_source"))?;
1763 assert_eq!(map.copy_map_len(), 3);
1758 assert_eq!(map.copy_map_len(), 3);
1764
1759
1765 // Added, so the entry is completely removed
1760 // Added, so the entry is completely removed
1766 map.set_untracked(p(b"files/added"))?;
1761 map.set_untracked(p(b"files/added"))?;
1767 assert_does_not_exist(&map, b"files/added");
1762 assert_does_not_exist(&map, b"files/added");
1768 assert_eq!(map.copy_map_len(), 2);
1763 assert_eq!(map.copy_map_len(), 2);
1769
1764
1770 // Removed, so the entry is kept around, so is its copy
1765 // Removed, so the entry is kept around, so is its copy
1771 map.set_untracked(p(b"removed"))?;
1766 map.set_untracked(p(b"removed"))?;
1772 assert!(map.get(p(b"removed"))?.is_some());
1767 assert!(map.get(p(b"removed"))?.is_some());
1773 assert_eq!(map.copy_map_len(), 2);
1768 assert_eq!(map.copy_map_len(), 2);
1774
1769
1775 // Clean, so the entry is kept around, but not its copy
1770 // Clean, so the entry is kept around, but not its copy
1776 map.set_untracked(p(b"files/clean"))?;
1771 map.set_untracked(p(b"files/clean"))?;
1777 assert!(map.get(p(b"files/clean"))?.is_some());
1772 assert!(map.get(p(b"files/clean"))?.is_some());
1778 assert_eq!(map.copy_map_len(), 1);
1773 assert_eq!(map.copy_map_len(), 1);
1779
1774
1780 map.copy_map_insert(p(b"files/from_p2"), p(b"from_p2_copy_source"))?;
1775 map.copy_map_insert(p(b"files/from_p2"), p(b"from_p2_copy_source"))?;
1781 assert_eq!(map.copy_map_len(), 2);
1776 assert_eq!(map.copy_map_len(), 2);
1782
1777
1783 // Info from p2, so its copy source info is kept around
1778 // Info from p2, so its copy source info is kept around
1784 map.set_untracked(p(b"files/from_p2"))?;
1779 map.set_untracked(p(b"files/from_p2"))?;
1785 assert!(map.get(p(b"files/from_p2"))?.is_some());
1780 assert!(map.get(p(b"files/from_p2"))?.is_some());
1786 assert_eq!(map.copy_map_len(), 2);
1781 assert_eq!(map.copy_map_len(), 2);
1787
1782
1788 Ok(())
1783 Ok(())
1789 }
1784 }
1790
1785
1791 /// Test with "on disk" data. For the sake of this test, the "on disk" data
1786 /// Test with "on disk" data. For the sake of this test, the "on disk" data
1792 /// does not actually come from the disk, but it's opaque to the code being
1787 /// does not actually come from the disk, but it's opaque to the code being
1793 /// tested.
1788 /// tested.
1794 #[test]
1789 #[test]
1795 fn test_on_disk() -> Result<(), DirstateError> {
1790 fn test_on_disk() -> Result<(), DirstateError> {
1796 // First let's create some data to put "on disk"
1791 // First let's create some data to put "on disk"
1797 let mut map = OwningDirstateMap::new_empty(vec![]);
1792 let mut map = OwningDirstateMap::new_empty(vec![]);
1798
1793
1799 // A file that was just added
1794 // A file that was just added
1800 map.set_tracked(p(b"some/nested/added"))?;
1795 map.set_tracked(p(b"some/nested/added"))?;
1801 map.copy_map_insert(p(b"some/nested/added"), p(b"added_copy_source"))?;
1796 map.copy_map_insert(p(b"some/nested/added"), p(b"added_copy_source"))?;
1802
1797
1803 // A file that was removed
1798 // A file that was removed
1804 map.reset_state(
1799 map.reset_state(
1805 p(b"some/nested/removed"),
1800 p(b"some/nested/removed"),
1806 false,
1801 false,
1807 true,
1802 true,
1808 false,
1803 false,
1809 false,
1804 false,
1810 None,
1805 None,
1811 )?;
1806 )?;
1812 // Only present in p2
1807 // Only present in p2
1813 map.reset_state(
1808 map.reset_state(
1814 p(b"other/p2_info_only"),
1809 p(b"other/p2_info_only"),
1815 false,
1810 false,
1816 false,
1811 false,
1817 true,
1812 true,
1818 false,
1813 false,
1819 None,
1814 None,
1820 )?;
1815 )?;
1821 map.copy_map_insert(
1816 map.copy_map_insert(
1822 p(b"other/p2_info_only"),
1817 p(b"other/p2_info_only"),
1823 p(b"other/p2_info_copy_source"),
1818 p(b"other/p2_info_copy_source"),
1824 )?;
1819 )?;
1825 // A file that was merged
1820 // A file that was merged
1826 map.reset_state(p(b"merged"), true, true, true, false, None)?;
1821 map.reset_state(p(b"merged"), true, true, true, false, None)?;
1827 // A file that is added, with info from p2
1822 // A file that is added, with info from p2
1828 // XXX is that actually possible?
1823 // XXX is that actually possible?
1829 map.reset_state(
1824 map.reset_state(
1830 p(b"other/added_with_p2"),
1825 p(b"other/added_with_p2"),
1831 true,
1826 true,
1832 false,
1827 false,
1833 true,
1828 true,
1834 false,
1829 false,
1835 None,
1830 None,
1836 )?;
1831 )?;
1837 // One layer without any files to test deletion cascade
1832 // One layer without any files to test deletion cascade
1838 // A clean file
1833 // A clean file
1839 map.reset_state(
1834 map.reset_state(
1840 p(b"some/other/nested/clean"),
1835 p(b"some/other/nested/clean"),
1841 true,
1836 true,
1842 true,
1837 true,
1843 false,
1838 false,
1844 false,
1839 false,
1845 None,
1840 None,
1846 )?;
1841 )?;
1847
1842
1848 let (packed, metadata, _should_append, _old_data_size) =
1843 let (packed, metadata, _should_append, _old_data_size) =
1849 map.pack_v2(DirstateMapWriteMode::ForceNewDataFile)?;
1844 map.pack_v2(DirstateMapWriteMode::ForceNewDataFile)?;
1850 let packed_len = packed.len();
1845 let packed_len = packed.len();
1851 assert!(packed_len > 0);
1846 assert!(packed_len > 0);
1852
1847
1853 // Recreate "from disk"
1848 // Recreate "from disk"
1854 let mut map = OwningDirstateMap::new_v2(
1849 let mut map = OwningDirstateMap::new_v2(
1855 packed,
1850 packed,
1856 packed_len,
1851 packed_len,
1857 metadata.as_bytes(),
1852 metadata.as_bytes(),
1858 vec![],
1853 vec![],
1859 None,
1854 None,
1860 )?;
1855 )?;
1861
1856
1862 // Check that everything is accounted for
1857 // Check that everything is accounted for
1863 assert!(map.contains_key(p(b"some/nested/added"))?);
1858 assert!(map.contains_key(p(b"some/nested/added"))?);
1864 assert!(map.contains_key(p(b"some/nested/removed"))?);
1859 assert!(map.contains_key(p(b"some/nested/removed"))?);
1865 assert!(map.contains_key(p(b"merged"))?);
1860 assert!(map.contains_key(p(b"merged"))?);
1866 assert!(map.contains_key(p(b"other/p2_info_only"))?);
1861 assert!(map.contains_key(p(b"other/p2_info_only"))?);
1867 assert!(map.contains_key(p(b"other/added_with_p2"))?);
1862 assert!(map.contains_key(p(b"other/added_with_p2"))?);
1868 assert!(map.contains_key(p(b"some/other/nested/clean"))?);
1863 assert!(map.contains_key(p(b"some/other/nested/clean"))?);
1869 assert_eq!(
1864 assert_eq!(
1870 map.copy_map_get(p(b"some/nested/added"))?,
1865 map.copy_map_get(p(b"some/nested/added"))?,
1871 Some(p(b"added_copy_source"))
1866 Some(p(b"added_copy_source"))
1872 );
1867 );
1873 assert_eq!(
1868 assert_eq!(
1874 map.copy_map_get(p(b"other/p2_info_only"))?,
1869 map.copy_map_get(p(b"other/p2_info_only"))?,
1875 Some(p(b"other/p2_info_copy_source"))
1870 Some(p(b"other/p2_info_copy_source"))
1876 );
1871 );
1877 assert_eq!(tracked_descendants(&map, b"some"), 2);
1872 assert_eq!(tracked_descendants(&map, b"some"), 2);
1878 assert_eq!(descendants_with_an_entry(&map, b"some"), 3);
1873 assert_eq!(descendants_with_an_entry(&map, b"some"), 3);
1879 assert_eq!(tracked_descendants(&map, b"other"), 1);
1874 assert_eq!(tracked_descendants(&map, b"other"), 1);
1880 assert_eq!(descendants_with_an_entry(&map, b"other"), 2);
1875 assert_eq!(descendants_with_an_entry(&map, b"other"), 2);
1881 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1876 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1882 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1877 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1883 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1878 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1884 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1879 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1885 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1880 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1886 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1881 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1887 assert_eq!(map.len(), 6);
1882 assert_eq!(map.len(), 6);
1888 assert_eq!(map.get_map().unreachable_bytes, 0);
1883 assert_eq!(map.get_map().unreachable_bytes, 0);
1889 assert_eq!(map.copy_map_len(), 2);
1884 assert_eq!(map.copy_map_len(), 2);
1890
1885
1891 // Shouldn't change anything since it's already not tracked
1886 // Shouldn't change anything since it's already not tracked
1892 map.set_untracked(p(b"some/nested/removed"))?;
1887 map.set_untracked(p(b"some/nested/removed"))?;
1893 assert_eq!(map.get_map().unreachable_bytes, 0);
1888 assert_eq!(map.get_map().unreachable_bytes, 0);
1894
1889
1895 if let ChildNodes::InMemory(_) = map.get_map().root {
1890 if let ChildNodes::InMemory(_) = map.get_map().root {
1896 panic!("root should not have been mutated")
1891 panic!("root should not have been mutated")
1897 }
1892 }
1898 // We haven't mutated enough (nothing, actually), we should still be in
1893 // We haven't mutated enough (nothing, actually), we should still be in
1899 // the append strategy
1894 // the append strategy
1900 assert!(map.get_map().write_should_append());
1895 assert!(map.get_map().write_should_append());
1901
1896
1902 // But this mutates the structure, so there should be unreachable_bytes
1897 // But this mutates the structure, so there should be unreachable_bytes
1903 assert!(map.set_untracked(p(b"some/nested/added"))?);
1898 assert!(map.set_untracked(p(b"some/nested/added"))?);
1904 let unreachable_bytes = map.get_map().unreachable_bytes;
1899 let unreachable_bytes = map.get_map().unreachable_bytes;
1905 assert!(unreachable_bytes > 0);
1900 assert!(unreachable_bytes > 0);
1906
1901
1907 if let ChildNodes::OnDisk(_) = map.get_map().root {
1902 if let ChildNodes::OnDisk(_) = map.get_map().root {
1908 panic!("root should have been mutated")
1903 panic!("root should have been mutated")
1909 }
1904 }
1910
1905
1911 // This should not mutate the structure either, since `root` has
1906 // This should not mutate the structure either, since `root` has
1912 // already been mutated along with its direct children.
1907 // already been mutated along with its direct children.
1913 map.set_untracked(p(b"merged"))?;
1908 map.set_untracked(p(b"merged"))?;
1914 assert_eq!(map.get_map().unreachable_bytes, unreachable_bytes);
1909 assert_eq!(map.get_map().unreachable_bytes, unreachable_bytes);
1915
1910
1916 if let NodeRef::InMemory(_, _) =
1911 if let NodeRef::InMemory(_, _) =
1917 map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap()
1912 map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap()
1918 {
1913 {
1919 panic!("'other/added_with_p2' should not have been mutated")
1914 panic!("'other/added_with_p2' should not have been mutated")
1920 }
1915 }
1921 // But this should, since it's in a different path
1916 // But this should, since it's in a different path
1922 // than `<root>some/nested/add`
1917 // than `<root>some/nested/add`
1923 map.set_untracked(p(b"other/added_with_p2"))?;
1918 map.set_untracked(p(b"other/added_with_p2"))?;
1924 assert!(map.get_map().unreachable_bytes > unreachable_bytes);
1919 assert!(map.get_map().unreachable_bytes > unreachable_bytes);
1925
1920
1926 if let NodeRef::OnDisk(_) =
1921 if let NodeRef::OnDisk(_) =
1927 map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap()
1922 map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap()
1928 {
1923 {
1929 panic!("'other/added_with_p2' should have been mutated")
1924 panic!("'other/added_with_p2' should have been mutated")
1930 }
1925 }
1931
1926
1932 // We have rewritten most of the tree, we should create a new file
1927 // We have rewritten most of the tree, we should create a new file
1933 assert!(!map.get_map().write_should_append());
1928 assert!(!map.get_map().write_should_append());
1934
1929
1935 Ok(())
1930 Ok(())
1936 }
1931 }
1937 }
1932 }
@@ -1,915 +1,913
1 //! The "version 2" disk representation of the dirstate
1 //! The "version 2" disk representation of the dirstate
2 //!
2 //!
3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4
4
5 use crate::dirstate::{DirstateV2Data, TruncatedTimestamp};
5 use crate::dirstate::{DirstateV2Data, TruncatedTimestamp};
6 use crate::dirstate_tree::dirstate_map::DirstateVersion;
6 use crate::dirstate_tree::dirstate_map::DirstateVersion;
7 use crate::dirstate_tree::dirstate_map::{
7 use crate::dirstate_tree::dirstate_map::{
8 self, DirstateMap, DirstateMapWriteMode, NodeRef,
8 self, DirstateMap, DirstateMapWriteMode, NodeRef,
9 };
9 };
10 use crate::dirstate_tree::path_with_basename::WithBasename;
10 use crate::dirstate_tree::path_with_basename::WithBasename;
11 use crate::errors::HgError;
11 use crate::errors::HgError;
12 use crate::utils::hg_path::HgPath;
12 use crate::utils::hg_path::HgPath;
13 use crate::DirstateEntry;
13 use crate::DirstateEntry;
14 use crate::DirstateError;
14 use crate::DirstateError;
15 use crate::DirstateParents;
15 use crate::DirstateParents;
16 use bitflags::bitflags;
16 use bitflags::bitflags;
17 use bytes_cast::unaligned::{U16Be, U32Be};
17 use bytes_cast::unaligned::{U16Be, U32Be};
18 use bytes_cast::BytesCast;
18 use bytes_cast::BytesCast;
19 use format_bytes::format_bytes;
19 use format_bytes::format_bytes;
20 use rand::Rng;
20 use rand::Rng;
21 use std::borrow::Cow;
21 use std::borrow::Cow;
22 use std::fmt::Write;
22 use std::fmt::Write;
23
23
24 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
24 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
25 /// This a redundant sanity check more than an actual "magic number" since
25 /// This a redundant sanity check more than an actual "magic number" since
26 /// `.hg/requires` already governs which format should be used.
26 /// `.hg/requires` already governs which format should be used.
27 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
27 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
28
28
29 /// Keep space for 256-bit hashes
29 /// Keep space for 256-bit hashes
30 const STORED_NODE_ID_BYTES: usize = 32;
30 const STORED_NODE_ID_BYTES: usize = 32;
31
31
32 /// … even though only 160 bits are used for now, with SHA-1
32 /// … even though only 160 bits are used for now, with SHA-1
33 const USED_NODE_ID_BYTES: usize = 20;
33 const USED_NODE_ID_BYTES: usize = 20;
34
34
35 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
35 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
36 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
36 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
37
37
38 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
38 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
39 const TREE_METADATA_SIZE: usize = 44;
39 const TREE_METADATA_SIZE: usize = 44;
40 const NODE_SIZE: usize = 44;
40 const NODE_SIZE: usize = 44;
41
41
42 /// Make sure that size-affecting changes are made knowingly
42 /// Make sure that size-affecting changes are made knowingly
43 #[allow(unused)]
43 #[allow(unused)]
44 fn static_assert_size_of() {
44 fn static_assert_size_of() {
45 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
45 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
46 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
46 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
47 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
47 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
48 }
48 }
49
49
50 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
50 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
51 #[derive(BytesCast)]
51 #[derive(BytesCast)]
52 #[repr(C)]
52 #[repr(C)]
53 struct DocketHeader {
53 struct DocketHeader {
54 marker: [u8; V2_FORMAT_MARKER.len()],
54 marker: [u8; V2_FORMAT_MARKER.len()],
55 parent_1: [u8; STORED_NODE_ID_BYTES],
55 parent_1: [u8; STORED_NODE_ID_BYTES],
56 parent_2: [u8; STORED_NODE_ID_BYTES],
56 parent_2: [u8; STORED_NODE_ID_BYTES],
57
57
58 metadata: TreeMetadata,
58 metadata: TreeMetadata,
59
59
60 /// Counted in bytes
60 /// Counted in bytes
61 data_size: Size,
61 data_size: Size,
62
62
63 uuid_size: u8,
63 uuid_size: u8,
64 }
64 }
65
65
66 pub struct Docket<'on_disk> {
66 pub struct Docket<'on_disk> {
67 header: &'on_disk DocketHeader,
67 header: &'on_disk DocketHeader,
68 pub uuid: &'on_disk [u8],
68 pub uuid: &'on_disk [u8],
69 }
69 }
70
70
71 /// Fields are documented in the *Tree metadata in the docket file*
71 /// Fields are documented in the *Tree metadata in the docket file*
72 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
72 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
73 #[derive(BytesCast)]
73 #[derive(BytesCast)]
74 #[repr(C)]
74 #[repr(C)]
75 pub struct TreeMetadata {
75 pub struct TreeMetadata {
76 root_nodes: ChildNodes,
76 root_nodes: ChildNodes,
77 nodes_with_entry_count: Size,
77 nodes_with_entry_count: Size,
78 nodes_with_copy_source_count: Size,
78 nodes_with_copy_source_count: Size,
79 unreachable_bytes: Size,
79 unreachable_bytes: Size,
80 unused: [u8; 4],
80 unused: [u8; 4],
81
81
82 /// See *Optional hash of ignore patterns* section of
82 /// See *Optional hash of ignore patterns* section of
83 /// `mercurial/helptext/internals/dirstate-v2.txt`
83 /// `mercurial/helptext/internals/dirstate-v2.txt`
84 ignore_patterns_hash: IgnorePatternsHash,
84 ignore_patterns_hash: IgnorePatternsHash,
85 }
85 }
86
86
87 /// Fields are documented in the *The data file format*
87 /// Fields are documented in the *The data file format*
88 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
88 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
89 #[derive(BytesCast, Debug)]
89 #[derive(BytesCast, Debug)]
90 #[repr(C)]
90 #[repr(C)]
91 pub(super) struct Node {
91 pub(super) struct Node {
92 full_path: PathSlice,
92 full_path: PathSlice,
93
93
94 /// In bytes from `self.full_path.start`
94 /// In bytes from `self.full_path.start`
95 base_name_start: PathSize,
95 base_name_start: PathSize,
96
96
97 copy_source: OptPathSlice,
97 copy_source: OptPathSlice,
98 children: ChildNodes,
98 children: ChildNodes,
99 pub(super) descendants_with_entry_count: Size,
99 pub(super) descendants_with_entry_count: Size,
100 pub(super) tracked_descendants_count: Size,
100 pub(super) tracked_descendants_count: Size,
101 flags: U16Be,
101 flags: U16Be,
102 size: U32Be,
102 size: U32Be,
103 mtime: PackedTruncatedTimestamp,
103 mtime: PackedTruncatedTimestamp,
104 }
104 }
105
105
106 bitflags! {
106 bitflags! {
107 #[repr(C)]
107 #[repr(C)]
108 struct Flags: u16 {
108 struct Flags: u16 {
109 const WDIR_TRACKED = 1 << 0;
109 const WDIR_TRACKED = 1 << 0;
110 const P1_TRACKED = 1 << 1;
110 const P1_TRACKED = 1 << 1;
111 const P2_INFO = 1 << 2;
111 const P2_INFO = 1 << 2;
112 const MODE_EXEC_PERM = 1 << 3;
112 const MODE_EXEC_PERM = 1 << 3;
113 const MODE_IS_SYMLINK = 1 << 4;
113 const MODE_IS_SYMLINK = 1 << 4;
114 const HAS_FALLBACK_EXEC = 1 << 5;
114 const HAS_FALLBACK_EXEC = 1 << 5;
115 const FALLBACK_EXEC = 1 << 6;
115 const FALLBACK_EXEC = 1 << 6;
116 const HAS_FALLBACK_SYMLINK = 1 << 7;
116 const HAS_FALLBACK_SYMLINK = 1 << 7;
117 const FALLBACK_SYMLINK = 1 << 8;
117 const FALLBACK_SYMLINK = 1 << 8;
118 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
118 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
119 const HAS_MODE_AND_SIZE = 1 <<10;
119 const HAS_MODE_AND_SIZE = 1 <<10;
120 const HAS_MTIME = 1 <<11;
120 const HAS_MTIME = 1 <<11;
121 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
121 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
122 const DIRECTORY = 1 <<13;
122 const DIRECTORY = 1 <<13;
123 const ALL_UNKNOWN_RECORDED = 1 <<14;
123 const ALL_UNKNOWN_RECORDED = 1 <<14;
124 const ALL_IGNORED_RECORDED = 1 <<15;
124 const ALL_IGNORED_RECORDED = 1 <<15;
125 }
125 }
126 }
126 }
127
127
128 /// Duration since the Unix epoch
128 /// Duration since the Unix epoch
129 #[derive(BytesCast, Copy, Clone, Debug)]
129 #[derive(BytesCast, Copy, Clone, Debug)]
130 #[repr(C)]
130 #[repr(C)]
131 struct PackedTruncatedTimestamp {
131 struct PackedTruncatedTimestamp {
132 truncated_seconds: U32Be,
132 truncated_seconds: U32Be,
133 nanoseconds: U32Be,
133 nanoseconds: U32Be,
134 }
134 }
135
135
136 /// Counted in bytes from the start of the file
136 /// Counted in bytes from the start of the file
137 ///
137 ///
138 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
138 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
139 type Offset = U32Be;
139 type Offset = U32Be;
140
140
141 /// Counted in number of items
141 /// Counted in number of items
142 ///
142 ///
143 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
143 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
144 type Size = U32Be;
144 type Size = U32Be;
145
145
146 /// Counted in bytes
146 /// Counted in bytes
147 ///
147 ///
148 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
148 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
149 type PathSize = U16Be;
149 type PathSize = U16Be;
150
150
151 /// A contiguous sequence of `len` times `Node`, representing the child nodes
151 /// A contiguous sequence of `len` times `Node`, representing the child nodes
152 /// of either some other node or of the repository root.
152 /// of either some other node or of the repository root.
153 ///
153 ///
154 /// Always sorted by ascending `full_path`, to allow binary search.
154 /// Always sorted by ascending `full_path`, to allow binary search.
155 /// Since nodes with the same parent nodes also have the same parent path,
155 /// Since nodes with the same parent nodes also have the same parent path,
156 /// only the `base_name`s need to be compared during binary search.
156 /// only the `base_name`s need to be compared during binary search.
157 #[derive(BytesCast, Copy, Clone, Debug)]
157 #[derive(BytesCast, Copy, Clone, Debug)]
158 #[repr(C)]
158 #[repr(C)]
159 struct ChildNodes {
159 struct ChildNodes {
160 start: Offset,
160 start: Offset,
161 len: Size,
161 len: Size,
162 }
162 }
163
163
164 /// A `HgPath` of `len` bytes
164 /// A `HgPath` of `len` bytes
165 #[derive(BytesCast, Copy, Clone, Debug)]
165 #[derive(BytesCast, Copy, Clone, Debug)]
166 #[repr(C)]
166 #[repr(C)]
167 struct PathSlice {
167 struct PathSlice {
168 start: Offset,
168 start: Offset,
169 len: PathSize,
169 len: PathSize,
170 }
170 }
171
171
172 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
172 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
173 type OptPathSlice = PathSlice;
173 type OptPathSlice = PathSlice;
174
174
175 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
175 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
176 ///
176 ///
177 /// This should only happen if Mercurial is buggy or a repository is corrupted.
177 /// This should only happen if Mercurial is buggy or a repository is corrupted.
178 #[derive(Debug)]
178 #[derive(Debug)]
179 pub struct DirstateV2ParseError {
179 pub struct DirstateV2ParseError {
180 message: String,
180 message: String,
181 }
181 }
182
182
183 impl DirstateV2ParseError {
183 impl DirstateV2ParseError {
184 pub fn new<S: Into<String>>(message: S) -> Self {
184 pub fn new<S: Into<String>>(message: S) -> Self {
185 Self {
185 Self {
186 message: message.into(),
186 message: message.into(),
187 }
187 }
188 }
188 }
189 }
189 }
190
190
191 impl From<DirstateV2ParseError> for HgError {
191 impl From<DirstateV2ParseError> for HgError {
192 fn from(e: DirstateV2ParseError) -> Self {
192 fn from(e: DirstateV2ParseError) -> Self {
193 HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message))
193 HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message))
194 }
194 }
195 }
195 }
196
196
197 impl From<DirstateV2ParseError> for crate::DirstateError {
197 impl From<DirstateV2ParseError> for crate::DirstateError {
198 fn from(error: DirstateV2ParseError) -> Self {
198 fn from(error: DirstateV2ParseError) -> Self {
199 HgError::from(error).into()
199 HgError::from(error).into()
200 }
200 }
201 }
201 }
202
202
203 impl TreeMetadata {
203 impl TreeMetadata {
204 pub fn as_bytes(&self) -> &[u8] {
204 pub fn as_bytes(&self) -> &[u8] {
205 BytesCast::as_bytes(self)
205 BytesCast::as_bytes(self)
206 }
206 }
207 }
207 }
208
208
209 impl<'on_disk> Docket<'on_disk> {
209 impl<'on_disk> Docket<'on_disk> {
210 /// Generate the identifier for a new data file
210 /// Generate the identifier for a new data file
211 ///
211 ///
212 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
212 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
213 /// See `mercurial/revlogutils/docket.py`
213 /// See `mercurial/revlogutils/docket.py`
214 pub fn new_uid() -> String {
214 pub fn new_uid() -> String {
215 const ID_LENGTH: usize = 8;
215 const ID_LENGTH: usize = 8;
216 let mut id = String::with_capacity(ID_LENGTH);
216 let mut id = String::with_capacity(ID_LENGTH);
217 let mut rng = rand::thread_rng();
217 let mut rng = rand::thread_rng();
218 for _ in 0..ID_LENGTH {
218 for _ in 0..ID_LENGTH {
219 // One random hexadecimal digit.
219 // One random hexadecimal digit.
220 // `unwrap` never panics because `impl Write for String`
220 // `unwrap` never panics because `impl Write for String`
221 // never returns an error.
221 // never returns an error.
222 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
222 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
223 }
223 }
224 id
224 id
225 }
225 }
226
226
227 pub fn serialize(
227 pub fn serialize(
228 parents: DirstateParents,
228 parents: DirstateParents,
229 tree_metadata: TreeMetadata,
229 tree_metadata: TreeMetadata,
230 data_size: u64,
230 data_size: u64,
231 uuid: &[u8],
231 uuid: &[u8],
232 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
232 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
233 let header = DocketHeader {
233 let header = DocketHeader {
234 marker: *V2_FORMAT_MARKER,
234 marker: *V2_FORMAT_MARKER,
235 parent_1: parents.p1.pad_to_256_bits(),
235 parent_1: parents.p1.pad_to_256_bits(),
236 parent_2: parents.p2.pad_to_256_bits(),
236 parent_2: parents.p2.pad_to_256_bits(),
237 metadata: tree_metadata,
237 metadata: tree_metadata,
238 data_size: u32::try_from(data_size)?.into(),
238 data_size: u32::try_from(data_size)?.into(),
239 uuid_size: uuid.len().try_into()?,
239 uuid_size: uuid.len().try_into()?,
240 };
240 };
241 let header = header.as_bytes();
241 let header = header.as_bytes();
242 let mut docket = Vec::with_capacity(header.len() + uuid.len());
242 let mut docket = Vec::with_capacity(header.len() + uuid.len());
243 docket.extend_from_slice(header);
243 docket.extend_from_slice(header);
244 docket.extend_from_slice(uuid);
244 docket.extend_from_slice(uuid);
245 Ok(docket)
245 Ok(docket)
246 }
246 }
247
247
248 pub fn parents(&self) -> DirstateParents {
248 pub fn parents(&self) -> DirstateParents {
249 use crate::Node;
249 use crate::Node;
250 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
250 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
251 .unwrap();
251 .unwrap();
252 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
252 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
253 .unwrap();
253 .unwrap();
254 DirstateParents { p1, p2 }
254 DirstateParents { p1, p2 }
255 }
255 }
256
256
257 pub fn tree_metadata(&self) -> &[u8] {
257 pub fn tree_metadata(&self) -> &[u8] {
258 self.header.metadata.as_bytes()
258 self.header.metadata.as_bytes()
259 }
259 }
260
260
261 pub fn data_size(&self) -> usize {
261 pub fn data_size(&self) -> usize {
262 // This `unwrap` could only panic on a 16-bit CPU
262 // This `unwrap` could only panic on a 16-bit CPU
263 self.header.data_size.get().try_into().unwrap()
263 self.header.data_size.get().try_into().unwrap()
264 }
264 }
265
265
266 pub fn data_filename(&self) -> String {
266 pub fn data_filename(&self) -> String {
267 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
267 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
268 }
268 }
269 }
269 }
270
270
271 pub fn read_docket(
271 pub fn read_docket(
272 on_disk: &[u8],
272 on_disk: &[u8],
273 ) -> Result<Docket<'_>, DirstateV2ParseError> {
273 ) -> Result<Docket<'_>, DirstateV2ParseError> {
274 let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| {
274 let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| {
275 DirstateV2ParseError::new(format!("when reading docket, {}", e))
275 DirstateV2ParseError::new(format!("when reading docket, {}", e))
276 })?;
276 })?;
277 let uuid_size = header.uuid_size as usize;
277 let uuid_size = header.uuid_size as usize;
278 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
278 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
279 Ok(Docket { header, uuid })
279 Ok(Docket { header, uuid })
280 } else {
280 } else {
281 Err(DirstateV2ParseError::new(
281 Err(DirstateV2ParseError::new(
282 "invalid format marker or uuid size",
282 "invalid format marker or uuid size",
283 ))
283 ))
284 }
284 }
285 }
285 }
286
286
287 pub(super) fn read<'on_disk>(
287 pub(super) fn read<'on_disk>(
288 on_disk: &'on_disk [u8],
288 on_disk: &'on_disk [u8],
289 metadata: &[u8],
289 metadata: &[u8],
290 uuid: Vec<u8>,
290 uuid: Vec<u8>,
291 identity: Option<u64>,
291 identity: Option<u64>,
292 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
292 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
293 if on_disk.is_empty() {
293 if on_disk.is_empty() {
294 let mut map = DirstateMap::empty(on_disk);
294 let mut map = DirstateMap::empty(on_disk);
295 map.dirstate_version = DirstateVersion::V2;
295 map.dirstate_version = DirstateVersion::V2;
296 return Ok(map);
296 return Ok(map);
297 }
297 }
298 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
298 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
299 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
299 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
300 })?;
300 })?;
301 let dirstate_map = DirstateMap {
301 let dirstate_map = DirstateMap {
302 on_disk,
302 on_disk,
303 root: dirstate_map::ChildNodes::OnDisk(
303 root: dirstate_map::ChildNodes::OnDisk(
304 read_nodes(on_disk, meta.root_nodes).map_err(|mut e| {
304 read_nodes(on_disk, meta.root_nodes).map_err(|mut e| {
305 e.message = format!("{}, when reading root notes", e.message);
305 e.message = format!("{}, when reading root notes", e.message);
306 e
306 e
307 })?,
307 })?,
308 ),
308 ),
309 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
309 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
310 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
310 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
311 ignore_patterns_hash: meta.ignore_patterns_hash,
311 ignore_patterns_hash: meta.ignore_patterns_hash,
312 unreachable_bytes: meta.unreachable_bytes.get(),
312 unreachable_bytes: meta.unreachable_bytes.get(),
313 old_data_size: on_disk.len(),
313 old_data_size: on_disk.len(),
314 old_uuid: Some(uuid),
314 old_uuid: Some(uuid),
315 identity,
315 identity,
316 dirstate_version: DirstateVersion::V2,
316 dirstate_version: DirstateVersion::V2,
317 write_mode: DirstateMapWriteMode::Auto,
317 write_mode: DirstateMapWriteMode::Auto,
318 };
318 };
319 Ok(dirstate_map)
319 Ok(dirstate_map)
320 }
320 }
321
321
322 impl Node {
322 impl Node {
323 pub(super) fn full_path<'on_disk>(
323 pub(super) fn full_path<'on_disk>(
324 &self,
324 &self,
325 on_disk: &'on_disk [u8],
325 on_disk: &'on_disk [u8],
326 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
326 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
327 read_hg_path(on_disk, self.full_path)
327 read_hg_path(on_disk, self.full_path)
328 }
328 }
329
329
330 pub(super) fn base_name_start(
330 pub(super) fn base_name_start(
331 &self,
331 &self,
332 ) -> Result<usize, DirstateV2ParseError> {
332 ) -> Result<usize, DirstateV2ParseError> {
333 let start = self.base_name_start.get();
333 let start = self.base_name_start.get();
334 if start < self.full_path.len.get() {
334 if start < self.full_path.len.get() {
335 let start = usize::try_from(start)
335 let start = usize::from(start);
336 // u32 -> usize, could only panic on a 16-bit CPU
337 .expect("dirstate-v2 base_name_start out of bounds");
338 Ok(start)
336 Ok(start)
339 } else {
337 } else {
340 Err(DirstateV2ParseError::new("not enough bytes for base name"))
338 Err(DirstateV2ParseError::new("not enough bytes for base name"))
341 }
339 }
342 }
340 }
343
341
344 pub(super) fn base_name<'on_disk>(
342 pub(super) fn base_name<'on_disk>(
345 &self,
343 &self,
346 on_disk: &'on_disk [u8],
344 on_disk: &'on_disk [u8],
347 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
345 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
348 let full_path = self.full_path(on_disk)?;
346 let full_path = self.full_path(on_disk)?;
349 let base_name_start = self.base_name_start()?;
347 let base_name_start = self.base_name_start()?;
350 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
348 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
351 }
349 }
352
350
353 pub(super) fn path<'on_disk>(
351 pub(super) fn path<'on_disk>(
354 &self,
352 &self,
355 on_disk: &'on_disk [u8],
353 on_disk: &'on_disk [u8],
356 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
354 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
357 Ok(WithBasename::from_raw_parts(
355 Ok(WithBasename::from_raw_parts(
358 Cow::Borrowed(self.full_path(on_disk)?),
356 Cow::Borrowed(self.full_path(on_disk)?),
359 self.base_name_start()?,
357 self.base_name_start()?,
360 ))
358 ))
361 }
359 }
362
360
363 pub(super) fn has_copy_source(&self) -> bool {
361 pub(super) fn has_copy_source(&self) -> bool {
364 self.copy_source.start.get() != 0
362 self.copy_source.start.get() != 0
365 }
363 }
366
364
367 pub(super) fn copy_source<'on_disk>(
365 pub(super) fn copy_source<'on_disk>(
368 &self,
366 &self,
369 on_disk: &'on_disk [u8],
367 on_disk: &'on_disk [u8],
370 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
368 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
371 Ok(if self.has_copy_source() {
369 Ok(if self.has_copy_source() {
372 Some(read_hg_path(on_disk, self.copy_source)?)
370 Some(read_hg_path(on_disk, self.copy_source)?)
373 } else {
371 } else {
374 None
372 None
375 })
373 })
376 }
374 }
377
375
378 fn flags(&self) -> Flags {
376 fn flags(&self) -> Flags {
379 Flags::from_bits_truncate(self.flags.get())
377 Flags::from_bits_truncate(self.flags.get())
380 }
378 }
381
379
382 fn has_entry(&self) -> bool {
380 fn has_entry(&self) -> bool {
383 self.flags().intersects(
381 self.flags().intersects(
384 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
382 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
385 )
383 )
386 }
384 }
387
385
388 pub(super) fn node_data(
386 pub(super) fn node_data(
389 &self,
387 &self,
390 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
388 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
391 if self.has_entry() {
389 if self.has_entry() {
392 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
390 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
393 } else if let Some(mtime) = self.cached_directory_mtime()? {
391 } else if let Some(mtime) = self.cached_directory_mtime()? {
394 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
392 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
395 } else {
393 } else {
396 Ok(dirstate_map::NodeData::None)
394 Ok(dirstate_map::NodeData::None)
397 }
395 }
398 }
396 }
399
397
400 pub(super) fn cached_directory_mtime(
398 pub(super) fn cached_directory_mtime(
401 &self,
399 &self,
402 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
400 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
403 // For now we do not have code to handle the absence of
401 // For now we do not have code to handle the absence of
404 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
402 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
405 // unset.
403 // unset.
406 if self.flags().contains(Flags::DIRECTORY)
404 if self.flags().contains(Flags::DIRECTORY)
407 && self.flags().contains(Flags::HAS_MTIME)
405 && self.flags().contains(Flags::HAS_MTIME)
408 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
406 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
409 {
407 {
410 Ok(Some(self.mtime()?))
408 Ok(Some(self.mtime()?))
411 } else {
409 } else {
412 Ok(None)
410 Ok(None)
413 }
411 }
414 }
412 }
415
413
416 fn synthesize_unix_mode(&self) -> u32 {
414 fn synthesize_unix_mode(&self) -> u32 {
417 // Some platforms' libc don't have the same type (MacOS uses i32 here)
415 // Some platforms' libc don't have the same type (MacOS uses i32 here)
418 #[allow(clippy::unnecessary_cast)]
416 #[allow(clippy::unnecessary_cast)]
419 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
417 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
420 libc::S_IFLNK as u32
418 libc::S_IFLNK as u32
421 } else {
419 } else {
422 libc::S_IFREG as u32
420 libc::S_IFREG as u32
423 };
421 };
424 let permissions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
422 let permissions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
425 0o755
423 0o755
426 } else {
424 } else {
427 0o644
425 0o644
428 };
426 };
429 file_type | permissions
427 file_type | permissions
430 }
428 }
431
429
432 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
430 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
433 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
431 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
434 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
432 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
435 m.second_ambiguous = true;
433 m.second_ambiguous = true;
436 }
434 }
437 Ok(m)
435 Ok(m)
438 }
436 }
439
437
440 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
438 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
441 // TODO: convert through raw bits instead?
439 // TODO: convert through raw bits instead?
442 let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
440 let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
443 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
441 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
444 let p2_info = self.flags().contains(Flags::P2_INFO);
442 let p2_info = self.flags().contains(Flags::P2_INFO);
445 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
443 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
446 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
444 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
447 {
445 {
448 Some((self.synthesize_unix_mode(), self.size.into()))
446 Some((self.synthesize_unix_mode(), self.size.into()))
449 } else {
447 } else {
450 None
448 None
451 };
449 };
452 let mtime = if self.flags().contains(Flags::HAS_MTIME)
450 let mtime = if self.flags().contains(Flags::HAS_MTIME)
453 && !self.flags().contains(Flags::DIRECTORY)
451 && !self.flags().contains(Flags::DIRECTORY)
454 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
452 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
455 {
453 {
456 Some(self.mtime()?)
454 Some(self.mtime()?)
457 } else {
455 } else {
458 None
456 None
459 };
457 };
460 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
458 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
461 {
459 {
462 Some(self.flags().contains(Flags::FALLBACK_EXEC))
460 Some(self.flags().contains(Flags::FALLBACK_EXEC))
463 } else {
461 } else {
464 None
462 None
465 };
463 };
466 let fallback_symlink =
464 let fallback_symlink =
467 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
465 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
468 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
466 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
469 } else {
467 } else {
470 None
468 None
471 };
469 };
472 Ok(DirstateEntry::from_v2_data(DirstateV2Data {
470 Ok(DirstateEntry::from_v2_data(DirstateV2Data {
473 wc_tracked,
471 wc_tracked,
474 p1_tracked,
472 p1_tracked,
475 p2_info,
473 p2_info,
476 mode_size,
474 mode_size,
477 mtime,
475 mtime,
478 fallback_exec,
476 fallback_exec,
479 fallback_symlink,
477 fallback_symlink,
480 }))
478 }))
481 }
479 }
482
480
483 pub(super) fn entry(
481 pub(super) fn entry(
484 &self,
482 &self,
485 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
483 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
486 if self.has_entry() {
484 if self.has_entry() {
487 Ok(Some(self.assume_entry()?))
485 Ok(Some(self.assume_entry()?))
488 } else {
486 } else {
489 Ok(None)
487 Ok(None)
490 }
488 }
491 }
489 }
492
490
493 pub(super) fn children<'on_disk>(
491 pub(super) fn children<'on_disk>(
494 &self,
492 &self,
495 on_disk: &'on_disk [u8],
493 on_disk: &'on_disk [u8],
496 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
494 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
497 read_nodes(on_disk, self.children)
495 read_nodes(on_disk, self.children)
498 }
496 }
499
497
500 pub(super) fn to_in_memory_node<'on_disk>(
498 pub(super) fn to_in_memory_node<'on_disk>(
501 &self,
499 &self,
502 on_disk: &'on_disk [u8],
500 on_disk: &'on_disk [u8],
503 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
501 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
504 Ok(dirstate_map::Node {
502 Ok(dirstate_map::Node {
505 children: dirstate_map::ChildNodes::OnDisk(
503 children: dirstate_map::ChildNodes::OnDisk(
506 self.children(on_disk)?,
504 self.children(on_disk)?,
507 ),
505 ),
508 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
506 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
509 data: self.node_data()?,
507 data: self.node_data()?,
510 descendants_with_entry_count: self
508 descendants_with_entry_count: self
511 .descendants_with_entry_count
509 .descendants_with_entry_count
512 .get(),
510 .get(),
513 tracked_descendants_count: self.tracked_descendants_count.get(),
511 tracked_descendants_count: self.tracked_descendants_count.get(),
514 })
512 })
515 }
513 }
516
514
517 fn from_dirstate_entry(
515 fn from_dirstate_entry(
518 entry: &DirstateEntry,
516 entry: &DirstateEntry,
519 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
517 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
520 let DirstateV2Data {
518 let DirstateV2Data {
521 wc_tracked,
519 wc_tracked,
522 p1_tracked,
520 p1_tracked,
523 p2_info,
521 p2_info,
524 mode_size: mode_size_opt,
522 mode_size: mode_size_opt,
525 mtime: mtime_opt,
523 mtime: mtime_opt,
526 fallback_exec,
524 fallback_exec,
527 fallback_symlink,
525 fallback_symlink,
528 } = entry.v2_data();
526 } = entry.v2_data();
529 // TODO: convert through raw flag bits instead?
527 // TODO: convert through raw flag bits instead?
530 let mut flags = Flags::empty();
528 let mut flags = Flags::empty();
531 flags.set(Flags::WDIR_TRACKED, wc_tracked);
529 flags.set(Flags::WDIR_TRACKED, wc_tracked);
532 flags.set(Flags::P1_TRACKED, p1_tracked);
530 flags.set(Flags::P1_TRACKED, p1_tracked);
533 flags.set(Flags::P2_INFO, p2_info);
531 flags.set(Flags::P2_INFO, p2_info);
534 // Some platforms' libc don't have the same type (MacOS uses i32 here)
532 // Some platforms' libc don't have the same type (MacOS uses i32 here)
535 #[allow(clippy::unnecessary_cast)]
533 #[allow(clippy::unnecessary_cast)]
536 let size = if let Some((m, s)) = mode_size_opt {
534 let size = if let Some((m, s)) = mode_size_opt {
537 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
535 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
538 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
536 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
539 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
537 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
540 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
538 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
541 flags.insert(Flags::HAS_MODE_AND_SIZE);
539 flags.insert(Flags::HAS_MODE_AND_SIZE);
542 s.into()
540 s.into()
543 } else {
541 } else {
544 0.into()
542 0.into()
545 };
543 };
546 let mtime = if let Some(m) = mtime_opt {
544 let mtime = if let Some(m) = mtime_opt {
547 flags.insert(Flags::HAS_MTIME);
545 flags.insert(Flags::HAS_MTIME);
548 if m.second_ambiguous {
546 if m.second_ambiguous {
549 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
547 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
550 };
548 };
551 m.into()
549 m.into()
552 } else {
550 } else {
553 PackedTruncatedTimestamp::null()
551 PackedTruncatedTimestamp::null()
554 };
552 };
555 if let Some(f_exec) = fallback_exec {
553 if let Some(f_exec) = fallback_exec {
556 flags.insert(Flags::HAS_FALLBACK_EXEC);
554 flags.insert(Flags::HAS_FALLBACK_EXEC);
557 if f_exec {
555 if f_exec {
558 flags.insert(Flags::FALLBACK_EXEC);
556 flags.insert(Flags::FALLBACK_EXEC);
559 }
557 }
560 }
558 }
561 if let Some(f_symlink) = fallback_symlink {
559 if let Some(f_symlink) = fallback_symlink {
562 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
560 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
563 if f_symlink {
561 if f_symlink {
564 flags.insert(Flags::FALLBACK_SYMLINK);
562 flags.insert(Flags::FALLBACK_SYMLINK);
565 }
563 }
566 }
564 }
567 (flags, size, mtime)
565 (flags, size, mtime)
568 }
566 }
569 }
567 }
570
568
571 fn read_hg_path(
569 fn read_hg_path(
572 on_disk: &[u8],
570 on_disk: &[u8],
573 slice: PathSlice,
571 slice: PathSlice,
574 ) -> Result<&HgPath, DirstateV2ParseError> {
572 ) -> Result<&HgPath, DirstateV2ParseError> {
575 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
573 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
576 }
574 }
577
575
578 fn read_nodes(
576 fn read_nodes(
579 on_disk: &[u8],
577 on_disk: &[u8],
580 slice: ChildNodes,
578 slice: ChildNodes,
581 ) -> Result<&[Node], DirstateV2ParseError> {
579 ) -> Result<&[Node], DirstateV2ParseError> {
582 read_slice(on_disk, slice.start, slice.len.get())
580 read_slice(on_disk, slice.start, slice.len.get())
583 }
581 }
584
582
585 fn read_slice<T, Len>(
583 fn read_slice<T, Len>(
586 on_disk: &[u8],
584 on_disk: &[u8],
587 start: Offset,
585 start: Offset,
588 len: Len,
586 len: Len,
589 ) -> Result<&[T], DirstateV2ParseError>
587 ) -> Result<&[T], DirstateV2ParseError>
590 where
588 where
591 T: BytesCast,
589 T: BytesCast,
592 Len: TryInto<usize>,
590 Len: TryInto<usize>,
593 {
591 {
594 // Either `usize::MAX` would result in "out of bounds" error since a single
592 // Either `usize::MAX` would result in "out of bounds" error since a single
595 // `&[u8]` cannot occupy the entire addess space.
593 // `&[u8]` cannot occupy the entire addess space.
596 let start = start.get().try_into().unwrap_or(std::usize::MAX);
594 let start = start.get().try_into().unwrap_or(usize::MAX);
597 let len = len.try_into().unwrap_or(std::usize::MAX);
595 let len = len.try_into().unwrap_or(usize::MAX);
598 let bytes = match on_disk.get(start..) {
596 let bytes = match on_disk.get(start..) {
599 Some(bytes) => bytes,
597 Some(bytes) => bytes,
600 None => {
598 None => {
601 return Err(DirstateV2ParseError::new(
599 return Err(DirstateV2ParseError::new(
602 "not enough bytes from disk",
600 "not enough bytes from disk",
603 ))
601 ))
604 }
602 }
605 };
603 };
606 T::slice_from_bytes(bytes, len)
604 T::slice_from_bytes(bytes, len)
607 .map_err(|e| {
605 .map_err(|e| {
608 DirstateV2ParseError::new(format!("when reading a slice, {}", e))
606 DirstateV2ParseError::new(format!("when reading a slice, {}", e))
609 })
607 })
610 .map(|(slice, _rest)| slice)
608 .map(|(slice, _rest)| slice)
611 }
609 }
612
610
613 /// Returns new data and metadata, together with whether that data should be
611 /// Returns new data and metadata, together with whether that data should be
614 /// appended to the existing data file whose content is at
612 /// appended to the existing data file whose content is at
615 /// `dirstate_map.on_disk` (true), instead of written to a new data file
613 /// `dirstate_map.on_disk` (true), instead of written to a new data file
616 /// (false), and the previous size of data on disk.
614 /// (false), and the previous size of data on disk.
617 pub(super) fn write(
615 pub(super) fn write(
618 dirstate_map: &DirstateMap,
616 dirstate_map: &DirstateMap,
619 write_mode: DirstateMapWriteMode,
617 write_mode: DirstateMapWriteMode,
620 ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
618 ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
621 let append = match write_mode {
619 let append = match write_mode {
622 DirstateMapWriteMode::Auto => dirstate_map.write_should_append(),
620 DirstateMapWriteMode::Auto => dirstate_map.write_should_append(),
623 DirstateMapWriteMode::ForceNewDataFile => false,
621 DirstateMapWriteMode::ForceNewDataFile => false,
624 DirstateMapWriteMode::ForceAppend => true,
622 DirstateMapWriteMode::ForceAppend => true,
625 };
623 };
626 if append {
624 if append {
627 log::trace!("appending to the dirstate data file");
625 log::trace!("appending to the dirstate data file");
628 } else {
626 } else {
629 log::trace!("creating new dirstate data file");
627 log::trace!("creating new dirstate data file");
630 }
628 }
631
629
632 // This ignores the space for paths, and for nodes without an entry.
630 // This ignores the space for paths, and for nodes without an entry.
633 // TODO: better estimate? Skip the `Vec` and write to a file directly?
631 // TODO: better estimate? Skip the `Vec` and write to a file directly?
634 let size_guess = std::mem::size_of::<Node>()
632 let size_guess = std::mem::size_of::<Node>()
635 * dirstate_map.nodes_with_entry_count as usize;
633 * dirstate_map.nodes_with_entry_count as usize;
636
634
637 let mut writer = Writer {
635 let mut writer = Writer {
638 dirstate_map,
636 dirstate_map,
639 append,
637 append,
640 out: Vec::with_capacity(size_guess),
638 out: Vec::with_capacity(size_guess),
641 };
639 };
642
640
643 let root_nodes = dirstate_map.root.as_ref();
641 let root_nodes = dirstate_map.root.as_ref();
644 for node in root_nodes.iter() {
642 for node in root_nodes.iter() {
645 // Catch some corruptions before we write to disk
643 // Catch some corruptions before we write to disk
646 let full_path = node.full_path(dirstate_map.on_disk)?;
644 let full_path = node.full_path(dirstate_map.on_disk)?;
647 let base_name = node.base_name(dirstate_map.on_disk)?;
645 let base_name = node.base_name(dirstate_map.on_disk)?;
648 if full_path != base_name {
646 if full_path != base_name {
649 let explanation = format!(
647 let explanation = format!(
650 "Dirstate root node '{}' is not at the root",
648 "Dirstate root node '{}' is not at the root",
651 full_path
649 full_path
652 );
650 );
653 return Err(HgError::corrupted(explanation).into());
651 return Err(HgError::corrupted(explanation).into());
654 }
652 }
655 }
653 }
656 let root_nodes = writer.write_nodes(root_nodes)?;
654 let root_nodes = writer.write_nodes(root_nodes)?;
657
655
658 let unreachable_bytes = if append {
656 let unreachable_bytes = if append {
659 dirstate_map.unreachable_bytes
657 dirstate_map.unreachable_bytes
660 } else {
658 } else {
661 0
659 0
662 };
660 };
663 let meta = TreeMetadata {
661 let meta = TreeMetadata {
664 root_nodes,
662 root_nodes,
665 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
663 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
666 nodes_with_copy_source_count: dirstate_map
664 nodes_with_copy_source_count: dirstate_map
667 .nodes_with_copy_source_count
665 .nodes_with_copy_source_count
668 .into(),
666 .into(),
669 unreachable_bytes: unreachable_bytes.into(),
667 unreachable_bytes: unreachable_bytes.into(),
670 unused: [0; 4],
668 unused: [0; 4],
671 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
669 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
672 };
670 };
673 Ok((writer.out, meta, append, dirstate_map.old_data_size))
671 Ok((writer.out, meta, append, dirstate_map.old_data_size))
674 }
672 }
675
673
676 struct Writer<'dmap, 'on_disk> {
674 struct Writer<'dmap, 'on_disk> {
677 dirstate_map: &'dmap DirstateMap<'on_disk>,
675 dirstate_map: &'dmap DirstateMap<'on_disk>,
678 append: bool,
676 append: bool,
679 out: Vec<u8>,
677 out: Vec<u8>,
680 }
678 }
681
679
682 impl Writer<'_, '_> {
680 impl Writer<'_, '_> {
683 fn write_nodes(
681 fn write_nodes(
684 &mut self,
682 &mut self,
685 nodes: dirstate_map::ChildNodesRef,
683 nodes: dirstate_map::ChildNodesRef,
686 ) -> Result<ChildNodes, DirstateError> {
684 ) -> Result<ChildNodes, DirstateError> {
687 // Reuse already-written nodes if possible
685 // Reuse already-written nodes if possible
688 if self.append {
686 if self.append {
689 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
687 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
690 let start = self.on_disk_offset_of(nodes_slice).expect(
688 let start = self.on_disk_offset_of(nodes_slice).expect(
691 "dirstate-v2 OnDisk nodes not found within on_disk",
689 "dirstate-v2 OnDisk nodes not found within on_disk",
692 );
690 );
693 let len = child_nodes_len_from_usize(nodes_slice.len());
691 let len = child_nodes_len_from_usize(nodes_slice.len());
694 return Ok(ChildNodes { start, len });
692 return Ok(ChildNodes { start, len });
695 }
693 }
696 }
694 }
697
695
698 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
696 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
699 // undefined iteration order. Sort to enable binary search in the
697 // undefined iteration order. Sort to enable binary search in the
700 // written file.
698 // written file.
701 let nodes = nodes.sorted();
699 let nodes = nodes.sorted();
702 let nodes_len = nodes.len();
700 let nodes_len = nodes.len();
703
701
704 // First accumulate serialized nodes in a `Vec`
702 // First accumulate serialized nodes in a `Vec`
705 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
703 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
706 for node in nodes {
704 for node in nodes {
707 let children = node.children(self.dirstate_map.on_disk)?;
705 let children = node.children(self.dirstate_map.on_disk)?;
708 let full_path = node.full_path(self.dirstate_map.on_disk)?;
706 let full_path = node.full_path(self.dirstate_map.on_disk)?;
709 self.check_children(&children, full_path)?;
707 self.check_children(&children, full_path)?;
710
708
711 let children = self.write_nodes(children)?;
709 let children = self.write_nodes(children)?;
712 let full_path = self.write_path(full_path.as_bytes());
710 let full_path = self.write_path(full_path.as_bytes());
713 let copy_source = if let Some(source) =
711 let copy_source = if let Some(source) =
714 node.copy_source(self.dirstate_map.on_disk)?
712 node.copy_source(self.dirstate_map.on_disk)?
715 {
713 {
716 self.write_path(source.as_bytes())
714 self.write_path(source.as_bytes())
717 } else {
715 } else {
718 PathSlice {
716 PathSlice {
719 start: 0.into(),
717 start: 0.into(),
720 len: 0.into(),
718 len: 0.into(),
721 }
719 }
722 };
720 };
723 on_disk_nodes.push(match node {
721 on_disk_nodes.push(match node {
724 NodeRef::InMemory(path, node) => {
722 NodeRef::InMemory(path, node) => {
725 let (flags, size, mtime) = match &node.data {
723 let (flags, size, mtime) = match &node.data {
726 dirstate_map::NodeData::Entry(entry) => {
724 dirstate_map::NodeData::Entry(entry) => {
727 Node::from_dirstate_entry(entry)
725 Node::from_dirstate_entry(entry)
728 }
726 }
729 dirstate_map::NodeData::CachedDirectory { mtime } => {
727 dirstate_map::NodeData::CachedDirectory { mtime } => {
730 // we currently never set a mtime if unknown file
728 // we currently never set a mtime if unknown file
731 // are present.
729 // are present.
732 // So if we have a mtime for a directory, we know
730 // So if we have a mtime for a directory, we know
733 // they are no unknown
731 // they are no unknown
734 // files and we
732 // files and we
735 // blindly set ALL_UNKNOWN_RECORDED.
733 // blindly set ALL_UNKNOWN_RECORDED.
736 //
734 //
737 // We never set ALL_IGNORED_RECORDED since we
735 // We never set ALL_IGNORED_RECORDED since we
738 // don't track that case
736 // don't track that case
739 // currently.
737 // currently.
740 let mut flags = Flags::DIRECTORY
738 let mut flags = Flags::DIRECTORY
741 | Flags::HAS_MTIME
739 | Flags::HAS_MTIME
742 | Flags::ALL_UNKNOWN_RECORDED;
740 | Flags::ALL_UNKNOWN_RECORDED;
743 if mtime.second_ambiguous {
741 if mtime.second_ambiguous {
744 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
742 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
745 }
743 }
746 (flags, 0.into(), (*mtime).into())
744 (flags, 0.into(), (*mtime).into())
747 }
745 }
748 dirstate_map::NodeData::None => (
746 dirstate_map::NodeData::None => (
749 Flags::DIRECTORY,
747 Flags::DIRECTORY,
750 0.into(),
748 0.into(),
751 PackedTruncatedTimestamp::null(),
749 PackedTruncatedTimestamp::null(),
752 ),
750 ),
753 };
751 };
754 Node {
752 Node {
755 children,
753 children,
756 copy_source,
754 copy_source,
757 full_path,
755 full_path,
758 base_name_start: u16::try_from(path.base_name_start())
756 base_name_start: u16::try_from(path.base_name_start())
759 // Could only panic for paths over 64 KiB
757 // Could only panic for paths over 64 KiB
760 .expect("dirstate-v2 path length overflow")
758 .expect("dirstate-v2 path length overflow")
761 .into(),
759 .into(),
762 descendants_with_entry_count: node
760 descendants_with_entry_count: node
763 .descendants_with_entry_count
761 .descendants_with_entry_count
764 .into(),
762 .into(),
765 tracked_descendants_count: node
763 tracked_descendants_count: node
766 .tracked_descendants_count
764 .tracked_descendants_count
767 .into(),
765 .into(),
768 flags: flags.bits().into(),
766 flags: flags.bits().into(),
769 size,
767 size,
770 mtime,
768 mtime,
771 }
769 }
772 }
770 }
773 NodeRef::OnDisk(node) => Node {
771 NodeRef::OnDisk(node) => Node {
774 children,
772 children,
775 copy_source,
773 copy_source,
776 full_path,
774 full_path,
777 ..*node
775 ..*node
778 },
776 },
779 })
777 })
780 }
778 }
781 // … so we can write them contiguously, after writing everything else
779 // … so we can write them contiguously, after writing everything else
782 // they refer to.
780 // they refer to.
783 let start = self.current_offset();
781 let start = self.current_offset();
784 let len = child_nodes_len_from_usize(nodes_len);
782 let len = child_nodes_len_from_usize(nodes_len);
785 self.out.extend(on_disk_nodes.as_bytes());
783 self.out.extend(on_disk_nodes.as_bytes());
786 Ok(ChildNodes { start, len })
784 Ok(ChildNodes { start, len })
787 }
785 }
788
786
789 /// Catch some dirstate corruptions before writing them to disk
787 /// Catch some dirstate corruptions before writing them to disk
790 fn check_children(
788 fn check_children(
791 &mut self,
789 &mut self,
792 children: &dirstate_map::ChildNodesRef,
790 children: &dirstate_map::ChildNodesRef,
793 full_path: &HgPath,
791 full_path: &HgPath,
794 ) -> Result<(), DirstateError> {
792 ) -> Result<(), DirstateError> {
795 for child in children.iter() {
793 for child in children.iter() {
796 let child_full_path =
794 let child_full_path =
797 child.full_path(self.dirstate_map.on_disk)?;
795 child.full_path(self.dirstate_map.on_disk)?;
798
796
799 let prefix_length = child_full_path.len()
797 let prefix_length = child_full_path.len()
800 // remove the filename
798 // remove the filename
801 - child.base_name(self.dirstate_map.on_disk)?.len()
799 - child.base_name(self.dirstate_map.on_disk)?.len()
802 // remove the slash
800 // remove the slash
803 - 1;
801 - 1;
804
802
805 let child_prefix = &child_full_path.as_bytes()[..prefix_length];
803 let child_prefix = &child_full_path.as_bytes()[..prefix_length];
806
804
807 if child_prefix != full_path.as_bytes() {
805 if child_prefix != full_path.as_bytes() {
808 let explanation = format!(
806 let explanation = format!(
809 "dirstate child node's path '{}' \
807 "dirstate child node's path '{}' \
810 does not start with its parent's path '{}'",
808 does not start with its parent's path '{}'",
811 child_full_path, full_path,
809 child_full_path, full_path,
812 );
810 );
813
811
814 return Err(HgError::corrupted(explanation).into());
812 return Err(HgError::corrupted(explanation).into());
815 }
813 }
816 }
814 }
817 Ok(())
815 Ok(())
818 }
816 }
819
817
820 /// If the given slice of items is within `on_disk`, returns its offset
818 /// If the given slice of items is within `on_disk`, returns its offset
821 /// from the start of `on_disk`.
819 /// from the start of `on_disk`.
822 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
820 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
823 where
821 where
824 T: BytesCast,
822 T: BytesCast,
825 {
823 {
826 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
824 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
827 let start = slice.as_ptr() as usize;
825 let start = slice.as_ptr() as usize;
828 let end = start + slice.len();
826 let end = start + slice.len();
829 start..=end
827 start..=end
830 }
828 }
831 let slice_addresses = address_range(slice.as_bytes());
829 let slice_addresses = address_range(slice.as_bytes());
832 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
830 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
833 if on_disk_addresses.contains(slice_addresses.start())
831 if on_disk_addresses.contains(slice_addresses.start())
834 && on_disk_addresses.contains(slice_addresses.end())
832 && on_disk_addresses.contains(slice_addresses.end())
835 {
833 {
836 let offset = slice_addresses.start() - on_disk_addresses.start();
834 let offset = slice_addresses.start() - on_disk_addresses.start();
837 Some(offset_from_usize(offset))
835 Some(offset_from_usize(offset))
838 } else {
836 } else {
839 None
837 None
840 }
838 }
841 }
839 }
842
840
843 fn current_offset(&mut self) -> Offset {
841 fn current_offset(&mut self) -> Offset {
844 let mut offset = self.out.len();
842 let mut offset = self.out.len();
845 if self.append {
843 if self.append {
846 offset += self.dirstate_map.on_disk.len()
844 offset += self.dirstate_map.on_disk.len()
847 }
845 }
848 offset_from_usize(offset)
846 offset_from_usize(offset)
849 }
847 }
850
848
851 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
849 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
852 let len = path_len_from_usize(slice.len());
850 let len = path_len_from_usize(slice.len());
853 // Reuse an already-written path if possible
851 // Reuse an already-written path if possible
854 if self.append {
852 if self.append {
855 if let Some(start) = self.on_disk_offset_of(slice) {
853 if let Some(start) = self.on_disk_offset_of(slice) {
856 return PathSlice { start, len };
854 return PathSlice { start, len };
857 }
855 }
858 }
856 }
859 let start = self.current_offset();
857 let start = self.current_offset();
860 self.out.extend(slice.as_bytes());
858 self.out.extend(slice.as_bytes());
861 PathSlice { start, len }
859 PathSlice { start, len }
862 }
860 }
863 }
861 }
864
862
865 fn offset_from_usize(x: usize) -> Offset {
863 fn offset_from_usize(x: usize) -> Offset {
866 u32::try_from(x)
864 u32::try_from(x)
867 // Could only panic for a dirstate file larger than 4 GiB
865 // Could only panic for a dirstate file larger than 4 GiB
868 .expect("dirstate-v2 offset overflow")
866 .expect("dirstate-v2 offset overflow")
869 .into()
867 .into()
870 }
868 }
871
869
872 fn child_nodes_len_from_usize(x: usize) -> Size {
870 fn child_nodes_len_from_usize(x: usize) -> Size {
873 u32::try_from(x)
871 u32::try_from(x)
874 // Could only panic with over 4 billion nodes
872 // Could only panic with over 4 billion nodes
875 .expect("dirstate-v2 slice length overflow")
873 .expect("dirstate-v2 slice length overflow")
876 .into()
874 .into()
877 }
875 }
878
876
879 fn path_len_from_usize(x: usize) -> PathSize {
877 fn path_len_from_usize(x: usize) -> PathSize {
880 u16::try_from(x)
878 u16::try_from(x)
881 // Could only panic for paths over 64 KiB
879 // Could only panic for paths over 64 KiB
882 .expect("dirstate-v2 path length overflow")
880 .expect("dirstate-v2 path length overflow")
883 .into()
881 .into()
884 }
882 }
885
883
886 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
884 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
887 fn from(timestamp: TruncatedTimestamp) -> Self {
885 fn from(timestamp: TruncatedTimestamp) -> Self {
888 Self {
886 Self {
889 truncated_seconds: timestamp.truncated_seconds().into(),
887 truncated_seconds: timestamp.truncated_seconds().into(),
890 nanoseconds: timestamp.nanoseconds().into(),
888 nanoseconds: timestamp.nanoseconds().into(),
891 }
889 }
892 }
890 }
893 }
891 }
894
892
895 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
893 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
896 type Error = DirstateV2ParseError;
894 type Error = DirstateV2ParseError;
897
895
898 fn try_from(
896 fn try_from(
899 timestamp: PackedTruncatedTimestamp,
897 timestamp: PackedTruncatedTimestamp,
900 ) -> Result<Self, Self::Error> {
898 ) -> Result<Self, Self::Error> {
901 Self::from_already_truncated(
899 Self::from_already_truncated(
902 timestamp.truncated_seconds.get(),
900 timestamp.truncated_seconds.get(),
903 timestamp.nanoseconds.get(),
901 timestamp.nanoseconds.get(),
904 false,
902 false,
905 )
903 )
906 }
904 }
907 }
905 }
908 impl PackedTruncatedTimestamp {
906 impl PackedTruncatedTimestamp {
909 fn null() -> Self {
907 fn null() -> Self {
910 Self {
908 Self {
911 truncated_seconds: 0.into(),
909 truncated_seconds: 0.into(),
912 nanoseconds: 0.into(),
910 nanoseconds: 0.into(),
913 }
911 }
914 }
912 }
915 }
913 }
@@ -1,2450 +1,2454
1 // matchers.rs
1 // matchers.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Structs and types for matching files and directories.
8 //! Structs and types for matching files and directories.
9
9
10 use format_bytes::format_bytes;
10 use format_bytes::format_bytes;
11 use once_cell::sync::OnceCell;
11 use once_cell::sync::OnceCell;
12
12
13 use crate::{
13 use crate::{
14 dirstate::dirs_multiset::DirsChildrenMultiset,
14 dirstate::dirs_multiset::DirsChildrenMultiset,
15 filepatterns::{
15 filepatterns::{
16 build_single_regex, filter_subincludes, get_patterns_from_file,
16 build_single_regex, filter_subincludes, get_patterns_from_file,
17 PatternFileWarning, PatternResult,
17 PatternFileWarning, PatternResult,
18 },
18 },
19 utils::{
19 utils::{
20 files::{dir_ancestors, find_dirs},
20 files::{dir_ancestors, find_dirs},
21 hg_path::{HgPath, HgPathBuf, HgPathError},
21 hg_path::{HgPath, HgPathBuf, HgPathError},
22 Escaped,
22 Escaped,
23 },
23 },
24 DirsMultiset, FastHashMap, IgnorePattern, PatternError, PatternSyntax,
24 DirsMultiset, FastHashMap, IgnorePattern, PatternError, PatternSyntax,
25 };
25 };
26
26
27 use crate::dirstate::status::IgnoreFnType;
27 use crate::dirstate::status::IgnoreFnType;
28 use crate::filepatterns::normalize_path_bytes;
28 use crate::filepatterns::normalize_path_bytes;
29 use std::collections::HashSet;
29 use std::collections::HashSet;
30 use std::fmt::{Display, Error, Formatter};
30 use std::fmt::{Display, Error, Formatter};
31 use std::path::{Path, PathBuf};
31 use std::path::{Path, PathBuf};
32 use std::{borrow::ToOwned, collections::BTreeSet};
32 use std::{borrow::ToOwned, collections::BTreeSet};
33
33
34 #[derive(Debug, PartialEq)]
34 #[derive(Debug, PartialEq)]
35 pub enum VisitChildrenSet {
35 pub enum VisitChildrenSet {
36 /// Don't visit anything
36 /// Don't visit anything
37 Empty,
37 Empty,
38 /// Visit this directory and probably its children
38 /// Visit this directory and probably its children
39 This,
39 This,
40 /// Only visit the children (both files and directories) if they
40 /// Only visit the children (both files and directories) if they
41 /// are mentioned in this set. (empty set corresponds to [Empty])
41 /// are mentioned in this set. (empty set corresponds to [Empty])
42 /// TODO Should we implement a `NonEmptyHashSet`?
42 /// TODO Should we implement a `NonEmptyHashSet`?
43 Set(HashSet<HgPathBuf>),
43 Set(HashSet<HgPathBuf>),
44 /// Visit this directory and all subdirectories
44 /// Visit this directory and all subdirectories
45 /// (you can stop asking about the children set)
45 /// (you can stop asking about the children set)
46 Recursive,
46 Recursive,
47 }
47 }
48
48
49 pub trait Matcher: core::fmt::Debug {
49 pub trait Matcher: core::fmt::Debug {
50 /// Explicitly listed files
50 /// Explicitly listed files
51 fn file_set(&self) -> Option<&HashSet<HgPathBuf>>;
51 fn file_set(&self) -> Option<&HashSet<HgPathBuf>>;
52 /// Returns whether `filename` is in `file_set`
52 /// Returns whether `filename` is in `file_set`
53 fn exact_match(&self, filename: &HgPath) -> bool;
53 fn exact_match(&self, filename: &HgPath) -> bool;
54 /// Returns whether `filename` is matched by this matcher
54 /// Returns whether `filename` is matched by this matcher
55 fn matches(&self, filename: &HgPath) -> bool;
55 fn matches(&self, filename: &HgPath) -> bool;
56 /// Decides whether a directory should be visited based on whether it
56 /// Decides whether a directory should be visited based on whether it
57 /// has potential matches in it or one of its subdirectories, and
57 /// has potential matches in it or one of its subdirectories, and
58 /// potentially lists which subdirectories of that directory should be
58 /// potentially lists which subdirectories of that directory should be
59 /// visited. This is based on the match's primary, included, and excluded
59 /// visited. This is based on the match's primary, included, and excluded
60 /// patterns.
60 /// patterns.
61 ///
61 ///
62 /// # Example
62 /// # Example
63 ///
63 ///
64 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
64 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
65 /// return the following values (assuming the implementation of
65 /// return the following values (assuming the implementation of
66 /// visit_children_set is capable of recognizing this; some implementations
66 /// visit_children_set is capable of recognizing this; some implementations
67 /// are not).
67 /// are not).
68 ///
68 ///
69 /// ```text
69 /// ```text
70 /// ```ignore
70 /// ```ignore
71 /// '' -> {'foo', 'qux'}
71 /// '' -> {'foo', 'qux'}
72 /// 'baz' -> set()
72 /// 'baz' -> set()
73 /// 'foo' -> {'bar'}
73 /// 'foo' -> {'bar'}
74 /// // Ideally this would be `Recursive`, but since the prefix nature of
74 /// // Ideally this would be `Recursive`, but since the prefix nature of
75 /// // matchers is applied to the entire matcher, we have to downgrade this
75 /// // matchers is applied to the entire matcher, we have to downgrade this
76 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
76 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
77 /// // `RootFilesIn'-kind matcher being mixed in.
77 /// // `RootFilesIn'-kind matcher being mixed in.
78 /// 'foo/bar' -> 'this'
78 /// 'foo/bar' -> 'this'
79 /// 'qux' -> 'this'
79 /// 'qux' -> 'this'
80 /// ```
80 /// ```
81 /// # Important
81 /// # Important
82 ///
82 ///
83 /// Most matchers do not know if they're representing files or
83 /// Most matchers do not know if they're representing files or
84 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
84 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
85 /// file or a directory, so `visit_children_set('dir')` for most matchers
85 /// file or a directory, so `visit_children_set('dir')` for most matchers
86 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
86 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
87 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
87 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
88 /// it may return `VisitChildrenSet::This`.
88 /// it may return `VisitChildrenSet::This`.
89 /// Do not rely on the return being a `HashSet` indicating that there are
89 /// Do not rely on the return being a `HashSet` indicating that there are
90 /// no files in this dir to investigate (or equivalently that if there are
90 /// no files in this dir to investigate (or equivalently that if there are
91 /// files to investigate in 'dir' that it will always return
91 /// files to investigate in 'dir' that it will always return
92 /// `VisitChildrenSet::This`).
92 /// `VisitChildrenSet::This`).
93 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet;
93 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet;
94 /// Matcher will match everything and `files_set()` will be empty:
94 /// Matcher will match everything and `files_set()` will be empty:
95 /// optimization might be possible.
95 /// optimization might be possible.
96 fn matches_everything(&self) -> bool;
96 fn matches_everything(&self) -> bool;
97 /// Matcher will match exactly the files in `files_set()`: optimization
97 /// Matcher will match exactly the files in `files_set()`: optimization
98 /// might be possible.
98 /// might be possible.
99 fn is_exact(&self) -> bool;
99 fn is_exact(&self) -> bool;
100 }
100 }
101
101
102 /// Matches everything.
102 /// Matches everything.
103 ///```
103 ///```
104 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
104 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
105 ///
105 ///
106 /// let matcher = AlwaysMatcher;
106 /// let matcher = AlwaysMatcher;
107 ///
107 ///
108 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
108 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
109 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
109 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
110 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
110 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
111 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
111 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
112 /// ```
112 /// ```
113 #[derive(Debug)]
113 #[derive(Debug)]
114 pub struct AlwaysMatcher;
114 pub struct AlwaysMatcher;
115
115
116 impl Matcher for AlwaysMatcher {
116 impl Matcher for AlwaysMatcher {
117 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
117 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
118 None
118 None
119 }
119 }
120 fn exact_match(&self, _filename: &HgPath) -> bool {
120 fn exact_match(&self, _filename: &HgPath) -> bool {
121 false
121 false
122 }
122 }
123 fn matches(&self, _filename: &HgPath) -> bool {
123 fn matches(&self, _filename: &HgPath) -> bool {
124 true
124 true
125 }
125 }
126 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
126 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
127 VisitChildrenSet::Recursive
127 VisitChildrenSet::Recursive
128 }
128 }
129 fn matches_everything(&self) -> bool {
129 fn matches_everything(&self) -> bool {
130 true
130 true
131 }
131 }
132 fn is_exact(&self) -> bool {
132 fn is_exact(&self) -> bool {
133 false
133 false
134 }
134 }
135 }
135 }
136
136
137 /// Matches nothing.
137 /// Matches nothing.
138 #[derive(Debug)]
138 #[derive(Debug)]
139 pub struct NeverMatcher;
139 pub struct NeverMatcher;
140
140
141 impl Matcher for NeverMatcher {
141 impl Matcher for NeverMatcher {
142 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
142 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
143 None
143 None
144 }
144 }
145 fn exact_match(&self, _filename: &HgPath) -> bool {
145 fn exact_match(&self, _filename: &HgPath) -> bool {
146 false
146 false
147 }
147 }
148 fn matches(&self, _filename: &HgPath) -> bool {
148 fn matches(&self, _filename: &HgPath) -> bool {
149 false
149 false
150 }
150 }
151 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
151 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
152 VisitChildrenSet::Empty
152 VisitChildrenSet::Empty
153 }
153 }
154 fn matches_everything(&self) -> bool {
154 fn matches_everything(&self) -> bool {
155 false
155 false
156 }
156 }
157 fn is_exact(&self) -> bool {
157 fn is_exact(&self) -> bool {
158 true
158 true
159 }
159 }
160 }
160 }
161
161
162 /// Matches the input files exactly. They are interpreted as paths, not
162 /// Matches the input files exactly. They are interpreted as paths, not
163 /// patterns.
163 /// patterns.
164 ///
164 ///
165 ///```
165 ///```
166 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} };
166 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} };
167 ///
167 ///
168 /// let files = vec![HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")];
168 /// let files = vec![HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")];
169 /// let matcher = FileMatcher::new(files).unwrap();
169 /// let matcher = FileMatcher::new(files).unwrap();
170 ///
170 ///
171 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
171 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
172 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
172 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
173 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
173 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
174 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
174 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
175 /// ```
175 /// ```
176 #[derive(Debug)]
176 #[derive(Debug)]
177 pub struct FileMatcher {
177 pub struct FileMatcher {
178 files: HashSet<HgPathBuf>,
178 files: HashSet<HgPathBuf>,
179 dirs: DirsMultiset,
179 dirs: DirsMultiset,
180 sorted_visitchildrenset_candidates: OnceCell<BTreeSet<HgPathBuf>>,
180 sorted_visitchildrenset_candidates: OnceCell<BTreeSet<HgPathBuf>>,
181 }
181 }
182
182
183 impl FileMatcher {
183 impl FileMatcher {
184 pub fn new(files: Vec<HgPathBuf>) -> Result<Self, HgPathError> {
184 pub fn new(files: Vec<HgPathBuf>) -> Result<Self, HgPathError> {
185 let dirs = DirsMultiset::from_manifest(&files)?;
185 let dirs = DirsMultiset::from_manifest(&files)?;
186 Ok(Self {
186 Ok(Self {
187 files: HashSet::from_iter(files),
187 files: HashSet::from_iter(files),
188 dirs,
188 dirs,
189 sorted_visitchildrenset_candidates: OnceCell::new(),
189 sorted_visitchildrenset_candidates: OnceCell::new(),
190 })
190 })
191 }
191 }
192 fn inner_matches(&self, filename: &HgPath) -> bool {
192 fn inner_matches(&self, filename: &HgPath) -> bool {
193 self.files.contains(filename.as_ref())
193 self.files.contains(filename.as_ref())
194 }
194 }
195 }
195 }
196
196
197 impl Matcher for FileMatcher {
197 impl Matcher for FileMatcher {
198 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
198 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
199 Some(&self.files)
199 Some(&self.files)
200 }
200 }
201 fn exact_match(&self, filename: &HgPath) -> bool {
201 fn exact_match(&self, filename: &HgPath) -> bool {
202 self.inner_matches(filename)
202 self.inner_matches(filename)
203 }
203 }
204 fn matches(&self, filename: &HgPath) -> bool {
204 fn matches(&self, filename: &HgPath) -> bool {
205 self.inner_matches(filename)
205 self.inner_matches(filename)
206 }
206 }
207 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
207 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
208 if self.files.is_empty() || !self.dirs.contains(directory) {
208 if self.files.is_empty() || !self.dirs.contains(directory) {
209 return VisitChildrenSet::Empty;
209 return VisitChildrenSet::Empty;
210 }
210 }
211
211
212 let compute_candidates = || -> BTreeSet<HgPathBuf> {
212 let compute_candidates = || -> BTreeSet<HgPathBuf> {
213 let mut candidates: BTreeSet<HgPathBuf> =
213 let mut candidates: BTreeSet<HgPathBuf> =
214 self.dirs.iter().cloned().collect();
214 self.dirs.iter().cloned().collect();
215 candidates.extend(self.files.iter().cloned());
215 candidates.extend(self.files.iter().cloned());
216 candidates.remove(HgPath::new(b""));
216 candidates.remove(HgPath::new(b""));
217 candidates
217 candidates
218 };
218 };
219 let candidates =
219 let candidates =
220 if directory.as_ref().is_empty() {
220 if directory.as_ref().is_empty() {
221 compute_candidates()
221 compute_candidates()
222 } else {
222 } else {
223 let sorted_candidates = self
223 let sorted_candidates = self
224 .sorted_visitchildrenset_candidates
224 .sorted_visitchildrenset_candidates
225 .get_or_init(compute_candidates);
225 .get_or_init(compute_candidates);
226 let directory_bytes = directory.as_ref().as_bytes();
226 let directory_bytes = directory.as_ref().as_bytes();
227 let start: HgPathBuf =
227 let start: HgPathBuf =
228 format_bytes!(b"{}/", directory_bytes).into();
228 format_bytes!(b"{}/", directory_bytes).into();
229 let start_len = start.len();
229 let start_len = start.len();
230 // `0` sorts after `/`
230 // `0` sorts after `/`
231 let end = format_bytes!(b"{}0", directory_bytes).into();
231 let end = format_bytes!(b"{}0", directory_bytes).into();
232 BTreeSet::from_iter(sorted_candidates.range(start..end).map(
232 BTreeSet::from_iter(sorted_candidates.range(start..end).map(
233 |c| HgPathBuf::from_bytes(&c.as_bytes()[start_len..]),
233 |c| HgPathBuf::from_bytes(&c.as_bytes()[start_len..]),
234 ))
234 ))
235 };
235 };
236
236
237 // `self.dirs` includes all of the directories, recursively, so if
237 // `self.dirs` includes all of the directories, recursively, so if
238 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
238 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
239 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
239 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
240 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
240 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
241 // subdir will be in there without a slash.
241 // subdir will be in there without a slash.
242 VisitChildrenSet::Set(
242 VisitChildrenSet::Set(
243 candidates
243 candidates
244 .into_iter()
244 .into_iter()
245 .filter_map(|c| {
245 .filter_map(|c| {
246 if c.bytes().all(|b| *b != b'/') {
246 if c.bytes().all(|b| *b != b'/') {
247 Some(c)
247 Some(c)
248 } else {
248 } else {
249 None
249 None
250 }
250 }
251 })
251 })
252 .collect(),
252 .collect(),
253 )
253 )
254 }
254 }
255 fn matches_everything(&self) -> bool {
255 fn matches_everything(&self) -> bool {
256 false
256 false
257 }
257 }
258 fn is_exact(&self) -> bool {
258 fn is_exact(&self) -> bool {
259 true
259 true
260 }
260 }
261 }
261 }
262
262
263 /// Matches a set of (kind, pat, source) against a 'root' directory.
263 /// Matches a set of (kind, pat, source) against a 'root' directory.
264 /// (Currently the 'root' directory is effectively always empty)
264 /// (Currently the 'root' directory is effectively always empty)
265 /// ```
265 /// ```
266 /// use hg::{
266 /// use hg::{
267 /// matchers::{PatternMatcher, Matcher},
267 /// matchers::{PatternMatcher, Matcher},
268 /// IgnorePattern,
268 /// IgnorePattern,
269 /// PatternSyntax,
269 /// PatternSyntax,
270 /// utils::hg_path::{HgPath, HgPathBuf}
270 /// utils::hg_path::{HgPath, HgPathBuf}
271 /// };
271 /// };
272 /// use std::collections::HashSet;
272 /// use std::collections::HashSet;
273 /// use std::path::Path;
273 /// use std::path::Path;
274 /// ///
274 /// ///
275 /// let ignore_patterns : Vec<IgnorePattern> =
275 /// let ignore_patterns : Vec<IgnorePattern> =
276 /// vec![IgnorePattern::new(PatternSyntax::Regexp, br".*\.c$", Path::new("")),
276 /// vec![IgnorePattern::new(PatternSyntax::Regexp, br".*\.c$", Path::new("")),
277 /// IgnorePattern::new(PatternSyntax::Path, b"foo/a", Path::new("")),
277 /// IgnorePattern::new(PatternSyntax::Path, b"foo/a", Path::new("")),
278 /// IgnorePattern::new(PatternSyntax::RelPath, b"b", Path::new("")),
278 /// IgnorePattern::new(PatternSyntax::RelPath, b"b", Path::new("")),
279 /// IgnorePattern::new(PatternSyntax::Glob, b"*.h", Path::new("")),
279 /// IgnorePattern::new(PatternSyntax::Glob, b"*.h", Path::new("")),
280 /// ];
280 /// ];
281 /// let matcher = PatternMatcher::new(ignore_patterns).unwrap();
281 /// let matcher = PatternMatcher::new(ignore_patterns).unwrap();
282 /// ///
282 /// ///
283 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true); // matches re:.*\.c$
283 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true); // matches re:.*\.c$
284 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
284 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
285 /// assert_eq!(matcher.matches(HgPath::new(b"foo/a")), true); // matches path:foo/a
285 /// assert_eq!(matcher.matches(HgPath::new(b"foo/a")), true); // matches path:foo/a
286 /// assert_eq!(matcher.matches(HgPath::new(b"a")), false); // does not match path:b, since 'root' is 'foo'
286 /// assert_eq!(matcher.matches(HgPath::new(b"a")), false); // does not match path:b, since 'root' is 'foo'
287 /// assert_eq!(matcher.matches(HgPath::new(b"b")), true); // matches relpath:b, since 'root' is 'foo'
287 /// assert_eq!(matcher.matches(HgPath::new(b"b")), true); // matches relpath:b, since 'root' is 'foo'
288 /// assert_eq!(matcher.matches(HgPath::new(b"lib.h")), true); // matches glob:*.h
288 /// assert_eq!(matcher.matches(HgPath::new(b"lib.h")), true); // matches glob:*.h
289 /// assert_eq!(matcher.file_set().unwrap(),
289 /// assert_eq!(matcher.file_set().unwrap(),
290 /// &HashSet::from([HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"foo/a"),
290 /// &HashSet::from([HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"foo/a"),
291 /// HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"b")]));
291 /// HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"b")]));
292 /// assert_eq!(matcher.exact_match(HgPath::new(b"foo/a")), true);
292 /// assert_eq!(matcher.exact_match(HgPath::new(b"foo/a")), true);
293 /// assert_eq!(matcher.exact_match(HgPath::new(b"b")), true);
293 /// assert_eq!(matcher.exact_match(HgPath::new(b"b")), true);
294 /// assert_eq!(matcher.exact_match(HgPath::new(b"lib.h")), false); // exact matches are for (rel)path kinds
294 /// assert_eq!(matcher.exact_match(HgPath::new(b"lib.h")), false); // exact matches are for (rel)path kinds
295 /// ```
295 /// ```
296 pub struct PatternMatcher<'a> {
296 pub struct PatternMatcher<'a> {
297 patterns: Vec<u8>,
297 patterns: Vec<u8>,
298 match_fn: IgnoreFnType<'a>,
298 match_fn: IgnoreFnType<'a>,
299 /// Whether all the patterns match a prefix (i.e. recursively)
299 /// Whether all the patterns match a prefix (i.e. recursively)
300 prefix: bool,
300 prefix: bool,
301 files: HashSet<HgPathBuf>,
301 files: HashSet<HgPathBuf>,
302 dirs_explicit: HashSet<HgPathBuf>,
302 dirs_explicit: HashSet<HgPathBuf>,
303 dirs: DirsMultiset,
303 dirs: DirsMultiset,
304 }
304 }
305
305
306 impl core::fmt::Debug for PatternMatcher<'_> {
306 impl core::fmt::Debug for PatternMatcher<'_> {
307 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
307 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
308 f.debug_struct("PatternMatcher")
308 f.debug_struct("PatternMatcher")
309 .field("patterns", &String::from_utf8_lossy(&self.patterns))
309 .field("patterns", &String::from_utf8_lossy(&self.patterns))
310 .field("prefix", &self.prefix)
310 .field("prefix", &self.prefix)
311 .field("files", &self.files)
311 .field("files", &self.files)
312 .field("dirs", &self.dirs)
312 .field("dirs", &self.dirs)
313 .finish()
313 .finish()
314 }
314 }
315 }
315 }
316
316
317 impl<'a> PatternMatcher<'a> {
317 impl<'a> PatternMatcher<'a> {
318 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
318 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
319 let RootsDirsAndParents {
319 let RootsDirsAndParents {
320 roots,
320 roots,
321 dirs: dirs_explicit,
321 dirs: dirs_explicit,
322 parents,
322 parents,
323 } = roots_dirs_and_parents(&ignore_patterns)?;
323 } = roots_dirs_and_parents(&ignore_patterns)?;
324 let files = roots;
324 let files = roots;
325 let dirs = parents;
325 let dirs = parents;
326 let files: HashSet<HgPathBuf> = HashSet::from_iter(files);
326 let files: HashSet<HgPathBuf> = HashSet::from_iter(files);
327
327
328 let prefix = ignore_patterns.iter().all(|k| {
328 let prefix = ignore_patterns.iter().all(|k| {
329 matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
329 matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
330 });
330 });
331 let (patterns, match_fn) = build_match(ignore_patterns, b"$")?;
331 let (patterns, match_fn) = build_match(ignore_patterns, b"$")?;
332
332
333 Ok(Self {
333 Ok(Self {
334 patterns,
334 patterns,
335 match_fn,
335 match_fn,
336 prefix,
336 prefix,
337 files,
337 files,
338 dirs,
338 dirs,
339 dirs_explicit,
339 dirs_explicit,
340 })
340 })
341 }
341 }
342 }
342 }
343
343
344 impl<'a> Matcher for PatternMatcher<'a> {
344 impl<'a> Matcher for PatternMatcher<'a> {
345 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
345 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
346 Some(&self.files)
346 Some(&self.files)
347 }
347 }
348
348
349 fn exact_match(&self, filename: &HgPath) -> bool {
349 fn exact_match(&self, filename: &HgPath) -> bool {
350 self.files.contains(filename)
350 self.files.contains(filename)
351 }
351 }
352
352
353 fn matches(&self, filename: &HgPath) -> bool {
353 fn matches(&self, filename: &HgPath) -> bool {
354 if self.files.contains(filename) {
354 if self.files.contains(filename) {
355 return true;
355 return true;
356 }
356 }
357 (self.match_fn)(filename)
357 (self.match_fn)(filename)
358 }
358 }
359
359
360 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
360 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
361 if self.prefix && self.files.contains(directory) {
361 if self.prefix && self.files.contains(directory) {
362 return VisitChildrenSet::Recursive;
362 return VisitChildrenSet::Recursive;
363 }
363 }
364 if self.dirs.contains(directory) {
364 if self.dirs.contains(directory) {
365 return VisitChildrenSet::This;
365 return VisitChildrenSet::This;
366 }
366 }
367 if dir_ancestors(directory).any(|parent_dir| {
367 if dir_ancestors(directory).any(|parent_dir| {
368 self.files.contains(parent_dir)
368 self.files.contains(parent_dir)
369 || self.dirs_explicit.contains(parent_dir)
369 || self.dirs_explicit.contains(parent_dir)
370 }) {
370 }) {
371 VisitChildrenSet::This
371 VisitChildrenSet::This
372 } else {
372 } else {
373 VisitChildrenSet::Empty
373 VisitChildrenSet::Empty
374 }
374 }
375 }
375 }
376
376
377 fn matches_everything(&self) -> bool {
377 fn matches_everything(&self) -> bool {
378 false
378 false
379 }
379 }
380
380
381 fn is_exact(&self) -> bool {
381 fn is_exact(&self) -> bool {
382 false
382 false
383 }
383 }
384 }
384 }
385
385
386 /// Matches files that are included in the ignore rules.
386 /// Matches files that are included in the ignore rules.
387 /// ```
387 /// ```
388 /// use hg::{
388 /// use hg::{
389 /// matchers::{IncludeMatcher, Matcher},
389 /// matchers::{IncludeMatcher, Matcher},
390 /// IgnorePattern,
390 /// IgnorePattern,
391 /// PatternSyntax,
391 /// PatternSyntax,
392 /// utils::hg_path::HgPath
392 /// utils::hg_path::HgPath
393 /// };
393 /// };
394 /// use std::path::Path;
394 /// use std::path::Path;
395 /// ///
395 /// ///
396 /// let ignore_patterns =
396 /// let ignore_patterns =
397 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
397 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
398 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
398 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
399 /// ///
399 /// ///
400 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
400 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
401 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
401 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
402 /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
402 /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
403 /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
403 /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
404 /// ///
404 /// ///
405 /// let ignore_patterns =
405 /// let ignore_patterns =
406 /// vec![IgnorePattern::new(PatternSyntax::RootFilesIn, b"dir/subdir", Path::new(""))];
406 /// vec![IgnorePattern::new(PatternSyntax::RootFilesIn, b"dir/subdir", Path::new(""))];
407 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
407 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
408 /// ///
408 /// ///
409 /// assert!(!matcher.matches(HgPath::new(b"file")));
409 /// assert!(!matcher.matches(HgPath::new(b"file")));
410 /// assert!(!matcher.matches(HgPath::new(b"dir/file")));
410 /// assert!(!matcher.matches(HgPath::new(b"dir/file")));
411 /// assert!(matcher.matches(HgPath::new(b"dir/subdir/file")));
411 /// assert!(matcher.matches(HgPath::new(b"dir/subdir/file")));
412 /// assert!(!matcher.matches(HgPath::new(b"dir/subdir/subsubdir/file")));
412 /// assert!(!matcher.matches(HgPath::new(b"dir/subdir/subsubdir/file")));
413 /// ```
413 /// ```
414 pub struct IncludeMatcher<'a> {
414 pub struct IncludeMatcher<'a> {
415 patterns: Vec<u8>,
415 patterns: Vec<u8>,
416 match_fn: IgnoreFnType<'a>,
416 match_fn: IgnoreFnType<'a>,
417 /// Whether all the patterns match a prefix (i.e. recursively)
417 /// Whether all the patterns match a prefix (i.e. recursively)
418 prefix: bool,
418 prefix: bool,
419 roots: HashSet<HgPathBuf>,
419 roots: HashSet<HgPathBuf>,
420 dirs: HashSet<HgPathBuf>,
420 dirs: HashSet<HgPathBuf>,
421 parents: DirsMultiset,
421 parents: DirsMultiset,
422 }
422 }
423
423
424 impl core::fmt::Debug for IncludeMatcher<'_> {
424 impl core::fmt::Debug for IncludeMatcher<'_> {
425 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
425 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
426 f.debug_struct("IncludeMatcher")
426 f.debug_struct("IncludeMatcher")
427 .field("patterns", &String::from_utf8_lossy(&self.patterns))
427 .field("patterns", &String::from_utf8_lossy(&self.patterns))
428 .field("prefix", &self.prefix)
428 .field("prefix", &self.prefix)
429 .field("roots", &self.roots)
429 .field("roots", &self.roots)
430 .field("dirs", &self.dirs)
430 .field("dirs", &self.dirs)
431 .field("parents", &self.parents)
431 .field("parents", &self.parents)
432 .finish()
432 .finish()
433 }
433 }
434 }
434 }
435
435
436 impl<'a> Matcher for IncludeMatcher<'a> {
436 impl<'a> Matcher for IncludeMatcher<'a> {
437 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
437 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
438 None
438 None
439 }
439 }
440
440
441 fn exact_match(&self, _filename: &HgPath) -> bool {
441 fn exact_match(&self, _filename: &HgPath) -> bool {
442 false
442 false
443 }
443 }
444
444
445 fn matches(&self, filename: &HgPath) -> bool {
445 fn matches(&self, filename: &HgPath) -> bool {
446 (self.match_fn)(filename)
446 (self.match_fn)(filename)
447 }
447 }
448
448
449 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
449 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
450 let dir = directory;
450 let dir = directory;
451 if self.prefix && self.roots.contains(dir) {
451 if self.prefix && self.roots.contains(dir) {
452 return VisitChildrenSet::Recursive;
452 return VisitChildrenSet::Recursive;
453 }
453 }
454 if self.roots.contains(HgPath::new(b""))
454 if self.roots.contains(HgPath::new(b""))
455 || self.roots.contains(dir)
455 || self.roots.contains(dir)
456 || self.dirs.contains(dir)
456 || self.dirs.contains(dir)
457 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
457 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
458 {
458 {
459 return VisitChildrenSet::This;
459 return VisitChildrenSet::This;
460 }
460 }
461
461
462 if self.parents.contains(dir.as_ref()) {
462 if self.parents.contains(dir.as_ref()) {
463 let multiset = self.get_all_parents_children();
463 let multiset = self.get_all_parents_children();
464 if let Some(children) = multiset.get(dir) {
464 if let Some(children) = multiset.get(dir) {
465 return VisitChildrenSet::Set(
465 return VisitChildrenSet::Set(
466 children.iter().map(HgPathBuf::from).collect(),
466 children.iter().map(HgPathBuf::from).collect(),
467 );
467 );
468 }
468 }
469 }
469 }
470 VisitChildrenSet::Empty
470 VisitChildrenSet::Empty
471 }
471 }
472
472
473 fn matches_everything(&self) -> bool {
473 fn matches_everything(&self) -> bool {
474 false
474 false
475 }
475 }
476
476
477 fn is_exact(&self) -> bool {
477 fn is_exact(&self) -> bool {
478 false
478 false
479 }
479 }
480 }
480 }
481
481
482 /// The union of multiple matchers. Will match if any of the matchers match.
482 /// The union of multiple matchers. Will match if any of the matchers match.
483 #[derive(Debug)]
483 #[derive(Debug)]
484 pub struct UnionMatcher {
484 pub struct UnionMatcher {
485 matchers: Vec<Box<dyn Matcher + Sync>>,
485 matchers: Vec<Box<dyn Matcher + Sync>>,
486 }
486 }
487
487
488 impl Matcher for UnionMatcher {
488 impl Matcher for UnionMatcher {
489 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
489 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
490 None
490 None
491 }
491 }
492
492
493 fn exact_match(&self, _filename: &HgPath) -> bool {
493 fn exact_match(&self, _filename: &HgPath) -> bool {
494 false
494 false
495 }
495 }
496
496
497 fn matches(&self, filename: &HgPath) -> bool {
497 fn matches(&self, filename: &HgPath) -> bool {
498 self.matchers.iter().any(|m| m.matches(filename))
498 self.matchers.iter().any(|m| m.matches(filename))
499 }
499 }
500
500
501 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
501 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
502 let mut result = HashSet::new();
502 let mut result = HashSet::new();
503 let mut this = false;
503 let mut this = false;
504 for matcher in self.matchers.iter() {
504 for matcher in self.matchers.iter() {
505 let visit = matcher.visit_children_set(directory);
505 let visit = matcher.visit_children_set(directory);
506 match visit {
506 match visit {
507 VisitChildrenSet::Empty => continue,
507 VisitChildrenSet::Empty => continue,
508 VisitChildrenSet::This => {
508 VisitChildrenSet::This => {
509 this = true;
509 this = true;
510 // Don't break, we might have an 'all' in here.
510 // Don't break, we might have an 'all' in here.
511 continue;
511 continue;
512 }
512 }
513 VisitChildrenSet::Set(set) => {
513 VisitChildrenSet::Set(set) => {
514 result.extend(set);
514 result.extend(set);
515 }
515 }
516 VisitChildrenSet::Recursive => {
516 VisitChildrenSet::Recursive => {
517 return visit;
517 return visit;
518 }
518 }
519 }
519 }
520 }
520 }
521 if this {
521 if this {
522 return VisitChildrenSet::This;
522 return VisitChildrenSet::This;
523 }
523 }
524 if result.is_empty() {
524 if result.is_empty() {
525 VisitChildrenSet::Empty
525 VisitChildrenSet::Empty
526 } else {
526 } else {
527 VisitChildrenSet::Set(result)
527 VisitChildrenSet::Set(result)
528 }
528 }
529 }
529 }
530
530
531 fn matches_everything(&self) -> bool {
531 fn matches_everything(&self) -> bool {
532 // TODO Maybe if all are AlwaysMatcher?
532 // TODO Maybe if all are AlwaysMatcher?
533 false
533 false
534 }
534 }
535
535
536 fn is_exact(&self) -> bool {
536 fn is_exact(&self) -> bool {
537 false
537 false
538 }
538 }
539 }
539 }
540
540
541 impl UnionMatcher {
541 impl UnionMatcher {
542 pub fn new(matchers: Vec<Box<dyn Matcher + Sync>>) -> Self {
542 pub fn new(matchers: Vec<Box<dyn Matcher + Sync>>) -> Self {
543 Self { matchers }
543 Self { matchers }
544 }
544 }
545 }
545 }
546
546
547 #[derive(Debug)]
547 #[derive(Debug)]
548 pub struct IntersectionMatcher {
548 pub struct IntersectionMatcher {
549 m1: Box<dyn Matcher + Sync>,
549 m1: Box<dyn Matcher + Sync>,
550 m2: Box<dyn Matcher + Sync>,
550 m2: Box<dyn Matcher + Sync>,
551 files: Option<HashSet<HgPathBuf>>,
551 files: Option<HashSet<HgPathBuf>>,
552 }
552 }
553
553
554 impl Matcher for IntersectionMatcher {
554 impl Matcher for IntersectionMatcher {
555 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
555 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
556 self.files.as_ref()
556 self.files.as_ref()
557 }
557 }
558
558
559 fn exact_match(&self, filename: &HgPath) -> bool {
559 fn exact_match(&self, filename: &HgPath) -> bool {
560 self.files.as_ref().map_or(false, |f| f.contains(filename))
560 self.files.as_ref().map_or(false, |f| f.contains(filename))
561 }
561 }
562
562
563 fn matches(&self, filename: &HgPath) -> bool {
563 fn matches(&self, filename: &HgPath) -> bool {
564 self.m1.matches(filename) && self.m2.matches(filename)
564 self.m1.matches(filename) && self.m2.matches(filename)
565 }
565 }
566
566
567 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
567 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
568 let m1_set = self.m1.visit_children_set(directory);
568 let m1_set = self.m1.visit_children_set(directory);
569 if m1_set == VisitChildrenSet::Empty {
569 if m1_set == VisitChildrenSet::Empty {
570 return VisitChildrenSet::Empty;
570 return VisitChildrenSet::Empty;
571 }
571 }
572 let m2_set = self.m2.visit_children_set(directory);
572 let m2_set = self.m2.visit_children_set(directory);
573 if m2_set == VisitChildrenSet::Empty {
573 if m2_set == VisitChildrenSet::Empty {
574 return VisitChildrenSet::Empty;
574 return VisitChildrenSet::Empty;
575 }
575 }
576
576
577 if m1_set == VisitChildrenSet::Recursive {
577 if m1_set == VisitChildrenSet::Recursive {
578 return m2_set;
578 return m2_set;
579 } else if m2_set == VisitChildrenSet::Recursive {
579 } else if m2_set == VisitChildrenSet::Recursive {
580 return m1_set;
580 return m1_set;
581 }
581 }
582
582
583 match (&m1_set, &m2_set) {
583 match (&m1_set, &m2_set) {
584 (VisitChildrenSet::Recursive, _) => m2_set,
584 (VisitChildrenSet::Recursive, _) => m2_set,
585 (_, VisitChildrenSet::Recursive) => m1_set,
585 (_, VisitChildrenSet::Recursive) => m1_set,
586 (VisitChildrenSet::This, _) | (_, VisitChildrenSet::This) => {
586 (VisitChildrenSet::This, _) | (_, VisitChildrenSet::This) => {
587 VisitChildrenSet::This
587 VisitChildrenSet::This
588 }
588 }
589 (VisitChildrenSet::Set(m1), VisitChildrenSet::Set(m2)) => {
589 (VisitChildrenSet::Set(m1), VisitChildrenSet::Set(m2)) => {
590 let set: HashSet<_> = m1.intersection(m2).cloned().collect();
590 let set: HashSet<_> = m1.intersection(m2).cloned().collect();
591 if set.is_empty() {
591 if set.is_empty() {
592 VisitChildrenSet::Empty
592 VisitChildrenSet::Empty
593 } else {
593 } else {
594 VisitChildrenSet::Set(set)
594 VisitChildrenSet::Set(set)
595 }
595 }
596 }
596 }
597 _ => unreachable!(),
597 _ => unreachable!(),
598 }
598 }
599 }
599 }
600
600
601 fn matches_everything(&self) -> bool {
601 fn matches_everything(&self) -> bool {
602 self.m1.matches_everything() && self.m2.matches_everything()
602 self.m1.matches_everything() && self.m2.matches_everything()
603 }
603 }
604
604
605 fn is_exact(&self) -> bool {
605 fn is_exact(&self) -> bool {
606 self.m1.is_exact() || self.m2.is_exact()
606 self.m1.is_exact() || self.m2.is_exact()
607 }
607 }
608 }
608 }
609
609
610 impl IntersectionMatcher {
610 impl IntersectionMatcher {
611 pub fn new(
611 pub fn new(
612 mut m1: Box<dyn Matcher + Sync>,
612 mut m1: Box<dyn Matcher + Sync>,
613 mut m2: Box<dyn Matcher + Sync>,
613 mut m2: Box<dyn Matcher + Sync>,
614 ) -> Self {
614 ) -> Self {
615 let files = if m1.is_exact() || m2.is_exact() {
615 let files = if m1.is_exact() || m2.is_exact() {
616 if !m1.is_exact() {
616 if !m1.is_exact() {
617 std::mem::swap(&mut m1, &mut m2);
617 std::mem::swap(&mut m1, &mut m2);
618 }
618 }
619 m1.file_set().map(|m1_files| {
619 m1.file_set().map(|m1_files| {
620 m1_files.iter().cloned().filter(|f| m2.matches(f)).collect()
620 m1_files
621 .iter()
622 .filter(|&f| m2.matches(f))
623 .cloned()
624 .collect()
621 })
625 })
622 } else {
626 } else {
623 // without exact input file sets, we can't do an exact
627 // without exact input file sets, we can't do an exact
624 // intersection, so we must over-approximate by
628 // intersection, so we must over-approximate by
625 // unioning instead
629 // unioning instead
626 m1.file_set().map(|m1_files| match m2.file_set() {
630 m1.file_set().map(|m1_files| match m2.file_set() {
627 Some(m2_files) => m1_files.union(m2_files).cloned().collect(),
631 Some(m2_files) => m1_files.union(m2_files).cloned().collect(),
628 None => m1_files.iter().cloned().collect(),
632 None => m1_files.iter().cloned().collect(),
629 })
633 })
630 };
634 };
631 Self { m1, m2, files }
635 Self { m1, m2, files }
632 }
636 }
633 }
637 }
634
638
635 #[derive(Debug)]
639 #[derive(Debug)]
636 pub struct DifferenceMatcher {
640 pub struct DifferenceMatcher {
637 base: Box<dyn Matcher + Sync>,
641 base: Box<dyn Matcher + Sync>,
638 excluded: Box<dyn Matcher + Sync>,
642 excluded: Box<dyn Matcher + Sync>,
639 files: Option<HashSet<HgPathBuf>>,
643 files: Option<HashSet<HgPathBuf>>,
640 }
644 }
641
645
642 impl Matcher for DifferenceMatcher {
646 impl Matcher for DifferenceMatcher {
643 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
647 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
644 self.files.as_ref()
648 self.files.as_ref()
645 }
649 }
646
650
647 fn exact_match(&self, filename: &HgPath) -> bool {
651 fn exact_match(&self, filename: &HgPath) -> bool {
648 self.files.as_ref().map_or(false, |f| f.contains(filename))
652 self.files.as_ref().map_or(false, |f| f.contains(filename))
649 }
653 }
650
654
651 fn matches(&self, filename: &HgPath) -> bool {
655 fn matches(&self, filename: &HgPath) -> bool {
652 self.base.matches(filename) && !self.excluded.matches(filename)
656 self.base.matches(filename) && !self.excluded.matches(filename)
653 }
657 }
654
658
655 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
659 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
656 let excluded_set = self.excluded.visit_children_set(directory);
660 let excluded_set = self.excluded.visit_children_set(directory);
657 if excluded_set == VisitChildrenSet::Recursive {
661 if excluded_set == VisitChildrenSet::Recursive {
658 return VisitChildrenSet::Empty;
662 return VisitChildrenSet::Empty;
659 }
663 }
660 let base_set = self.base.visit_children_set(directory);
664 let base_set = self.base.visit_children_set(directory);
661 // Possible values for base: 'recursive', 'this', set(...), set()
665 // Possible values for base: 'recursive', 'this', set(...), set()
662 // Possible values for excluded: 'this', set(...), set()
666 // Possible values for excluded: 'this', set(...), set()
663 // If excluded has nothing under here that we care about, return base,
667 // If excluded has nothing under here that we care about, return base,
664 // even if it's 'recursive'.
668 // even if it's 'recursive'.
665 if excluded_set == VisitChildrenSet::Empty {
669 if excluded_set == VisitChildrenSet::Empty {
666 return base_set;
670 return base_set;
667 }
671 }
668 match base_set {
672 match base_set {
669 VisitChildrenSet::This | VisitChildrenSet::Recursive => {
673 VisitChildrenSet::This | VisitChildrenSet::Recursive => {
670 // Never return 'recursive' here if excluded_set is any kind of
674 // Never return 'recursive' here if excluded_set is any kind of
671 // non-empty (either 'this' or set(foo)), since excluded might
675 // non-empty (either 'this' or set(foo)), since excluded might
672 // return set() for a subdirectory.
676 // return set() for a subdirectory.
673 VisitChildrenSet::This
677 VisitChildrenSet::This
674 }
678 }
675 set => {
679 set => {
676 // Possible values for base: set(...), set()
680 // Possible values for base: set(...), set()
677 // Possible values for excluded: 'this', set(...)
681 // Possible values for excluded: 'this', set(...)
678 // We ignore excluded set results. They're possibly incorrect:
682 // We ignore excluded set results. They're possibly incorrect:
679 // base = path:dir/subdir
683 // base = path:dir/subdir
680 // excluded=rootfilesin:dir,
684 // excluded=rootfilesin:dir,
681 // visit_children_set(''):
685 // visit_children_set(''):
682 // base returns {'dir'}, excluded returns {'dir'}, if we
686 // base returns {'dir'}, excluded returns {'dir'}, if we
683 // subtracted we'd return set(), which is *not* correct, we
687 // subtracted we'd return set(), which is *not* correct, we
684 // still need to visit 'dir'!
688 // still need to visit 'dir'!
685 set
689 set
686 }
690 }
687 }
691 }
688 }
692 }
689
693
690 fn matches_everything(&self) -> bool {
694 fn matches_everything(&self) -> bool {
691 false
695 false
692 }
696 }
693
697
694 fn is_exact(&self) -> bool {
698 fn is_exact(&self) -> bool {
695 self.base.is_exact()
699 self.base.is_exact()
696 }
700 }
697 }
701 }
698
702
699 impl DifferenceMatcher {
703 impl DifferenceMatcher {
700 pub fn new(
704 pub fn new(
701 base: Box<dyn Matcher + Sync>,
705 base: Box<dyn Matcher + Sync>,
702 excluded: Box<dyn Matcher + Sync>,
706 excluded: Box<dyn Matcher + Sync>,
703 ) -> Self {
707 ) -> Self {
704 let base_is_exact = base.is_exact();
708 let base_is_exact = base.is_exact();
705 let base_files = base.file_set().map(ToOwned::to_owned);
709 let base_files = base.file_set().map(ToOwned::to_owned);
706 let mut new = Self {
710 let mut new = Self {
707 base,
711 base,
708 excluded,
712 excluded,
709 files: None,
713 files: None,
710 };
714 };
711 if base_is_exact {
715 if base_is_exact {
712 new.files = base_files.map(|files| {
716 new.files = base_files.map(|files| {
713 files.iter().cloned().filter(|f| new.matches(f)).collect()
717 files.iter().filter(|&f| new.matches(f)).cloned().collect()
714 });
718 });
715 }
719 }
716 new
720 new
717 }
721 }
718 }
722 }
719
723
720 /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded
724 /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded
721 /// contexts.
725 /// contexts.
722 ///
726 ///
723 /// The `status` algorithm makes heavy use of threads, and calling `is_match`
727 /// The `status` algorithm makes heavy use of threads, and calling `is_match`
724 /// from many threads at once is prone to contention, probably within the
728 /// from many threads at once is prone to contention, probably within the
725 /// scratch space needed as the regex DFA is built lazily.
729 /// scratch space needed as the regex DFA is built lazily.
726 ///
730 ///
727 /// We are in the process of raising the issue upstream, but for now
731 /// We are in the process of raising the issue upstream, but for now
728 /// the workaround used here is to store the `Regex` in a lazily populated
732 /// the workaround used here is to store the `Regex` in a lazily populated
729 /// thread-local variable, sharing the initial read-only compilation, but
733 /// thread-local variable, sharing the initial read-only compilation, but
730 /// not the lazy dfa scratch space mentioned above.
734 /// not the lazy dfa scratch space mentioned above.
731 ///
735 ///
732 /// This reduces the contention observed with 16+ threads, but does not
736 /// This reduces the contention observed with 16+ threads, but does not
733 /// completely remove it. Hopefully this can be addressed upstream.
737 /// completely remove it. Hopefully this can be addressed upstream.
734 struct RegexMatcher {
738 struct RegexMatcher {
735 /// Compiled at the start of the status algorithm, used as a base for
739 /// Compiled at the start of the status algorithm, used as a base for
736 /// cloning in each thread-local `self.local`, thus sharing the expensive
740 /// cloning in each thread-local `self.local`, thus sharing the expensive
737 /// first compilation.
741 /// first compilation.
738 base: regex::bytes::Regex,
742 base: regex::bytes::Regex,
739 /// Thread-local variable that holds the `Regex` that is actually queried
743 /// Thread-local variable that holds the `Regex` that is actually queried
740 /// from each thread.
744 /// from each thread.
741 local: thread_local::ThreadLocal<regex::bytes::Regex>,
745 local: thread_local::ThreadLocal<regex::bytes::Regex>,
742 }
746 }
743
747
744 impl RegexMatcher {
748 impl RegexMatcher {
745 /// Returns whether the path matches the stored `Regex`.
749 /// Returns whether the path matches the stored `Regex`.
746 pub fn is_match(&self, path: &HgPath) -> bool {
750 pub fn is_match(&self, path: &HgPath) -> bool {
747 self.local
751 self.local
748 .get_or(|| self.base.clone())
752 .get_or(|| self.base.clone())
749 .is_match(path.as_bytes())
753 .is_match(path.as_bytes())
750 }
754 }
751 }
755 }
752
756
753 /// Return a `RegexBuilder` from a bytes pattern
757 /// Return a `RegexBuilder` from a bytes pattern
754 ///
758 ///
755 /// This works around the fact that even if it works on byte haysacks,
759 /// This works around the fact that even if it works on byte haysacks,
756 /// [`regex::bytes::Regex`] still uses UTF-8 patterns.
760 /// [`regex::bytes::Regex`] still uses UTF-8 patterns.
757 pub fn re_bytes_builder(pattern: &[u8]) -> regex::bytes::RegexBuilder {
761 pub fn re_bytes_builder(pattern: &[u8]) -> regex::bytes::RegexBuilder {
758 use std::io::Write;
762 use std::io::Write;
759
763
760 // The `regex` crate adds `.*` to the start and end of expressions if there
764 // The `regex` crate adds `.*` to the start and end of expressions if there
761 // are no anchors, so add the start anchor.
765 // are no anchors, so add the start anchor.
762 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
766 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
763 for byte in pattern {
767 for byte in pattern {
764 if *byte > 127 {
768 if *byte > 127 {
765 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
769 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
766 } else {
770 } else {
767 escaped_bytes.push(*byte);
771 escaped_bytes.push(*byte);
768 }
772 }
769 }
773 }
770 escaped_bytes.push(b')');
774 escaped_bytes.push(b')');
771
775
772 // Avoid the cost of UTF8 checking
776 // Avoid the cost of UTF8 checking
773 //
777 //
774 // # Safety
778 // # Safety
775 // This is safe because we escaped all non-ASCII bytes.
779 // This is safe because we escaped all non-ASCII bytes.
776 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
780 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
777 regex::bytes::RegexBuilder::new(&pattern_string)
781 regex::bytes::RegexBuilder::new(&pattern_string)
778 }
782 }
779
783
780 /// Returns a function that matches an `HgPath` against the given regex
784 /// Returns a function that matches an `HgPath` against the given regex
781 /// pattern.
785 /// pattern.
782 ///
786 ///
783 /// This can fail when the pattern is invalid or not supported by the
787 /// This can fail when the pattern is invalid or not supported by the
784 /// underlying engine (the `regex` crate), for instance anything with
788 /// underlying engine (the `regex` crate), for instance anything with
785 /// back-references.
789 /// back-references.
786 #[logging_timer::time("trace")]
790 #[logging_timer::time("trace")]
787 fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> {
791 fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> {
788 let re = re_bytes_builder(pattern)
792 let re = re_bytes_builder(pattern)
789 .unicode(false)
793 .unicode(false)
790 // Big repos with big `.hgignore` will hit the default limit and
794 // Big repos with big `.hgignore` will hit the default limit and
791 // incur a significant performance hit. One repo's `hg status` hit
795 // incur a significant performance hit. One repo's `hg status` hit
792 // multiple *minutes*.
796 // multiple *minutes*.
793 .dfa_size_limit(50 * (1 << 20))
797 .dfa_size_limit(50 * (1 << 20))
794 .build()
798 .build()
795 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
799 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
796
800
797 Ok(RegexMatcher {
801 Ok(RegexMatcher {
798 base: re,
802 base: re,
799 local: Default::default(),
803 local: Default::default(),
800 })
804 })
801 }
805 }
802
806
803 /// Returns the regex pattern and a function that matches an `HgPath` against
807 /// Returns the regex pattern and a function that matches an `HgPath` against
804 /// said regex formed by the given ignore patterns.
808 /// said regex formed by the given ignore patterns.
805 fn build_regex_match<'a>(
809 fn build_regex_match<'a>(
806 ignore_patterns: &[IgnorePattern],
810 ignore_patterns: &[IgnorePattern],
807 glob_suffix: &[u8],
811 glob_suffix: &[u8],
808 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
812 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
809 let mut regexps = vec![];
813 let mut regexps = vec![];
810 let mut exact_set = HashSet::new();
814 let mut exact_set = HashSet::new();
811
815
812 for pattern in ignore_patterns {
816 for pattern in ignore_patterns {
813 if let Some(re) = build_single_regex(pattern, glob_suffix)? {
817 if let Some(re) = build_single_regex(pattern, glob_suffix)? {
814 regexps.push(re);
818 regexps.push(re);
815 } else {
819 } else {
816 let exact = normalize_path_bytes(&pattern.pattern);
820 let exact = normalize_path_bytes(&pattern.pattern);
817 exact_set.insert(HgPathBuf::from_bytes(&exact));
821 exact_set.insert(HgPathBuf::from_bytes(&exact));
818 }
822 }
819 }
823 }
820
824
821 let full_regex = regexps.join(&b'|');
825 let full_regex = regexps.join(&b'|');
822
826
823 // An empty pattern would cause the regex engine to incorrectly match the
827 // An empty pattern would cause the regex engine to incorrectly match the
824 // (empty) root directory
828 // (empty) root directory
825 let func = if !(regexps.is_empty()) {
829 let func = if !(regexps.is_empty()) {
826 let matcher = re_matcher(&full_regex)?;
830 let matcher = re_matcher(&full_regex)?;
827 let func = move |filename: &HgPath| {
831 let func = move |filename: &HgPath| {
828 exact_set.contains(filename) || matcher.is_match(filename)
832 exact_set.contains(filename) || matcher.is_match(filename)
829 };
833 };
830 Box::new(func) as IgnoreFnType
834 Box::new(func) as IgnoreFnType
831 } else {
835 } else {
832 let func = move |filename: &HgPath| exact_set.contains(filename);
836 let func = move |filename: &HgPath| exact_set.contains(filename);
833 Box::new(func) as IgnoreFnType
837 Box::new(func) as IgnoreFnType
834 };
838 };
835
839
836 Ok((full_regex, func))
840 Ok((full_regex, func))
837 }
841 }
838
842
839 /// Returns roots and directories corresponding to each pattern.
843 /// Returns roots and directories corresponding to each pattern.
840 ///
844 ///
841 /// This calculates the roots and directories exactly matching the patterns and
845 /// This calculates the roots and directories exactly matching the patterns and
842 /// returns a tuple of (roots, dirs). It does not return other directories
846 /// returns a tuple of (roots, dirs). It does not return other directories
843 /// which may also need to be considered, like the parent directories.
847 /// which may also need to be considered, like the parent directories.
844 fn roots_and_dirs(
848 fn roots_and_dirs(
845 ignore_patterns: &[IgnorePattern],
849 ignore_patterns: &[IgnorePattern],
846 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
850 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
847 let mut roots = Vec::new();
851 let mut roots = Vec::new();
848 let mut dirs = Vec::new();
852 let mut dirs = Vec::new();
849
853
850 for ignore_pattern in ignore_patterns {
854 for ignore_pattern in ignore_patterns {
851 let IgnorePattern {
855 let IgnorePattern {
852 syntax, pattern, ..
856 syntax, pattern, ..
853 } = ignore_pattern;
857 } = ignore_pattern;
854 match syntax {
858 match syntax {
855 PatternSyntax::RootGlob | PatternSyntax::Glob => {
859 PatternSyntax::RootGlob | PatternSyntax::Glob => {
856 let mut root = HgPathBuf::new();
860 let mut root = HgPathBuf::new();
857 for p in pattern.split(|c| *c == b'/') {
861 for p in pattern.split(|c| *c == b'/') {
858 if p.iter()
862 if p.iter()
859 .any(|c| matches!(*c, b'[' | b'{' | b'*' | b'?'))
863 .any(|c| matches!(*c, b'[' | b'{' | b'*' | b'?'))
860 {
864 {
861 break;
865 break;
862 }
866 }
863 root.push(HgPathBuf::from_bytes(p).as_ref());
867 root.push(HgPathBuf::from_bytes(p).as_ref());
864 }
868 }
865 roots.push(root);
869 roots.push(root);
866 }
870 }
867 PatternSyntax::Path
871 PatternSyntax::Path
868 | PatternSyntax::RelPath
872 | PatternSyntax::RelPath
869 | PatternSyntax::FilePath => {
873 | PatternSyntax::FilePath => {
870 let pat = HgPath::new(if pattern == b"." {
874 let pat = HgPath::new(if pattern == b"." {
871 &[] as &[u8]
875 &[] as &[u8]
872 } else {
876 } else {
873 pattern
877 pattern
874 });
878 });
875 roots.push(pat.to_owned());
879 roots.push(pat.to_owned());
876 }
880 }
877 PatternSyntax::RootFilesIn => {
881 PatternSyntax::RootFilesIn => {
878 let pat = if pattern == b"." {
882 let pat = if pattern == b"." {
879 &[] as &[u8]
883 &[] as &[u8]
880 } else {
884 } else {
881 pattern
885 pattern
882 };
886 };
883 dirs.push(HgPathBuf::from_bytes(pat));
887 dirs.push(HgPathBuf::from_bytes(pat));
884 }
888 }
885 _ => {
889 _ => {
886 roots.push(HgPathBuf::new());
890 roots.push(HgPathBuf::new());
887 }
891 }
888 }
892 }
889 }
893 }
890 (roots, dirs)
894 (roots, dirs)
891 }
895 }
892
896
893 /// Paths extracted from patterns
897 /// Paths extracted from patterns
894 #[derive(Debug, PartialEq)]
898 #[derive(Debug, PartialEq)]
895 struct RootsDirsAndParents {
899 struct RootsDirsAndParents {
896 /// Directories to match recursively
900 /// Directories to match recursively
897 pub roots: HashSet<HgPathBuf>,
901 pub roots: HashSet<HgPathBuf>,
898 /// Directories to match non-recursively
902 /// Directories to match non-recursively
899 pub dirs: HashSet<HgPathBuf>,
903 pub dirs: HashSet<HgPathBuf>,
900 /// Implicitly required directories to go to items in either roots or dirs
904 /// Implicitly required directories to go to items in either roots or dirs
901 pub parents: DirsMultiset,
905 pub parents: DirsMultiset,
902 }
906 }
903
907
904 /// Extract roots, dirs and parents from patterns.
908 /// Extract roots, dirs and parents from patterns.
905 fn roots_dirs_and_parents(
909 fn roots_dirs_and_parents(
906 ignore_patterns: &[IgnorePattern],
910 ignore_patterns: &[IgnorePattern],
907 ) -> PatternResult<RootsDirsAndParents> {
911 ) -> PatternResult<RootsDirsAndParents> {
908 let (roots, dirs) = roots_and_dirs(ignore_patterns);
912 let (roots, dirs) = roots_and_dirs(ignore_patterns);
909
913
910 let mut parents = DirsMultiset::from_manifest(&dirs)?;
914 let mut parents = DirsMultiset::from_manifest(&dirs)?;
911
915
912 for path in &roots {
916 for path in &roots {
913 parents.add_path(path)?
917 parents.add_path(path)?
914 }
918 }
915
919
916 Ok(RootsDirsAndParents {
920 Ok(RootsDirsAndParents {
917 roots: HashSet::from_iter(roots),
921 roots: HashSet::from_iter(roots),
918 dirs: HashSet::from_iter(dirs),
922 dirs: HashSet::from_iter(dirs),
919 parents,
923 parents,
920 })
924 })
921 }
925 }
922
926
923 /// Returns a function that checks whether a given file (in the general sense)
927 /// Returns a function that checks whether a given file (in the general sense)
924 /// should be matched.
928 /// should be matched.
925 fn build_match<'a>(
929 fn build_match<'a>(
926 ignore_patterns: Vec<IgnorePattern>,
930 ignore_patterns: Vec<IgnorePattern>,
927 glob_suffix: &[u8],
931 glob_suffix: &[u8],
928 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
932 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
929 let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![];
933 let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![];
930 // For debugging and printing
934 // For debugging and printing
931 let mut patterns = vec![];
935 let mut patterns = vec![];
932
936
933 let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
937 let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
934
938
935 if !subincludes.is_empty() {
939 if !subincludes.is_empty() {
936 // Build prefix-based matcher functions for subincludes
940 // Build prefix-based matcher functions for subincludes
937 let mut submatchers = FastHashMap::default();
941 let mut submatchers = FastHashMap::default();
938 let mut prefixes = vec![];
942 let mut prefixes = vec![];
939
943
940 for sub_include in subincludes {
944 for sub_include in subincludes {
941 let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
945 let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
942 let match_fn =
946 let match_fn =
943 Box::new(move |path: &HgPath| matcher.matches(path));
947 Box::new(move |path: &HgPath| matcher.matches(path));
944 prefixes.push(sub_include.prefix.clone());
948 prefixes.push(sub_include.prefix.clone());
945 submatchers.insert(sub_include.prefix.clone(), match_fn);
949 submatchers.insert(sub_include.prefix.clone(), match_fn);
946 }
950 }
947
951
948 let match_subinclude = move |filename: &HgPath| {
952 let match_subinclude = move |filename: &HgPath| {
949 for prefix in prefixes.iter() {
953 for prefix in prefixes.iter() {
950 if let Some(rel) = filename.relative_to(prefix) {
954 if let Some(rel) = filename.relative_to(prefix) {
951 if (submatchers[prefix])(rel) {
955 if (submatchers[prefix])(rel) {
952 return true;
956 return true;
953 }
957 }
954 }
958 }
955 }
959 }
956 false
960 false
957 };
961 };
958
962
959 match_funcs.push(Box::new(match_subinclude));
963 match_funcs.push(Box::new(match_subinclude));
960 }
964 }
961
965
962 if !ignore_patterns.is_empty() {
966 if !ignore_patterns.is_empty() {
963 // Either do dumb matching if all patterns are rootfiles, or match
967 // Either do dumb matching if all patterns are rootfiles, or match
964 // with a regex.
968 // with a regex.
965 if ignore_patterns
969 if ignore_patterns
966 .iter()
970 .iter()
967 .all(|k| k.syntax == PatternSyntax::RootFilesIn)
971 .all(|k| k.syntax == PatternSyntax::RootFilesIn)
968 {
972 {
969 let dirs: HashSet<_> = ignore_patterns
973 let dirs: HashSet<_> = ignore_patterns
970 .iter()
974 .iter()
971 .map(|k| k.pattern.to_owned())
975 .map(|k| k.pattern.to_owned())
972 .collect();
976 .collect();
973 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
977 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
974
978
975 let match_func = move |path: &HgPath| -> bool {
979 let match_func = move |path: &HgPath| -> bool {
976 let path = path.as_bytes();
980 let path = path.as_bytes();
977 let i = path.iter().rposition(|a| *a == b'/');
981 let i = path.iter().rposition(|a| *a == b'/');
978 let dir = if let Some(i) = i { &path[..i] } else { b"." };
982 let dir = if let Some(i) = i { &path[..i] } else { b"." };
979 dirs.contains(dir)
983 dirs.contains(dir)
980 };
984 };
981 match_funcs.push(Box::new(match_func));
985 match_funcs.push(Box::new(match_func));
982
986
983 patterns.extend(b"rootfilesin: ");
987 patterns.extend(b"rootfilesin: ");
984 dirs_vec.sort();
988 dirs_vec.sort();
985 patterns.extend(dirs_vec.escaped_bytes());
989 patterns.extend(dirs_vec.escaped_bytes());
986 } else {
990 } else {
987 let (new_re, match_func) =
991 let (new_re, match_func) =
988 build_regex_match(&ignore_patterns, glob_suffix)?;
992 build_regex_match(&ignore_patterns, glob_suffix)?;
989 patterns = new_re;
993 patterns = new_re;
990 match_funcs.push(match_func)
994 match_funcs.push(match_func)
991 }
995 }
992 }
996 }
993
997
994 Ok(if match_funcs.len() == 1 {
998 Ok(if match_funcs.len() == 1 {
995 (patterns, match_funcs.remove(0))
999 (patterns, match_funcs.remove(0))
996 } else {
1000 } else {
997 (
1001 (
998 patterns,
1002 patterns,
999 Box::new(move |f: &HgPath| -> bool {
1003 Box::new(move |f: &HgPath| -> bool {
1000 match_funcs.iter().any(|match_func| match_func(f))
1004 match_funcs.iter().any(|match_func| match_func(f))
1001 }),
1005 }),
1002 )
1006 )
1003 })
1007 })
1004 }
1008 }
1005
1009
1006 /// Parses all "ignore" files with their recursive includes and returns a
1010 /// Parses all "ignore" files with their recursive includes and returns a
1007 /// function that checks whether a given file (in the general sense) should be
1011 /// function that checks whether a given file (in the general sense) should be
1008 /// ignored.
1012 /// ignored.
1009 pub fn get_ignore_matcher<'a>(
1013 pub fn get_ignore_matcher<'a>(
1010 mut all_pattern_files: Vec<PathBuf>,
1014 mut all_pattern_files: Vec<PathBuf>,
1011 root_dir: &Path,
1015 root_dir: &Path,
1012 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
1016 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
1013 ) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> {
1017 ) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> {
1014 let mut all_patterns = vec![];
1018 let mut all_patterns = vec![];
1015 let mut all_warnings = vec![];
1019 let mut all_warnings = vec![];
1016
1020
1017 // Sort to make the ordering of calls to `inspect_pattern_bytes`
1021 // Sort to make the ordering of calls to `inspect_pattern_bytes`
1018 // deterministic even if the ordering of `all_pattern_files` is not (such
1022 // deterministic even if the ordering of `all_pattern_files` is not (such
1019 // as when a iteration order of a Python dict or Rust HashMap is involved).
1023 // as when a iteration order of a Python dict or Rust HashMap is involved).
1020 // Sort by "string" representation instead of the default by component
1024 // Sort by "string" representation instead of the default by component
1021 // (with a Rust-specific definition of a component)
1025 // (with a Rust-specific definition of a component)
1022 all_pattern_files
1026 all_pattern_files
1023 .sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str()));
1027 .sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str()));
1024
1028
1025 for pattern_file in &all_pattern_files {
1029 for pattern_file in &all_pattern_files {
1026 let (patterns, warnings) = get_patterns_from_file(
1030 let (patterns, warnings) = get_patterns_from_file(
1027 pattern_file,
1031 pattern_file,
1028 root_dir,
1032 root_dir,
1029 inspect_pattern_bytes,
1033 inspect_pattern_bytes,
1030 )?;
1034 )?;
1031
1035
1032 all_patterns.extend(patterns.to_owned());
1036 all_patterns.extend(patterns.to_owned());
1033 all_warnings.extend(warnings);
1037 all_warnings.extend(warnings);
1034 }
1038 }
1035 let matcher = IncludeMatcher::new(all_patterns)?;
1039 let matcher = IncludeMatcher::new(all_patterns)?;
1036 Ok((matcher, all_warnings))
1040 Ok((matcher, all_warnings))
1037 }
1041 }
1038
1042
1039 /// Parses all "ignore" files with their recursive includes and returns a
1043 /// Parses all "ignore" files with their recursive includes and returns a
1040 /// function that checks whether a given file (in the general sense) should be
1044 /// function that checks whether a given file (in the general sense) should be
1041 /// ignored.
1045 /// ignored.
1042 pub fn get_ignore_function<'a>(
1046 pub fn get_ignore_function<'a>(
1043 all_pattern_files: Vec<PathBuf>,
1047 all_pattern_files: Vec<PathBuf>,
1044 root_dir: &Path,
1048 root_dir: &Path,
1045 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
1049 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
1046 ) -> PatternResult<(IgnoreFnType<'a>, Vec<PatternFileWarning>)> {
1050 ) -> PatternResult<(IgnoreFnType<'a>, Vec<PatternFileWarning>)> {
1047 let res =
1051 let res =
1048 get_ignore_matcher(all_pattern_files, root_dir, inspect_pattern_bytes);
1052 get_ignore_matcher(all_pattern_files, root_dir, inspect_pattern_bytes);
1049 res.map(|(matcher, all_warnings)| {
1053 res.map(|(matcher, all_warnings)| {
1050 let res: IgnoreFnType<'a> =
1054 let res: IgnoreFnType<'a> =
1051 Box::new(move |path: &HgPath| matcher.matches(path));
1055 Box::new(move |path: &HgPath| matcher.matches(path));
1052
1056
1053 (res, all_warnings)
1057 (res, all_warnings)
1054 })
1058 })
1055 }
1059 }
1056
1060
1057 impl<'a> IncludeMatcher<'a> {
1061 impl<'a> IncludeMatcher<'a> {
1058 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
1062 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
1059 let RootsDirsAndParents {
1063 let RootsDirsAndParents {
1060 roots,
1064 roots,
1061 dirs,
1065 dirs,
1062 parents,
1066 parents,
1063 } = roots_dirs_and_parents(&ignore_patterns)?;
1067 } = roots_dirs_and_parents(&ignore_patterns)?;
1064 let prefix = ignore_patterns.iter().all(|k| {
1068 let prefix = ignore_patterns.iter().all(|k| {
1065 matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
1069 matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
1066 });
1070 });
1067 let (patterns, match_fn) = build_match(ignore_patterns, b"(?:/|$)")?;
1071 let (patterns, match_fn) = build_match(ignore_patterns, b"(?:/|$)")?;
1068
1072
1069 Ok(Self {
1073 Ok(Self {
1070 patterns,
1074 patterns,
1071 match_fn,
1075 match_fn,
1072 prefix,
1076 prefix,
1073 roots,
1077 roots,
1074 dirs,
1078 dirs,
1075 parents,
1079 parents,
1076 })
1080 })
1077 }
1081 }
1078
1082
1079 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
1083 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
1080 // TODO cache
1084 // TODO cache
1081 let thing = self
1085 let thing = self
1082 .dirs
1086 .dirs
1083 .iter()
1087 .iter()
1084 .chain(self.roots.iter())
1088 .chain(self.roots.iter())
1085 .chain(self.parents.iter());
1089 .chain(self.parents.iter());
1086 DirsChildrenMultiset::new(thing, Some(self.parents.iter()))
1090 DirsChildrenMultiset::new(thing, Some(self.parents.iter()))
1087 }
1091 }
1088
1092
1089 pub fn debug_get_patterns(&self) -> &[u8] {
1093 pub fn debug_get_patterns(&self) -> &[u8] {
1090 self.patterns.as_ref()
1094 self.patterns.as_ref()
1091 }
1095 }
1092 }
1096 }
1093
1097
1094 impl<'a> Display for IncludeMatcher<'a> {
1098 impl<'a> Display for IncludeMatcher<'a> {
1095 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
1099 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
1096 // XXX What about exact matches?
1100 // XXX What about exact matches?
1097 // I'm not sure it's worth it to clone the HashSet and keep it
1101 // I'm not sure it's worth it to clone the HashSet and keep it
1098 // around just in case someone wants to display the matcher, plus
1102 // around just in case someone wants to display the matcher, plus
1099 // it's going to be unreadable after a few entries, but we need to
1103 // it's going to be unreadable after a few entries, but we need to
1100 // inform in this display that exact matches are being used and are
1104 // inform in this display that exact matches are being used and are
1101 // (on purpose) missing from the `includes`.
1105 // (on purpose) missing from the `includes`.
1102 write!(
1106 write!(
1103 f,
1107 f,
1104 "IncludeMatcher(includes='{}')",
1108 "IncludeMatcher(includes='{}')",
1105 String::from_utf8_lossy(&self.patterns.escaped_bytes())
1109 String::from_utf8_lossy(&self.patterns.escaped_bytes())
1106 )
1110 )
1107 }
1111 }
1108 }
1112 }
1109
1113
1110 #[cfg(test)]
1114 #[cfg(test)]
1111 mod tests {
1115 mod tests {
1112 use super::*;
1116 use super::*;
1113 use pretty_assertions::assert_eq;
1117 use pretty_assertions::assert_eq;
1114 use std::collections::BTreeMap;
1118 use std::collections::BTreeMap;
1115 use std::collections::BTreeSet;
1119 use std::collections::BTreeSet;
1116 use std::fmt::Debug;
1120 use std::fmt::Debug;
1117 use std::path::Path;
1121 use std::path::Path;
1118
1122
1119 #[test]
1123 #[test]
1120 fn test_roots_and_dirs() {
1124 fn test_roots_and_dirs() {
1121 let pats = vec![
1125 let pats = vec![
1122 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
1126 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
1123 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
1127 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
1124 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1128 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1125 ];
1129 ];
1126 let (roots, dirs) = roots_and_dirs(&pats);
1130 let (roots, dirs) = roots_and_dirs(&pats);
1127
1131
1128 assert_eq!(
1132 assert_eq!(
1129 roots,
1133 roots,
1130 vec!(
1134 vec!(
1131 HgPathBuf::from_bytes(b"g/h"),
1135 HgPathBuf::from_bytes(b"g/h"),
1132 HgPathBuf::from_bytes(b"g/h"),
1136 HgPathBuf::from_bytes(b"g/h"),
1133 HgPathBuf::new()
1137 HgPathBuf::new()
1134 ),
1138 ),
1135 );
1139 );
1136 assert_eq!(dirs, vec!());
1140 assert_eq!(dirs, vec!());
1137 }
1141 }
1138
1142
1139 #[test]
1143 #[test]
1140 fn test_roots_dirs_and_parents() {
1144 fn test_roots_dirs_and_parents() {
1141 let pats = vec![
1145 let pats = vec![
1142 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
1146 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
1143 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
1147 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
1144 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1148 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1145 ];
1149 ];
1146
1150
1147 let mut roots = HashSet::new();
1151 let mut roots = HashSet::new();
1148 roots.insert(HgPathBuf::from_bytes(b"g/h"));
1152 roots.insert(HgPathBuf::from_bytes(b"g/h"));
1149 roots.insert(HgPathBuf::new());
1153 roots.insert(HgPathBuf::new());
1150
1154
1151 let dirs = HashSet::new();
1155 let dirs = HashSet::new();
1152
1156
1153 let parents = DirsMultiset::from_manifest(&[
1157 let parents = DirsMultiset::from_manifest(&[
1154 HgPathBuf::from_bytes(b"x"),
1158 HgPathBuf::from_bytes(b"x"),
1155 HgPathBuf::from_bytes(b"g/x"),
1159 HgPathBuf::from_bytes(b"g/x"),
1156 HgPathBuf::from_bytes(b"g/y"),
1160 HgPathBuf::from_bytes(b"g/y"),
1157 ])
1161 ])
1158 .unwrap();
1162 .unwrap();
1159
1163
1160 assert_eq!(
1164 assert_eq!(
1161 roots_dirs_and_parents(&pats).unwrap(),
1165 roots_dirs_and_parents(&pats).unwrap(),
1162 RootsDirsAndParents {
1166 RootsDirsAndParents {
1163 roots,
1167 roots,
1164 dirs,
1168 dirs,
1165 parents
1169 parents
1166 }
1170 }
1167 );
1171 );
1168 }
1172 }
1169
1173
1170 #[test]
1174 #[test]
1171 fn test_filematcher_visit_children_set() {
1175 fn test_filematcher_visit_children_set() {
1172 // Visitchildrenset
1176 // Visitchildrenset
1173 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")];
1177 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")];
1174 let matcher = FileMatcher::new(files).unwrap();
1178 let matcher = FileMatcher::new(files).unwrap();
1175
1179
1176 let mut set = HashSet::new();
1180 let mut set = HashSet::new();
1177 set.insert(HgPathBuf::from_bytes(b"dir"));
1181 set.insert(HgPathBuf::from_bytes(b"dir"));
1178 assert_eq!(
1182 assert_eq!(
1179 matcher.visit_children_set(HgPath::new(b"")),
1183 matcher.visit_children_set(HgPath::new(b"")),
1180 VisitChildrenSet::Set(set)
1184 VisitChildrenSet::Set(set)
1181 );
1185 );
1182
1186
1183 let mut set = HashSet::new();
1187 let mut set = HashSet::new();
1184 set.insert(HgPathBuf::from_bytes(b"subdir"));
1188 set.insert(HgPathBuf::from_bytes(b"subdir"));
1185 assert_eq!(
1189 assert_eq!(
1186 matcher.visit_children_set(HgPath::new(b"dir")),
1190 matcher.visit_children_set(HgPath::new(b"dir")),
1187 VisitChildrenSet::Set(set)
1191 VisitChildrenSet::Set(set)
1188 );
1192 );
1189
1193
1190 let mut set = HashSet::new();
1194 let mut set = HashSet::new();
1191 set.insert(HgPathBuf::from_bytes(b"foo.txt"));
1195 set.insert(HgPathBuf::from_bytes(b"foo.txt"));
1192 assert_eq!(
1196 assert_eq!(
1193 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1197 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1194 VisitChildrenSet::Set(set)
1198 VisitChildrenSet::Set(set)
1195 );
1199 );
1196
1200
1197 assert_eq!(
1201 assert_eq!(
1198 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1202 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1199 VisitChildrenSet::Empty
1203 VisitChildrenSet::Empty
1200 );
1204 );
1201 assert_eq!(
1205 assert_eq!(
1202 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
1206 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
1203 VisitChildrenSet::Empty
1207 VisitChildrenSet::Empty
1204 );
1208 );
1205 assert_eq!(
1209 assert_eq!(
1206 matcher.visit_children_set(HgPath::new(b"folder")),
1210 matcher.visit_children_set(HgPath::new(b"folder")),
1207 VisitChildrenSet::Empty
1211 VisitChildrenSet::Empty
1208 );
1212 );
1209 }
1213 }
1210
1214
1211 #[test]
1215 #[test]
1212 fn test_filematcher_visit_children_set_files_and_dirs() {
1216 fn test_filematcher_visit_children_set_files_and_dirs() {
1213 let files = vec![
1217 let files = vec![
1214 HgPathBuf::from_bytes(b"rootfile.txt"),
1218 HgPathBuf::from_bytes(b"rootfile.txt"),
1215 HgPathBuf::from_bytes(b"a/file1.txt"),
1219 HgPathBuf::from_bytes(b"a/file1.txt"),
1216 HgPathBuf::from_bytes(b"a/b/file2.txt"),
1220 HgPathBuf::from_bytes(b"a/b/file2.txt"),
1217 // No file in a/b/c
1221 // No file in a/b/c
1218 HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"),
1222 HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"),
1219 ];
1223 ];
1220 let matcher = FileMatcher::new(files).unwrap();
1224 let matcher = FileMatcher::new(files).unwrap();
1221
1225
1222 let mut set = HashSet::new();
1226 let mut set = HashSet::new();
1223 set.insert(HgPathBuf::from_bytes(b"a"));
1227 set.insert(HgPathBuf::from_bytes(b"a"));
1224 set.insert(HgPathBuf::from_bytes(b"rootfile.txt"));
1228 set.insert(HgPathBuf::from_bytes(b"rootfile.txt"));
1225 assert_eq!(
1229 assert_eq!(
1226 matcher.visit_children_set(HgPath::new(b"")),
1230 matcher.visit_children_set(HgPath::new(b"")),
1227 VisitChildrenSet::Set(set)
1231 VisitChildrenSet::Set(set)
1228 );
1232 );
1229
1233
1230 let mut set = HashSet::new();
1234 let mut set = HashSet::new();
1231 set.insert(HgPathBuf::from_bytes(b"b"));
1235 set.insert(HgPathBuf::from_bytes(b"b"));
1232 set.insert(HgPathBuf::from_bytes(b"file1.txt"));
1236 set.insert(HgPathBuf::from_bytes(b"file1.txt"));
1233 assert_eq!(
1237 assert_eq!(
1234 matcher.visit_children_set(HgPath::new(b"a")),
1238 matcher.visit_children_set(HgPath::new(b"a")),
1235 VisitChildrenSet::Set(set)
1239 VisitChildrenSet::Set(set)
1236 );
1240 );
1237
1241
1238 let mut set = HashSet::new();
1242 let mut set = HashSet::new();
1239 set.insert(HgPathBuf::from_bytes(b"c"));
1243 set.insert(HgPathBuf::from_bytes(b"c"));
1240 set.insert(HgPathBuf::from_bytes(b"file2.txt"));
1244 set.insert(HgPathBuf::from_bytes(b"file2.txt"));
1241 assert_eq!(
1245 assert_eq!(
1242 matcher.visit_children_set(HgPath::new(b"a/b")),
1246 matcher.visit_children_set(HgPath::new(b"a/b")),
1243 VisitChildrenSet::Set(set)
1247 VisitChildrenSet::Set(set)
1244 );
1248 );
1245
1249
1246 let mut set = HashSet::new();
1250 let mut set = HashSet::new();
1247 set.insert(HgPathBuf::from_bytes(b"d"));
1251 set.insert(HgPathBuf::from_bytes(b"d"));
1248 assert_eq!(
1252 assert_eq!(
1249 matcher.visit_children_set(HgPath::new(b"a/b/c")),
1253 matcher.visit_children_set(HgPath::new(b"a/b/c")),
1250 VisitChildrenSet::Set(set)
1254 VisitChildrenSet::Set(set)
1251 );
1255 );
1252 let mut set = HashSet::new();
1256 let mut set = HashSet::new();
1253 set.insert(HgPathBuf::from_bytes(b"file4.txt"));
1257 set.insert(HgPathBuf::from_bytes(b"file4.txt"));
1254 assert_eq!(
1258 assert_eq!(
1255 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
1259 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
1256 VisitChildrenSet::Set(set)
1260 VisitChildrenSet::Set(set)
1257 );
1261 );
1258
1262
1259 assert_eq!(
1263 assert_eq!(
1260 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
1264 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
1261 VisitChildrenSet::Empty
1265 VisitChildrenSet::Empty
1262 );
1266 );
1263 assert_eq!(
1267 assert_eq!(
1264 matcher.visit_children_set(HgPath::new(b"folder")),
1268 matcher.visit_children_set(HgPath::new(b"folder")),
1265 VisitChildrenSet::Empty
1269 VisitChildrenSet::Empty
1266 );
1270 );
1267 }
1271 }
1268
1272
1269 #[test]
1273 #[test]
1270 fn test_patternmatcher() {
1274 fn test_patternmatcher() {
1271 // VisitdirPrefix
1275 // VisitdirPrefix
1272 let m = PatternMatcher::new(vec![IgnorePattern::new(
1276 let m = PatternMatcher::new(vec![IgnorePattern::new(
1273 PatternSyntax::Path,
1277 PatternSyntax::Path,
1274 b"dir/subdir",
1278 b"dir/subdir",
1275 Path::new(""),
1279 Path::new(""),
1276 )])
1280 )])
1277 .unwrap();
1281 .unwrap();
1278 assert_eq!(
1282 assert_eq!(
1279 m.visit_children_set(HgPath::new(b"")),
1283 m.visit_children_set(HgPath::new(b"")),
1280 VisitChildrenSet::This
1284 VisitChildrenSet::This
1281 );
1285 );
1282 assert_eq!(
1286 assert_eq!(
1283 m.visit_children_set(HgPath::new(b"dir")),
1287 m.visit_children_set(HgPath::new(b"dir")),
1284 VisitChildrenSet::This
1288 VisitChildrenSet::This
1285 );
1289 );
1286 assert_eq!(
1290 assert_eq!(
1287 m.visit_children_set(HgPath::new(b"dir/subdir")),
1291 m.visit_children_set(HgPath::new(b"dir/subdir")),
1288 VisitChildrenSet::Recursive
1292 VisitChildrenSet::Recursive
1289 );
1293 );
1290 // OPT: This should probably be Recursive if its parent is?
1294 // OPT: This should probably be Recursive if its parent is?
1291 assert_eq!(
1295 assert_eq!(
1292 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1296 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1293 VisitChildrenSet::This
1297 VisitChildrenSet::This
1294 );
1298 );
1295 assert_eq!(
1299 assert_eq!(
1296 m.visit_children_set(HgPath::new(b"folder")),
1300 m.visit_children_set(HgPath::new(b"folder")),
1297 VisitChildrenSet::Empty
1301 VisitChildrenSet::Empty
1298 );
1302 );
1299
1303
1300 // VisitchildrensetPrefix
1304 // VisitchildrensetPrefix
1301 let m = PatternMatcher::new(vec![IgnorePattern::new(
1305 let m = PatternMatcher::new(vec![IgnorePattern::new(
1302 PatternSyntax::Path,
1306 PatternSyntax::Path,
1303 b"dir/subdir",
1307 b"dir/subdir",
1304 Path::new(""),
1308 Path::new(""),
1305 )])
1309 )])
1306 .unwrap();
1310 .unwrap();
1307 assert_eq!(
1311 assert_eq!(
1308 m.visit_children_set(HgPath::new(b"")),
1312 m.visit_children_set(HgPath::new(b"")),
1309 VisitChildrenSet::This
1313 VisitChildrenSet::This
1310 );
1314 );
1311 assert_eq!(
1315 assert_eq!(
1312 m.visit_children_set(HgPath::new(b"dir")),
1316 m.visit_children_set(HgPath::new(b"dir")),
1313 VisitChildrenSet::This
1317 VisitChildrenSet::This
1314 );
1318 );
1315 assert_eq!(
1319 assert_eq!(
1316 m.visit_children_set(HgPath::new(b"dir/subdir")),
1320 m.visit_children_set(HgPath::new(b"dir/subdir")),
1317 VisitChildrenSet::Recursive
1321 VisitChildrenSet::Recursive
1318 );
1322 );
1319 // OPT: This should probably be Recursive if its parent is?
1323 // OPT: This should probably be Recursive if its parent is?
1320 assert_eq!(
1324 assert_eq!(
1321 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1325 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1322 VisitChildrenSet::This
1326 VisitChildrenSet::This
1323 );
1327 );
1324 assert_eq!(
1328 assert_eq!(
1325 m.visit_children_set(HgPath::new(b"folder")),
1329 m.visit_children_set(HgPath::new(b"folder")),
1326 VisitChildrenSet::Empty
1330 VisitChildrenSet::Empty
1327 );
1331 );
1328
1332
1329 // VisitdirRootfilesin
1333 // VisitdirRootfilesin
1330 let m = PatternMatcher::new(vec![IgnorePattern::new(
1334 let m = PatternMatcher::new(vec![IgnorePattern::new(
1331 PatternSyntax::RootFilesIn,
1335 PatternSyntax::RootFilesIn,
1332 b"dir/subdir",
1336 b"dir/subdir",
1333 Path::new(""),
1337 Path::new(""),
1334 )])
1338 )])
1335 .unwrap();
1339 .unwrap();
1336 assert_eq!(
1340 assert_eq!(
1337 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1341 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1338 VisitChildrenSet::This
1342 VisitChildrenSet::This
1339 );
1343 );
1340 assert_eq!(
1344 assert_eq!(
1341 m.visit_children_set(HgPath::new(b"folder")),
1345 m.visit_children_set(HgPath::new(b"folder")),
1342 VisitChildrenSet::Empty
1346 VisitChildrenSet::Empty
1343 );
1347 );
1344 assert_eq!(
1348 assert_eq!(
1345 m.visit_children_set(HgPath::new(b"")),
1349 m.visit_children_set(HgPath::new(b"")),
1346 VisitChildrenSet::This
1350 VisitChildrenSet::This
1347 );
1351 );
1348 assert_eq!(
1352 assert_eq!(
1349 m.visit_children_set(HgPath::new(b"dir")),
1353 m.visit_children_set(HgPath::new(b"dir")),
1350 VisitChildrenSet::This
1354 VisitChildrenSet::This
1351 );
1355 );
1352 assert_eq!(
1356 assert_eq!(
1353 m.visit_children_set(HgPath::new(b"dir/subdir")),
1357 m.visit_children_set(HgPath::new(b"dir/subdir")),
1354 VisitChildrenSet::This
1358 VisitChildrenSet::This
1355 );
1359 );
1356
1360
1357 // VisitchildrensetRootfilesin
1361 // VisitchildrensetRootfilesin
1358 let m = PatternMatcher::new(vec![IgnorePattern::new(
1362 let m = PatternMatcher::new(vec![IgnorePattern::new(
1359 PatternSyntax::RootFilesIn,
1363 PatternSyntax::RootFilesIn,
1360 b"dir/subdir",
1364 b"dir/subdir",
1361 Path::new(""),
1365 Path::new(""),
1362 )])
1366 )])
1363 .unwrap();
1367 .unwrap();
1364 assert_eq!(
1368 assert_eq!(
1365 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1369 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1366 VisitChildrenSet::This
1370 VisitChildrenSet::This
1367 );
1371 );
1368 assert_eq!(
1372 assert_eq!(
1369 m.visit_children_set(HgPath::new(b"folder")),
1373 m.visit_children_set(HgPath::new(b"folder")),
1370 VisitChildrenSet::Empty
1374 VisitChildrenSet::Empty
1371 );
1375 );
1372 // FIXME: These should probably be {'dir'}, {'subdir'} and This,
1376 // FIXME: These should probably be {'dir'}, {'subdir'} and This,
1373 // respectively
1377 // respectively
1374 assert_eq!(
1378 assert_eq!(
1375 m.visit_children_set(HgPath::new(b"")),
1379 m.visit_children_set(HgPath::new(b"")),
1376 VisitChildrenSet::This
1380 VisitChildrenSet::This
1377 );
1381 );
1378 assert_eq!(
1382 assert_eq!(
1379 m.visit_children_set(HgPath::new(b"dir")),
1383 m.visit_children_set(HgPath::new(b"dir")),
1380 VisitChildrenSet::This
1384 VisitChildrenSet::This
1381 );
1385 );
1382 assert_eq!(
1386 assert_eq!(
1383 m.visit_children_set(HgPath::new(b"dir/subdir")),
1387 m.visit_children_set(HgPath::new(b"dir/subdir")),
1384 VisitChildrenSet::This
1388 VisitChildrenSet::This
1385 );
1389 );
1386
1390
1387 // VisitdirGlob
1391 // VisitdirGlob
1388 let m = PatternMatcher::new(vec![IgnorePattern::new(
1392 let m = PatternMatcher::new(vec![IgnorePattern::new(
1389 PatternSyntax::Glob,
1393 PatternSyntax::Glob,
1390 b"dir/z*",
1394 b"dir/z*",
1391 Path::new(""),
1395 Path::new(""),
1392 )])
1396 )])
1393 .unwrap();
1397 .unwrap();
1394 assert_eq!(
1398 assert_eq!(
1395 m.visit_children_set(HgPath::new(b"")),
1399 m.visit_children_set(HgPath::new(b"")),
1396 VisitChildrenSet::This
1400 VisitChildrenSet::This
1397 );
1401 );
1398 assert_eq!(
1402 assert_eq!(
1399 m.visit_children_set(HgPath::new(b"dir")),
1403 m.visit_children_set(HgPath::new(b"dir")),
1400 VisitChildrenSet::This
1404 VisitChildrenSet::This
1401 );
1405 );
1402 assert_eq!(
1406 assert_eq!(
1403 m.visit_children_set(HgPath::new(b"folder")),
1407 m.visit_children_set(HgPath::new(b"folder")),
1404 VisitChildrenSet::Empty
1408 VisitChildrenSet::Empty
1405 );
1409 );
1406 // OPT: these should probably be False.
1410 // OPT: these should probably be False.
1407 assert_eq!(
1411 assert_eq!(
1408 m.visit_children_set(HgPath::new(b"dir/subdir")),
1412 m.visit_children_set(HgPath::new(b"dir/subdir")),
1409 VisitChildrenSet::This
1413 VisitChildrenSet::This
1410 );
1414 );
1411 assert_eq!(
1415 assert_eq!(
1412 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1416 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1413 VisitChildrenSet::This
1417 VisitChildrenSet::This
1414 );
1418 );
1415
1419
1416 // VisitchildrensetGlob
1420 // VisitchildrensetGlob
1417 let m = PatternMatcher::new(vec![IgnorePattern::new(
1421 let m = PatternMatcher::new(vec![IgnorePattern::new(
1418 PatternSyntax::Glob,
1422 PatternSyntax::Glob,
1419 b"dir/z*",
1423 b"dir/z*",
1420 Path::new(""),
1424 Path::new(""),
1421 )])
1425 )])
1422 .unwrap();
1426 .unwrap();
1423 assert_eq!(
1427 assert_eq!(
1424 m.visit_children_set(HgPath::new(b"")),
1428 m.visit_children_set(HgPath::new(b"")),
1425 VisitChildrenSet::This
1429 VisitChildrenSet::This
1426 );
1430 );
1427 assert_eq!(
1431 assert_eq!(
1428 m.visit_children_set(HgPath::new(b"folder")),
1432 m.visit_children_set(HgPath::new(b"folder")),
1429 VisitChildrenSet::Empty
1433 VisitChildrenSet::Empty
1430 );
1434 );
1431 assert_eq!(
1435 assert_eq!(
1432 m.visit_children_set(HgPath::new(b"dir")),
1436 m.visit_children_set(HgPath::new(b"dir")),
1433 VisitChildrenSet::This
1437 VisitChildrenSet::This
1434 );
1438 );
1435 // OPT: these should probably be Empty
1439 // OPT: these should probably be Empty
1436 assert_eq!(
1440 assert_eq!(
1437 m.visit_children_set(HgPath::new(b"dir/subdir")),
1441 m.visit_children_set(HgPath::new(b"dir/subdir")),
1438 VisitChildrenSet::This
1442 VisitChildrenSet::This
1439 );
1443 );
1440 assert_eq!(
1444 assert_eq!(
1441 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1445 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1442 VisitChildrenSet::This
1446 VisitChildrenSet::This
1443 );
1447 );
1444
1448
1445 // VisitdirFilepath
1449 // VisitdirFilepath
1446 let m = PatternMatcher::new(vec![IgnorePattern::new(
1450 let m = PatternMatcher::new(vec![IgnorePattern::new(
1447 PatternSyntax::FilePath,
1451 PatternSyntax::FilePath,
1448 b"dir/z",
1452 b"dir/z",
1449 Path::new(""),
1453 Path::new(""),
1450 )])
1454 )])
1451 .unwrap();
1455 .unwrap();
1452 assert_eq!(
1456 assert_eq!(
1453 m.visit_children_set(HgPath::new(b"")),
1457 m.visit_children_set(HgPath::new(b"")),
1454 VisitChildrenSet::This
1458 VisitChildrenSet::This
1455 );
1459 );
1456 assert_eq!(
1460 assert_eq!(
1457 m.visit_children_set(HgPath::new(b"dir")),
1461 m.visit_children_set(HgPath::new(b"dir")),
1458 VisitChildrenSet::This
1462 VisitChildrenSet::This
1459 );
1463 );
1460 assert_eq!(
1464 assert_eq!(
1461 m.visit_children_set(HgPath::new(b"folder")),
1465 m.visit_children_set(HgPath::new(b"folder")),
1462 VisitChildrenSet::Empty
1466 VisitChildrenSet::Empty
1463 );
1467 );
1464 assert_eq!(
1468 assert_eq!(
1465 m.visit_children_set(HgPath::new(b"dir/subdir")),
1469 m.visit_children_set(HgPath::new(b"dir/subdir")),
1466 VisitChildrenSet::Empty
1470 VisitChildrenSet::Empty
1467 );
1471 );
1468 assert_eq!(
1472 assert_eq!(
1469 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1473 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1470 VisitChildrenSet::Empty
1474 VisitChildrenSet::Empty
1471 );
1475 );
1472
1476
1473 // VisitchildrensetFilepath
1477 // VisitchildrensetFilepath
1474 let m = PatternMatcher::new(vec![IgnorePattern::new(
1478 let m = PatternMatcher::new(vec![IgnorePattern::new(
1475 PatternSyntax::FilePath,
1479 PatternSyntax::FilePath,
1476 b"dir/z",
1480 b"dir/z",
1477 Path::new(""),
1481 Path::new(""),
1478 )])
1482 )])
1479 .unwrap();
1483 .unwrap();
1480 assert_eq!(
1484 assert_eq!(
1481 m.visit_children_set(HgPath::new(b"")),
1485 m.visit_children_set(HgPath::new(b"")),
1482 VisitChildrenSet::This
1486 VisitChildrenSet::This
1483 );
1487 );
1484 assert_eq!(
1488 assert_eq!(
1485 m.visit_children_set(HgPath::new(b"folder")),
1489 m.visit_children_set(HgPath::new(b"folder")),
1486 VisitChildrenSet::Empty
1490 VisitChildrenSet::Empty
1487 );
1491 );
1488 assert_eq!(
1492 assert_eq!(
1489 m.visit_children_set(HgPath::new(b"dir")),
1493 m.visit_children_set(HgPath::new(b"dir")),
1490 VisitChildrenSet::This
1494 VisitChildrenSet::This
1491 );
1495 );
1492 assert_eq!(
1496 assert_eq!(
1493 m.visit_children_set(HgPath::new(b"dir/subdir")),
1497 m.visit_children_set(HgPath::new(b"dir/subdir")),
1494 VisitChildrenSet::Empty
1498 VisitChildrenSet::Empty
1495 );
1499 );
1496 assert_eq!(
1500 assert_eq!(
1497 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1501 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1498 VisitChildrenSet::Empty
1502 VisitChildrenSet::Empty
1499 );
1503 );
1500 }
1504 }
1501
1505
1502 #[test]
1506 #[test]
1503 fn test_includematcher() {
1507 fn test_includematcher() {
1504 // VisitchildrensetPrefix
1508 // VisitchildrensetPrefix
1505 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1509 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1506 PatternSyntax::RelPath,
1510 PatternSyntax::RelPath,
1507 b"dir/subdir",
1511 b"dir/subdir",
1508 Path::new(""),
1512 Path::new(""),
1509 )])
1513 )])
1510 .unwrap();
1514 .unwrap();
1511
1515
1512 let mut set = HashSet::new();
1516 let mut set = HashSet::new();
1513 set.insert(HgPathBuf::from_bytes(b"dir"));
1517 set.insert(HgPathBuf::from_bytes(b"dir"));
1514 assert_eq!(
1518 assert_eq!(
1515 matcher.visit_children_set(HgPath::new(b"")),
1519 matcher.visit_children_set(HgPath::new(b"")),
1516 VisitChildrenSet::Set(set)
1520 VisitChildrenSet::Set(set)
1517 );
1521 );
1518
1522
1519 let mut set = HashSet::new();
1523 let mut set = HashSet::new();
1520 set.insert(HgPathBuf::from_bytes(b"subdir"));
1524 set.insert(HgPathBuf::from_bytes(b"subdir"));
1521 assert_eq!(
1525 assert_eq!(
1522 matcher.visit_children_set(HgPath::new(b"dir")),
1526 matcher.visit_children_set(HgPath::new(b"dir")),
1523 VisitChildrenSet::Set(set)
1527 VisitChildrenSet::Set(set)
1524 );
1528 );
1525 assert_eq!(
1529 assert_eq!(
1526 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1530 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1527 VisitChildrenSet::Recursive
1531 VisitChildrenSet::Recursive
1528 );
1532 );
1529 // OPT: This should probably be 'all' if its parent is?
1533 // OPT: This should probably be 'all' if its parent is?
1530 assert_eq!(
1534 assert_eq!(
1531 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1535 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1532 VisitChildrenSet::This
1536 VisitChildrenSet::This
1533 );
1537 );
1534 assert_eq!(
1538 assert_eq!(
1535 matcher.visit_children_set(HgPath::new(b"folder")),
1539 matcher.visit_children_set(HgPath::new(b"folder")),
1536 VisitChildrenSet::Empty
1540 VisitChildrenSet::Empty
1537 );
1541 );
1538
1542
1539 // VisitchildrensetRootfilesin
1543 // VisitchildrensetRootfilesin
1540 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1544 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1541 PatternSyntax::RootFilesIn,
1545 PatternSyntax::RootFilesIn,
1542 b"dir/subdir",
1546 b"dir/subdir",
1543 Path::new(""),
1547 Path::new(""),
1544 )])
1548 )])
1545 .unwrap();
1549 .unwrap();
1546
1550
1547 let mut set = HashSet::new();
1551 let mut set = HashSet::new();
1548 set.insert(HgPathBuf::from_bytes(b"dir"));
1552 set.insert(HgPathBuf::from_bytes(b"dir"));
1549 assert_eq!(
1553 assert_eq!(
1550 matcher.visit_children_set(HgPath::new(b"")),
1554 matcher.visit_children_set(HgPath::new(b"")),
1551 VisitChildrenSet::Set(set)
1555 VisitChildrenSet::Set(set)
1552 );
1556 );
1553
1557
1554 let mut set = HashSet::new();
1558 let mut set = HashSet::new();
1555 set.insert(HgPathBuf::from_bytes(b"subdir"));
1559 set.insert(HgPathBuf::from_bytes(b"subdir"));
1556 assert_eq!(
1560 assert_eq!(
1557 matcher.visit_children_set(HgPath::new(b"dir")),
1561 matcher.visit_children_set(HgPath::new(b"dir")),
1558 VisitChildrenSet::Set(set)
1562 VisitChildrenSet::Set(set)
1559 );
1563 );
1560
1564
1561 assert_eq!(
1565 assert_eq!(
1562 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1566 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1563 VisitChildrenSet::This
1567 VisitChildrenSet::This
1564 );
1568 );
1565 assert_eq!(
1569 assert_eq!(
1566 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1570 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1567 VisitChildrenSet::Empty
1571 VisitChildrenSet::Empty
1568 );
1572 );
1569 assert_eq!(
1573 assert_eq!(
1570 matcher.visit_children_set(HgPath::new(b"folder")),
1574 matcher.visit_children_set(HgPath::new(b"folder")),
1571 VisitChildrenSet::Empty
1575 VisitChildrenSet::Empty
1572 );
1576 );
1573
1577
1574 // VisitchildrensetGlob
1578 // VisitchildrensetGlob
1575 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1579 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1576 PatternSyntax::Glob,
1580 PatternSyntax::Glob,
1577 b"dir/z*",
1581 b"dir/z*",
1578 Path::new(""),
1582 Path::new(""),
1579 )])
1583 )])
1580 .unwrap();
1584 .unwrap();
1581
1585
1582 let mut set = HashSet::new();
1586 let mut set = HashSet::new();
1583 set.insert(HgPathBuf::from_bytes(b"dir"));
1587 set.insert(HgPathBuf::from_bytes(b"dir"));
1584 assert_eq!(
1588 assert_eq!(
1585 matcher.visit_children_set(HgPath::new(b"")),
1589 matcher.visit_children_set(HgPath::new(b"")),
1586 VisitChildrenSet::Set(set)
1590 VisitChildrenSet::Set(set)
1587 );
1591 );
1588 assert_eq!(
1592 assert_eq!(
1589 matcher.visit_children_set(HgPath::new(b"folder")),
1593 matcher.visit_children_set(HgPath::new(b"folder")),
1590 VisitChildrenSet::Empty
1594 VisitChildrenSet::Empty
1591 );
1595 );
1592 assert_eq!(
1596 assert_eq!(
1593 matcher.visit_children_set(HgPath::new(b"dir")),
1597 matcher.visit_children_set(HgPath::new(b"dir")),
1594 VisitChildrenSet::This
1598 VisitChildrenSet::This
1595 );
1599 );
1596 // OPT: these should probably be set().
1600 // OPT: these should probably be set().
1597 assert_eq!(
1601 assert_eq!(
1598 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1602 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1599 VisitChildrenSet::This
1603 VisitChildrenSet::This
1600 );
1604 );
1601 assert_eq!(
1605 assert_eq!(
1602 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1606 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1603 VisitChildrenSet::This
1607 VisitChildrenSet::This
1604 );
1608 );
1605
1609
1606 // VisitchildrensetFilePath
1610 // VisitchildrensetFilePath
1607 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1611 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1608 PatternSyntax::FilePath,
1612 PatternSyntax::FilePath,
1609 b"dir/z",
1613 b"dir/z",
1610 Path::new(""),
1614 Path::new(""),
1611 )])
1615 )])
1612 .unwrap();
1616 .unwrap();
1613
1617
1614 let mut set = HashSet::new();
1618 let mut set = HashSet::new();
1615 set.insert(HgPathBuf::from_bytes(b"dir"));
1619 set.insert(HgPathBuf::from_bytes(b"dir"));
1616 assert_eq!(
1620 assert_eq!(
1617 matcher.visit_children_set(HgPath::new(b"")),
1621 matcher.visit_children_set(HgPath::new(b"")),
1618 VisitChildrenSet::Set(set)
1622 VisitChildrenSet::Set(set)
1619 );
1623 );
1620 assert_eq!(
1624 assert_eq!(
1621 matcher.visit_children_set(HgPath::new(b"folder")),
1625 matcher.visit_children_set(HgPath::new(b"folder")),
1622 VisitChildrenSet::Empty
1626 VisitChildrenSet::Empty
1623 );
1627 );
1624 let mut set = HashSet::new();
1628 let mut set = HashSet::new();
1625 set.insert(HgPathBuf::from_bytes(b"z"));
1629 set.insert(HgPathBuf::from_bytes(b"z"));
1626 assert_eq!(
1630 assert_eq!(
1627 matcher.visit_children_set(HgPath::new(b"dir")),
1631 matcher.visit_children_set(HgPath::new(b"dir")),
1628 VisitChildrenSet::Set(set)
1632 VisitChildrenSet::Set(set)
1629 );
1633 );
1630 // OPT: these should probably be set().
1634 // OPT: these should probably be set().
1631 assert_eq!(
1635 assert_eq!(
1632 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1636 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1633 VisitChildrenSet::Empty
1637 VisitChildrenSet::Empty
1634 );
1638 );
1635 assert_eq!(
1639 assert_eq!(
1636 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1640 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1637 VisitChildrenSet::Empty
1641 VisitChildrenSet::Empty
1638 );
1642 );
1639
1643
1640 // Test multiple patterns
1644 // Test multiple patterns
1641 let matcher = IncludeMatcher::new(vec![
1645 let matcher = IncludeMatcher::new(vec![
1642 IgnorePattern::new(PatternSyntax::RelPath, b"foo", Path::new("")),
1646 IgnorePattern::new(PatternSyntax::RelPath, b"foo", Path::new("")),
1643 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1647 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1644 ])
1648 ])
1645 .unwrap();
1649 .unwrap();
1646
1650
1647 assert_eq!(
1651 assert_eq!(
1648 matcher.visit_children_set(HgPath::new(b"")),
1652 matcher.visit_children_set(HgPath::new(b"")),
1649 VisitChildrenSet::This
1653 VisitChildrenSet::This
1650 );
1654 );
1651
1655
1652 // Test multiple patterns
1656 // Test multiple patterns
1653 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1657 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1654 PatternSyntax::Glob,
1658 PatternSyntax::Glob,
1655 b"**/*.exe",
1659 b"**/*.exe",
1656 Path::new(""),
1660 Path::new(""),
1657 )])
1661 )])
1658 .unwrap();
1662 .unwrap();
1659
1663
1660 assert_eq!(
1664 assert_eq!(
1661 matcher.visit_children_set(HgPath::new(b"")),
1665 matcher.visit_children_set(HgPath::new(b"")),
1662 VisitChildrenSet::This
1666 VisitChildrenSet::This
1663 );
1667 );
1664 }
1668 }
1665
1669
1666 #[test]
1670 #[test]
1667 fn test_unionmatcher() {
1671 fn test_unionmatcher() {
1668 // Path + Rootfiles
1672 // Path + Rootfiles
1669 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1673 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1670 PatternSyntax::RelPath,
1674 PatternSyntax::RelPath,
1671 b"dir/subdir",
1675 b"dir/subdir",
1672 Path::new(""),
1676 Path::new(""),
1673 )])
1677 )])
1674 .unwrap();
1678 .unwrap();
1675 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1679 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1676 PatternSyntax::RootFilesIn,
1680 PatternSyntax::RootFilesIn,
1677 b"dir",
1681 b"dir",
1678 Path::new(""),
1682 Path::new(""),
1679 )])
1683 )])
1680 .unwrap();
1684 .unwrap();
1681 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1685 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1682
1686
1683 let mut set = HashSet::new();
1687 let mut set = HashSet::new();
1684 set.insert(HgPathBuf::from_bytes(b"dir"));
1688 set.insert(HgPathBuf::from_bytes(b"dir"));
1685 assert_eq!(
1689 assert_eq!(
1686 matcher.visit_children_set(HgPath::new(b"")),
1690 matcher.visit_children_set(HgPath::new(b"")),
1687 VisitChildrenSet::Set(set)
1691 VisitChildrenSet::Set(set)
1688 );
1692 );
1689 assert_eq!(
1693 assert_eq!(
1690 matcher.visit_children_set(HgPath::new(b"dir")),
1694 matcher.visit_children_set(HgPath::new(b"dir")),
1691 VisitChildrenSet::This
1695 VisitChildrenSet::This
1692 );
1696 );
1693 assert_eq!(
1697 assert_eq!(
1694 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1698 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1695 VisitChildrenSet::Recursive
1699 VisitChildrenSet::Recursive
1696 );
1700 );
1697 assert_eq!(
1701 assert_eq!(
1698 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1702 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1699 VisitChildrenSet::Empty
1703 VisitChildrenSet::Empty
1700 );
1704 );
1701 assert_eq!(
1705 assert_eq!(
1702 matcher.visit_children_set(HgPath::new(b"folder")),
1706 matcher.visit_children_set(HgPath::new(b"folder")),
1703 VisitChildrenSet::Empty
1707 VisitChildrenSet::Empty
1704 );
1708 );
1705 assert_eq!(
1709 assert_eq!(
1706 matcher.visit_children_set(HgPath::new(b"folder")),
1710 matcher.visit_children_set(HgPath::new(b"folder")),
1707 VisitChildrenSet::Empty
1711 VisitChildrenSet::Empty
1708 );
1712 );
1709
1713
1710 // OPT: These next two could be 'all' instead of 'this'.
1714 // OPT: These next two could be 'all' instead of 'this'.
1711 assert_eq!(
1715 assert_eq!(
1712 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1716 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1713 VisitChildrenSet::This
1717 VisitChildrenSet::This
1714 );
1718 );
1715 assert_eq!(
1719 assert_eq!(
1716 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1720 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1717 VisitChildrenSet::This
1721 VisitChildrenSet::This
1718 );
1722 );
1719
1723
1720 // Path + unrelated Path
1724 // Path + unrelated Path
1721 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1725 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1722 PatternSyntax::RelPath,
1726 PatternSyntax::RelPath,
1723 b"dir/subdir",
1727 b"dir/subdir",
1724 Path::new(""),
1728 Path::new(""),
1725 )])
1729 )])
1726 .unwrap();
1730 .unwrap();
1727 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1731 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1728 PatternSyntax::RelPath,
1732 PatternSyntax::RelPath,
1729 b"folder",
1733 b"folder",
1730 Path::new(""),
1734 Path::new(""),
1731 )])
1735 )])
1732 .unwrap();
1736 .unwrap();
1733 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1737 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1734
1738
1735 let mut set = HashSet::new();
1739 let mut set = HashSet::new();
1736 set.insert(HgPathBuf::from_bytes(b"folder"));
1740 set.insert(HgPathBuf::from_bytes(b"folder"));
1737 set.insert(HgPathBuf::from_bytes(b"dir"));
1741 set.insert(HgPathBuf::from_bytes(b"dir"));
1738 assert_eq!(
1742 assert_eq!(
1739 matcher.visit_children_set(HgPath::new(b"")),
1743 matcher.visit_children_set(HgPath::new(b"")),
1740 VisitChildrenSet::Set(set)
1744 VisitChildrenSet::Set(set)
1741 );
1745 );
1742 let mut set = HashSet::new();
1746 let mut set = HashSet::new();
1743 set.insert(HgPathBuf::from_bytes(b"subdir"));
1747 set.insert(HgPathBuf::from_bytes(b"subdir"));
1744 assert_eq!(
1748 assert_eq!(
1745 matcher.visit_children_set(HgPath::new(b"dir")),
1749 matcher.visit_children_set(HgPath::new(b"dir")),
1746 VisitChildrenSet::Set(set)
1750 VisitChildrenSet::Set(set)
1747 );
1751 );
1748
1752
1749 assert_eq!(
1753 assert_eq!(
1750 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1754 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1751 VisitChildrenSet::Recursive
1755 VisitChildrenSet::Recursive
1752 );
1756 );
1753 assert_eq!(
1757 assert_eq!(
1754 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1758 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1755 VisitChildrenSet::Empty
1759 VisitChildrenSet::Empty
1756 );
1760 );
1757
1761
1758 assert_eq!(
1762 assert_eq!(
1759 matcher.visit_children_set(HgPath::new(b"folder")),
1763 matcher.visit_children_set(HgPath::new(b"folder")),
1760 VisitChildrenSet::Recursive
1764 VisitChildrenSet::Recursive
1761 );
1765 );
1762 // OPT: These next two could be 'all' instead of 'this'.
1766 // OPT: These next two could be 'all' instead of 'this'.
1763 assert_eq!(
1767 assert_eq!(
1764 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1768 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1765 VisitChildrenSet::This
1769 VisitChildrenSet::This
1766 );
1770 );
1767 assert_eq!(
1771 assert_eq!(
1768 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1772 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1769 VisitChildrenSet::This
1773 VisitChildrenSet::This
1770 );
1774 );
1771
1775
1772 // Path + subpath
1776 // Path + subpath
1773 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1777 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1774 PatternSyntax::RelPath,
1778 PatternSyntax::RelPath,
1775 b"dir/subdir/x",
1779 b"dir/subdir/x",
1776 Path::new(""),
1780 Path::new(""),
1777 )])
1781 )])
1778 .unwrap();
1782 .unwrap();
1779 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1783 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1780 PatternSyntax::RelPath,
1784 PatternSyntax::RelPath,
1781 b"dir/subdir",
1785 b"dir/subdir",
1782 Path::new(""),
1786 Path::new(""),
1783 )])
1787 )])
1784 .unwrap();
1788 .unwrap();
1785 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1789 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1786
1790
1787 let mut set = HashSet::new();
1791 let mut set = HashSet::new();
1788 set.insert(HgPathBuf::from_bytes(b"dir"));
1792 set.insert(HgPathBuf::from_bytes(b"dir"));
1789 assert_eq!(
1793 assert_eq!(
1790 matcher.visit_children_set(HgPath::new(b"")),
1794 matcher.visit_children_set(HgPath::new(b"")),
1791 VisitChildrenSet::Set(set)
1795 VisitChildrenSet::Set(set)
1792 );
1796 );
1793 let mut set = HashSet::new();
1797 let mut set = HashSet::new();
1794 set.insert(HgPathBuf::from_bytes(b"subdir"));
1798 set.insert(HgPathBuf::from_bytes(b"subdir"));
1795 assert_eq!(
1799 assert_eq!(
1796 matcher.visit_children_set(HgPath::new(b"dir")),
1800 matcher.visit_children_set(HgPath::new(b"dir")),
1797 VisitChildrenSet::Set(set)
1801 VisitChildrenSet::Set(set)
1798 );
1802 );
1799
1803
1800 assert_eq!(
1804 assert_eq!(
1801 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1805 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1802 VisitChildrenSet::Recursive
1806 VisitChildrenSet::Recursive
1803 );
1807 );
1804 assert_eq!(
1808 assert_eq!(
1805 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1809 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1806 VisitChildrenSet::Empty
1810 VisitChildrenSet::Empty
1807 );
1811 );
1808
1812
1809 assert_eq!(
1813 assert_eq!(
1810 matcher.visit_children_set(HgPath::new(b"folder")),
1814 matcher.visit_children_set(HgPath::new(b"folder")),
1811 VisitChildrenSet::Empty
1815 VisitChildrenSet::Empty
1812 );
1816 );
1813 assert_eq!(
1817 assert_eq!(
1814 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1818 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1815 VisitChildrenSet::Recursive
1819 VisitChildrenSet::Recursive
1816 );
1820 );
1817 // OPT: this should probably be 'all' not 'this'.
1821 // OPT: this should probably be 'all' not 'this'.
1818 assert_eq!(
1822 assert_eq!(
1819 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1823 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1820 VisitChildrenSet::This
1824 VisitChildrenSet::This
1821 );
1825 );
1822 }
1826 }
1823
1827
1824 #[test]
1828 #[test]
1825 fn test_intersectionmatcher() {
1829 fn test_intersectionmatcher() {
1826 // Include path + Include rootfiles
1830 // Include path + Include rootfiles
1827 let m1 = Box::new(
1831 let m1 = Box::new(
1828 IncludeMatcher::new(vec![IgnorePattern::new(
1832 IncludeMatcher::new(vec![IgnorePattern::new(
1829 PatternSyntax::RelPath,
1833 PatternSyntax::RelPath,
1830 b"dir/subdir",
1834 b"dir/subdir",
1831 Path::new(""),
1835 Path::new(""),
1832 )])
1836 )])
1833 .unwrap(),
1837 .unwrap(),
1834 );
1838 );
1835 let m2 = Box::new(
1839 let m2 = Box::new(
1836 IncludeMatcher::new(vec![IgnorePattern::new(
1840 IncludeMatcher::new(vec![IgnorePattern::new(
1837 PatternSyntax::RootFilesIn,
1841 PatternSyntax::RootFilesIn,
1838 b"dir",
1842 b"dir",
1839 Path::new(""),
1843 Path::new(""),
1840 )])
1844 )])
1841 .unwrap(),
1845 .unwrap(),
1842 );
1846 );
1843 let matcher = IntersectionMatcher::new(m1, m2);
1847 let matcher = IntersectionMatcher::new(m1, m2);
1844
1848
1845 let mut set = HashSet::new();
1849 let mut set = HashSet::new();
1846 set.insert(HgPathBuf::from_bytes(b"dir"));
1850 set.insert(HgPathBuf::from_bytes(b"dir"));
1847 assert_eq!(
1851 assert_eq!(
1848 matcher.visit_children_set(HgPath::new(b"")),
1852 matcher.visit_children_set(HgPath::new(b"")),
1849 VisitChildrenSet::Set(set)
1853 VisitChildrenSet::Set(set)
1850 );
1854 );
1851 assert_eq!(
1855 assert_eq!(
1852 matcher.visit_children_set(HgPath::new(b"dir")),
1856 matcher.visit_children_set(HgPath::new(b"dir")),
1853 VisitChildrenSet::This
1857 VisitChildrenSet::This
1854 );
1858 );
1855 assert_eq!(
1859 assert_eq!(
1856 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1860 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1857 VisitChildrenSet::Empty
1861 VisitChildrenSet::Empty
1858 );
1862 );
1859 assert_eq!(
1863 assert_eq!(
1860 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1864 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1861 VisitChildrenSet::Empty
1865 VisitChildrenSet::Empty
1862 );
1866 );
1863 assert_eq!(
1867 assert_eq!(
1864 matcher.visit_children_set(HgPath::new(b"folder")),
1868 matcher.visit_children_set(HgPath::new(b"folder")),
1865 VisitChildrenSet::Empty
1869 VisitChildrenSet::Empty
1866 );
1870 );
1867 assert_eq!(
1871 assert_eq!(
1868 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1872 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1869 VisitChildrenSet::Empty
1873 VisitChildrenSet::Empty
1870 );
1874 );
1871 assert_eq!(
1875 assert_eq!(
1872 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1876 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1873 VisitChildrenSet::Empty
1877 VisitChildrenSet::Empty
1874 );
1878 );
1875
1879
1876 // Non intersecting paths
1880 // Non intersecting paths
1877 let m1 = Box::new(
1881 let m1 = Box::new(
1878 IncludeMatcher::new(vec![IgnorePattern::new(
1882 IncludeMatcher::new(vec![IgnorePattern::new(
1879 PatternSyntax::RelPath,
1883 PatternSyntax::RelPath,
1880 b"dir/subdir",
1884 b"dir/subdir",
1881 Path::new(""),
1885 Path::new(""),
1882 )])
1886 )])
1883 .unwrap(),
1887 .unwrap(),
1884 );
1888 );
1885 let m2 = Box::new(
1889 let m2 = Box::new(
1886 IncludeMatcher::new(vec![IgnorePattern::new(
1890 IncludeMatcher::new(vec![IgnorePattern::new(
1887 PatternSyntax::RelPath,
1891 PatternSyntax::RelPath,
1888 b"folder",
1892 b"folder",
1889 Path::new(""),
1893 Path::new(""),
1890 )])
1894 )])
1891 .unwrap(),
1895 .unwrap(),
1892 );
1896 );
1893 let matcher = IntersectionMatcher::new(m1, m2);
1897 let matcher = IntersectionMatcher::new(m1, m2);
1894
1898
1895 assert_eq!(
1899 assert_eq!(
1896 matcher.visit_children_set(HgPath::new(b"")),
1900 matcher.visit_children_set(HgPath::new(b"")),
1897 VisitChildrenSet::Empty
1901 VisitChildrenSet::Empty
1898 );
1902 );
1899 assert_eq!(
1903 assert_eq!(
1900 matcher.visit_children_set(HgPath::new(b"dir")),
1904 matcher.visit_children_set(HgPath::new(b"dir")),
1901 VisitChildrenSet::Empty
1905 VisitChildrenSet::Empty
1902 );
1906 );
1903 assert_eq!(
1907 assert_eq!(
1904 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1908 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1905 VisitChildrenSet::Empty
1909 VisitChildrenSet::Empty
1906 );
1910 );
1907 assert_eq!(
1911 assert_eq!(
1908 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1912 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1909 VisitChildrenSet::Empty
1913 VisitChildrenSet::Empty
1910 );
1914 );
1911 assert_eq!(
1915 assert_eq!(
1912 matcher.visit_children_set(HgPath::new(b"folder")),
1916 matcher.visit_children_set(HgPath::new(b"folder")),
1913 VisitChildrenSet::Empty
1917 VisitChildrenSet::Empty
1914 );
1918 );
1915 assert_eq!(
1919 assert_eq!(
1916 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1920 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1917 VisitChildrenSet::Empty
1921 VisitChildrenSet::Empty
1918 );
1922 );
1919 assert_eq!(
1923 assert_eq!(
1920 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1924 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1921 VisitChildrenSet::Empty
1925 VisitChildrenSet::Empty
1922 );
1926 );
1923
1927
1924 // Nested paths
1928 // Nested paths
1925 let m1 = Box::new(
1929 let m1 = Box::new(
1926 IncludeMatcher::new(vec![IgnorePattern::new(
1930 IncludeMatcher::new(vec![IgnorePattern::new(
1927 PatternSyntax::RelPath,
1931 PatternSyntax::RelPath,
1928 b"dir/subdir/x",
1932 b"dir/subdir/x",
1929 Path::new(""),
1933 Path::new(""),
1930 )])
1934 )])
1931 .unwrap(),
1935 .unwrap(),
1932 );
1936 );
1933 let m2 = Box::new(
1937 let m2 = Box::new(
1934 IncludeMatcher::new(vec![IgnorePattern::new(
1938 IncludeMatcher::new(vec![IgnorePattern::new(
1935 PatternSyntax::RelPath,
1939 PatternSyntax::RelPath,
1936 b"dir/subdir",
1940 b"dir/subdir",
1937 Path::new(""),
1941 Path::new(""),
1938 )])
1942 )])
1939 .unwrap(),
1943 .unwrap(),
1940 );
1944 );
1941 let matcher = IntersectionMatcher::new(m1, m2);
1945 let matcher = IntersectionMatcher::new(m1, m2);
1942
1946
1943 let mut set = HashSet::new();
1947 let mut set = HashSet::new();
1944 set.insert(HgPathBuf::from_bytes(b"dir"));
1948 set.insert(HgPathBuf::from_bytes(b"dir"));
1945 assert_eq!(
1949 assert_eq!(
1946 matcher.visit_children_set(HgPath::new(b"")),
1950 matcher.visit_children_set(HgPath::new(b"")),
1947 VisitChildrenSet::Set(set)
1951 VisitChildrenSet::Set(set)
1948 );
1952 );
1949
1953
1950 let mut set = HashSet::new();
1954 let mut set = HashSet::new();
1951 set.insert(HgPathBuf::from_bytes(b"subdir"));
1955 set.insert(HgPathBuf::from_bytes(b"subdir"));
1952 assert_eq!(
1956 assert_eq!(
1953 matcher.visit_children_set(HgPath::new(b"dir")),
1957 matcher.visit_children_set(HgPath::new(b"dir")),
1954 VisitChildrenSet::Set(set)
1958 VisitChildrenSet::Set(set)
1955 );
1959 );
1956 let mut set = HashSet::new();
1960 let mut set = HashSet::new();
1957 set.insert(HgPathBuf::from_bytes(b"x"));
1961 set.insert(HgPathBuf::from_bytes(b"x"));
1958 assert_eq!(
1962 assert_eq!(
1959 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1963 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1960 VisitChildrenSet::Set(set)
1964 VisitChildrenSet::Set(set)
1961 );
1965 );
1962 assert_eq!(
1966 assert_eq!(
1963 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1967 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1964 VisitChildrenSet::Empty
1968 VisitChildrenSet::Empty
1965 );
1969 );
1966 assert_eq!(
1970 assert_eq!(
1967 matcher.visit_children_set(HgPath::new(b"folder")),
1971 matcher.visit_children_set(HgPath::new(b"folder")),
1968 VisitChildrenSet::Empty
1972 VisitChildrenSet::Empty
1969 );
1973 );
1970 assert_eq!(
1974 assert_eq!(
1971 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1975 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1972 VisitChildrenSet::Empty
1976 VisitChildrenSet::Empty
1973 );
1977 );
1974 // OPT: this should probably be 'all' not 'this'.
1978 // OPT: this should probably be 'all' not 'this'.
1975 assert_eq!(
1979 assert_eq!(
1976 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1980 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1977 VisitChildrenSet::This
1981 VisitChildrenSet::This
1978 );
1982 );
1979
1983
1980 // Diverging paths
1984 // Diverging paths
1981 let m1 = Box::new(
1985 let m1 = Box::new(
1982 IncludeMatcher::new(vec![IgnorePattern::new(
1986 IncludeMatcher::new(vec![IgnorePattern::new(
1983 PatternSyntax::RelPath,
1987 PatternSyntax::RelPath,
1984 b"dir/subdir/x",
1988 b"dir/subdir/x",
1985 Path::new(""),
1989 Path::new(""),
1986 )])
1990 )])
1987 .unwrap(),
1991 .unwrap(),
1988 );
1992 );
1989 let m2 = Box::new(
1993 let m2 = Box::new(
1990 IncludeMatcher::new(vec![IgnorePattern::new(
1994 IncludeMatcher::new(vec![IgnorePattern::new(
1991 PatternSyntax::RelPath,
1995 PatternSyntax::RelPath,
1992 b"dir/subdir/z",
1996 b"dir/subdir/z",
1993 Path::new(""),
1997 Path::new(""),
1994 )])
1998 )])
1995 .unwrap(),
1999 .unwrap(),
1996 );
2000 );
1997 let matcher = IntersectionMatcher::new(m1, m2);
2001 let matcher = IntersectionMatcher::new(m1, m2);
1998
2002
1999 // OPT: these next two could probably be Empty as well.
2003 // OPT: these next two could probably be Empty as well.
2000 let mut set = HashSet::new();
2004 let mut set = HashSet::new();
2001 set.insert(HgPathBuf::from_bytes(b"dir"));
2005 set.insert(HgPathBuf::from_bytes(b"dir"));
2002 assert_eq!(
2006 assert_eq!(
2003 matcher.visit_children_set(HgPath::new(b"")),
2007 matcher.visit_children_set(HgPath::new(b"")),
2004 VisitChildrenSet::Set(set)
2008 VisitChildrenSet::Set(set)
2005 );
2009 );
2006 // OPT: these next two could probably be Empty as well.
2010 // OPT: these next two could probably be Empty as well.
2007 let mut set = HashSet::new();
2011 let mut set = HashSet::new();
2008 set.insert(HgPathBuf::from_bytes(b"subdir"));
2012 set.insert(HgPathBuf::from_bytes(b"subdir"));
2009 assert_eq!(
2013 assert_eq!(
2010 matcher.visit_children_set(HgPath::new(b"dir")),
2014 matcher.visit_children_set(HgPath::new(b"dir")),
2011 VisitChildrenSet::Set(set)
2015 VisitChildrenSet::Set(set)
2012 );
2016 );
2013 assert_eq!(
2017 assert_eq!(
2014 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
2018 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
2015 VisitChildrenSet::Empty
2019 VisitChildrenSet::Empty
2016 );
2020 );
2017 assert_eq!(
2021 assert_eq!(
2018 matcher.visit_children_set(HgPath::new(b"dir/foo")),
2022 matcher.visit_children_set(HgPath::new(b"dir/foo")),
2019 VisitChildrenSet::Empty
2023 VisitChildrenSet::Empty
2020 );
2024 );
2021 assert_eq!(
2025 assert_eq!(
2022 matcher.visit_children_set(HgPath::new(b"folder")),
2026 matcher.visit_children_set(HgPath::new(b"folder")),
2023 VisitChildrenSet::Empty
2027 VisitChildrenSet::Empty
2024 );
2028 );
2025 assert_eq!(
2029 assert_eq!(
2026 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
2030 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
2027 VisitChildrenSet::Empty
2031 VisitChildrenSet::Empty
2028 );
2032 );
2029 assert_eq!(
2033 assert_eq!(
2030 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
2034 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
2031 VisitChildrenSet::Empty
2035 VisitChildrenSet::Empty
2032 );
2036 );
2033 }
2037 }
2034
2038
2035 #[test]
2039 #[test]
2036 fn test_differencematcher() {
2040 fn test_differencematcher() {
2037 // Two alwaysmatchers should function like a nevermatcher
2041 // Two alwaysmatchers should function like a nevermatcher
2038 let m1 = AlwaysMatcher;
2042 let m1 = AlwaysMatcher;
2039 let m2 = AlwaysMatcher;
2043 let m2 = AlwaysMatcher;
2040 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
2044 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
2041
2045
2042 for case in &[
2046 for case in &[
2043 &b""[..],
2047 &b""[..],
2044 b"dir",
2048 b"dir",
2045 b"dir/subdir",
2049 b"dir/subdir",
2046 b"dir/subdir/z",
2050 b"dir/subdir/z",
2047 b"dir/foo",
2051 b"dir/foo",
2048 b"dir/subdir/x",
2052 b"dir/subdir/x",
2049 b"folder",
2053 b"folder",
2050 ] {
2054 ] {
2051 assert_eq!(
2055 assert_eq!(
2052 matcher.visit_children_set(HgPath::new(case)),
2056 matcher.visit_children_set(HgPath::new(case)),
2053 VisitChildrenSet::Empty
2057 VisitChildrenSet::Empty
2054 );
2058 );
2055 }
2059 }
2056
2060
2057 // One always and one never should behave the same as an always
2061 // One always and one never should behave the same as an always
2058 let m1 = AlwaysMatcher;
2062 let m1 = AlwaysMatcher;
2059 let m2 = NeverMatcher;
2063 let m2 = NeverMatcher;
2060 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
2064 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
2061
2065
2062 for case in &[
2066 for case in &[
2063 &b""[..],
2067 &b""[..],
2064 b"dir",
2068 b"dir",
2065 b"dir/subdir",
2069 b"dir/subdir",
2066 b"dir/subdir/z",
2070 b"dir/subdir/z",
2067 b"dir/foo",
2071 b"dir/foo",
2068 b"dir/subdir/x",
2072 b"dir/subdir/x",
2069 b"folder",
2073 b"folder",
2070 ] {
2074 ] {
2071 assert_eq!(
2075 assert_eq!(
2072 matcher.visit_children_set(HgPath::new(case)),
2076 matcher.visit_children_set(HgPath::new(case)),
2073 VisitChildrenSet::Recursive
2077 VisitChildrenSet::Recursive
2074 );
2078 );
2075 }
2079 }
2076
2080
2077 // Two include matchers
2081 // Two include matchers
2078 let m1 = Box::new(
2082 let m1 = Box::new(
2079 IncludeMatcher::new(vec![IgnorePattern::new(
2083 IncludeMatcher::new(vec![IgnorePattern::new(
2080 PatternSyntax::RelPath,
2084 PatternSyntax::RelPath,
2081 b"dir/subdir",
2085 b"dir/subdir",
2082 Path::new("/repo"),
2086 Path::new("/repo"),
2083 )])
2087 )])
2084 .unwrap(),
2088 .unwrap(),
2085 );
2089 );
2086 let m2 = Box::new(
2090 let m2 = Box::new(
2087 IncludeMatcher::new(vec![IgnorePattern::new(
2091 IncludeMatcher::new(vec![IgnorePattern::new(
2088 PatternSyntax::RootFilesIn,
2092 PatternSyntax::RootFilesIn,
2089 b"dir",
2093 b"dir",
2090 Path::new("/repo"),
2094 Path::new("/repo"),
2091 )])
2095 )])
2092 .unwrap(),
2096 .unwrap(),
2093 );
2097 );
2094
2098
2095 let matcher = DifferenceMatcher::new(m1, m2);
2099 let matcher = DifferenceMatcher::new(m1, m2);
2096
2100
2097 let mut set = HashSet::new();
2101 let mut set = HashSet::new();
2098 set.insert(HgPathBuf::from_bytes(b"dir"));
2102 set.insert(HgPathBuf::from_bytes(b"dir"));
2099 assert_eq!(
2103 assert_eq!(
2100 matcher.visit_children_set(HgPath::new(b"")),
2104 matcher.visit_children_set(HgPath::new(b"")),
2101 VisitChildrenSet::Set(set)
2105 VisitChildrenSet::Set(set)
2102 );
2106 );
2103
2107
2104 let mut set = HashSet::new();
2108 let mut set = HashSet::new();
2105 set.insert(HgPathBuf::from_bytes(b"subdir"));
2109 set.insert(HgPathBuf::from_bytes(b"subdir"));
2106 assert_eq!(
2110 assert_eq!(
2107 matcher.visit_children_set(HgPath::new(b"dir")),
2111 matcher.visit_children_set(HgPath::new(b"dir")),
2108 VisitChildrenSet::Set(set)
2112 VisitChildrenSet::Set(set)
2109 );
2113 );
2110 assert_eq!(
2114 assert_eq!(
2111 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
2115 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
2112 VisitChildrenSet::Recursive
2116 VisitChildrenSet::Recursive
2113 );
2117 );
2114 assert_eq!(
2118 assert_eq!(
2115 matcher.visit_children_set(HgPath::new(b"dir/foo")),
2119 matcher.visit_children_set(HgPath::new(b"dir/foo")),
2116 VisitChildrenSet::Empty
2120 VisitChildrenSet::Empty
2117 );
2121 );
2118 assert_eq!(
2122 assert_eq!(
2119 matcher.visit_children_set(HgPath::new(b"folder")),
2123 matcher.visit_children_set(HgPath::new(b"folder")),
2120 VisitChildrenSet::Empty
2124 VisitChildrenSet::Empty
2121 );
2125 );
2122 assert_eq!(
2126 assert_eq!(
2123 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
2127 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
2124 VisitChildrenSet::This
2128 VisitChildrenSet::This
2125 );
2129 );
2126 assert_eq!(
2130 assert_eq!(
2127 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
2131 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
2128 VisitChildrenSet::This
2132 VisitChildrenSet::This
2129 );
2133 );
2130 }
2134 }
2131
2135
2132 mod invariants {
2136 mod invariants {
2133 pub mod visit_children_set {
2137 pub mod visit_children_set {
2134
2138
2135 use crate::{
2139 use crate::{
2136 matchers::{tests::Tree, Matcher, VisitChildrenSet},
2140 matchers::{tests::Tree, Matcher, VisitChildrenSet},
2137 utils::hg_path::HgPath,
2141 utils::hg_path::HgPath,
2138 };
2142 };
2139
2143
2140 #[allow(dead_code)]
2144 #[allow(dead_code)]
2141 #[derive(Debug)]
2145 #[derive(Debug)]
2142 struct Error<'a, M> {
2146 struct Error<'a, M> {
2143 matcher: &'a M,
2147 matcher: &'a M,
2144 path: &'a HgPath,
2148 path: &'a HgPath,
2145 matching: &'a Tree,
2149 matching: &'a Tree,
2146 visit_children_set: &'a VisitChildrenSet,
2150 visit_children_set: &'a VisitChildrenSet,
2147 }
2151 }
2148
2152
2149 fn holds(
2153 fn holds(
2150 matching: &Tree,
2154 matching: &Tree,
2151 not_matching: &Tree,
2155 not_matching: &Tree,
2152 vcs: &VisitChildrenSet,
2156 vcs: &VisitChildrenSet,
2153 ) -> bool {
2157 ) -> bool {
2154 match vcs {
2158 match vcs {
2155 VisitChildrenSet::Empty => matching.is_empty(),
2159 VisitChildrenSet::Empty => matching.is_empty(),
2156 VisitChildrenSet::This => {
2160 VisitChildrenSet::This => {
2157 // `This` does not come with any obligations.
2161 // `This` does not come with any obligations.
2158 true
2162 true
2159 }
2163 }
2160 VisitChildrenSet::Recursive => {
2164 VisitChildrenSet::Recursive => {
2161 // `Recursive` requires that *everything* in the
2165 // `Recursive` requires that *everything* in the
2162 // subtree matches. This
2166 // subtree matches. This
2163 // requirement is relied on for example in
2167 // requirement is relied on for example in
2164 // DifferenceMatcher implementation.
2168 // DifferenceMatcher implementation.
2165 not_matching.is_empty()
2169 not_matching.is_empty()
2166 }
2170 }
2167 VisitChildrenSet::Set(allowed_children) => {
2171 VisitChildrenSet::Set(allowed_children) => {
2168 // `allowed_children` does not distinguish between
2172 // `allowed_children` does not distinguish between
2169 // files and directories: if it's not included, it
2173 // files and directories: if it's not included, it
2170 // must not be matched.
2174 // must not be matched.
2171 for k in matching.dirs.keys() {
2175 for k in matching.dirs.keys() {
2172 if !(allowed_children.contains(k)) {
2176 if !(allowed_children.contains(k)) {
2173 return false;
2177 return false;
2174 }
2178 }
2175 }
2179 }
2176 for k in matching.files.iter() {
2180 for k in matching.files.iter() {
2177 if !(allowed_children.contains(k)) {
2181 if !(allowed_children.contains(k)) {
2178 return false;
2182 return false;
2179 }
2183 }
2180 }
2184 }
2181 true
2185 true
2182 }
2186 }
2183 }
2187 }
2184 }
2188 }
2185
2189
2186 pub fn check<M: Matcher + std::fmt::Debug>(
2190 pub fn check<M: Matcher + std::fmt::Debug>(
2187 matcher: &M,
2191 matcher: &M,
2188 path: &HgPath,
2192 path: &HgPath,
2189 matching: &Tree,
2193 matching: &Tree,
2190 not_matching: &Tree,
2194 not_matching: &Tree,
2191 visit_children_set: &VisitChildrenSet,
2195 visit_children_set: &VisitChildrenSet,
2192 ) {
2196 ) {
2193 if !holds(matching, not_matching, visit_children_set) {
2197 if !holds(matching, not_matching, visit_children_set) {
2194 panic!(
2198 panic!(
2195 "{:#?}",
2199 "{:#?}",
2196 Error {
2200 Error {
2197 matcher,
2201 matcher,
2198 path,
2202 path,
2199 visit_children_set,
2203 visit_children_set,
2200 matching
2204 matching
2201 }
2205 }
2202 )
2206 )
2203 }
2207 }
2204 }
2208 }
2205 }
2209 }
2206 }
2210 }
2207
2211
2208 #[derive(Debug, Clone)]
2212 #[derive(Debug, Clone)]
2209 pub struct Tree {
2213 pub struct Tree {
2210 files: BTreeSet<HgPathBuf>,
2214 files: BTreeSet<HgPathBuf>,
2211 dirs: BTreeMap<HgPathBuf, Tree>,
2215 dirs: BTreeMap<HgPathBuf, Tree>,
2212 }
2216 }
2213
2217
2214 impl Tree {
2218 impl Tree {
2215 fn len(&self) -> usize {
2219 fn len(&self) -> usize {
2216 let mut n = 0;
2220 let mut n = 0;
2217 n += self.files.len();
2221 n += self.files.len();
2218 for d in self.dirs.values() {
2222 for d in self.dirs.values() {
2219 n += d.len();
2223 n += d.len();
2220 }
2224 }
2221 n
2225 n
2222 }
2226 }
2223
2227
2224 fn is_empty(&self) -> bool {
2228 fn is_empty(&self) -> bool {
2225 self.files.is_empty() && self.dirs.is_empty()
2229 self.files.is_empty() && self.dirs.is_empty()
2226 }
2230 }
2227
2231
2228 fn make(
2232 fn make(
2229 files: BTreeSet<HgPathBuf>,
2233 files: BTreeSet<HgPathBuf>,
2230 dirs: BTreeMap<HgPathBuf, Tree>,
2234 dirs: BTreeMap<HgPathBuf, Tree>,
2231 ) -> Self {
2235 ) -> Self {
2232 Self {
2236 Self {
2233 files,
2237 files,
2234 dirs: dirs
2238 dirs: dirs
2235 .into_iter()
2239 .into_iter()
2236 .filter(|(_k, v)| (!(v.is_empty())))
2240 .filter(|(_k, v)| (!(v.is_empty())))
2237 .collect(),
2241 .collect(),
2238 }
2242 }
2239 }
2243 }
2240
2244
2241 fn filter_and_check<M: Matcher + Debug>(
2245 fn filter_and_check<M: Matcher + Debug>(
2242 &self,
2246 &self,
2243 m: &M,
2247 m: &M,
2244 path: &HgPath,
2248 path: &HgPath,
2245 ) -> (Self, Self) {
2249 ) -> (Self, Self) {
2246 let (files1, files2): (BTreeSet<HgPathBuf>, BTreeSet<HgPathBuf>) =
2250 let (files1, files2): (BTreeSet<HgPathBuf>, BTreeSet<HgPathBuf>) =
2247 self.files
2251 self.files
2248 .iter()
2252 .iter()
2249 .map(|v| v.to_owned())
2253 .map(|v| v.to_owned())
2250 .partition(|v| m.matches(&path.join(v)));
2254 .partition(|v| m.matches(&path.join(v)));
2251 let (dirs1, dirs2): (
2255 let (dirs1, dirs2): (
2252 BTreeMap<HgPathBuf, Tree>,
2256 BTreeMap<HgPathBuf, Tree>,
2253 BTreeMap<HgPathBuf, Tree>,
2257 BTreeMap<HgPathBuf, Tree>,
2254 ) = self
2258 ) = self
2255 .dirs
2259 .dirs
2256 .iter()
2260 .iter()
2257 .map(|(k, v)| {
2261 .map(|(k, v)| {
2258 let path = path.join(k);
2262 let path = path.join(k);
2259 let (t1, t2) = v.filter_and_check(m, &path);
2263 let (t1, t2) = v.filter_and_check(m, &path);
2260 ((k.clone(), t1), (k.clone(), t2))
2264 ((k.clone(), t1), (k.clone(), t2))
2261 })
2265 })
2262 .unzip();
2266 .unzip();
2263 let matching = Self::make(files1, dirs1);
2267 let matching = Self::make(files1, dirs1);
2264 let not_matching = Self::make(files2, dirs2);
2268 let not_matching = Self::make(files2, dirs2);
2265 let vcs = m.visit_children_set(path);
2269 let vcs = m.visit_children_set(path);
2266 invariants::visit_children_set::check(
2270 invariants::visit_children_set::check(
2267 m,
2271 m,
2268 path,
2272 path,
2269 &matching,
2273 &matching,
2270 &not_matching,
2274 &not_matching,
2271 &vcs,
2275 &vcs,
2272 );
2276 );
2273 (matching, not_matching)
2277 (matching, not_matching)
2274 }
2278 }
2275
2279
2276 fn check_matcher<M: Matcher + Debug>(
2280 fn check_matcher<M: Matcher + Debug>(
2277 &self,
2281 &self,
2278 m: &M,
2282 m: &M,
2279 expect_count: usize,
2283 expect_count: usize,
2280 ) {
2284 ) {
2281 let res = self.filter_and_check(m, &HgPathBuf::new());
2285 let res = self.filter_and_check(m, &HgPathBuf::new());
2282 if expect_count != res.0.len() {
2286 if expect_count != res.0.len() {
2283 eprintln!(
2287 eprintln!(
2284 "warning: expected {} matches, got {} for {:#?}",
2288 "warning: expected {} matches, got {} for {:#?}",
2285 expect_count,
2289 expect_count,
2286 res.0.len(),
2290 res.0.len(),
2287 m
2291 m
2288 );
2292 );
2289 }
2293 }
2290 }
2294 }
2291 }
2295 }
2292
2296
2293 fn mkdir(children: &[(&[u8], &Tree)]) -> Tree {
2297 fn mkdir(children: &[(&[u8], &Tree)]) -> Tree {
2294 let p = HgPathBuf::from_bytes;
2298 let p = HgPathBuf::from_bytes;
2295 let names = [
2299 let names = [
2296 p(b"a"),
2300 p(b"a"),
2297 p(b"b.txt"),
2301 p(b"b.txt"),
2298 p(b"file.txt"),
2302 p(b"file.txt"),
2299 p(b"c.c"),
2303 p(b"c.c"),
2300 p(b"c.h"),
2304 p(b"c.h"),
2301 p(b"dir1"),
2305 p(b"dir1"),
2302 p(b"dir2"),
2306 p(b"dir2"),
2303 p(b"subdir"),
2307 p(b"subdir"),
2304 ];
2308 ];
2305 let files: BTreeSet<HgPathBuf> = BTreeSet::from(names);
2309 let files: BTreeSet<HgPathBuf> = BTreeSet::from(names);
2306 let dirs = children
2310 let dirs = children
2307 .iter()
2311 .iter()
2308 .map(|(name, t)| (p(name), (*t).clone()))
2312 .map(|(name, t)| (p(name), (*t).clone()))
2309 .collect();
2313 .collect();
2310 Tree { files, dirs }
2314 Tree { files, dirs }
2311 }
2315 }
2312
2316
2313 fn make_example_tree() -> Tree {
2317 fn make_example_tree() -> Tree {
2314 let leaf = mkdir(&[]);
2318 let leaf = mkdir(&[]);
2315 let abc = mkdir(&[(b"d", &leaf)]);
2319 let abc = mkdir(&[(b"d", &leaf)]);
2316 let ab = mkdir(&[(b"c", &abc)]);
2320 let ab = mkdir(&[(b"c", &abc)]);
2317 let a = mkdir(&[(b"b", &ab)]);
2321 let a = mkdir(&[(b"b", &ab)]);
2318 let dir = mkdir(&[(b"subdir", &leaf), (b"subdir.c", &leaf)]);
2322 let dir = mkdir(&[(b"subdir", &leaf), (b"subdir.c", &leaf)]);
2319 mkdir(&[(b"dir", &dir), (b"dir1", &dir), (b"dir2", &dir), (b"a", &a)])
2323 mkdir(&[(b"dir", &dir), (b"dir1", &dir), (b"dir2", &dir), (b"a", &a)])
2320 }
2324 }
2321
2325
2322 #[test]
2326 #[test]
2323 fn test_pattern_matcher_visit_children_set() {
2327 fn test_pattern_matcher_visit_children_set() {
2324 let tree = make_example_tree();
2328 let tree = make_example_tree();
2325 let pattern_dir1_glob_c =
2329 let pattern_dir1_glob_c =
2326 PatternMatcher::new(vec![IgnorePattern::new(
2330 PatternMatcher::new(vec![IgnorePattern::new(
2327 PatternSyntax::Glob,
2331 PatternSyntax::Glob,
2328 b"dir1/*.c",
2332 b"dir1/*.c",
2329 Path::new(""),
2333 Path::new(""),
2330 )])
2334 )])
2331 .unwrap();
2335 .unwrap();
2332 let pattern_dir1 = || {
2336 let pattern_dir1 = || {
2333 PatternMatcher::new(vec![IgnorePattern::new(
2337 PatternMatcher::new(vec![IgnorePattern::new(
2334 PatternSyntax::Path,
2338 PatternSyntax::Path,
2335 b"dir1",
2339 b"dir1",
2336 Path::new(""),
2340 Path::new(""),
2337 )])
2341 )])
2338 .unwrap()
2342 .unwrap()
2339 };
2343 };
2340 let pattern_dir1_a = PatternMatcher::new(vec![IgnorePattern::new(
2344 let pattern_dir1_a = PatternMatcher::new(vec![IgnorePattern::new(
2341 PatternSyntax::Glob,
2345 PatternSyntax::Glob,
2342 b"dir1/a",
2346 b"dir1/a",
2343 Path::new(""),
2347 Path::new(""),
2344 )])
2348 )])
2345 .unwrap();
2349 .unwrap();
2346 let pattern_relglob_c = || {
2350 let pattern_relglob_c = || {
2347 PatternMatcher::new(vec![IgnorePattern::new(
2351 PatternMatcher::new(vec![IgnorePattern::new(
2348 PatternSyntax::RelGlob,
2352 PatternSyntax::RelGlob,
2349 b"*.c",
2353 b"*.c",
2350 Path::new(""),
2354 Path::new(""),
2351 )])
2355 )])
2352 .unwrap()
2356 .unwrap()
2353 };
2357 };
2354 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/b.txt")];
2358 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/b.txt")];
2355 let file_dir_subdir_b = FileMatcher::new(files).unwrap();
2359 let file_dir_subdir_b = FileMatcher::new(files).unwrap();
2356
2360
2357 let files = vec![
2361 let files = vec![
2358 HgPathBuf::from_bytes(b"file.txt"),
2362 HgPathBuf::from_bytes(b"file.txt"),
2359 HgPathBuf::from_bytes(b"a/file.txt"),
2363 HgPathBuf::from_bytes(b"a/file.txt"),
2360 HgPathBuf::from_bytes(b"a/b/file.txt"),
2364 HgPathBuf::from_bytes(b"a/b/file.txt"),
2361 // No file in a/b/c
2365 // No file in a/b/c
2362 HgPathBuf::from_bytes(b"a/b/c/d/file.txt"),
2366 HgPathBuf::from_bytes(b"a/b/c/d/file.txt"),
2363 ];
2367 ];
2364 let file_abcdfile = FileMatcher::new(files).unwrap();
2368 let file_abcdfile = FileMatcher::new(files).unwrap();
2365 let rootfilesin_dir = PatternMatcher::new(vec![IgnorePattern::new(
2369 let rootfilesin_dir = PatternMatcher::new(vec![IgnorePattern::new(
2366 PatternSyntax::RootFilesIn,
2370 PatternSyntax::RootFilesIn,
2367 b"dir",
2371 b"dir",
2368 Path::new(""),
2372 Path::new(""),
2369 )])
2373 )])
2370 .unwrap();
2374 .unwrap();
2371
2375
2372 let pattern_filepath_dir_subdir =
2376 let pattern_filepath_dir_subdir =
2373 PatternMatcher::new(vec![IgnorePattern::new(
2377 PatternMatcher::new(vec![IgnorePattern::new(
2374 PatternSyntax::FilePath,
2378 PatternSyntax::FilePath,
2375 b"dir/subdir",
2379 b"dir/subdir",
2376 Path::new(""),
2380 Path::new(""),
2377 )])
2381 )])
2378 .unwrap();
2382 .unwrap();
2379
2383
2380 let include_dir_subdir =
2384 let include_dir_subdir =
2381 IncludeMatcher::new(vec![IgnorePattern::new(
2385 IncludeMatcher::new(vec![IgnorePattern::new(
2382 PatternSyntax::RelPath,
2386 PatternSyntax::RelPath,
2383 b"dir/subdir",
2387 b"dir/subdir",
2384 Path::new(""),
2388 Path::new(""),
2385 )])
2389 )])
2386 .unwrap();
2390 .unwrap();
2387
2391
2388 let more_includematchers = [
2392 let more_includematchers = [
2389 IncludeMatcher::new(vec![IgnorePattern::new(
2393 IncludeMatcher::new(vec![IgnorePattern::new(
2390 PatternSyntax::Glob,
2394 PatternSyntax::Glob,
2391 b"dir/s*",
2395 b"dir/s*",
2392 Path::new(""),
2396 Path::new(""),
2393 )])
2397 )])
2394 .unwrap(),
2398 .unwrap(),
2395 // Test multiple patterns
2399 // Test multiple patterns
2396 IncludeMatcher::new(vec![
2400 IncludeMatcher::new(vec![
2397 IgnorePattern::new(
2401 IgnorePattern::new(
2398 PatternSyntax::RelPath,
2402 PatternSyntax::RelPath,
2399 b"dir",
2403 b"dir",
2400 Path::new(""),
2404 Path::new(""),
2401 ),
2405 ),
2402 IgnorePattern::new(PatternSyntax::Glob, b"s*", Path::new("")),
2406 IgnorePattern::new(PatternSyntax::Glob, b"s*", Path::new("")),
2403 ])
2407 ])
2404 .unwrap(),
2408 .unwrap(),
2405 // Test multiple patterns
2409 // Test multiple patterns
2406 IncludeMatcher::new(vec![IgnorePattern::new(
2410 IncludeMatcher::new(vec![IgnorePattern::new(
2407 PatternSyntax::Glob,
2411 PatternSyntax::Glob,
2408 b"**/*.c",
2412 b"**/*.c",
2409 Path::new(""),
2413 Path::new(""),
2410 )])
2414 )])
2411 .unwrap(),
2415 .unwrap(),
2412 ];
2416 ];
2413
2417
2414 tree.check_matcher(&pattern_dir1(), 25);
2418 tree.check_matcher(&pattern_dir1(), 25);
2415 tree.check_matcher(&pattern_dir1_a, 1);
2419 tree.check_matcher(&pattern_dir1_a, 1);
2416 tree.check_matcher(&pattern_dir1_glob_c, 2);
2420 tree.check_matcher(&pattern_dir1_glob_c, 2);
2417 tree.check_matcher(&pattern_relglob_c(), 14);
2421 tree.check_matcher(&pattern_relglob_c(), 14);
2418 tree.check_matcher(&AlwaysMatcher, 112);
2422 tree.check_matcher(&AlwaysMatcher, 112);
2419 tree.check_matcher(&NeverMatcher, 0);
2423 tree.check_matcher(&NeverMatcher, 0);
2420 tree.check_matcher(
2424 tree.check_matcher(
2421 &IntersectionMatcher::new(
2425 &IntersectionMatcher::new(
2422 Box::new(pattern_relglob_c()),
2426 Box::new(pattern_relglob_c()),
2423 Box::new(pattern_dir1()),
2427 Box::new(pattern_dir1()),
2424 ),
2428 ),
2425 3,
2429 3,
2426 );
2430 );
2427 tree.check_matcher(
2431 tree.check_matcher(
2428 &UnionMatcher::new(vec![
2432 &UnionMatcher::new(vec![
2429 Box::new(pattern_relglob_c()),
2433 Box::new(pattern_relglob_c()),
2430 Box::new(pattern_dir1()),
2434 Box::new(pattern_dir1()),
2431 ]),
2435 ]),
2432 36,
2436 36,
2433 );
2437 );
2434 tree.check_matcher(
2438 tree.check_matcher(
2435 &DifferenceMatcher::new(
2439 &DifferenceMatcher::new(
2436 Box::new(pattern_relglob_c()),
2440 Box::new(pattern_relglob_c()),
2437 Box::new(pattern_dir1()),
2441 Box::new(pattern_dir1()),
2438 ),
2442 ),
2439 11,
2443 11,
2440 );
2444 );
2441 tree.check_matcher(&file_dir_subdir_b, 1);
2445 tree.check_matcher(&file_dir_subdir_b, 1);
2442 tree.check_matcher(&file_abcdfile, 4);
2446 tree.check_matcher(&file_abcdfile, 4);
2443 tree.check_matcher(&rootfilesin_dir, 8);
2447 tree.check_matcher(&rootfilesin_dir, 8);
2444 tree.check_matcher(&pattern_filepath_dir_subdir, 1);
2448 tree.check_matcher(&pattern_filepath_dir_subdir, 1);
2445 tree.check_matcher(&include_dir_subdir, 9);
2449 tree.check_matcher(&include_dir_subdir, 9);
2446 tree.check_matcher(&more_includematchers[0], 17);
2450 tree.check_matcher(&more_includematchers[0], 17);
2447 tree.check_matcher(&more_includematchers[1], 25);
2451 tree.check_matcher(&more_includematchers[1], 25);
2448 tree.check_matcher(&more_includematchers[2], 35);
2452 tree.check_matcher(&more_includematchers[2], 35);
2449 }
2453 }
2450 }
2454 }
@@ -1,744 +1,744
1 use std::ascii::escape_default;
1 use std::ascii::escape_default;
2 use std::borrow::Cow;
2 use std::borrow::Cow;
3 use std::collections::BTreeMap;
3 use std::collections::BTreeMap;
4 use std::fmt::{Debug, Formatter};
4 use std::fmt::{Debug, Formatter};
5 use std::{iter, str};
5 use std::{iter, str};
6
6
7 use chrono::{DateTime, FixedOffset, NaiveDateTime};
7 use chrono::{DateTime, FixedOffset, NaiveDateTime};
8 use itertools::{Either, Itertools};
8 use itertools::{Either, Itertools};
9
9
10 use crate::errors::HgError;
10 use crate::errors::HgError;
11 use crate::revlog::Revision;
11 use crate::revlog::Revision;
12 use crate::revlog::{Node, NodePrefix};
12 use crate::revlog::{Node, NodePrefix};
13 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
13 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
14 use crate::utils::hg_path::HgPath;
14 use crate::utils::hg_path::HgPath;
15 use crate::vfs::Vfs;
15 use crate::vfs::Vfs;
16 use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
16 use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
17
17
18 /// A specialized `Revlog` to work with changelog data format.
18 /// A specialized `Revlog` to work with changelog data format.
19 pub struct Changelog {
19 pub struct Changelog {
20 /// The generic `revlog` format.
20 /// The generic `revlog` format.
21 pub(crate) revlog: Revlog,
21 pub(crate) revlog: Revlog,
22 }
22 }
23
23
24 impl Changelog {
24 impl Changelog {
25 /// Open the `changelog` of a repository given by its root.
25 /// Open the `changelog` of a repository given by its root.
26 pub fn open(
26 pub fn open(
27 store_vfs: &Vfs,
27 store_vfs: &Vfs,
28 options: RevlogOpenOptions,
28 options: RevlogOpenOptions,
29 ) -> Result<Self, HgError> {
29 ) -> Result<Self, HgError> {
30 let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
30 let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
31 Ok(Self { revlog })
31 Ok(Self { revlog })
32 }
32 }
33
33
34 /// Return the `ChangelogRevisionData` for the given node ID.
34 /// Return the `ChangelogRevisionData` for the given node ID.
35 pub fn data_for_node(
35 pub fn data_for_node(
36 &self,
36 &self,
37 node: NodePrefix,
37 node: NodePrefix,
38 ) -> Result<ChangelogRevisionData, RevlogError> {
38 ) -> Result<ChangelogRevisionData, RevlogError> {
39 let rev = self.revlog.rev_from_node(node)?;
39 let rev = self.revlog.rev_from_node(node)?;
40 self.entry_for_checked_rev(rev)?.data()
40 self.entry_for_checked_rev(rev)?.data()
41 }
41 }
42
42
43 /// Return the [`ChangelogEntry`] for the given revision number.
43 /// Return the [`ChangelogEntry`] for the given revision number.
44 pub fn entry_for_rev(
44 pub fn entry_for_rev(
45 &self,
45 &self,
46 rev: UncheckedRevision,
46 rev: UncheckedRevision,
47 ) -> Result<ChangelogEntry, RevlogError> {
47 ) -> Result<ChangelogEntry, RevlogError> {
48 let revlog_entry = self.revlog.get_entry(rev)?;
48 let revlog_entry = self.revlog.get_entry(rev)?;
49 Ok(ChangelogEntry { revlog_entry })
49 Ok(ChangelogEntry { revlog_entry })
50 }
50 }
51
51
52 /// Same as [`Self::entry_for_rev`] for checked revisions.
52 /// Same as [`Self::entry_for_rev`] for checked revisions.
53 fn entry_for_checked_rev(
53 fn entry_for_checked_rev(
54 &self,
54 &self,
55 rev: Revision,
55 rev: Revision,
56 ) -> Result<ChangelogEntry, RevlogError> {
56 ) -> Result<ChangelogEntry, RevlogError> {
57 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
57 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
58 Ok(ChangelogEntry { revlog_entry })
58 Ok(ChangelogEntry { revlog_entry })
59 }
59 }
60
60
61 /// Return the [`ChangelogRevisionData`] for the given revision number.
61 /// Return the [`ChangelogRevisionData`] for the given revision number.
62 ///
62 ///
63 /// This is a useful shortcut in case the caller does not need the
63 /// This is a useful shortcut in case the caller does not need the
64 /// generic revlog information (parents, hashes etc). Otherwise
64 /// generic revlog information (parents, hashes etc). Otherwise
65 /// consider taking a [`ChangelogEntry`] with
65 /// consider taking a [`ChangelogEntry`] with
66 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
66 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
67 pub fn data_for_rev(
67 pub fn data_for_rev(
68 &self,
68 &self,
69 rev: UncheckedRevision,
69 rev: UncheckedRevision,
70 ) -> Result<ChangelogRevisionData, RevlogError> {
70 ) -> Result<ChangelogRevisionData, RevlogError> {
71 self.entry_for_rev(rev)?.data()
71 self.entry_for_rev(rev)?.data()
72 }
72 }
73
73
74 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
74 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
75 self.revlog.node_from_rev(rev)
75 self.revlog.node_from_rev(rev)
76 }
76 }
77
77
78 pub fn rev_from_node(
78 pub fn rev_from_node(
79 &self,
79 &self,
80 node: NodePrefix,
80 node: NodePrefix,
81 ) -> Result<Revision, RevlogError> {
81 ) -> Result<Revision, RevlogError> {
82 self.revlog.rev_from_node(node)
82 self.revlog.rev_from_node(node)
83 }
83 }
84 }
84 }
85
85
86 impl Graph for Changelog {
86 impl Graph for Changelog {
87 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
87 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
88 self.revlog.parents(rev)
88 self.revlog.parents(rev)
89 }
89 }
90 }
90 }
91
91
92 /// A specialized `RevlogEntry` for `changelog` data format
92 /// A specialized `RevlogEntry` for `changelog` data format
93 ///
93 ///
94 /// This is a `RevlogEntry` with the added semantics that the associated
94 /// This is a `RevlogEntry` with the added semantics that the associated
95 /// data should meet the requirements for `changelog`, materialized by
95 /// data should meet the requirements for `changelog`, materialized by
96 /// the fact that `data()` constructs a `ChangelogRevisionData`.
96 /// the fact that `data()` constructs a `ChangelogRevisionData`.
97 /// In case that promise would be broken, the `data` method returns an error.
97 /// In case that promise would be broken, the `data` method returns an error.
98 #[derive(Clone)]
98 #[derive(Clone)]
99 pub struct ChangelogEntry<'changelog> {
99 pub struct ChangelogEntry<'changelog> {
100 /// Same data, as a generic `RevlogEntry`.
100 /// Same data, as a generic `RevlogEntry`.
101 pub(crate) revlog_entry: RevlogEntry<'changelog>,
101 pub(crate) revlog_entry: RevlogEntry<'changelog>,
102 }
102 }
103
103
104 impl<'changelog> ChangelogEntry<'changelog> {
104 impl<'changelog> ChangelogEntry<'changelog> {
105 pub fn data<'a>(
105 pub fn data<'a>(
106 &'a self,
106 &'a self,
107 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
107 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
108 let bytes = self.revlog_entry.data()?;
108 let bytes = self.revlog_entry.data()?;
109 if bytes.is_empty() {
109 if bytes.is_empty() {
110 Ok(ChangelogRevisionData::null())
110 Ok(ChangelogRevisionData::null())
111 } else {
111 } else {
112 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
112 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
113 RevlogError::Other(HgError::CorruptedRepository(format!(
113 RevlogError::Other(HgError::CorruptedRepository(format!(
114 "Invalid changelog data for revision {}: {:?}",
114 "Invalid changelog data for revision {}: {:?}",
115 self.revlog_entry.revision(),
115 self.revlog_entry.revision(),
116 err
116 err
117 )))
117 )))
118 })?)
118 })?)
119 }
119 }
120 }
120 }
121
121
122 /// Obtain a reference to the underlying `RevlogEntry`.
122 /// Obtain a reference to the underlying `RevlogEntry`.
123 ///
123 ///
124 /// This allows the caller to access the information that is common
124 /// This allows the caller to access the information that is common
125 /// to all revlog entries: revision number, node id, parent revisions etc.
125 /// to all revlog entries: revision number, node id, parent revisions etc.
126 pub fn as_revlog_entry(&self) -> &RevlogEntry {
126 pub fn as_revlog_entry(&self) -> &RevlogEntry {
127 &self.revlog_entry
127 &self.revlog_entry
128 }
128 }
129
129
130 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
130 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
131 Ok(self
131 Ok(self
132 .revlog_entry
132 .revlog_entry
133 .p1_entry()?
133 .p1_entry()?
134 .map(|revlog_entry| Self { revlog_entry }))
134 .map(|revlog_entry| Self { revlog_entry }))
135 }
135 }
136
136
137 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
137 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
138 Ok(self
138 Ok(self
139 .revlog_entry
139 .revlog_entry
140 .p2_entry()?
140 .p2_entry()?
141 .map(|revlog_entry| Self { revlog_entry }))
141 .map(|revlog_entry| Self { revlog_entry }))
142 }
142 }
143 }
143 }
144
144
145 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
145 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
146 #[derive(PartialEq)]
146 #[derive(PartialEq)]
147 pub struct ChangelogRevisionData<'changelog> {
147 pub struct ChangelogRevisionData<'changelog> {
148 /// The data bytes of the `changelog` entry.
148 /// The data bytes of the `changelog` entry.
149 bytes: Cow<'changelog, [u8]>,
149 bytes: Cow<'changelog, [u8]>,
150 /// The end offset for the hex manifest (not including the newline)
150 /// The end offset for the hex manifest (not including the newline)
151 manifest_end: usize,
151 manifest_end: usize,
152 /// The end offset for the user+email (not including the newline)
152 /// The end offset for the user+email (not including the newline)
153 user_end: usize,
153 user_end: usize,
154 /// The end offset for the timestamp+timezone+extras (not including the
154 /// The end offset for the timestamp+timezone+extras (not including the
155 /// newline)
155 /// newline)
156 timestamp_end: usize,
156 timestamp_end: usize,
157 /// The end offset for the file list (not including the newline)
157 /// The end offset for the file list (not including the newline)
158 files_end: usize,
158 files_end: usize,
159 }
159 }
160
160
161 impl<'changelog> ChangelogRevisionData<'changelog> {
161 impl<'changelog> ChangelogRevisionData<'changelog> {
162 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
162 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
163 let mut line_iter = bytes.split(|b| b == &b'\n');
163 let mut line_iter = bytes.split(|b| b == &b'\n');
164 let manifest_end = line_iter
164 let manifest_end = line_iter
165 .next()
165 .next()
166 .expect("Empty iterator from split()?")
166 .expect("Empty iterator from split()?")
167 .len();
167 .len();
168 let user_slice = line_iter.next().ok_or_else(|| {
168 let user_slice = line_iter.next().ok_or_else(|| {
169 HgError::corrupted("Changeset data truncated after manifest line")
169 HgError::corrupted("Changeset data truncated after manifest line")
170 })?;
170 })?;
171 let user_end = manifest_end + 1 + user_slice.len();
171 let user_end = manifest_end + 1 + user_slice.len();
172 let timestamp_slice = line_iter.next().ok_or_else(|| {
172 let timestamp_slice = line_iter.next().ok_or_else(|| {
173 HgError::corrupted("Changeset data truncated after user line")
173 HgError::corrupted("Changeset data truncated after user line")
174 })?;
174 })?;
175 let timestamp_end = user_end + 1 + timestamp_slice.len();
175 let timestamp_end = user_end + 1 + timestamp_slice.len();
176 let mut files_end = timestamp_end + 1;
176 let mut files_end = timestamp_end + 1;
177 loop {
177 loop {
178 let line = line_iter.next().ok_or_else(|| {
178 let line = line_iter.next().ok_or_else(|| {
179 HgError::corrupted("Changeset data truncated in files list")
179 HgError::corrupted("Changeset data truncated in files list")
180 })?;
180 })?;
181 if line.is_empty() {
181 if line.is_empty() {
182 if files_end == bytes.len() {
182 if files_end == bytes.len() {
183 // The list of files ended with a single newline (there
183 // The list of files ended with a single newline (there
184 // should be two)
184 // should be two)
185 return Err(HgError::corrupted(
185 return Err(HgError::corrupted(
186 "Changeset data truncated after files list",
186 "Changeset data truncated after files list",
187 ));
187 ));
188 }
188 }
189 files_end -= 1;
189 files_end -= 1;
190 break;
190 break;
191 }
191 }
192 files_end += line.len() + 1;
192 files_end += line.len() + 1;
193 }
193 }
194
194
195 Ok(Self {
195 Ok(Self {
196 bytes,
196 bytes,
197 manifest_end,
197 manifest_end,
198 user_end,
198 user_end,
199 timestamp_end,
199 timestamp_end,
200 files_end,
200 files_end,
201 })
201 })
202 }
202 }
203
203
204 fn null() -> Self {
204 fn null() -> Self {
205 Self::new(Cow::Borrowed(
205 Self::new(Cow::Borrowed(
206 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
206 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
207 ))
207 ))
208 .unwrap()
208 .unwrap()
209 }
209 }
210
210
211 /// Return an iterator over the lines of the entry.
211 /// Return an iterator over the lines of the entry.
212 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
212 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
213 self.bytes.split(|b| b == &b'\n')
213 self.bytes.split(|b| b == &b'\n')
214 }
214 }
215
215
216 /// Return the node id of the `manifest` referenced by this `changelog`
216 /// Return the node id of the `manifest` referenced by this `changelog`
217 /// entry.
217 /// entry.
218 pub fn manifest_node(&self) -> Result<Node, HgError> {
218 pub fn manifest_node(&self) -> Result<Node, HgError> {
219 let manifest_node_hex = &self.bytes[..self.manifest_end];
219 let manifest_node_hex = &self.bytes[..self.manifest_end];
220 Node::from_hex_for_repo(manifest_node_hex)
220 Node::from_hex_for_repo(manifest_node_hex)
221 }
221 }
222
222
223 /// The full user string (usually a name followed by an email enclosed in
223 /// The full user string (usually a name followed by an email enclosed in
224 /// angle brackets)
224 /// angle brackets)
225 pub fn user(&self) -> &[u8] {
225 pub fn user(&self) -> &[u8] {
226 &self.bytes[self.manifest_end + 1..self.user_end]
226 &self.bytes[self.manifest_end + 1..self.user_end]
227 }
227 }
228
228
229 /// The full timestamp line (timestamp in seconds, offset in seconds, and
229 /// The full timestamp line (timestamp in seconds, offset in seconds, and
230 /// possibly extras)
230 /// possibly extras)
231 // TODO: We should expose this in a more useful way
231 // TODO: We should expose this in a more useful way
232 pub fn timestamp_line(&self) -> &[u8] {
232 pub fn timestamp_line(&self) -> &[u8] {
233 &self.bytes[self.user_end + 1..self.timestamp_end]
233 &self.bytes[self.user_end + 1..self.timestamp_end]
234 }
234 }
235
235
236 /// Parsed timestamp.
236 /// Parsed timestamp.
237 pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
237 pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
238 parse_timestamp(self.timestamp_line())
238 parse_timestamp(self.timestamp_line())
239 }
239 }
240
240
241 /// Optional commit extras.
241 /// Optional commit extras.
242 pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
242 pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
243 parse_timestamp_line_extra(self.timestamp_line())
243 parse_timestamp_line_extra(self.timestamp_line())
244 }
244 }
245
245
246 /// The files changed in this revision.
246 /// The files changed in this revision.
247 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
247 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
248 if self.timestamp_end == self.files_end {
248 if self.timestamp_end == self.files_end {
249 Either::Left(iter::empty())
249 Either::Left(iter::empty())
250 } else {
250 } else {
251 Either::Right(
251 Either::Right(
252 self.bytes[self.timestamp_end + 1..self.files_end]
252 self.bytes[self.timestamp_end + 1..self.files_end]
253 .split(|b| b == &b'\n')
253 .split(|b| b == &b'\n')
254 .map(HgPath::new),
254 .map(HgPath::new),
255 )
255 )
256 }
256 }
257 }
257 }
258
258
259 /// The change description.
259 /// The change description.
260 pub fn description(&self) -> &[u8] {
260 pub fn description(&self) -> &[u8] {
261 &self.bytes[self.files_end + 2..]
261 &self.bytes[self.files_end + 2..]
262 }
262 }
263 }
263 }
264
264
265 impl Debug for ChangelogRevisionData<'_> {
265 impl Debug for ChangelogRevisionData<'_> {
266 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
266 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
267 f.debug_struct("ChangelogRevisionData")
267 f.debug_struct("ChangelogRevisionData")
268 .field("bytes", &debug_bytes(&self.bytes))
268 .field("bytes", &debug_bytes(&self.bytes))
269 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
269 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
270 .field(
270 .field(
271 "user",
271 "user",
272 &debug_bytes(
272 &debug_bytes(
273 &self.bytes[self.manifest_end + 1..self.user_end],
273 &self.bytes[self.manifest_end + 1..self.user_end],
274 ),
274 ),
275 )
275 )
276 .field(
276 .field(
277 "timestamp",
277 "timestamp",
278 &debug_bytes(
278 &debug_bytes(
279 &self.bytes[self.user_end + 1..self.timestamp_end],
279 &self.bytes[self.user_end + 1..self.timestamp_end],
280 ),
280 ),
281 )
281 )
282 .field(
282 .field(
283 "files",
283 "files",
284 &debug_bytes(
284 &debug_bytes(
285 &self.bytes[self.timestamp_end + 1..self.files_end],
285 &self.bytes[self.timestamp_end + 1..self.files_end],
286 ),
286 ),
287 )
287 )
288 .field(
288 .field(
289 "description",
289 "description",
290 &debug_bytes(&self.bytes[self.files_end + 2..]),
290 &debug_bytes(&self.bytes[self.files_end + 2..]),
291 )
291 )
292 .finish()
292 .finish()
293 }
293 }
294 }
294 }
295
295
296 fn debug_bytes(bytes: &[u8]) -> String {
296 fn debug_bytes(bytes: &[u8]) -> String {
297 String::from_utf8_lossy(
297 String::from_utf8_lossy(
298 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
298 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
299 )
299 )
300 .to_string()
300 .to_string()
301 }
301 }
302
302
303 /// Parse the raw bytes of the timestamp line from a changelog entry.
303 /// Parse the raw bytes of the timestamp line from a changelog entry.
304 ///
304 ///
305 /// According to the documentation in `hg help dates` and the
305 /// According to the documentation in `hg help dates` and the
306 /// implementation in `changelog.py`, the format of the timestamp line
306 /// implementation in `changelog.py`, the format of the timestamp line
307 /// is `time tz extra\n` where:
307 /// is `time tz extra\n` where:
308 ///
308 ///
309 /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
309 /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
310 /// as seconds since the UNIX epoch.
310 /// as seconds since the UNIX epoch.
311 ///
311 ///
312 /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
312 /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
313 /// seconds WEST of UTC (so negative for timezones east of UTC, which is the
313 /// seconds WEST of UTC (so negative for timezones east of UTC, which is the
314 /// opposite of the sign in ISO 8601 timestamps).
314 /// opposite of the sign in ISO 8601 timestamps).
315 ///
315 ///
316 /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
316 /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
317 /// and value in each pair separated by an ASCII colon. Keys are limited to
317 /// and value in each pair separated by an ASCII colon. Keys are limited to
318 /// ASCII letters, digits, hyphens, and underscores, whereas values can be
318 /// ASCII letters, digits, hyphens, and underscores, whereas values can be
319 /// arbitrary bytes.
319 /// arbitrary bytes.
320 fn parse_timestamp(
320 fn parse_timestamp(
321 timestamp_line: &[u8],
321 timestamp_line: &[u8],
322 ) -> Result<DateTime<FixedOffset>, HgError> {
322 ) -> Result<DateTime<FixedOffset>, HgError> {
323 let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
323 let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
324
324
325 let timestamp_bytes = parts
325 let timestamp_bytes = parts
326 .next()
326 .next()
327 .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
327 .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
328 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
328 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
329 HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
329 HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
330 })?;
330 })?;
331 let timestamp_utc = timestamp_str
331 let timestamp_utc = timestamp_str
332 .parse()
332 .parse()
333 .map_err(|e| {
333 .map_err(|e| {
334 HgError::corrupted(format!("failed to parse timestamp: {e}"))
334 HgError::corrupted(format!("failed to parse timestamp: {e}"))
335 })
335 })
336 .and_then(|secs| {
336 .and_then(|secs| {
337 NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
337 NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
338 HgError::corrupted(format!(
338 HgError::corrupted(format!(
339 "integer timestamp out of valid range: {secs}"
339 "integer timestamp out of valid range: {secs}"
340 ))
340 ))
341 })
341 })
342 })
342 })
343 // Attempt to parse the timestamp as a float if we can't parse
343 // Attempt to parse the timestamp as a float if we can't parse
344 // it as an int. It doesn't seem like float timestamps are actually
344 // it as an int. It doesn't seem like float timestamps are actually
345 // used in practice, but the Python code supports them.
345 // used in practice, but the Python code supports them.
346 .or_else(|_| parse_float_timestamp(timestamp_str))?;
346 .or_else(|_| parse_float_timestamp(timestamp_str))?;
347
347
348 let timezone_bytes = parts
348 let timezone_bytes = parts
349 .next()
349 .next()
350 .ok_or_else(|| HgError::corrupted("missing timezone"))?;
350 .ok_or_else(|| HgError::corrupted("missing timezone"))?;
351 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
351 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
352 .map_err(|e| {
352 .map_err(|e| {
353 HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
353 HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
354 })?
354 })?
355 .parse()
355 .parse()
356 .map_err(|e| {
356 .map_err(|e| {
357 HgError::corrupted(format!("timezone is not an integer: {e}"))
357 HgError::corrupted(format!("timezone is not an integer: {e}"))
358 })?;
358 })?;
359 let timezone = FixedOffset::west_opt(timezone_secs)
359 let timezone = FixedOffset::west_opt(timezone_secs)
360 .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
360 .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
361
361
362 Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))
362 Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))
363 }
363 }
364
364
365 /// Attempt to parse the given string as floating-point timestamp, and
365 /// Attempt to parse the given string as floating-point timestamp, and
366 /// convert the result into a `chrono::NaiveDateTime`.
366 /// convert the result into a `chrono::NaiveDateTime`.
367 fn parse_float_timestamp(
367 fn parse_float_timestamp(
368 timestamp_str: &str,
368 timestamp_str: &str,
369 ) -> Result<NaiveDateTime, HgError> {
369 ) -> Result<NaiveDateTime, HgError> {
370 let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
370 let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
371 HgError::corrupted(format!("failed to parse timestamp: {e}"))
371 HgError::corrupted(format!("failed to parse timestamp: {e}"))
372 })?;
372 })?;
373
373
374 // To construct a `NaiveDateTime` we'll need to convert the float
374 // To construct a `NaiveDateTime` we'll need to convert the float
375 // into signed integer seconds and unsigned integer nanoseconds.
375 // into signed integer seconds and unsigned integer nanoseconds.
376 let mut secs = timestamp.trunc() as i64;
376 let mut secs = timestamp.trunc() as i64;
377 let mut subsecs = timestamp.fract();
377 let mut subsecs = timestamp.fract();
378
378
379 // If the timestamp is negative, we need to express the fractional
379 // If the timestamp is negative, we need to express the fractional
380 // component as positive nanoseconds since the previous second.
380 // component as positive nanoseconds since the previous second.
381 if timestamp < 0.0 {
381 if timestamp < 0.0 {
382 secs -= 1;
382 secs -= 1;
383 subsecs += 1.0;
383 subsecs += 1.0;
384 }
384 }
385
385
386 // This cast should be safe because the fractional component is
386 // This cast should be safe because the fractional component is
387 // by definition less than 1.0, so this value should not exceed
387 // by definition less than 1.0, so this value should not exceed
388 // 1 billion, which is representable as an f64 without loss of
388 // 1 billion, which is representable as an f64 without loss of
389 // precision and should fit into a u32 without overflowing.
389 // precision and should fit into a u32 without overflowing.
390 //
390 //
391 // (Any loss of precision in the fractional component will have
391 // (Any loss of precision in the fractional component will have
392 // already happened at the time of initial parsing; in general,
392 // already happened at the time of initial parsing; in general,
393 // f64s are insufficiently precise to provide nanosecond-level
393 // f64s are insufficiently precise to provide nanosecond-level
394 // precision with present-day timestamps.)
394 // precision with present-day timestamps.)
395 let nsecs = (subsecs * 1_000_000_000.0) as u32;
395 let nsecs = (subsecs * 1_000_000_000.0) as u32;
396
396
397 NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {
397 NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {
398 HgError::corrupted(format!(
398 HgError::corrupted(format!(
399 "float timestamp out of valid range: {timestamp}"
399 "float timestamp out of valid range: {timestamp}"
400 ))
400 ))
401 })
401 })
402 }
402 }
403
403
404 /// Decode changeset extra fields.
404 /// Decode changeset extra fields.
405 ///
405 ///
406 /// Extras are null-delimited key-value pairs where the key consists of ASCII
406 /// Extras are null-delimited key-value pairs where the key consists of ASCII
407 /// alphanumeric characters plus hyphens and underscores, and the value can
407 /// alphanumeric characters plus hyphens and underscores, and the value can
408 /// contain arbitrary bytes.
408 /// contain arbitrary bytes.
409 fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
409 fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
410 extra
410 extra
411 .split(|c| *c == b'\0')
411 .split(|c| *c == b'\0')
412 .map(|pair| {
412 .map(|pair| {
413 let pair = unescape_extra(pair);
413 let pair = unescape_extra(pair);
414 let mut iter = pair.splitn(2, |c| *c == b':');
414 let mut iter = pair.splitn(2, |c| *c == b':');
415
415
416 let key_bytes =
416 let key_bytes =
417 iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
417 iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
418 HgError::corrupted("empty key in changeset extras")
418 HgError::corrupted("empty key in changeset extras")
419 })?;
419 })?;
420
420
421 let key = str::from_utf8(key_bytes)
421 let key = str::from_utf8(key_bytes)
422 .ok()
422 .ok()
423 .filter(|k| {
423 .filter(|k| {
424 k.chars().all(|c| {
424 k.chars().all(|c| {
425 c.is_ascii_alphanumeric() || c == '_' || c == '-'
425 c.is_ascii_alphanumeric() || c == '_' || c == '-'
426 })
426 })
427 })
427 })
428 .ok_or_else(|| {
428 .ok_or_else(|| {
429 let key = String::from_utf8_lossy(key_bytes);
429 let key = String::from_utf8_lossy(key_bytes);
430 HgError::corrupted(format!(
430 HgError::corrupted(format!(
431 "invalid key in changeset extras: {key}",
431 "invalid key in changeset extras: {key}",
432 ))
432 ))
433 })?
433 })?
434 .to_string();
434 .to_string();
435
435
436 let value = iter.next().map(Into::into).ok_or_else(|| {
436 let value = iter.next().map(Into::into).ok_or_else(|| {
437 HgError::corrupted(format!(
437 HgError::corrupted(format!(
438 "missing value for changeset extra: {key}"
438 "missing value for changeset extra: {key}"
439 ))
439 ))
440 })?;
440 })?;
441
441
442 Ok((key, value))
442 Ok((key, value))
443 })
443 })
444 .collect()
444 .collect()
445 }
445 }
446
446
447 /// Parse the extra fields from a changeset's timestamp line.
447 /// Parse the extra fields from a changeset's timestamp line.
448 fn parse_timestamp_line_extra(
448 fn parse_timestamp_line_extra(
449 timestamp_line: &[u8],
449 timestamp_line: &[u8],
450 ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
450 ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
451 Ok(timestamp_line
451 Ok(timestamp_line
452 .splitn(3, |c| *c == b' ')
452 .splitn(3, |c| *c == b' ')
453 .nth(2)
453 .nth(2)
454 .map(decode_extra)
454 .map(decode_extra)
455 .transpose()?
455 .transpose()?
456 .unwrap_or_default())
456 .unwrap_or_default())
457 }
457 }
458
458
459 /// Decode Mercurial's escaping for changelog extras.
459 /// Decode Mercurial's escaping for changelog extras.
460 ///
460 ///
461 /// The `_string_escape` function in `changelog.py` only escapes 4 characters
461 /// The `_string_escape` function in `changelog.py` only escapes 4 characters
462 /// (null, backslash, newline, and carriage return) so we only decode those.
462 /// (null, backslash, newline, and carriage return) so we only decode those.
463 ///
463 ///
464 /// The Python code also includes a workaround for decoding escaped nuls
464 /// The Python code also includes a workaround for decoding escaped nuls
465 /// that are followed by an ASCII octal digit, since Python's built-in
465 /// that are followed by an ASCII octal digit, since Python's built-in
466 /// `string_escape` codec will interpret that as an escaped octal byte value.
466 /// `string_escape` codec will interpret that as an escaped octal byte value.
467 /// That workaround is omitted here since we don't support decoding octal.
467 /// That workaround is omitted here since we don't support decoding octal.
468 fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
468 fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
469 let mut output = Vec::with_capacity(bytes.len());
469 let mut output = Vec::with_capacity(bytes.len());
470 let mut input = bytes.iter().copied();
470 let mut input = bytes.iter().copied();
471
471
472 while let Some(c) = input.next() {
472 while let Some(c) = input.next() {
473 if c != b'\\' {
473 if c != b'\\' {
474 output.push(c);
474 output.push(c);
475 continue;
475 continue;
476 }
476 }
477
477
478 match input.next() {
478 match input.next() {
479 Some(b'0') => output.push(b'\0'),
479 Some(b'0') => output.push(b'\0'),
480 Some(b'\\') => output.push(b'\\'),
480 Some(b'\\') => output.push(b'\\'),
481 Some(b'n') => output.push(b'\n'),
481 Some(b'n') => output.push(b'\n'),
482 Some(b'r') => output.push(b'\r'),
482 Some(b'r') => output.push(b'\r'),
483 // The following cases should never occur in theory because any
483 // The following cases should never occur in theory because any
484 // backslashes in the original input should have been escaped
484 // backslashes in the original input should have been escaped
485 // with another backslash, so it should not be possible to
485 // with another backslash, so it should not be possible to
486 // observe an escape sequence other than the 4 above.
486 // observe an escape sequence other than the 4 above.
487 Some(c) => output.extend_from_slice(&[b'\\', c]),
487 Some(c) => output.extend_from_slice(&[b'\\', c]),
488 None => output.push(b'\\'),
488 None => output.push(b'\\'),
489 }
489 }
490 }
490 }
491
491
492 output
492 output
493 }
493 }
494
494
495 #[cfg(test)]
495 #[cfg(test)]
496 mod tests {
496 mod tests {
497 use super::*;
497 use super::*;
498 use crate::vfs::Vfs;
498 use crate::vfs::Vfs;
499 use crate::NULL_REVISION;
499 use crate::NULL_REVISION;
500 use pretty_assertions::assert_eq;
500 use pretty_assertions::assert_eq;
501
501
502 #[test]
502 #[test]
503 fn test_create_changelogrevisiondata_invalid() {
503 fn test_create_changelogrevisiondata_invalid() {
504 // Completely empty
504 // Completely empty
505 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
505 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
506 // No newline after manifest
506 // No newline after manifest
507 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
507 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
508 // No newline after user
508 // No newline after user
509 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
509 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
510 // No newline after timestamp
510 // No newline after timestamp
511 assert!(
511 assert!(
512 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
512 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
513 );
513 );
514 // Missing newline after files
514 // Missing newline after files
515 assert!(ChangelogRevisionData::new(Cow::Borrowed(
515 assert!(ChangelogRevisionData::new(Cow::Borrowed(
516 b"abcd\n\n0 0\nfile1\nfile2"
516 b"abcd\n\n0 0\nfile1\nfile2"
517 ))
517 ))
518 .is_err(),);
518 .is_err(),);
519 // Only one newline after files
519 // Only one newline after files
520 assert!(ChangelogRevisionData::new(Cow::Borrowed(
520 assert!(ChangelogRevisionData::new(Cow::Borrowed(
521 b"abcd\n\n0 0\nfile1\nfile2\n"
521 b"abcd\n\n0 0\nfile1\nfile2\n"
522 ))
522 ))
523 .is_err(),);
523 .is_err(),);
524 }
524 }
525
525
526 #[test]
526 #[test]
527 fn test_create_changelogrevisiondata() {
527 fn test_create_changelogrevisiondata() {
528 let data = ChangelogRevisionData::new(Cow::Borrowed(
528 let data = ChangelogRevisionData::new(Cow::Borrowed(
529 b"0123456789abcdef0123456789abcdef01234567
529 b"0123456789abcdef0123456789abcdef01234567
530 Some One <someone@example.com>
530 Some One <someone@example.com>
531 0 0
531 0 0
532 file1
532 file1
533 file2
533 file2
534
534
535 some
535 some
536 commit
536 commit
537 message",
537 message",
538 ))
538 ))
539 .unwrap();
539 .unwrap();
540 assert_eq!(
540 assert_eq!(
541 data.manifest_node().unwrap(),
541 data.manifest_node().unwrap(),
542 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
542 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
543 .unwrap()
543 .unwrap()
544 );
544 );
545 assert_eq!(data.user(), b"Some One <someone@example.com>");
545 assert_eq!(data.user(), b"Some One <someone@example.com>");
546 assert_eq!(data.timestamp_line(), b"0 0");
546 assert_eq!(data.timestamp_line(), b"0 0");
547 assert_eq!(
547 assert_eq!(
548 data.files().collect_vec(),
548 data.files().collect_vec(),
549 vec![HgPath::new("file1"), HgPath::new("file2")]
549 vec![HgPath::new("file1"), HgPath::new("file2")]
550 );
550 );
551 assert_eq!(data.description(), b"some\ncommit\nmessage");
551 assert_eq!(data.description(), b"some\ncommit\nmessage");
552 }
552 }
553
553
554 #[test]
554 #[test]
555 fn test_data_from_rev_null() -> Result<(), RevlogError> {
555 fn test_data_from_rev_null() -> Result<(), RevlogError> {
556 // an empty revlog will be enough for this case
556 // an empty revlog will be enough for this case
557 let temp = tempfile::tempdir().unwrap();
557 let temp = tempfile::tempdir().unwrap();
558 let vfs = Vfs { base: temp.path() };
558 let vfs = Vfs { base: temp.path() };
559 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
559 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
560 let revlog =
560 let revlog =
561 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
561 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
562 .unwrap();
562 .unwrap();
563
563
564 let changelog = Changelog { revlog };
564 let changelog = Changelog { revlog };
565 assert_eq!(
565 assert_eq!(
566 changelog.data_for_rev(NULL_REVISION.into())?,
566 changelog.data_for_rev(NULL_REVISION.into())?,
567 ChangelogRevisionData::null()
567 ChangelogRevisionData::null()
568 );
568 );
569 // same with the intermediate entry object
569 // same with the intermediate entry object
570 assert_eq!(
570 assert_eq!(
571 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
571 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
572 ChangelogRevisionData::null()
572 ChangelogRevisionData::null()
573 );
573 );
574 Ok(())
574 Ok(())
575 }
575 }
576
576
577 #[test]
577 #[test]
578 fn test_empty_files_list() {
578 fn test_empty_files_list() {
579 assert!(ChangelogRevisionData::null()
579 assert!(ChangelogRevisionData::null()
580 .files()
580 .files()
581 .collect_vec()
581 .collect_vec()
582 .is_empty());
582 .is_empty());
583 }
583 }
584
584
585 #[test]
585 #[test]
586 fn test_unescape_basic() {
586 fn test_unescape_basic() {
587 // '\0', '\\', '\n', and '\r' are correctly unescaped.
587 // '\0', '\\', '\n', and '\r' are correctly unescaped.
588 let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
588 let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
589 let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
589 let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
590 let unescaped = unescape_extra(escaped);
590 let unescaped = unescape_extra(escaped);
591 assert_eq!(&expected[..], &unescaped[..]);
591 assert_eq!(&expected[..], &unescaped[..]);
592 }
592 }
593
593
594 #[test]
594 #[test]
595 fn test_unescape_unsupported_sequence() {
595 fn test_unescape_unsupported_sequence() {
596 // Other escape sequences are left unaltered.
596 // Other escape sequences are left unaltered.
597 for c in 0u8..255 {
597 for c in 0u8..255 {
598 match c {
598 match c {
599 b'0' | b'\\' | b'n' | b'r' => continue,
599 b'0' | b'\\' | b'n' | b'r' => continue,
600 c => {
600 c => {
601 let expected = &[b'\\', c][..];
601 let expected = &[b'\\', c][..];
602 let unescaped = unescape_extra(expected);
602 let unescaped = unescape_extra(expected);
603 assert_eq!(expected, &unescaped[..]);
603 assert_eq!(expected, &unescaped[..]);
604 }
604 }
605 }
605 }
606 }
606 }
607 }
607 }
608
608
609 #[test]
609 #[test]
610 fn test_unescape_trailing_backslash() {
610 fn test_unescape_trailing_backslash() {
611 // Trailing backslashes are OK.
611 // Trailing backslashes are OK.
612 let expected = br"hi\";
612 let expected = br"hi\";
613 let unescaped = unescape_extra(expected);
613 let unescaped = unescape_extra(expected);
614 assert_eq!(&expected[..], &unescaped[..]);
614 assert_eq!(&expected[..], &unescaped[..]);
615 }
615 }
616
616
617 #[test]
617 #[test]
618 fn test_unescape_nul_followed_by_octal() {
618 fn test_unescape_nul_followed_by_octal() {
619 // Escaped NUL chars followed by octal digits are decoded correctly.
619 // Escaped NUL chars followed by octal digits are decoded correctly.
620 let expected = b"\012";
620 let expected = b"\x0012";
621 let escaped = br"\012";
621 let escaped = br"\012";
622 let unescaped = unescape_extra(escaped);
622 let unescaped = unescape_extra(escaped);
623 assert_eq!(&expected[..], &unescaped[..]);
623 assert_eq!(&expected[..], &unescaped[..]);
624 }
624 }
625
625
626 #[test]
626 #[test]
627 fn test_parse_float_timestamp() {
627 fn test_parse_float_timestamp() {
628 let test_cases = [
628 let test_cases = [
629 // Zero should map to the UNIX epoch.
629 // Zero should map to the UNIX epoch.
630 ("0.0", "1970-01-01 00:00:00"),
630 ("0.0", "1970-01-01 00:00:00"),
631 // Negative zero should be the same as positive zero.
631 // Negative zero should be the same as positive zero.
632 ("-0.0", "1970-01-01 00:00:00"),
632 ("-0.0", "1970-01-01 00:00:00"),
633 // Values without fractional components should work like integers.
633 // Values without fractional components should work like integers.
634 // (Assuming the timestamp is within the limits of f64 precision.)
634 // (Assuming the timestamp is within the limits of f64 precision.)
635 ("1115154970.0", "2005-05-03 21:16:10"),
635 ("1115154970.0", "2005-05-03 21:16:10"),
636 // We expect some loss of precision in the fractional component
636 // We expect some loss of precision in the fractional component
637 // when parsing arbitrary floating-point values.
637 // when parsing arbitrary floating-point values.
638 ("1115154970.123456789", "2005-05-03 21:16:10.123456716"),
638 ("1115154970.123456789", "2005-05-03 21:16:10.123456716"),
639 // But representable f64 values should parse losslessly.
639 // But representable f64 values should parse losslessly.
640 ("1115154970.123456716", "2005-05-03 21:16:10.123456716"),
640 ("1115154970.123456716", "2005-05-03 21:16:10.123456716"),
641 // Negative fractional components are subtracted from the epoch.
641 // Negative fractional components are subtracted from the epoch.
642 ("-1.333", "1969-12-31 23:59:58.667"),
642 ("-1.333", "1969-12-31 23:59:58.667"),
643 ];
643 ];
644
644
645 for (input, expected) in test_cases {
645 for (input, expected) in test_cases {
646 let res = parse_float_timestamp(input).unwrap().to_string();
646 let res = parse_float_timestamp(input).unwrap().to_string();
647 assert_eq!(res, expected);
647 assert_eq!(res, expected);
648 }
648 }
649 }
649 }
650
650
651 fn escape_extra(bytes: &[u8]) -> Vec<u8> {
651 fn escape_extra(bytes: &[u8]) -> Vec<u8> {
652 let mut output = Vec::with_capacity(bytes.len());
652 let mut output = Vec::with_capacity(bytes.len());
653
653
654 for c in bytes.iter().copied() {
654 for c in bytes.iter().copied() {
655 output.extend_from_slice(match c {
655 output.extend_from_slice(match c {
656 b'\0' => &b"\\0"[..],
656 b'\0' => &b"\\0"[..],
657 b'\\' => &b"\\\\"[..],
657 b'\\' => &b"\\\\"[..],
658 b'\n' => &b"\\n"[..],
658 b'\n' => &b"\\n"[..],
659 b'\r' => &b"\\r"[..],
659 b'\r' => &b"\\r"[..],
660 _ => {
660 _ => {
661 output.push(c);
661 output.push(c);
662 continue;
662 continue;
663 }
663 }
664 });
664 });
665 }
665 }
666
666
667 output
667 output
668 }
668 }
669
669
670 fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
670 fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
671 where
671 where
672 K: AsRef<[u8]>,
672 K: AsRef<[u8]>,
673 V: AsRef<[u8]>,
673 V: AsRef<[u8]>,
674 {
674 {
675 let extras = pairs.into_iter().map(|(k, v)| {
675 let extras = pairs.into_iter().map(|(k, v)| {
676 escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
676 escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
677 });
677 });
678 // Use fully-qualified syntax to avoid a future naming conflict with
678 // Use fully-qualified syntax to avoid a future naming conflict with
679 // the standard library: https://github.com/rust-lang/rust/issues/79524
679 // the standard library: https://github.com/rust-lang/rust/issues/79524
680 Itertools::intersperse(extras, b"\0".to_vec()).concat()
680 Itertools::intersperse(extras, b"\0".to_vec()).concat()
681 }
681 }
682
682
683 #[test]
683 #[test]
684 fn test_decode_extra() {
684 fn test_decode_extra() {
685 let extra = [
685 let extra = [
686 ("branch".into(), b"default".to_vec()),
686 ("branch".into(), b"default".to_vec()),
687 ("key-with-hyphens".into(), b"value1".to_vec()),
687 ("key-with-hyphens".into(), b"value1".to_vec()),
688 ("key_with_underscores".into(), b"value2".to_vec()),
688 ("key_with_underscores".into(), b"value2".to_vec()),
689 ("empty-value".into(), b"".to_vec()),
689 ("empty-value".into(), b"".to_vec()),
690 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
690 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
691 ]
691 ]
692 .into_iter()
692 .into_iter()
693 .collect::<BTreeMap<String, Vec<u8>>>();
693 .collect::<BTreeMap<String, Vec<u8>>>();
694
694
695 let encoded = encode_extra(&extra);
695 let encoded = encode_extra(&extra);
696 let decoded = decode_extra(&encoded).unwrap();
696 let decoded = decode_extra(&encoded).unwrap();
697
697
698 assert_eq!(extra, decoded);
698 assert_eq!(extra, decoded);
699 }
699 }
700
700
701 #[test]
701 #[test]
702 fn test_corrupt_extra() {
702 fn test_corrupt_extra() {
703 let test_cases = [
703 let test_cases = [
704 (&b""[..], "empty input"),
704 (&b""[..], "empty input"),
705 (&b"\0"[..], "unexpected null byte"),
705 (&b"\0"[..], "unexpected null byte"),
706 (&b":empty-key"[..], "empty key"),
706 (&b":empty-key"[..], "empty key"),
707 (&b"\0leading-null:"[..], "leading null"),
707 (&b"\0leading-null:"[..], "leading null"),
708 (&b"trailing-null:\0"[..], "trailing null"),
708 (&b"trailing-null:\0"[..], "trailing null"),
709 (&b"missing-value"[..], "missing value"),
709 (&b"missing-value"[..], "missing value"),
710 (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
710 (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
711 (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
711 (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
712 ];
712 ];
713
713
714 for (extra, msg) in test_cases {
714 for (extra, msg) in test_cases {
715 assert!(
715 assert!(
716 decode_extra(&extra).is_err(),
716 decode_extra(extra).is_err(),
717 "corrupt extra should have failed to parse: {}",
717 "corrupt extra should have failed to parse: {}",
718 msg
718 msg
719 );
719 );
720 }
720 }
721 }
721 }
722
722
723 #[test]
723 #[test]
724 fn test_parse_timestamp_line() {
724 fn test_parse_timestamp_line() {
725 let extra = [
725 let extra = [
726 ("branch".into(), b"default".to_vec()),
726 ("branch".into(), b"default".to_vec()),
727 ("key-with-hyphens".into(), b"value1".to_vec()),
727 ("key-with-hyphens".into(), b"value1".to_vec()),
728 ("key_with_underscores".into(), b"value2".to_vec()),
728 ("key_with_underscores".into(), b"value2".to_vec()),
729 ("empty-value".into(), b"".to_vec()),
729 ("empty-value".into(), b"".to_vec()),
730 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
730 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
731 ]
731 ]
732 .into_iter()
732 .into_iter()
733 .collect::<BTreeMap<String, Vec<u8>>>();
733 .collect::<BTreeMap<String, Vec<u8>>>();
734
734
735 let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
735 let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
736 line.extend_from_slice(&encode_extra(&extra));
736 line.extend_from_slice(&encode_extra(&extra));
737
737
738 let timestamp = parse_timestamp(&line).unwrap();
738 let timestamp = parse_timestamp(&line).unwrap();
739 assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
739 assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
740
740
741 let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
741 let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
742 assert_eq!(extra, parsed_extra);
742 assert_eq!(extra, parsed_extra);
743 }
743 }
744 }
744 }
@@ -1,2032 +1,2035
1 use std::collections::{HashMap, HashSet};
1 use std::collections::{HashMap, HashSet};
2 use std::fmt::Debug;
2 use std::fmt::Debug;
3 use std::ops::Deref;
3 use std::ops::Deref;
4 use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
4 use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
5
5
6 use bitvec::prelude::*;
6 use bitvec::prelude::*;
7 use byteorder::{BigEndian, ByteOrder};
7 use byteorder::{BigEndian, ByteOrder};
8 use bytes_cast::{unaligned, BytesCast};
8 use bytes_cast::{unaligned, BytesCast};
9
9
10 use super::REVIDX_KNOWN_FLAGS;
10 use super::REVIDX_KNOWN_FLAGS;
11 use crate::errors::HgError;
11 use crate::errors::HgError;
12 use crate::node::{NODE_BYTES_LENGTH, NULL_NODE, STORED_NODE_ID_BYTES};
12 use crate::node::{NODE_BYTES_LENGTH, NULL_NODE, STORED_NODE_ID_BYTES};
13 use crate::revlog::node::Node;
13 use crate::revlog::node::Node;
14 use crate::revlog::{Revision, NULL_REVISION};
14 use crate::revlog::{Revision, NULL_REVISION};
15 use crate::{
15 use crate::{
16 dagops, BaseRevision, FastHashMap, Graph, GraphError, RevlogError,
16 dagops, BaseRevision, FastHashMap, Graph, GraphError, RevlogError,
17 RevlogIndex, UncheckedRevision,
17 RevlogIndex, UncheckedRevision,
18 };
18 };
19
19
20 pub const INDEX_ENTRY_SIZE: usize = 64;
20 pub const INDEX_ENTRY_SIZE: usize = 64;
21 pub const INDEX_HEADER_SIZE: usize = 4;
21 pub const INDEX_HEADER_SIZE: usize = 4;
22 pub const COMPRESSION_MODE_INLINE: u8 = 2;
22 pub const COMPRESSION_MODE_INLINE: u8 = 2;
23
23
24 #[derive(Debug)]
24 #[derive(Debug)]
25 pub struct IndexHeader {
25 pub struct IndexHeader {
26 pub(super) header_bytes: [u8; INDEX_HEADER_SIZE],
26 pub(super) header_bytes: [u8; INDEX_HEADER_SIZE],
27 }
27 }
28
28
29 #[derive(Copy, Clone)]
29 #[derive(Copy, Clone)]
30 pub struct IndexHeaderFlags {
30 pub struct IndexHeaderFlags {
31 flags: u16,
31 flags: u16,
32 }
32 }
33
33
34 /// Corresponds to the high bits of `_format_flags` in python
34 /// Corresponds to the high bits of `_format_flags` in python
35 impl IndexHeaderFlags {
35 impl IndexHeaderFlags {
36 /// Corresponds to FLAG_INLINE_DATA in python
36 /// Corresponds to FLAG_INLINE_DATA in python
37 pub fn is_inline(self) -> bool {
37 pub fn is_inline(self) -> bool {
38 self.flags & 1 != 0
38 self.flags & 1 != 0
39 }
39 }
40 /// Corresponds to FLAG_GENERALDELTA in python
40 /// Corresponds to FLAG_GENERALDELTA in python
41 pub fn uses_generaldelta(self) -> bool {
41 pub fn uses_generaldelta(self) -> bool {
42 self.flags & 2 != 0
42 self.flags & 2 != 0
43 }
43 }
44 }
44 }
45
45
46 /// Corresponds to the INDEX_HEADER structure,
46 /// Corresponds to the INDEX_HEADER structure,
47 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
47 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
48 impl IndexHeader {
48 impl IndexHeader {
49 fn format_flags(&self) -> IndexHeaderFlags {
49 fn format_flags(&self) -> IndexHeaderFlags {
50 // No "unknown flags" check here, unlike in python. Maybe there should
50 // No "unknown flags" check here, unlike in python. Maybe there should
51 // be.
51 // be.
52 IndexHeaderFlags {
52 IndexHeaderFlags {
53 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
53 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
54 }
54 }
55 }
55 }
56
56
57 /// The only revlog version currently supported by rhg.
57 /// The only revlog version currently supported by rhg.
58 const REVLOGV1: u16 = 1;
58 const REVLOGV1: u16 = 1;
59
59
60 /// Corresponds to `_format_version` in Python.
60 /// Corresponds to `_format_version` in Python.
61 fn format_version(&self) -> u16 {
61 fn format_version(&self) -> u16 {
62 BigEndian::read_u16(&self.header_bytes[2..4])
62 BigEndian::read_u16(&self.header_bytes[2..4])
63 }
63 }
64
64
65 pub fn parse(index_bytes: &[u8]) -> Result<Option<IndexHeader>, HgError> {
65 pub fn parse(index_bytes: &[u8]) -> Result<Option<IndexHeader>, HgError> {
66 if index_bytes.is_empty() {
66 if index_bytes.is_empty() {
67 return Ok(None);
67 return Ok(None);
68 }
68 }
69 if index_bytes.len() < 4 {
69 if index_bytes.len() < 4 {
70 return Err(HgError::corrupted(
70 return Err(HgError::corrupted(
71 "corrupted revlog: can't read the index format header",
71 "corrupted revlog: can't read the index format header",
72 ));
72 ));
73 }
73 }
74 Ok(Some(IndexHeader {
74 Ok(Some(IndexHeader {
75 header_bytes: {
75 header_bytes: {
76 let bytes: [u8; 4] =
76 let bytes: [u8; 4] =
77 index_bytes[0..4].try_into().expect("impossible");
77 index_bytes[0..4].try_into().expect("impossible");
78 bytes
78 bytes
79 },
79 },
80 }))
80 }))
81 }
81 }
82 }
82 }
83
83
84 /// Abstracts the access to the index bytes since they can be spread between
84 /// Abstracts the access to the index bytes since they can be spread between
85 /// the immutable (bytes) part and the mutable (added) part if any appends
85 /// the immutable (bytes) part and the mutable (added) part if any appends
86 /// happened. This makes it transparent for the callers.
86 /// happened. This makes it transparent for the callers.
87 struct IndexData {
87 struct IndexData {
88 /// Immutable bytes, most likely taken from disk
88 /// Immutable bytes, most likely taken from disk
89 bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>,
89 bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>,
90 /// Used when stripping index contents, keeps track of the start of the
90 /// Used when stripping index contents, keeps track of the start of the
91 /// first stripped revision, which is used to give a slice of the
91 /// first stripped revision, which is used to give a slice of the
92 /// `bytes` field.
92 /// `bytes` field.
93 truncation: Option<usize>,
93 truncation: Option<usize>,
94 /// Bytes that were added after reading the index
94 /// Bytes that were added after reading the index
95 added: Vec<u8>,
95 added: Vec<u8>,
96 first_entry: [u8; INDEX_ENTRY_SIZE],
96 first_entry: [u8; INDEX_ENTRY_SIZE],
97 }
97 }
98
98
99 impl IndexData {
99 impl IndexData {
100 pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>) -> Self {
100 pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>) -> Self {
101 let mut first_entry = [0; INDEX_ENTRY_SIZE];
101 let mut first_entry = [0; INDEX_ENTRY_SIZE];
102 if bytes.len() >= INDEX_ENTRY_SIZE {
102 if bytes.len() >= INDEX_ENTRY_SIZE {
103 first_entry[INDEX_HEADER_SIZE..]
103 first_entry[INDEX_HEADER_SIZE..]
104 .copy_from_slice(&bytes[INDEX_HEADER_SIZE..INDEX_ENTRY_SIZE])
104 .copy_from_slice(&bytes[INDEX_HEADER_SIZE..INDEX_ENTRY_SIZE])
105 }
105 }
106 Self {
106 Self {
107 bytes,
107 bytes,
108 truncation: None,
108 truncation: None,
109 added: vec![],
109 added: vec![],
110 first_entry,
110 first_entry,
111 }
111 }
112 }
112 }
113
113
114 pub fn len(&self) -> usize {
114 pub fn len(&self) -> usize {
115 match self.truncation {
115 match self.truncation {
116 Some(truncation) => truncation + self.added.len(),
116 Some(truncation) => truncation + self.added.len(),
117 None => self.bytes.len() + self.added.len(),
117 None => self.bytes.len() + self.added.len(),
118 }
118 }
119 }
119 }
120
120
121 fn remove(
121 fn remove(
122 &mut self,
122 &mut self,
123 rev: Revision,
123 rev: Revision,
124 offsets: Option<&[usize]>,
124 offsets: Option<&[usize]>,
125 ) -> Result<(), RevlogError> {
125 ) -> Result<(), RevlogError> {
126 let rev = rev.0 as usize;
126 let rev = rev.0 as usize;
127 let truncation = if let Some(offsets) = offsets {
127 let truncation = if let Some(offsets) = offsets {
128 offsets[rev]
128 offsets[rev]
129 } else {
129 } else {
130 rev * INDEX_ENTRY_SIZE
130 rev * INDEX_ENTRY_SIZE
131 };
131 };
132 if truncation < self.bytes.len() {
132 if truncation < self.bytes.len() {
133 self.truncation = Some(truncation);
133 self.truncation = Some(truncation);
134 self.added.clear();
134 self.added.clear();
135 } else {
135 } else {
136 self.added.truncate(truncation - self.bytes.len());
136 self.added.truncate(truncation - self.bytes.len());
137 }
137 }
138 Ok(())
138 Ok(())
139 }
139 }
140
140
141 fn is_new(&self) -> bool {
141 fn is_new(&self) -> bool {
142 self.bytes.is_empty()
142 self.bytes.is_empty()
143 }
143 }
144 }
144 }
145
145
146 impl std::ops::Index<std::ops::Range<usize>> for IndexData {
146 impl std::ops::Index<std::ops::Range<usize>> for IndexData {
147 type Output = [u8];
147 type Output = [u8];
148
148
149 fn index(&self, index: std::ops::Range<usize>) -> &Self::Output {
149 fn index(&self, index: std::ops::Range<usize>) -> &Self::Output {
150 let start = index.start;
150 let start = index.start;
151 let end = index.end;
151 let end = index.end;
152 let immutable_len = match self.truncation {
152 let immutable_len = match self.truncation {
153 Some(truncation) => truncation,
153 Some(truncation) => truncation,
154 None => self.bytes.len(),
154 None => self.bytes.len(),
155 };
155 };
156 if start < immutable_len {
156 if start < immutable_len {
157 if end > immutable_len {
157 if end > immutable_len {
158 panic!("index data cannot span existing and added ranges");
158 panic!("index data cannot span existing and added ranges");
159 }
159 }
160 &self.bytes[index]
160 &self.bytes[index]
161 } else {
161 } else {
162 &self.added[start - immutable_len..end - immutable_len]
162 &self.added[start - immutable_len..end - immutable_len]
163 }
163 }
164 }
164 }
165 }
165 }
166
166
167 #[derive(Debug, PartialEq, Eq)]
167 #[derive(Debug, PartialEq, Eq)]
168 pub struct RevisionDataParams {
168 pub struct RevisionDataParams {
169 pub flags: u16,
169 pub flags: u16,
170 pub data_offset: u64,
170 pub data_offset: u64,
171 pub data_compressed_length: i32,
171 pub data_compressed_length: i32,
172 pub data_uncompressed_length: i32,
172 pub data_uncompressed_length: i32,
173 pub data_delta_base: i32,
173 pub data_delta_base: i32,
174 pub link_rev: i32,
174 pub link_rev: i32,
175 pub parent_rev_1: i32,
175 pub parent_rev_1: i32,
176 pub parent_rev_2: i32,
176 pub parent_rev_2: i32,
177 pub node_id: [u8; NODE_BYTES_LENGTH],
177 pub node_id: [u8; NODE_BYTES_LENGTH],
178 pub _sidedata_offset: u64,
178 pub _sidedata_offset: u64,
179 pub _sidedata_compressed_length: i32,
179 pub _sidedata_compressed_length: i32,
180 pub data_compression_mode: u8,
180 pub data_compression_mode: u8,
181 pub _sidedata_compression_mode: u8,
181 pub _sidedata_compression_mode: u8,
182 pub _rank: i32,
182 pub _rank: i32,
183 }
183 }
184
184
185 impl Default for RevisionDataParams {
185 impl Default for RevisionDataParams {
186 fn default() -> Self {
186 fn default() -> Self {
187 Self {
187 Self {
188 flags: 0,
188 flags: 0,
189 data_offset: 0,
189 data_offset: 0,
190 data_compressed_length: 0,
190 data_compressed_length: 0,
191 data_uncompressed_length: 0,
191 data_uncompressed_length: 0,
192 data_delta_base: -1,
192 data_delta_base: -1,
193 link_rev: -1,
193 link_rev: -1,
194 parent_rev_1: -1,
194 parent_rev_1: -1,
195 parent_rev_2: -1,
195 parent_rev_2: -1,
196 node_id: [0; NODE_BYTES_LENGTH],
196 node_id: [0; NODE_BYTES_LENGTH],
197 _sidedata_offset: 0,
197 _sidedata_offset: 0,
198 _sidedata_compressed_length: 0,
198 _sidedata_compressed_length: 0,
199 data_compression_mode: COMPRESSION_MODE_INLINE,
199 data_compression_mode: COMPRESSION_MODE_INLINE,
200 _sidedata_compression_mode: COMPRESSION_MODE_INLINE,
200 _sidedata_compression_mode: COMPRESSION_MODE_INLINE,
201 _rank: -1,
201 _rank: -1,
202 }
202 }
203 }
203 }
204 }
204 }
205
205
206 #[derive(BytesCast)]
206 #[derive(BytesCast)]
207 #[repr(C)]
207 #[repr(C)]
208 pub struct RevisionDataV1 {
208 pub struct RevisionDataV1 {
209 data_offset_or_flags: unaligned::U64Be,
209 data_offset_or_flags: unaligned::U64Be,
210 data_compressed_length: unaligned::I32Be,
210 data_compressed_length: unaligned::I32Be,
211 data_uncompressed_length: unaligned::I32Be,
211 data_uncompressed_length: unaligned::I32Be,
212 data_delta_base: unaligned::I32Be,
212 data_delta_base: unaligned::I32Be,
213 link_rev: unaligned::I32Be,
213 link_rev: unaligned::I32Be,
214 parent_rev_1: unaligned::I32Be,
214 parent_rev_1: unaligned::I32Be,
215 parent_rev_2: unaligned::I32Be,
215 parent_rev_2: unaligned::I32Be,
216 node_id: [u8; STORED_NODE_ID_BYTES],
216 node_id: [u8; STORED_NODE_ID_BYTES],
217 }
217 }
218
218
219 fn _static_assert_size_of_revision_data_v1() {
219 fn _static_assert_size_of_revision_data_v1() {
220 let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>;
220 let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>;
221 }
221 }
222
222
223 impl RevisionDataParams {
223 impl RevisionDataParams {
224 pub fn validate(&self) -> Result<(), RevlogError> {
224 pub fn validate(&self) -> Result<(), RevlogError> {
225 if self.flags & !REVIDX_KNOWN_FLAGS != 0 {
225 if self.flags & !REVIDX_KNOWN_FLAGS != 0 {
226 return Err(RevlogError::corrupted(format!(
226 return Err(RevlogError::corrupted(format!(
227 "unknown revlog index flags: {}",
227 "unknown revlog index flags: {}",
228 self.flags
228 self.flags
229 )));
229 )));
230 }
230 }
231 if self.data_compression_mode != COMPRESSION_MODE_INLINE {
231 if self.data_compression_mode != COMPRESSION_MODE_INLINE {
232 return Err(RevlogError::corrupted(format!(
232 return Err(RevlogError::corrupted(format!(
233 "invalid data compression mode: {}",
233 "invalid data compression mode: {}",
234 self.data_compression_mode
234 self.data_compression_mode
235 )));
235 )));
236 }
236 }
237 // FIXME isn't this only for v2 or changelog v2?
237 // FIXME isn't this only for v2 or changelog v2?
238 if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE {
238 if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE {
239 return Err(RevlogError::corrupted(format!(
239 return Err(RevlogError::corrupted(format!(
240 "invalid sidedata compression mode: {}",
240 "invalid sidedata compression mode: {}",
241 self._sidedata_compression_mode
241 self._sidedata_compression_mode
242 )));
242 )));
243 }
243 }
244 Ok(())
244 Ok(())
245 }
245 }
246
246
247 pub fn into_v1(self) -> RevisionDataV1 {
247 pub fn into_v1(self) -> RevisionDataV1 {
248 let data_offset_or_flags = self.data_offset << 16 | self.flags as u64;
248 let data_offset_or_flags = self.data_offset << 16 | self.flags as u64;
249 let mut node_id = [0; STORED_NODE_ID_BYTES];
249 let mut node_id = [0; STORED_NODE_ID_BYTES];
250 node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id);
250 node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id);
251 RevisionDataV1 {
251 RevisionDataV1 {
252 data_offset_or_flags: data_offset_or_flags.into(),
252 data_offset_or_flags: data_offset_or_flags.into(),
253 data_compressed_length: self.data_compressed_length.into(),
253 data_compressed_length: self.data_compressed_length.into(),
254 data_uncompressed_length: self.data_uncompressed_length.into(),
254 data_uncompressed_length: self.data_uncompressed_length.into(),
255 data_delta_base: self.data_delta_base.into(),
255 data_delta_base: self.data_delta_base.into(),
256 link_rev: self.link_rev.into(),
256 link_rev: self.link_rev.into(),
257 parent_rev_1: self.parent_rev_1.into(),
257 parent_rev_1: self.parent_rev_1.into(),
258 parent_rev_2: self.parent_rev_2.into(),
258 parent_rev_2: self.parent_rev_2.into(),
259 node_id,
259 node_id,
260 }
260 }
261 }
261 }
262 }
262 }
263
263
264 /// A Revlog index
264 /// A Revlog index
265 pub struct Index {
265 pub struct Index {
266 bytes: IndexData,
266 bytes: IndexData,
267 /// Offsets of starts of index blocks.
267 /// Offsets of starts of index blocks.
268 /// Only needed when the index is interleaved with data.
268 /// Only needed when the index is interleaved with data.
269 offsets: RwLock<Option<Vec<usize>>>,
269 offsets: RwLock<Option<Vec<usize>>>,
270 uses_generaldelta: bool,
270 uses_generaldelta: bool,
271 is_inline: bool,
271 is_inline: bool,
272 /// Cache of (head_revisions, filtered_revisions)
272 /// Cache of (head_revisions, filtered_revisions)
273 ///
273 ///
274 /// The head revisions in this index, kept in sync. Should
274 /// The head revisions in this index, kept in sync. Should
275 /// be accessed via the [`Self::head_revs`] method.
275 /// be accessed via the [`Self::head_revs`] method.
276 /// The last filtered revisions in this index, used to make sure
276 /// The last filtered revisions in this index, used to make sure
277 /// we haven't changed filters when returning the cached `head_revs`.
277 /// we haven't changed filters when returning the cached `head_revs`.
278 head_revs: RwLock<(Vec<Revision>, HashSet<Revision>)>,
278 head_revs: RwLock<(Vec<Revision>, HashSet<Revision>)>,
279 }
279 }
280
280
281 impl Debug for Index {
281 impl Debug for Index {
282 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
282 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
283 f.debug_struct("Index")
283 f.debug_struct("Index")
284 .field("offsets", &self.offsets)
284 .field("offsets", &self.offsets)
285 .field("uses_generaldelta", &self.uses_generaldelta)
285 .field("uses_generaldelta", &self.uses_generaldelta)
286 .finish()
286 .finish()
287 }
287 }
288 }
288 }
289
289
290 impl Graph for Index {
290 impl Graph for Index {
291 #[inline(always)]
291 #[inline(always)]
292 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
292 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
293 let err = || GraphError::ParentOutOfRange(rev);
293 let err = || GraphError::ParentOutOfRange(rev);
294 match self.get_entry(rev) {
294 match self.get_entry(rev) {
295 Some(entry) => {
295 Some(entry) => {
296 // The C implementation checks that the parents are valid
296 // The C implementation checks that the parents are valid
297 // before returning
297 // before returning
298 Ok([
298 Ok([
299 self.check_revision(entry.p1()).ok_or_else(err)?,
299 self.check_revision(entry.p1()).ok_or_else(err)?,
300 self.check_revision(entry.p2()).ok_or_else(err)?,
300 self.check_revision(entry.p2()).ok_or_else(err)?,
301 ])
301 ])
302 }
302 }
303 None => Ok([NULL_REVISION, NULL_REVISION]),
303 None => Ok([NULL_REVISION, NULL_REVISION]),
304 }
304 }
305 }
305 }
306 }
306 }
307
307
308 /// A cache suitable for find_snapshots
308 /// A cache suitable for find_snapshots
309 ///
309 ///
310 /// Logically equivalent to a mapping whose keys are [`BaseRevision`] and
310 /// Logically equivalent to a mapping whose keys are [`BaseRevision`] and
311 /// values sets of [`BaseRevision`]
311 /// values sets of [`BaseRevision`]
312 ///
312 ///
313 /// TODO the dubious part is insisting that errors must be RevlogError
313 /// TODO the dubious part is insisting that errors must be RevlogError
314 /// we would probably need to sprinkle some magic here, such as an associated
314 /// we would probably need to sprinkle some magic here, such as an associated
315 /// type that would be Into<RevlogError> but even that would not be
315 /// type that would be Into<RevlogError> but even that would not be
316 /// satisfactory, as errors potentially have nothing to do with the revlog.
316 /// satisfactory, as errors potentially have nothing to do with the revlog.
317 pub trait SnapshotsCache {
317 pub trait SnapshotsCache {
318 fn insert_for(
318 fn insert_for(
319 &mut self,
319 &mut self,
320 rev: BaseRevision,
320 rev: BaseRevision,
321 value: BaseRevision,
321 value: BaseRevision,
322 ) -> Result<(), RevlogError>;
322 ) -> Result<(), RevlogError>;
323 }
323 }
324
324
325 impl SnapshotsCache for FastHashMap<BaseRevision, HashSet<BaseRevision>> {
325 impl SnapshotsCache for FastHashMap<BaseRevision, HashSet<BaseRevision>> {
326 fn insert_for(
326 fn insert_for(
327 &mut self,
327 &mut self,
328 rev: BaseRevision,
328 rev: BaseRevision,
329 value: BaseRevision,
329 value: BaseRevision,
330 ) -> Result<(), RevlogError> {
330 ) -> Result<(), RevlogError> {
331 let all_values = self.entry(rev).or_default();
331 let all_values = self.entry(rev).or_default();
332 all_values.insert(value);
332 all_values.insert(value);
333 Ok(())
333 Ok(())
334 }
334 }
335 }
335 }
336
336
337 impl Index {
337 impl Index {
338 /// Create an index from bytes.
338 /// Create an index from bytes.
339 /// Calculate the start of each entry when is_inline is true.
339 /// Calculate the start of each entry when is_inline is true.
340 pub fn new(
340 pub fn new(
341 bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>,
341 bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>,
342 default_header: IndexHeader,
342 default_header: IndexHeader,
343 ) -> Result<Self, HgError> {
343 ) -> Result<Self, HgError> {
344 let header =
344 let header =
345 IndexHeader::parse(bytes.as_ref())?.unwrap_or(default_header);
345 IndexHeader::parse(bytes.as_ref())?.unwrap_or(default_header);
346
346
347 if header.format_version() != IndexHeader::REVLOGV1 {
347 if header.format_version() != IndexHeader::REVLOGV1 {
348 // A proper new version should have had a repo/store
348 // A proper new version should have had a repo/store
349 // requirement.
349 // requirement.
350 return Err(HgError::corrupted("unsupported revlog version"));
350 return Err(HgError::corrupted("unsupported revlog version"));
351 }
351 }
352
352
353 // This is only correct because we know version is REVLOGV1.
353 // This is only correct because we know version is REVLOGV1.
354 // In v2 we always use generaldelta, while in v0 we never use
354 // In v2 we always use generaldelta, while in v0 we never use
355 // generaldelta. Similar for [is_inline] (it's only used in v1).
355 // generaldelta. Similar for [is_inline] (it's only used in v1).
356 let uses_generaldelta = header.format_flags().uses_generaldelta();
356 let uses_generaldelta = header.format_flags().uses_generaldelta();
357
357
358 if header.format_flags().is_inline() {
358 if header.format_flags().is_inline() {
359 let mut offset: usize = 0;
359 let mut offset: usize = 0;
360 let mut offsets = Vec::new();
360 let mut offsets = Vec::new();
361
361
362 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
362 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
363 offsets.push(offset);
363 offsets.push(offset);
364 let end = offset + INDEX_ENTRY_SIZE;
364 let end = offset + INDEX_ENTRY_SIZE;
365 let entry = IndexEntry {
365 let entry = IndexEntry {
366 bytes: &bytes[offset..end],
366 bytes: &bytes[offset..end],
367 };
367 };
368
368
369 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
369 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
370 }
370 }
371
371
372 if offset == bytes.len() {
372 if offset == bytes.len() {
373 Ok(Self {
373 Ok(Self {
374 bytes: IndexData::new(bytes),
374 bytes: IndexData::new(bytes),
375 offsets: RwLock::new(Some(offsets)),
375 offsets: RwLock::new(Some(offsets)),
376 uses_generaldelta,
376 uses_generaldelta,
377 is_inline: true,
377 is_inline: true,
378 head_revs: RwLock::new((vec![], HashSet::new())),
378 head_revs: RwLock::new((vec![], HashSet::new())),
379 })
379 })
380 } else {
380 } else {
381 Err(HgError::corrupted("unexpected inline revlog length"))
381 Err(HgError::corrupted("unexpected inline revlog length"))
382 }
382 }
383 } else {
383 } else {
384 Ok(Self {
384 Ok(Self {
385 bytes: IndexData::new(bytes),
385 bytes: IndexData::new(bytes),
386 offsets: RwLock::new(None),
386 offsets: RwLock::new(None),
387 uses_generaldelta,
387 uses_generaldelta,
388 is_inline: false,
388 is_inline: false,
389 head_revs: RwLock::new((vec![], HashSet::new())),
389 head_revs: RwLock::new((vec![], HashSet::new())),
390 })
390 })
391 }
391 }
392 }
392 }
393
393
394 pub fn uses_generaldelta(&self) -> bool {
394 pub fn uses_generaldelta(&self) -> bool {
395 self.uses_generaldelta
395 self.uses_generaldelta
396 }
396 }
397
397
398 /// Value of the inline flag.
398 /// Value of the inline flag.
399 pub fn is_inline(&self) -> bool {
399 pub fn is_inline(&self) -> bool {
400 self.is_inline
400 self.is_inline
401 }
401 }
402
402
403 /// Return a slice of bytes if `revlog` is inline. Panic if not.
403 /// Return a slice of bytes if `revlog` is inline. Panic if not.
404 pub fn data(&self, start: usize, end: usize) -> &[u8] {
404 pub fn data(&self, start: usize, end: usize) -> &[u8] {
405 if !self.is_inline() {
405 if !self.is_inline() {
406 panic!("tried to access data in the index of a revlog that is not inline");
406 panic!("tried to access data in the index of a revlog that is not inline");
407 }
407 }
408 &self.bytes[start..end]
408 &self.bytes[start..end]
409 }
409 }
410
410
411 /// Return number of entries of the revlog index.
411 /// Return number of entries of the revlog index.
412 pub fn len(&self) -> usize {
412 pub fn len(&self) -> usize {
413 if self.is_inline() {
413 if self.is_inline() {
414 (*self.get_offsets())
414 (*self.get_offsets())
415 .as_ref()
415 .as_ref()
416 .expect("inline should have offsets")
416 .expect("inline should have offsets")
417 .len()
417 .len()
418 } else {
418 } else {
419 self.bytes.len() / INDEX_ENTRY_SIZE
419 self.bytes.len() / INDEX_ENTRY_SIZE
420 }
420 }
421 }
421 }
422
422
423 pub fn get_offsets(&self) -> RwLockReadGuard<Option<Vec<usize>>> {
423 pub fn get_offsets(&self) -> RwLockReadGuard<Option<Vec<usize>>> {
424 assert!(self.is_inline());
424 assert!(self.is_inline());
425 {
425 {
426 // Wrap in a block to drop the read guard
426 // Wrap in a block to drop the read guard
427 // TODO perf?
427 // TODO perf?
428 let mut offsets = self.offsets.write().unwrap();
428 let mut offsets = self.offsets.write().unwrap();
429 if offsets.is_none() {
429 if offsets.is_none() {
430 offsets.replace(inline_scan(&self.bytes.bytes).1);
430 offsets.replace(inline_scan(&self.bytes.bytes).1);
431 }
431 }
432 }
432 }
433 self.offsets.read().unwrap()
433 self.offsets.read().unwrap()
434 }
434 }
435
435
436 pub fn get_offsets_mut(&mut self) -> RwLockWriteGuard<Option<Vec<usize>>> {
436 pub fn get_offsets_mut(&mut self) -> RwLockWriteGuard<Option<Vec<usize>>> {
437 assert!(self.is_inline());
437 assert!(self.is_inline());
438 let mut offsets = self.offsets.write().unwrap();
438 let mut offsets = self.offsets.write().unwrap();
439 if offsets.is_none() {
439 if offsets.is_none() {
440 offsets.replace(inline_scan(&self.bytes.bytes).1);
440 offsets.replace(inline_scan(&self.bytes.bytes).1);
441 }
441 }
442 offsets
442 offsets
443 }
443 }
444
444
445 /// Returns `true` if the `Index` has zero `entries`.
445 /// Returns `true` if the `Index` has zero `entries`.
446 pub fn is_empty(&self) -> bool {
446 pub fn is_empty(&self) -> bool {
447 self.len() == 0
447 self.len() == 0
448 }
448 }
449
449
450 /// Return the index entry corresponding to the given revision or `None`
450 /// Return the index entry corresponding to the given revision or `None`
451 /// for [`NULL_REVISION`]
451 /// for [`NULL_REVISION`]
452 ///
452 ///
453 /// The specified revision being of the checked type, it always exists
453 /// The specified revision being of the checked type, it always exists
454 /// if it was validated by this index.
454 /// if it was validated by this index.
455 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
455 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
456 if rev == NULL_REVISION {
456 if rev == NULL_REVISION {
457 return None;
457 return None;
458 }
458 }
459 if rev.0 == 0 {
459 if rev.0 == 0 {
460 Some(IndexEntry {
460 Some(IndexEntry {
461 bytes: &self.bytes.first_entry[..],
461 bytes: &self.bytes.first_entry[..],
462 })
462 })
463 } else {
463 } else {
464 Some(if self.is_inline() {
464 Some(if self.is_inline() {
465 self.get_entry_inline(rev)
465 self.get_entry_inline(rev)
466 } else {
466 } else {
467 self.get_entry_separated(rev)
467 self.get_entry_separated(rev)
468 })
468 })
469 }
469 }
470 }
470 }
471
471
472 /// Return the binary content of the index entry for the given revision
472 /// Return the binary content of the index entry for the given revision
473 ///
473 ///
474 /// See [get_entry()](`Self::get_entry()`) for cases when `None` is
474 /// See [get_entry()](`Self::get_entry()`) for cases when `None` is
475 /// returned.
475 /// returned.
476 pub fn entry_binary(&self, rev: Revision) -> Option<&[u8]> {
476 pub fn entry_binary(&self, rev: Revision) -> Option<&[u8]> {
477 self.get_entry(rev).map(|e| {
477 self.get_entry(rev).map(|e| {
478 let bytes = e.as_bytes();
478 let bytes = e.as_bytes();
479 if rev.0 == 0 {
479 if rev.0 == 0 {
480 &bytes[4..]
480 &bytes[4..]
481 } else {
481 } else {
482 bytes
482 bytes
483 }
483 }
484 })
484 })
485 }
485 }
486
486
487 pub fn entry_as_params(
487 pub fn entry_as_params(
488 &self,
488 &self,
489 rev: UncheckedRevision,
489 rev: UncheckedRevision,
490 ) -> Option<RevisionDataParams> {
490 ) -> Option<RevisionDataParams> {
491 let rev = self.check_revision(rev)?;
491 let rev = self.check_revision(rev)?;
492 self.get_entry(rev).map(|e| RevisionDataParams {
492 self.get_entry(rev).map(|e| RevisionDataParams {
493 flags: e.flags(),
493 flags: e.flags(),
494 data_offset: if rev.0 == 0 && !self.bytes.is_new() {
494 data_offset: if rev.0 == 0 && !self.bytes.is_new() {
495 e.flags() as u64
495 e.flags() as u64
496 } else {
496 } else {
497 e.raw_offset()
497 e.raw_offset()
498 },
498 },
499 data_compressed_length: e
499 data_compressed_length: e
500 .compressed_len()
500 .compressed_len()
501 .try_into()
501 .try_into()
502 .unwrap_or_else(|_| {
502 .unwrap_or_else(|_| {
503 // Python's `unionrepo` sets the compressed length to be
503 // Python's `unionrepo` sets the compressed length to be
504 // `-1` (or `u32::MAX` if transmuted to `u32`) because it
504 // `-1` (or `u32::MAX` if transmuted to `u32`) because it
505 // cannot know the correct compressed length of a given
505 // cannot know the correct compressed length of a given
506 // revision. I'm not sure if this is true, but having this
506 // revision. I'm not sure if this is true, but having this
507 // edge case won't hurt other use cases, let's handle it.
507 // edge case won't hurt other use cases, let's handle it.
508 assert_eq!(e.compressed_len(), u32::MAX);
508 assert_eq!(e.compressed_len(), u32::MAX);
509 NULL_REVISION.0
509 NULL_REVISION.0
510 }),
510 }),
511 data_uncompressed_length: e.uncompressed_len(),
511 data_uncompressed_length: e.uncompressed_len(),
512 data_delta_base: e.base_revision_or_base_of_delta_chain().0,
512 data_delta_base: e.base_revision_or_base_of_delta_chain().0,
513 link_rev: e.link_revision().0,
513 link_rev: e.link_revision().0,
514 parent_rev_1: e.p1().0,
514 parent_rev_1: e.p1().0,
515 parent_rev_2: e.p2().0,
515 parent_rev_2: e.p2().0,
516 node_id: e.hash().as_bytes().try_into().unwrap(),
516 node_id: e.hash().as_bytes().try_into().unwrap(),
517 ..Default::default()
517 ..Default::default()
518 })
518 })
519 }
519 }
520
520
521 fn get_entry_inline(&self, rev: Revision) -> IndexEntry {
521 fn get_entry_inline(&self, rev: Revision) -> IndexEntry {
522 let offsets = &self.get_offsets();
522 let offsets = &self.get_offsets();
523 let offsets = offsets.as_ref().expect("inline should have offsets");
523 let offsets = offsets.as_ref().expect("inline should have offsets");
524 let start = offsets[rev.0 as usize];
524 let start = offsets[rev.0 as usize];
525 let end = start + INDEX_ENTRY_SIZE;
525 let end = start + INDEX_ENTRY_SIZE;
526 let bytes = &self.bytes[start..end];
526 let bytes = &self.bytes[start..end];
527
527
528 IndexEntry { bytes }
528 IndexEntry { bytes }
529 }
529 }
530
530
531 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
531 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
532 let start = rev.0 as usize * INDEX_ENTRY_SIZE;
532 let start = rev.0 as usize * INDEX_ENTRY_SIZE;
533 let end = start + INDEX_ENTRY_SIZE;
533 let end = start + INDEX_ENTRY_SIZE;
534 let bytes = &self.bytes[start..end];
534 let bytes = &self.bytes[start..end];
535
535
536 IndexEntry { bytes }
536 IndexEntry { bytes }
537 }
537 }
538
538
539 fn null_entry(&self) -> IndexEntry {
539 fn null_entry(&self) -> IndexEntry {
540 IndexEntry {
540 IndexEntry {
541 bytes: &[0; INDEX_ENTRY_SIZE],
541 bytes: &[0; INDEX_ENTRY_SIZE],
542 }
542 }
543 }
543 }
544
544
545 /// Return the head revisions of this index
545 /// Return the head revisions of this index
546 pub fn head_revs(&self) -> Result<Vec<Revision>, GraphError> {
546 pub fn head_revs(&self) -> Result<Vec<Revision>, GraphError> {
547 self.head_revs_filtered(&HashSet::new(), false)
547 self.head_revs_filtered(&HashSet::new(), false)
548 .map(|h| h.unwrap())
548 .map(|h| h.unwrap())
549 }
549 }
550
550
551 /// Python-specific shortcut to save on PyList creation
551 /// Python-specific shortcut to save on PyList creation
552 pub fn head_revs_shortcut(
552 pub fn head_revs_shortcut(
553 &self,
553 &self,
554 ) -> Result<Option<Vec<Revision>>, GraphError> {
554 ) -> Result<Option<Vec<Revision>>, GraphError> {
555 self.head_revs_filtered(&HashSet::new(), true)
555 self.head_revs_filtered(&HashSet::new(), true)
556 }
556 }
557
557
558 /// Return the heads removed and added by advancing from `begin` to `end`.
558 /// Return the heads removed and added by advancing from `begin` to `end`.
559 /// In revset language, we compute:
559 /// In revset language, we compute:
560 /// - `heads(:begin)-heads(:end)`
560 /// - `heads(:begin)-heads(:end)`
561 /// - `heads(:end)-heads(:begin)`
561 /// - `heads(:end)-heads(:begin)`
562 pub fn head_revs_diff(
562 pub fn head_revs_diff(
563 &self,
563 &self,
564 begin: Revision,
564 begin: Revision,
565 end: Revision,
565 end: Revision,
566 ) -> Result<(Vec<Revision>, Vec<Revision>), GraphError> {
566 ) -> Result<(Vec<Revision>, Vec<Revision>), GraphError> {
567 let mut heads_added = vec![];
567 let mut heads_added = vec![];
568 let mut heads_removed = vec![];
568 let mut heads_removed = vec![];
569
569
570 let mut acc = HashSet::new();
570 let mut acc = HashSet::new();
571 let Revision(begin) = begin;
571 let Revision(begin) = begin;
572 let Revision(end) = end;
572 let Revision(end) = end;
573 let mut i = end;
573 let mut i = end;
574
574
575 while i > begin {
575 while i > begin {
576 // acc invariant:
576 // acc invariant:
577 // `j` is in the set iff `j <= i` and it has children
577 // `j` is in the set iff `j <= i` and it has children
578 // among `i+1..end` (inclusive)
578 // among `i+1..end` (inclusive)
579 if !acc.remove(&i) {
579 if !acc.remove(&i) {
580 heads_added.push(Revision(i));
580 heads_added.push(Revision(i));
581 }
581 }
582 for Revision(parent) in self.parents(Revision(i))? {
582 for Revision(parent) in self.parents(Revision(i))? {
583 acc.insert(parent);
583 acc.insert(parent);
584 }
584 }
585 i -= 1;
585 i -= 1;
586 }
586 }
587
587
588 // At this point `acc` contains old revisions that gained new children.
588 // At this point `acc` contains old revisions that gained new children.
589 // We need to check if they had any children before. If not, those
589 // We need to check if they had any children before. If not, those
590 // revisions are the removed heads.
590 // revisions are the removed heads.
591 while !acc.is_empty() {
591 while !acc.is_empty() {
592 // acc invariant:
592 // acc invariant:
593 // `j` is in the set iff `j <= i` and it has children
593 // `j` is in the set iff `j <= i` and it has children
594 // among `begin+1..end`, but not among `i+1..begin` (inclusive)
594 // among `begin+1..end`, but not among `i+1..begin` (inclusive)
595
595
596 assert!(i >= -1); // yes, `-1` can also be a head if the repo is empty
596 assert!(i >= -1); // yes, `-1` can also be a head if the repo is empty
597 if acc.remove(&i) {
597 if acc.remove(&i) {
598 heads_removed.push(Revision(i));
598 heads_removed.push(Revision(i));
599 }
599 }
600 for Revision(parent) in self.parents(Revision(i))? {
600 for Revision(parent) in self.parents(Revision(i))? {
601 acc.remove(&parent);
601 acc.remove(&parent);
602 }
602 }
603 i -= 1;
603 i -= 1;
604 }
604 }
605
605
606 Ok((heads_removed, heads_added))
606 Ok((heads_removed, heads_added))
607 }
607 }
608
608
609 /// Return the head revisions of this index
609 /// Return the head revisions of this index
610 pub fn head_revs_filtered(
610 pub fn head_revs_filtered(
611 &self,
611 &self,
612 filtered_revs: &HashSet<Revision>,
612 filtered_revs: &HashSet<Revision>,
613 py_shortcut: bool,
613 py_shortcut: bool,
614 ) -> Result<Option<Vec<Revision>>, GraphError> {
614 ) -> Result<Option<Vec<Revision>>, GraphError> {
615 {
615 {
616 let guard = self
616 let guard = self
617 .head_revs
617 .head_revs
618 .read()
618 .read()
619 .expect("RwLock on Index.head_revs should not be poisoned");
619 .expect("RwLock on Index.head_revs should not be poisoned");
620 let self_head_revs = &guard.0;
620 let self_head_revs = &guard.0;
621 let self_filtered_revs = &guard.1;
621 let self_filtered_revs = &guard.1;
622 if !self_head_revs.is_empty()
622 if !self_head_revs.is_empty()
623 && filtered_revs == self_filtered_revs
623 && filtered_revs == self_filtered_revs
624 {
624 {
625 if py_shortcut {
625 if py_shortcut {
626 // Don't copy the revs since we've already cached them
626 // Don't copy the revs since we've already cached them
627 // on the Python side.
627 // on the Python side.
628 return Ok(None);
628 return Ok(None);
629 } else {
629 } else {
630 return Ok(Some(self_head_revs.to_owned()));
630 return Ok(Some(self_head_revs.to_owned()));
631 }
631 }
632 }
632 }
633 }
633 }
634
634
635 let as_vec = if self.is_empty() {
635 let as_vec = if self.is_empty() {
636 vec![NULL_REVISION]
636 vec![NULL_REVISION]
637 } else {
637 } else {
638 let mut not_heads = bitvec![0; self.len()];
638 let mut not_heads = bitvec![0; self.len()];
639 dagops::retain_heads_fast(
639 dagops::retain_heads_fast(
640 self,
640 self,
641 not_heads.as_mut_bitslice(),
641 not_heads.as_mut_bitslice(),
642 filtered_revs,
642 filtered_revs,
643 )?;
643 )?;
644 not_heads
644 not_heads
645 .into_iter()
645 .into_iter()
646 .enumerate()
646 .enumerate()
647 .filter_map(|(idx, is_not_head)| {
647 .filter_map(|(idx, is_not_head)| {
648 if is_not_head {
648 if is_not_head {
649 None
649 None
650 } else {
650 } else {
651 Some(Revision(idx as BaseRevision))
651 Some(Revision(idx as BaseRevision))
652 }
652 }
653 })
653 })
654 .collect()
654 .collect()
655 };
655 };
656 *self
656 *self
657 .head_revs
657 .head_revs
658 .write()
658 .write()
659 .expect("RwLock on Index.head_revs should not be poisoned") =
659 .expect("RwLock on Index.head_revs should not be poisoned") =
660 (as_vec.to_owned(), filtered_revs.to_owned());
660 (as_vec.to_owned(), filtered_revs.to_owned());
661 Ok(Some(as_vec))
661 Ok(Some(as_vec))
662 }
662 }
663
663
664 /// Obtain the delta chain for a revision.
664 /// Obtain the delta chain for a revision.
665 ///
665 ///
666 /// `stop_rev` specifies a revision to stop at. If not specified, we
666 /// `stop_rev` specifies a revision to stop at. If not specified, we
667 /// stop at the base of the chain.
667 /// stop at the base of the chain.
668 ///
668 ///
669 /// Returns a 2-tuple of (chain, stopped) where `chain` is a vec of
669 /// Returns a 2-tuple of (chain, stopped) where `chain` is a vec of
670 /// revs in ascending order and `stopped` is a bool indicating whether
670 /// revs in ascending order and `stopped` is a bool indicating whether
671 /// `stoprev` was hit.
671 /// `stoprev` was hit.
672 pub fn delta_chain(
672 pub fn delta_chain(
673 &self,
673 &self,
674 rev: Revision,
674 rev: Revision,
675 stop_rev: Option<Revision>,
675 stop_rev: Option<Revision>,
676 using_general_delta: Option<bool>,
676 using_general_delta: Option<bool>,
677 ) -> Result<(Vec<Revision>, bool), HgError> {
677 ) -> Result<(Vec<Revision>, bool), HgError> {
678 let mut current_rev = rev;
678 let mut current_rev = rev;
679 let mut entry = self.get_entry(rev).unwrap();
679 let mut entry = self.get_entry(rev).unwrap();
680 let mut chain = vec![];
680 let mut chain = vec![];
681 let using_general_delta =
681 let using_general_delta =
682 using_general_delta.unwrap_or_else(|| self.uses_generaldelta());
682 using_general_delta.unwrap_or_else(|| self.uses_generaldelta());
683 while current_rev.0 != entry.base_revision_or_base_of_delta_chain().0
683 while current_rev.0 != entry.base_revision_or_base_of_delta_chain().0
684 && stop_rev.map(|r| r != current_rev).unwrap_or(true)
684 && stop_rev.map(|r| r != current_rev).unwrap_or(true)
685 {
685 {
686 chain.push(current_rev);
686 chain.push(current_rev);
687 let new_rev = if using_general_delta {
687 let new_rev = if using_general_delta {
688 entry.base_revision_or_base_of_delta_chain()
688 entry.base_revision_or_base_of_delta_chain()
689 } else {
689 } else {
690 UncheckedRevision(current_rev.0 - 1)
690 UncheckedRevision(current_rev.0 - 1)
691 };
691 };
692 current_rev = self.check_revision(new_rev).ok_or_else(|| {
692 current_rev = self.check_revision(new_rev).ok_or_else(|| {
693 HgError::corrupted(format!("Revision {new_rev} out of range"))
693 HgError::corrupted(format!("Revision {new_rev} out of range"))
694 })?;
694 })?;
695 if current_rev.0 == NULL_REVISION.0 {
695 if current_rev.0 == NULL_REVISION.0 {
696 break;
696 break;
697 }
697 }
698 entry = self.get_entry(current_rev).unwrap()
698 entry = self.get_entry(current_rev).unwrap()
699 }
699 }
700
700
701 let stopped = if stop_rev.map(|r| current_rev == r).unwrap_or(false) {
701 let stopped = if stop_rev.map(|r| current_rev == r).unwrap_or(false) {
702 true
702 true
703 } else {
703 } else {
704 chain.push(current_rev);
704 chain.push(current_rev);
705 false
705 false
706 };
706 };
707 chain.reverse();
707 chain.reverse();
708 Ok((chain, stopped))
708 Ok((chain, stopped))
709 }
709 }
710
710
711 pub fn find_snapshots(
711 pub fn find_snapshots(
712 &self,
712 &self,
713 start_rev: UncheckedRevision,
713 start_rev: UncheckedRevision,
714 end_rev: UncheckedRevision,
714 end_rev: UncheckedRevision,
715 cache: &mut impl SnapshotsCache,
715 cache: &mut impl SnapshotsCache,
716 ) -> Result<(), RevlogError> {
716 ) -> Result<(), RevlogError> {
717 let mut start_rev = start_rev.0;
717 let mut start_rev = start_rev.0;
718 let mut end_rev = end_rev.0;
718 let mut end_rev = end_rev.0;
719 end_rev += 1;
719 end_rev += 1;
720 let len = self.len().try_into().unwrap();
720 let len = self.len().try_into().unwrap();
721 if end_rev > len {
721 if end_rev > len {
722 end_rev = len;
722 end_rev = len;
723 }
723 }
724 if start_rev < 0 {
724 if start_rev < 0 {
725 start_rev = 0;
725 start_rev = 0;
726 }
726 }
727 for rev in start_rev..end_rev {
727 for rev in start_rev..end_rev {
728 if !self.is_snapshot_unchecked(Revision(rev))? {
728 if !self.is_snapshot_unchecked(Revision(rev))? {
729 continue;
729 continue;
730 }
730 }
731 let mut base = self
731 let mut base = self
732 .get_entry(Revision(rev))
732 .get_entry(Revision(rev))
733 .unwrap()
733 .unwrap()
734 .base_revision_or_base_of_delta_chain();
734 .base_revision_or_base_of_delta_chain();
735 if base.0 == rev {
735 if base.0 == rev {
736 base = NULL_REVISION.into();
736 base = NULL_REVISION.into();
737 }
737 }
738 cache.insert_for(base.0, rev)?;
738 cache.insert_for(base.0, rev)?;
739 }
739 }
740 Ok(())
740 Ok(())
741 }
741 }
742
742
743 fn clear_head_revs(&self) {
743 fn clear_head_revs(&self) {
744 self.head_revs
744 self.head_revs
745 .write()
745 .write()
746 .expect("RwLock on Index.head_revs should not be poisoined")
746 .expect("RwLock on Index.head_revs should not be poisoined")
747 .0
747 .0
748 .clear()
748 .clear()
749 }
749 }
750
750
751 /// TODO move this to the trait probably, along with other things
751 /// TODO move this to the trait probably, along with other things
752 pub fn append(
752 pub fn append(
753 &mut self,
753 &mut self,
754 revision_data: RevisionDataParams,
754 revision_data: RevisionDataParams,
755 ) -> Result<(), RevlogError> {
755 ) -> Result<(), RevlogError> {
756 revision_data.validate()?;
756 revision_data.validate()?;
757 let entry_v1 = revision_data.into_v1();
757 let entry_v1 = revision_data.into_v1();
758 let entry_bytes = entry_v1.as_bytes();
758 let entry_bytes = entry_v1.as_bytes();
759 if self.bytes.len() == 0 {
759 if self.bytes.len() == 0 {
760 self.bytes.first_entry[INDEX_HEADER_SIZE..].copy_from_slice(
760 self.bytes.first_entry[INDEX_HEADER_SIZE..].copy_from_slice(
761 &entry_bytes[INDEX_HEADER_SIZE..INDEX_ENTRY_SIZE],
761 &entry_bytes[INDEX_HEADER_SIZE..INDEX_ENTRY_SIZE],
762 )
762 )
763 }
763 }
764 if self.is_inline() {
764 if self.is_inline() {
765 let new_offset = self.bytes.len();
765 let new_offset = self.bytes.len();
766 if let Some(offsets) = &mut *self.get_offsets_mut() {
766 if let Some(offsets) = &mut *self.get_offsets_mut() {
767 offsets.push(new_offset)
767 offsets.push(new_offset)
768 }
768 }
769 }
769 }
770 self.bytes.added.extend(entry_bytes);
770 self.bytes.added.extend(entry_bytes);
771 self.clear_head_revs();
771 self.clear_head_revs();
772 Ok(())
772 Ok(())
773 }
773 }
774
774
775 pub fn pack_header(&self, header: i32) -> [u8; 4] {
775 pub fn pack_header(&self, header: i32) -> [u8; 4] {
776 header.to_be_bytes()
776 header.to_be_bytes()
777 }
777 }
778
778
779 pub fn remove(&mut self, rev: Revision) -> Result<(), RevlogError> {
779 pub fn remove(&mut self, rev: Revision) -> Result<(), RevlogError> {
780 let offsets = if self.is_inline() {
780 let offsets = if self.is_inline() {
781 self.get_offsets().clone()
781 self.get_offsets().clone()
782 } else {
782 } else {
783 None
783 None
784 };
784 };
785 self.bytes.remove(rev, offsets.as_deref())?;
785 self.bytes.remove(rev, offsets.as_deref())?;
786 if self.is_inline() {
786 if self.is_inline() {
787 if let Some(offsets) = &mut *self.get_offsets_mut() {
787 if let Some(offsets) = &mut *self.get_offsets_mut() {
788 offsets.truncate(rev.0 as usize)
788 offsets.truncate(rev.0 as usize)
789 }
789 }
790 }
790 }
791 self.clear_head_revs();
791 self.clear_head_revs();
792 Ok(())
792 Ok(())
793 }
793 }
794
794
795 pub fn clear_caches(&self) {
795 pub fn clear_caches(&self) {
796 // We need to get the 'inline' value from Python at init and use this
796 // We need to get the 'inline' value from Python at init and use this
797 // instead of offsets to determine whether we're inline since we might
797 // instead of offsets to determine whether we're inline since we might
798 // clear caches. This implies re-populating the offsets on-demand.
798 // clear caches. This implies re-populating the offsets on-demand.
799 *self
799 *self
800 .offsets
800 .offsets
801 .write()
801 .write()
802 .expect("RwLock on Index.offsets should not be poisoed") = None;
802 .expect("RwLock on Index.offsets should not be poisoed") = None;
803 self.clear_head_revs();
803 self.clear_head_revs();
804 }
804 }
805
805
806 /// Unchecked version of `is_snapshot`.
806 /// Unchecked version of `is_snapshot`.
807 /// Assumes the caller checked that `rev` is within a valid revision range.
807 /// Assumes the caller checked that `rev` is within a valid revision range.
808 pub fn is_snapshot_unchecked(
808 pub fn is_snapshot_unchecked(
809 &self,
809 &self,
810 mut rev: Revision,
810 mut rev: Revision,
811 ) -> Result<bool, RevlogError> {
811 ) -> Result<bool, RevlogError> {
812 while rev.0 >= 0 {
812 while rev.0 >= 0 {
813 let entry = self.get_entry(rev).unwrap();
813 let entry = self.get_entry(rev).unwrap();
814 let mut base = entry.base_revision_or_base_of_delta_chain().0;
814 let mut base = entry.base_revision_or_base_of_delta_chain().0;
815 if base == rev.0 {
815 if base == rev.0 {
816 base = NULL_REVISION.0;
816 base = NULL_REVISION.0;
817 }
817 }
818 if base == NULL_REVISION.0 {
818 if base == NULL_REVISION.0 {
819 return Ok(true);
819 return Ok(true);
820 }
820 }
821 let [mut p1, mut p2] = self
821 let [mut p1, mut p2] = self
822 .parents(rev)
822 .parents(rev)
823 .map_err(|_| RevlogError::InvalidRevision)?;
823 .map_err(|_| RevlogError::InvalidRevision)?;
824 while let Some(p1_entry) = self.get_entry(p1) {
824 while let Some(p1_entry) = self.get_entry(p1) {
825 if p1_entry.compressed_len() != 0 || p1.0 == 0 {
825 if p1_entry.compressed_len() != 0 || p1.0 == 0 {
826 break;
826 break;
827 }
827 }
828 let parent_base =
828 let parent_base =
829 p1_entry.base_revision_or_base_of_delta_chain();
829 p1_entry.base_revision_or_base_of_delta_chain();
830 if parent_base.0 == p1.0 {
830 if parent_base.0 == p1.0 {
831 break;
831 break;
832 }
832 }
833 p1 = self
833 p1 = self
834 .check_revision(parent_base)
834 .check_revision(parent_base)
835 .ok_or(RevlogError::InvalidRevision)?;
835 .ok_or(RevlogError::InvalidRevision)?;
836 }
836 }
837 while let Some(p2_entry) = self.get_entry(p2) {
837 while let Some(p2_entry) = self.get_entry(p2) {
838 if p2_entry.compressed_len() != 0 || p2.0 == 0 {
838 if p2_entry.compressed_len() != 0 || p2.0 == 0 {
839 break;
839 break;
840 }
840 }
841 let parent_base =
841 let parent_base =
842 p2_entry.base_revision_or_base_of_delta_chain();
842 p2_entry.base_revision_or_base_of_delta_chain();
843 if parent_base.0 == p2.0 {
843 if parent_base.0 == p2.0 {
844 break;
844 break;
845 }
845 }
846 p2 = self
846 p2 = self
847 .check_revision(parent_base)
847 .check_revision(parent_base)
848 .ok_or(RevlogError::InvalidRevision)?;
848 .ok_or(RevlogError::InvalidRevision)?;
849 }
849 }
850 if base == p1.0 || base == p2.0 {
850 if base == p1.0 || base == p2.0 {
851 return Ok(false);
851 return Ok(false);
852 }
852 }
853 rev = self
853 rev = self
854 .check_revision(base.into())
854 .check_revision(base.into())
855 .ok_or(RevlogError::InvalidRevision)?;
855 .ok_or(RevlogError::InvalidRevision)?;
856 }
856 }
857 Ok(rev == NULL_REVISION)
857 Ok(rev == NULL_REVISION)
858 }
858 }
859
859
860 /// Return whether the given revision is a snapshot. Returns an error if
860 /// Return whether the given revision is a snapshot. Returns an error if
861 /// `rev` is not within a valid revision range.
861 /// `rev` is not within a valid revision range.
862 pub fn is_snapshot(
862 pub fn is_snapshot(
863 &self,
863 &self,
864 rev: UncheckedRevision,
864 rev: UncheckedRevision,
865 ) -> Result<bool, RevlogError> {
865 ) -> Result<bool, RevlogError> {
866 let rev = self
866 let rev = self
867 .check_revision(rev)
867 .check_revision(rev)
868 .ok_or_else(|| RevlogError::corrupted("test"))?;
868 .ok_or_else(|| RevlogError::corrupted("test"))?;
869 self.is_snapshot_unchecked(rev)
869 self.is_snapshot_unchecked(rev)
870 }
870 }
871
871
872 /// Slice revs to reduce the amount of unrelated data to be read from disk.
872 /// Slice revs to reduce the amount of unrelated data to be read from disk.
873 ///
873 ///
874 /// The index is sliced into groups that should be read in one time.
874 /// The index is sliced into groups that should be read in one time.
875 ///
875 ///
876 /// The initial chunk is sliced until the overall density
876 /// The initial chunk is sliced until the overall density
877 /// (payload/chunks-span ratio) is above `target_density`.
877 /// (payload/chunks-span ratio) is above `target_density`.
878 /// No gap smaller than `min_gap_size` is skipped.
878 /// No gap smaller than `min_gap_size` is skipped.
879 pub fn slice_chunk_to_density(
879 pub fn slice_chunk_to_density(
880 &self,
880 &self,
881 revs: &[Revision],
881 revs: &[Revision],
882 target_density: f64,
882 target_density: f64,
883 min_gap_size: usize,
883 min_gap_size: usize,
884 ) -> Vec<Vec<Revision>> {
884 ) -> Vec<Vec<Revision>> {
885 if revs.is_empty() {
885 if revs.is_empty() {
886 return vec![];
886 return vec![];
887 }
887 }
888 if revs.len() == 1 {
888 if revs.len() == 1 {
889 return vec![revs.to_owned()];
889 return vec![revs.to_owned()];
890 }
890 }
891 let delta_chain_span = self.segment_span(revs);
891 let delta_chain_span = self.segment_span(revs);
892 if delta_chain_span < min_gap_size {
892 if delta_chain_span < min_gap_size {
893 return vec![revs.to_owned()];
893 return vec![revs.to_owned()];
894 }
894 }
895 let entries: Vec<_> = revs
895 let entries: Vec<_> = revs
896 .iter()
896 .iter()
897 .map(|r| {
897 .map(|r| {
898 (*r, self.get_entry(*r).unwrap_or_else(|| self.null_entry()))
898 (*r, self.get_entry(*r).unwrap_or_else(|| self.null_entry()))
899 })
899 })
900 .collect();
900 .collect();
901
901
902 let mut read_data = delta_chain_span;
902 let mut read_data = delta_chain_span;
903 let chain_payload: u32 =
903 let chain_payload: u32 =
904 entries.iter().map(|(_r, e)| e.compressed_len()).sum();
904 entries.iter().map(|(_r, e)| e.compressed_len()).sum();
905 let mut density = if delta_chain_span > 0 {
905 let mut density = if delta_chain_span > 0 {
906 chain_payload as f64 / delta_chain_span as f64
906 chain_payload as f64 / delta_chain_span as f64
907 } else {
907 } else {
908 1.0
908 1.0
909 };
909 };
910
910
911 if density >= target_density {
911 if density >= target_density {
912 return vec![revs.to_owned()];
912 return vec![revs.to_owned()];
913 }
913 }
914
914
915 // Store the gaps in a heap to have them sorted by decreasing size
915 // Store the gaps in a heap to have them sorted by decreasing size
916 let mut gaps = Vec::new();
916 let mut gaps = Vec::new();
917 let mut previous_end = None;
917 let mut previous_end = None;
918
918
919 for (i, (_rev, entry)) in entries.iter().enumerate() {
919 for (i, (_rev, entry)) in entries.iter().enumerate() {
920 let start = entry.c_start() as usize;
920 let start = entry.c_start() as usize;
921 let length = entry.compressed_len();
921 let length = entry.compressed_len();
922
922
923 // Skip empty revisions to form larger holes
923 // Skip empty revisions to form larger holes
924 if length == 0 {
924 if length == 0 {
925 continue;
925 continue;
926 }
926 }
927
927
928 if let Some(end) = previous_end {
928 if let Some(end) = previous_end {
929 let gap_size = start - end;
929 let gap_size = start - end;
930 // Only consider holes that are large enough
930 // Only consider holes that are large enough
931 if gap_size > min_gap_size {
931 if gap_size > min_gap_size {
932 gaps.push((gap_size, i));
932 gaps.push((gap_size, i));
933 }
933 }
934 }
934 }
935 previous_end = Some(start + length as usize);
935 previous_end = Some(start + length as usize);
936 }
936 }
937 if gaps.is_empty() {
937 if gaps.is_empty() {
938 return vec![revs.to_owned()];
938 return vec![revs.to_owned()];
939 }
939 }
940 // sort the gaps to pop them from largest to small
940 // sort the gaps to pop them from largest to small
941 gaps.sort_unstable();
941 gaps.sort_unstable();
942
942
943 // Collect the indices of the largest holes until
943 // Collect the indices of the largest holes until
944 // the density is acceptable
944 // the density is acceptable
945 let mut selected = vec![];
945 let mut selected = vec![];
946 while let Some((gap_size, gap_id)) = gaps.pop() {
946 while let Some((gap_size, gap_id)) = gaps.pop() {
947 if density >= target_density {
947 if density >= target_density {
948 break;
948 break;
949 }
949 }
950 selected.push(gap_id);
950 selected.push(gap_id);
951
951
952 // The gap sizes are stored as negatives to be sorted decreasingly
952 // The gap sizes are stored as negatives to be sorted decreasingly
953 // by the heap
953 // by the heap
954 read_data -= gap_size;
954 read_data -= gap_size;
955 density = if read_data > 0 {
955 density = if read_data > 0 {
956 chain_payload as f64 / read_data as f64
956 chain_payload as f64 / read_data as f64
957 } else {
957 } else {
958 1.0
958 1.0
959 };
959 };
960 if density >= target_density {
960 if density >= target_density {
961 break;
961 break;
962 }
962 }
963 }
963 }
964 selected.sort_unstable();
964 selected.sort_unstable();
965 selected.push(revs.len());
965 selected.push(revs.len());
966
966
967 // Cut the revs at collected indices
967 // Cut the revs at collected indices
968 let mut previous_idx = 0;
968 let mut previous_idx = 0;
969 let mut chunks = vec![];
969 let mut chunks = vec![];
970 for idx in selected {
970 for idx in selected {
971 let chunk = self.trim_chunk(&entries, previous_idx, idx);
971 let chunk = self.trim_chunk(&entries, previous_idx, idx);
972 if !chunk.is_empty() {
972 if !chunk.is_empty() {
973 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
973 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
974 }
974 }
975 previous_idx = idx;
975 previous_idx = idx;
976 }
976 }
977 let chunk = self.trim_chunk(&entries, previous_idx, entries.len());
977 let chunk = self.trim_chunk(&entries, previous_idx, entries.len());
978 if !chunk.is_empty() {
978 if !chunk.is_empty() {
979 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
979 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
980 }
980 }
981
981
982 chunks
982 chunks
983 }
983 }
984
984
985 /// Get the byte span of a segment of sorted revisions.
985 /// Get the byte span of a segment of sorted revisions.
986 ///
986 ///
987 /// Occurrences of [`NULL_REVISION`] are ignored at the beginning of
987 /// Occurrences of [`NULL_REVISION`] are ignored at the beginning of
988 /// the `revs` segment.
988 /// the `revs` segment.
989 ///
989 ///
990 /// panics:
990 /// panics:
991 /// - if `revs` is empty or only made of `NULL_REVISION`
991 /// - if `revs` is empty or only made of `NULL_REVISION`
992 /// - if cannot retrieve entry for the last or first not null element of
992 /// - if cannot retrieve entry for the last or first not null element of
993 /// `revs`.
993 /// `revs`.
994 fn segment_span(&self, revs: &[Revision]) -> usize {
994 fn segment_span(&self, revs: &[Revision]) -> usize {
995 if revs.is_empty() {
995 if revs.is_empty() {
996 return 0;
996 return 0;
997 }
997 }
998 let last_entry = &self.get_entry(revs[revs.len() - 1]).unwrap();
998 let last_entry = &self.get_entry(revs[revs.len() - 1]).unwrap();
999 let end = last_entry.c_start() + last_entry.compressed_len() as u64;
999 let end = last_entry.c_start() + last_entry.compressed_len() as u64;
1000 let first_rev = revs.iter().find(|r| r.0 != NULL_REVISION.0).unwrap();
1000 let first_rev = revs.iter().find(|r| r.0 != NULL_REVISION.0).unwrap();
1001 let start = if first_rev.0 == 0 {
1001 let start = if first_rev.0 == 0 {
1002 0
1002 0
1003 } else {
1003 } else {
1004 self.get_entry(*first_rev).unwrap().c_start()
1004 self.get_entry(*first_rev).unwrap().c_start()
1005 };
1005 };
1006 (end - start) as usize
1006 (end - start) as usize
1007 }
1007 }
1008
1008
1009 /// Returns `&revs[startidx..endidx]` without empty trailing revs
1009 /// Returns `&revs[startidx..endidx]` without empty trailing revs
1010 fn trim_chunk<'a>(
1010 fn trim_chunk<'a>(
1011 &'a self,
1011 &'a self,
1012 revs: &'a [(Revision, IndexEntry)],
1012 revs: &'a [(Revision, IndexEntry)],
1013 start: usize,
1013 start: usize,
1014 mut end: usize,
1014 mut end: usize,
1015 ) -> &'a [(Revision, IndexEntry)] {
1015 ) -> &'a [(Revision, IndexEntry)] {
1016 // Trim empty revs at the end, except the very first rev of a chain
1016 // Trim empty revs at the end, except the very first rev of a chain
1017 let last_rev = revs[end - 1].0;
1017 let last_rev = revs[end - 1].0;
1018 if last_rev.0 < self.len() as BaseRevision {
1018 if last_rev.0 < self.len() as BaseRevision {
1019 while end > 1
1019 while end > 1
1020 && end > start
1020 && end > start
1021 && revs[end - 1].1.compressed_len() == 0
1021 && revs[end - 1].1.compressed_len() == 0
1022 {
1022 {
1023 end -= 1
1023 end -= 1
1024 }
1024 }
1025 }
1025 }
1026 &revs[start..end]
1026 &revs[start..end]
1027 }
1027 }
1028
1028
1029 /// Computes the set of revisions for each non-public phase from `roots`,
1029 /// Computes the set of revisions for each non-public phase from `roots`,
1030 /// which are the last known roots for each non-public phase.
1030 /// which are the last known roots for each non-public phase.
1031 pub fn compute_phases_map_sets(
1031 pub fn compute_phases_map_sets(
1032 &self,
1032 &self,
1033 roots: HashMap<Phase, Vec<Revision>>,
1033 roots: HashMap<Phase, Vec<Revision>>,
1034 ) -> Result<(usize, RootsPerPhase), GraphError> {
1034 ) -> Result<(usize, RootsPerPhase), GraphError> {
1035 let mut phases = vec![Phase::Public; self.len()];
1035 let mut phases = vec![Phase::Public; self.len()];
1036 let mut min_phase_rev = NULL_REVISION;
1036 let mut min_phase_rev = NULL_REVISION;
1037
1037
1038 for phase in Phase::non_public_phases() {
1038 for phase in Phase::non_public_phases() {
1039 if let Some(phase_roots) = roots.get(phase) {
1039 if let Some(phase_roots) = roots.get(phase) {
1040 let min_rev =
1040 let min_rev =
1041 self.add_roots_get_min(phase_roots, &mut phases, *phase);
1041 self.add_roots_get_min(phase_roots, &mut phases, *phase);
1042 if min_rev != NULL_REVISION
1042 if min_rev != NULL_REVISION
1043 && (min_phase_rev == NULL_REVISION
1043 && (min_phase_rev == NULL_REVISION
1044 || min_rev < min_phase_rev)
1044 || min_rev < min_phase_rev)
1045 {
1045 {
1046 min_phase_rev = min_rev;
1046 min_phase_rev = min_rev;
1047 }
1047 }
1048 } else {
1048 } else {
1049 continue;
1049 continue;
1050 };
1050 };
1051 }
1051 }
1052 let mut phase_sets: RootsPerPhase = Default::default();
1052 let mut phase_sets: RootsPerPhase = Default::default();
1053
1053
1054 if min_phase_rev == NULL_REVISION {
1054 if min_phase_rev == NULL_REVISION {
1055 min_phase_rev = Revision(self.len() as BaseRevision);
1055 min_phase_rev = Revision(self.len() as BaseRevision);
1056 }
1056 }
1057
1057
1058 for rev in min_phase_rev.0..self.len() as BaseRevision {
1058 for rev in min_phase_rev.0..self.len() as BaseRevision {
1059 let rev = Revision(rev);
1059 let rev = Revision(rev);
1060 let [p1, p2] = self.parents(rev)?;
1060 let [p1, p2] = self.parents(rev)?;
1061
1061
1062 if p1.0 >= 0 && phases[p1.0 as usize] > phases[rev.0 as usize] {
1062 if p1.0 >= 0 && phases[p1.0 as usize] > phases[rev.0 as usize] {
1063 phases[rev.0 as usize] = phases[p1.0 as usize];
1063 phases[rev.0 as usize] = phases[p1.0 as usize];
1064 }
1064 }
1065 if p2.0 >= 0 && phases[p2.0 as usize] > phases[rev.0 as usize] {
1065 if p2.0 >= 0 && phases[p2.0 as usize] > phases[rev.0 as usize] {
1066 phases[rev.0 as usize] = phases[p2.0 as usize];
1066 phases[rev.0 as usize] = phases[p2.0 as usize];
1067 }
1067 }
1068 let set = match phases[rev.0 as usize] {
1068 let set = match phases[rev.0 as usize] {
1069 Phase::Public => continue,
1069 Phase::Public => continue,
1070 phase => &mut phase_sets[phase as usize - 1],
1070 phase => &mut phase_sets[phase as usize - 1],
1071 };
1071 };
1072 set.push(rev);
1072 set.push(rev);
1073 }
1073 }
1074
1074
1075 Ok((self.len(), phase_sets))
1075 Ok((self.len(), phase_sets))
1076 }
1076 }
1077
1077
1078 fn add_roots_get_min(
1078 fn add_roots_get_min(
1079 &self,
1079 &self,
1080 phase_roots: &[Revision],
1080 phase_roots: &[Revision],
1081 phases: &mut [Phase],
1081 phases: &mut [Phase],
1082 phase: Phase,
1082 phase: Phase,
1083 ) -> Revision {
1083 ) -> Revision {
1084 let mut min_rev = NULL_REVISION;
1084 let mut min_rev = NULL_REVISION;
1085
1085
1086 for root in phase_roots {
1086 for root in phase_roots {
1087 phases[root.0 as usize] = phase;
1087 phases[root.0 as usize] = phase;
1088 if min_rev == NULL_REVISION || min_rev > *root {
1088 if min_rev == NULL_REVISION || min_rev > *root {
1089 min_rev = *root;
1089 min_rev = *root;
1090 }
1090 }
1091 }
1091 }
1092 min_rev
1092 min_rev
1093 }
1093 }
1094
1094
1095 /// Return `(heads(::(<roots> and <roots>::<heads>)))`
1095 /// Return `(heads(::(<roots> and <roots>::<heads>)))`
1096 /// If `include_path` is `true`, return `(<roots>::<heads>)`."""
1096 /// If `include_path` is `true`, return `(<roots>::<heads>)`."""
1097 ///
1097 ///
1098 /// `min_root` and `roots` are unchecked since they are just used as
1098 /// `min_root` and `roots` are unchecked since they are just used as
1099 /// a bound or for comparison and don't need to represent a valid revision.
1099 /// a bound or for comparison and don't need to represent a valid revision.
1100 /// In practice, the only invalid revision passed is the working directory
1100 /// In practice, the only invalid revision passed is the working directory
1101 /// revision ([`i32::MAX`]).
1101 /// revision ([`i32::MAX`]).
1102 pub fn reachable_roots(
1102 pub fn reachable_roots(
1103 &self,
1103 &self,
1104 min_root: UncheckedRevision,
1104 min_root: UncheckedRevision,
1105 mut heads: Vec<Revision>,
1105 mut heads: Vec<Revision>,
1106 roots: HashSet<UncheckedRevision>,
1106 roots: HashSet<UncheckedRevision>,
1107 include_path: bool,
1107 include_path: bool,
1108 ) -> Result<HashSet<Revision>, GraphError> {
1108 ) -> Result<HashSet<Revision>, GraphError> {
1109 if roots.is_empty() {
1109 if roots.is_empty() {
1110 return Ok(HashSet::new());
1110 return Ok(HashSet::new());
1111 }
1111 }
1112 let mut reachable = HashSet::new();
1112 let mut reachable = HashSet::new();
1113 let mut seen = HashMap::new();
1113 let mut seen = HashMap::new();
1114
1114
1115 while let Some(rev) = heads.pop() {
1115 while let Some(rev) = heads.pop() {
1116 if roots.contains(&rev.into()) {
1116 if roots.contains(&rev.into()) {
1117 reachable.insert(rev);
1117 reachable.insert(rev);
1118 if !include_path {
1118 if !include_path {
1119 continue;
1119 continue;
1120 }
1120 }
1121 }
1121 }
1122 let parents = self.parents(rev)?;
1122 let parents = self.parents(rev)?;
1123 seen.insert(rev, parents);
1123 seen.insert(rev, parents);
1124 for parent in parents {
1124 for parent in parents {
1125 if parent.0 >= min_root.0 && !seen.contains_key(&parent) {
1125 if parent.0 >= min_root.0 && !seen.contains_key(&parent) {
1126 heads.push(parent);
1126 heads.push(parent);
1127 }
1127 }
1128 }
1128 }
1129 }
1129 }
1130 if !include_path {
1130 if !include_path {
1131 return Ok(reachable);
1131 return Ok(reachable);
1132 }
1132 }
1133 let mut revs: Vec<_> = seen.keys().collect();
1133 let mut revs: Vec<_> = seen.keys().collect();
1134 revs.sort_unstable();
1134 revs.sort_unstable();
1135 for rev in revs {
1135 for rev in revs {
1136 for parent in seen[rev] {
1136 for parent in seen[rev] {
1137 if reachable.contains(&parent) {
1137 if reachable.contains(&parent) {
1138 reachable.insert(*rev);
1138 reachable.insert(*rev);
1139 }
1139 }
1140 }
1140 }
1141 }
1141 }
1142 Ok(reachable)
1142 Ok(reachable)
1143 }
1143 }
1144
1144
1145 /// Given a (possibly overlapping) set of revs, return all the
1145 /// Given a (possibly overlapping) set of revs, return all the
1146 /// common ancestors heads: `heads(::args[0] and ::a[1] and ...)`
1146 /// common ancestors heads: `heads(::args[0] and ::a[1] and ...)`
1147 pub fn common_ancestor_heads(
1147 pub fn common_ancestor_heads(
1148 &self,
1148 &self,
1149 revisions: &[Revision],
1149 revisions: &[Revision],
1150 ) -> Result<Vec<Revision>, GraphError> {
1150 ) -> Result<Vec<Revision>, GraphError> {
1151 // given that revisions is expected to be small, we find this shortcut
1151 // given that revisions is expected to be small, we find this shortcut
1152 // potentially acceptable, especially given that `hg-cpython` could
1152 // potentially acceptable, especially given that `hg-cpython` could
1153 // very much bypass this, constructing a vector of unique values from
1153 // very much bypass this, constructing a vector of unique values from
1154 // the onset.
1154 // the onset.
1155 let as_set: HashSet<Revision> = revisions.iter().copied().collect();
1155 let as_set: HashSet<Revision> = revisions.iter().copied().collect();
1156 // Besides deduplicating, the C version also implements the shortcut
1156 // Besides deduplicating, the C version also implements the shortcut
1157 // for `NULL_REVISION`:
1157 // for `NULL_REVISION`:
1158 if as_set.contains(&NULL_REVISION) {
1158 if as_set.contains(&NULL_REVISION) {
1159 return Ok(vec![]);
1159 return Ok(vec![]);
1160 }
1160 }
1161
1161
1162 let revisions: Vec<Revision> = as_set.into_iter().collect();
1162 let revisions: Vec<Revision> = as_set.into_iter().collect();
1163
1163
1164 if revisions.len() < 8 {
1164 if revisions.len() < 8 {
1165 self.find_gca_candidates::<u8>(&revisions)
1165 self.find_gca_candidates::<u8>(&revisions)
1166 } else if revisions.len() < 64 {
1166 } else if revisions.len() < 64 {
1167 self.find_gca_candidates::<u64>(&revisions)
1167 self.find_gca_candidates::<u64>(&revisions)
1168 } else {
1168 } else {
1169 self.find_gca_candidates::<NonStaticPoisonableBitSet>(&revisions)
1169 self.find_gca_candidates::<NonStaticPoisonableBitSet>(&revisions)
1170 }
1170 }
1171 }
1171 }
1172
1172
1173 pub fn ancestors(
1173 pub fn ancestors(
1174 &self,
1174 &self,
1175 revisions: &[Revision],
1175 revisions: &[Revision],
1176 ) -> Result<Vec<Revision>, GraphError> {
1176 ) -> Result<Vec<Revision>, GraphError> {
1177 self.find_deepest_revs(&self.common_ancestor_heads(revisions)?)
1177 self.find_deepest_revs(&self.common_ancestor_heads(revisions)?)
1178 }
1178 }
1179
1179
1180 /// Given a disjoint set of revs, return all candidates for the
1180 /// Given a disjoint set of revs, return all candidates for the
1181 /// greatest common ancestor. In revset notation, this is the set
1181 /// greatest common ancestor. In revset notation, this is the set
1182 /// `heads(::a and ::b and ...)`
1182 /// `heads(::a and ::b and ...)`
1183 fn find_gca_candidates<BS: PoisonableBitSet + Clone>(
1183 fn find_gca_candidates<BS: PoisonableBitSet + Clone>(
1184 &self,
1184 &self,
1185 revs: &[Revision],
1185 revs: &[Revision],
1186 ) -> Result<Vec<Revision>, GraphError> {
1186 ) -> Result<Vec<Revision>, GraphError> {
1187 if revs.is_empty() {
1187 if revs.is_empty() {
1188 return Ok(vec![]);
1188 return Ok(vec![]);
1189 }
1189 }
1190 let revcount = revs.len();
1190 let revcount = revs.len();
1191 let mut candidates = vec![];
1191 let mut candidates = vec![];
1192 let max_rev = revs.iter().max().unwrap();
1192 let max_rev = revs.iter().max().unwrap();
1193
1193
1194 let mut seen = BS::vec_of_empty(revs.len(), (max_rev.0 + 1) as usize);
1194 let mut seen = BS::vec_of_empty(revs.len(), (max_rev.0 + 1) as usize);
1195
1195
1196 for (idx, rev) in revs.iter().enumerate() {
1196 for (idx, rev) in revs.iter().enumerate() {
1197 seen[rev.0 as usize].add(idx);
1197 seen[rev.0 as usize].add(idx);
1198 }
1198 }
1199 let mut current_rev = *max_rev;
1199 let mut current_rev = *max_rev;
1200 // Number of revisions whose inspection in the main loop
1200 // Number of revisions whose inspection in the main loop
1201 // will give a result or trigger inspection of other revisions
1201 // will give a result or trigger inspection of other revisions
1202 let mut interesting = revcount;
1202 let mut interesting = revcount;
1203
1203
1204 // The algorithm works on a vector of bit sets, indexed by revision
1204 // The algorithm works on a vector of bit sets, indexed by revision
1205 // numbers and iterated on reverse order.
1205 // numbers and iterated on reverse order.
1206 // An entry in this vector is poisoned if and only if the corresponding
1206 // An entry in this vector is poisoned if and only if the corresponding
1207 // revision is a common, yet not maximal ancestor.
1207 // revision is a common, yet not maximal ancestor.
1208
1208
1209 // The principle of the algorithm is as follows:
1209 // The principle of the algorithm is as follows:
1210 // For a revision `r`, when entering the loop, `seen[r]` is either
1210 // For a revision `r`, when entering the loop, `seen[r]` is either
1211 // poisoned or the sub set of `revs` of which `r` is an ancestor.
1211 // poisoned or the sub set of `revs` of which `r` is an ancestor.
1212 // In this sub set is full, then `r` is a solution and its parents
1212 // In this sub set is full, then `r` is a solution and its parents
1213 // have to be poisoned.
1213 // have to be poisoned.
1214 //
1214 //
1215 // At each iteration, the bit sets of the parents are updated by
1215 // At each iteration, the bit sets of the parents are updated by
1216 // union with `seen[r]`.
1216 // union with `seen[r]`.
1217 // As we walk the index from the end, we are sure we have encountered
1217 // As we walk the index from the end, we are sure we have encountered
1218 // all children of `r` before `r`, hence we know that `seen[r]` is
1218 // all children of `r` before `r`, hence we know that `seen[r]` is
1219 // fully computed.
1219 // fully computed.
1220 //
1220 //
1221 // On top of that there are several optimizations that make reading
1221 // On top of that there are several optimizations that make reading
1222 // less obvious than the comment above:
1222 // less obvious than the comment above:
1223 // - The `interesting` counter allows to break early
1223 // - The `interesting` counter allows to break early
1224 // - The loop starts from `max(revs)`
1224 // - The loop starts from `max(revs)`
1225 // - Early return in case it is detected that one of the incoming revs
1225 // - Early return in case it is detected that one of the incoming revs
1226 // is a common ancestor of all of them.
1226 // is a common ancestor of all of them.
1227 while current_rev.0 >= 0 && interesting > 0 {
1227 while current_rev.0 >= 0 && interesting > 0 {
1228 let current_seen = seen[current_rev.0 as usize].clone();
1228 let current_seen = seen[current_rev.0 as usize].clone();
1229
1229
1230 if current_seen.is_empty() {
1230 if current_seen.is_empty() {
1231 current_rev = Revision(current_rev.0 - 1);
1231 current_rev = Revision(current_rev.0 - 1);
1232 continue;
1232 continue;
1233 }
1233 }
1234 let mut poison = current_seen.is_poisoned();
1234 let mut poison = current_seen.is_poisoned();
1235 if !poison {
1235 if !poison {
1236 interesting -= 1;
1236 interesting -= 1;
1237 if current_seen.is_full_range(revcount) {
1237 if current_seen.is_full_range(revcount) {
1238 candidates.push(current_rev);
1238 candidates.push(current_rev);
1239 poison = true;
1239 poison = true;
1240
1240
1241 // Being a common ancestor, if `current_rev` is among
1241 // Being a common ancestor, if `current_rev` is among
1242 // the input revisions, it is *the* answer.
1242 // the input revisions, it is *the* answer.
1243 for rev in revs {
1243 for rev in revs {
1244 if *rev == current_rev {
1244 if *rev == current_rev {
1245 return Ok(candidates);
1245 return Ok(candidates);
1246 }
1246 }
1247 }
1247 }
1248 }
1248 }
1249 }
1249 }
1250 for parent in self.parents(current_rev)? {
1250 for parent in self.parents(current_rev)? {
1251 if parent == NULL_REVISION {
1251 if parent == NULL_REVISION {
1252 continue;
1252 continue;
1253 }
1253 }
1254 let parent_seen = &mut seen[parent.0 as usize];
1254 let parent_seen = &mut seen[parent.0 as usize];
1255 if poison {
1255 if poison {
1256 // this block is logically equivalent to poisoning parent
1256 // this block is logically equivalent to poisoning parent
1257 // and counting it as non interesting if it
1257 // and counting it as non interesting if it
1258 // has been seen before (hence counted then as interesting)
1258 // has been seen before (hence counted then as interesting)
1259 if !parent_seen.is_empty() && !parent_seen.is_poisoned() {
1259 if !parent_seen.is_empty() && !parent_seen.is_poisoned() {
1260 interesting -= 1;
1260 interesting -= 1;
1261 }
1261 }
1262 parent_seen.poison();
1262 parent_seen.poison();
1263 } else {
1263 } else {
1264 if parent_seen.is_empty() {
1264 if parent_seen.is_empty() {
1265 interesting += 1;
1265 interesting += 1;
1266 }
1266 }
1267 parent_seen.union(&current_seen);
1267 parent_seen.union(&current_seen);
1268 }
1268 }
1269 }
1269 }
1270
1270
1271 current_rev = Revision(current_rev.0 - 1);
1271 current_rev = Revision(current_rev.0 - 1);
1272 }
1272 }
1273
1273
1274 Ok(candidates)
1274 Ok(candidates)
1275 }
1275 }
1276
1276
1277 /// Given a disjoint set of revs, return the subset with the longest path
1277 /// Given a disjoint set of revs, return the subset with the longest path
1278 /// to the root.
1278 /// to the root.
1279 fn find_deepest_revs(
1279 fn find_deepest_revs(
1280 &self,
1280 &self,
1281 revs: &[Revision],
1281 revs: &[Revision],
1282 ) -> Result<Vec<Revision>, GraphError> {
1282 ) -> Result<Vec<Revision>, GraphError> {
1283 // TODO replace this all with just comparing rank?
1283 // TODO replace this all with just comparing rank?
1284 // Also, the original implementations in C/Python are cryptic, not
1284 // Also, the original implementations in C/Python are cryptic, not
1285 // even sure we actually need this?
1285 // even sure we actually need this?
1286 if revs.len() <= 1 {
1286 if revs.len() <= 1 {
1287 return Ok(revs.to_owned());
1287 return Ok(revs.to_owned());
1288 }
1288 }
1289 let max_rev = revs.iter().max().unwrap().0;
1289 let max_rev = revs.iter().max().unwrap().0;
1290 let mut interesting = HashMap::new();
1290 let mut interesting = HashMap::new();
1291 let mut seen = vec![0; max_rev as usize + 1];
1291 let mut seen = vec![0; max_rev as usize + 1];
1292 let mut depth = vec![0; max_rev as usize + 1];
1292 let mut depth = vec![0; max_rev as usize + 1];
1293 let mut mapping = vec![];
1293 let mut mapping = vec![];
1294 let mut revs = revs.to_owned();
1294 let mut revs = revs.to_owned();
1295 revs.sort_unstable();
1295 revs.sort_unstable();
1296
1296
1297 for (idx, rev) in revs.iter().enumerate() {
1297 for (idx, rev) in revs.iter().enumerate() {
1298 depth[rev.0 as usize] = 1;
1298 depth[rev.0 as usize] = 1;
1299 let shift = 1 << idx;
1299 let shift = 1 << idx;
1300 seen[rev.0 as usize] = shift;
1300 seen[rev.0 as usize] = shift;
1301 interesting.insert(shift, 1);
1301 interesting.insert(shift, 1);
1302 mapping.push((shift, *rev));
1302 mapping.push((shift, *rev));
1303 }
1303 }
1304
1304
1305 let mut current_rev = Revision(max_rev);
1305 let mut current_rev = Revision(max_rev);
1306 while current_rev.0 >= 0 && interesting.len() > 1 {
1306 while current_rev.0 >= 0 && interesting.len() > 1 {
1307 let current_depth = depth[current_rev.0 as usize];
1307 let current_depth = depth[current_rev.0 as usize];
1308 if current_depth == 0 {
1308 if current_depth == 0 {
1309 current_rev = Revision(current_rev.0 - 1);
1309 current_rev = Revision(current_rev.0 - 1);
1310 continue;
1310 continue;
1311 }
1311 }
1312
1312
1313 let current_seen = seen[current_rev.0 as usize];
1313 let current_seen = seen[current_rev.0 as usize];
1314 for parent in self.parents(current_rev)? {
1314 for parent in self.parents(current_rev)? {
1315 if parent == NULL_REVISION {
1315 if parent == NULL_REVISION {
1316 continue;
1316 continue;
1317 }
1317 }
1318 let parent_seen = seen[parent.0 as usize];
1318 let parent_seen = seen[parent.0 as usize];
1319 let parent_depth = depth[parent.0 as usize];
1319 let parent_depth = depth[parent.0 as usize];
1320 if parent_depth <= current_depth {
1320 if parent_depth <= current_depth {
1321 depth[parent.0 as usize] = current_depth + 1;
1321 depth[parent.0 as usize] = current_depth + 1;
1322 if parent_seen != current_seen {
1322 if parent_seen != current_seen {
1323 *interesting.get_mut(&current_seen).unwrap() += 1;
1323 *interesting.get_mut(&current_seen).unwrap() += 1;
1324 seen[parent.0 as usize] = current_seen;
1324 seen[parent.0 as usize] = current_seen;
1325 if parent_seen != 0 {
1325 if parent_seen != 0 {
1326 let parent_interesting =
1326 let parent_interesting =
1327 interesting.get_mut(&parent_seen).unwrap();
1327 interesting.get_mut(&parent_seen).unwrap();
1328 *parent_interesting -= 1;
1328 *parent_interesting -= 1;
1329 if *parent_interesting == 0 {
1329 if *parent_interesting == 0 {
1330 interesting.remove(&parent_seen);
1330 interesting.remove(&parent_seen);
1331 }
1331 }
1332 }
1332 }
1333 }
1333 }
1334 } else if current_depth == parent_depth - 1 {
1334 } else if current_depth == parent_depth - 1 {
1335 let either_seen = parent_seen | current_seen;
1335 let either_seen = parent_seen | current_seen;
1336 if either_seen == parent_seen {
1336 if either_seen == parent_seen {
1337 continue;
1337 continue;
1338 }
1338 }
1339 seen[parent.0 as usize] = either_seen;
1339 seen[parent.0 as usize] = either_seen;
1340 interesting
1340 interesting
1341 .entry(either_seen)
1341 .entry(either_seen)
1342 .and_modify(|v| *v += 1)
1342 .and_modify(|v| *v += 1)
1343 .or_insert(1);
1343 .or_insert(1);
1344 *interesting.get_mut(&parent_seen).unwrap() -= 1;
1344 *interesting.get_mut(&parent_seen).unwrap() -= 1;
1345 if interesting[&parent_seen] == 0 {
1345 if interesting[&parent_seen] == 0 {
1346 interesting.remove(&parent_seen);
1346 interesting.remove(&parent_seen);
1347 }
1347 }
1348 }
1348 }
1349 }
1349 }
1350 *interesting.get_mut(&current_seen).unwrap() -= 1;
1350 *interesting.get_mut(&current_seen).unwrap() -= 1;
1351 if interesting[&current_seen] == 0 {
1351 if interesting[&current_seen] == 0 {
1352 interesting.remove(&current_seen);
1352 interesting.remove(&current_seen);
1353 }
1353 }
1354
1354
1355 current_rev = Revision(current_rev.0 - 1);
1355 current_rev = Revision(current_rev.0 - 1);
1356 }
1356 }
1357
1357
1358 if interesting.len() != 1 {
1358 if interesting.len() != 1 {
1359 return Ok(vec![]);
1359 return Ok(vec![]);
1360 }
1360 }
1361 let mask = interesting.keys().next().unwrap();
1361 let mask = interesting.keys().next().unwrap();
1362
1362
1363 Ok(mapping
1363 Ok(mapping
1364 .into_iter()
1364 .into_iter()
1365 .filter_map(|(shift, rev)| {
1365 .filter_map(|(shift, rev)| {
1366 if (mask & shift) != 0 {
1366 if (mask & shift) != 0 {
1367 return Some(rev);
1367 return Some(rev);
1368 }
1368 }
1369 None
1369 None
1370 })
1370 })
1371 .collect())
1371 .collect())
1372 }
1372 }
1373 }
1373 }
1374
1374
1375 /// The kind of functionality needed by find_gca_candidates
1375 /// The kind of functionality needed by find_gca_candidates
1376 ///
1376 ///
1377 /// This is a bit mask which can be declared to be "poisoned", which callers
1377 /// This is a bit mask which can be declared to be "poisoned", which callers
1378 /// interpret to break out of some loops.
1378 /// interpret to break out of some loops.
1379 ///
1379 ///
1380 /// The maximum capacity of the bit mask is up to the actual implementation
1380 /// The maximum capacity of the bit mask is up to the actual implementation
1381 trait PoisonableBitSet: Sized + PartialEq {
1381 trait PoisonableBitSet: Sized + PartialEq {
1382 /// Return a vector of exactly n elements, initialized to be empty.
1382 /// Return a vector of exactly n elements, initialized to be empty.
1383 ///
1383 ///
1384 /// Optimization can vastly depend on implementation. Those being `Copy`
1384 /// Optimization can vastly depend on implementation. Those being `Copy`
1385 /// and having constant capacity typically can have a very simple
1385 /// and having constant capacity typically can have a very simple
1386 /// implementation.
1386 /// implementation.
1387 fn vec_of_empty(sets_size: usize, vec_len: usize) -> Vec<Self>;
1387 fn vec_of_empty(sets_size: usize, vec_len: usize) -> Vec<Self>;
1388
1388
1389 /// The size of the bit mask in memory
1389 /// The size of the bit mask in memory
1390 #[allow(unused)]
1390 fn size(&self) -> usize;
1391 fn size(&self) -> usize;
1391
1392
1392 /// The number of elements that can be represented in the set.
1393 /// The number of elements that can be represented in the set.
1393 ///
1394 ///
1394 /// Another way to put it is that it is the highest integer `C` such that
1395 /// Another way to put it is that it is the highest integer `C` such that
1395 /// the set is guaranteed to always be a subset of the integer range
1396 /// the set is guaranteed to always be a subset of the integer range
1396 /// `[0, C)`
1397 /// `[0, C)`
1398 #[allow(unused)]
1397 fn capacity(&self) -> usize;
1399 fn capacity(&self) -> usize;
1398
1400
1399 /// Declare `n` to belong to the set
1401 /// Declare `n` to belong to the set
1400 fn add(&mut self, n: usize);
1402 fn add(&mut self, n: usize);
1401
1403
1402 /// Declare `n` not to belong to the set
1404 /// Declare `n` not to belong to the set
1405 #[allow(unused)]
1403 fn discard(&mut self, n: usize);
1406 fn discard(&mut self, n: usize);
1404
1407
1405 /// Replace this bit set by its union with other
1408 /// Replace this bit set by its union with other
1406 fn union(&mut self, other: &Self);
1409 fn union(&mut self, other: &Self);
1407
1410
1408 /// Poison the bit set
1411 /// Poison the bit set
1409 ///
1412 ///
1410 /// Interpretation up to the caller
1413 /// Interpretation up to the caller
1411 fn poison(&mut self);
1414 fn poison(&mut self);
1412
1415
1413 /// Is the bit set poisoned?
1416 /// Is the bit set poisoned?
1414 ///
1417 ///
1415 /// Interpretation is up to the caller
1418 /// Interpretation is up to the caller
1416 fn is_poisoned(&self) -> bool;
1419 fn is_poisoned(&self) -> bool;
1417
1420
1418 /// Is the bit set empty?
1421 /// Is the bit set empty?
1419 fn is_empty(&self) -> bool;
1422 fn is_empty(&self) -> bool;
1420
1423
1421 /// return `true` if and only if the bit is the full range `[0, n)`
1424 /// return `true` if and only if the bit is the full range `[0, n)`
1422 /// of integers
1425 /// of integers
1423 fn is_full_range(&self, n: usize) -> bool;
1426 fn is_full_range(&self, n: usize) -> bool;
1424 }
1427 }
1425
1428
1426 const U64_POISON: u64 = 1 << 63;
1429 const U64_POISON: u64 = 1 << 63;
1427 const U8_POISON: u8 = 1 << 7;
1430 const U8_POISON: u8 = 1 << 7;
1428
1431
1429 impl PoisonableBitSet for u64 {
1432 impl PoisonableBitSet for u64 {
1430 fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
1433 fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
1431 vec![0u64; vec_len]
1434 vec![0u64; vec_len]
1432 }
1435 }
1433
1436
1434 fn size(&self) -> usize {
1437 fn size(&self) -> usize {
1435 8
1438 8
1436 }
1439 }
1437
1440
1438 fn capacity(&self) -> usize {
1441 fn capacity(&self) -> usize {
1439 63
1442 63
1440 }
1443 }
1441
1444
1442 fn add(&mut self, n: usize) {
1445 fn add(&mut self, n: usize) {
1443 (*self) |= 1u64 << n;
1446 (*self) |= 1u64 << n;
1444 }
1447 }
1445
1448
1446 fn discard(&mut self, n: usize) {
1449 fn discard(&mut self, n: usize) {
1447 (*self) &= u64::MAX - (1u64 << n);
1450 (*self) &= u64::MAX - (1u64 << n);
1448 }
1451 }
1449
1452
1450 fn union(&mut self, other: &Self) {
1453 fn union(&mut self, other: &Self) {
1451 if *self != *other {
1454 if *self != *other {
1452 (*self) |= *other;
1455 (*self) |= *other;
1453 }
1456 }
1454 }
1457 }
1455
1458
1456 fn is_full_range(&self, n: usize) -> bool {
1459 fn is_full_range(&self, n: usize) -> bool {
1457 *self + 1 == (1u64 << n)
1460 *self + 1 == (1u64 << n)
1458 }
1461 }
1459
1462
1460 fn is_empty(&self) -> bool {
1463 fn is_empty(&self) -> bool {
1461 *self == 0
1464 *self == 0
1462 }
1465 }
1463
1466
1464 fn poison(&mut self) {
1467 fn poison(&mut self) {
1465 *self = U64_POISON;
1468 *self = U64_POISON;
1466 }
1469 }
1467
1470
1468 fn is_poisoned(&self) -> bool {
1471 fn is_poisoned(&self) -> bool {
1469 // equality comparison would be tempting but would not resist
1472 // equality comparison would be tempting but would not resist
1470 // operations after poisoning (even if these should be bogus).
1473 // operations after poisoning (even if these should be bogus).
1471 *self >= U64_POISON
1474 *self >= U64_POISON
1472 }
1475 }
1473 }
1476 }
1474
1477
1475 impl PoisonableBitSet for u8 {
1478 impl PoisonableBitSet for u8 {
1476 fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
1479 fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
1477 vec![0; vec_len]
1480 vec![0; vec_len]
1478 }
1481 }
1479
1482
1480 fn size(&self) -> usize {
1483 fn size(&self) -> usize {
1481 1
1484 1
1482 }
1485 }
1483
1486
1484 fn capacity(&self) -> usize {
1487 fn capacity(&self) -> usize {
1485 7
1488 7
1486 }
1489 }
1487
1490
1488 fn add(&mut self, n: usize) {
1491 fn add(&mut self, n: usize) {
1489 (*self) |= 1 << n;
1492 (*self) |= 1 << n;
1490 }
1493 }
1491
1494
1492 fn discard(&mut self, n: usize) {
1495 fn discard(&mut self, n: usize) {
1493 (*self) &= u8::MAX - (1 << n);
1496 (*self) &= u8::MAX - (1 << n);
1494 }
1497 }
1495
1498
1496 fn union(&mut self, other: &Self) {
1499 fn union(&mut self, other: &Self) {
1497 if *self != *other {
1500 if *self != *other {
1498 (*self) |= *other;
1501 (*self) |= *other;
1499 }
1502 }
1500 }
1503 }
1501
1504
1502 fn is_full_range(&self, n: usize) -> bool {
1505 fn is_full_range(&self, n: usize) -> bool {
1503 *self + 1 == (1 << n)
1506 *self + 1 == (1 << n)
1504 }
1507 }
1505
1508
1506 fn is_empty(&self) -> bool {
1509 fn is_empty(&self) -> bool {
1507 *self == 0
1510 *self == 0
1508 }
1511 }
1509
1512
1510 fn poison(&mut self) {
1513 fn poison(&mut self) {
1511 *self = U8_POISON;
1514 *self = U8_POISON;
1512 }
1515 }
1513
1516
1514 fn is_poisoned(&self) -> bool {
1517 fn is_poisoned(&self) -> bool {
1515 // equality comparison would be tempting but would not resist
1518 // equality comparison would be tempting but would not resist
1516 // operations after poisoning (even if these should be bogus).
1519 // operations after poisoning (even if these should be bogus).
1517 *self >= U8_POISON
1520 *self >= U8_POISON
1518 }
1521 }
1519 }
1522 }
1520
1523
1521 /// A poisonable bit set whose capacity is not known at compile time but
1524 /// A poisonable bit set whose capacity is not known at compile time but
1522 /// is constant after initial construction
1525 /// is constant after initial construction
1523 ///
1526 ///
1524 /// This can be way further optimized if performance assessments (speed
1527 /// This can be way further optimized if performance assessments (speed
1525 /// and/or RAM) require it.
1528 /// and/or RAM) require it.
1526 /// As far as RAM is concerned, for large vectors of these, the main problem
1529 /// As far as RAM is concerned, for large vectors of these, the main problem
1527 /// would be the repetition of set_size in each item. We would need a trait
1530 /// would be the repetition of set_size in each item. We would need a trait
1528 /// to abstract over the idea of a vector of such bit sets to do better.
1531 /// to abstract over the idea of a vector of such bit sets to do better.
1529 #[derive(Clone, PartialEq)]
1532 #[derive(Clone, PartialEq)]
1530 struct NonStaticPoisonableBitSet {
1533 struct NonStaticPoisonableBitSet {
1531 set_size: usize,
1534 set_size: usize,
1532 bit_set: Vec<u64>,
1535 bit_set: Vec<u64>,
1533 }
1536 }
1534
1537
1535 /// Number of `u64` needed for a [`NonStaticPoisonableBitSet`] of given size
1538 /// Number of `u64` needed for a [`NonStaticPoisonableBitSet`] of given size
1536 fn non_static_poisonable_inner_len(set_size: usize) -> usize {
1539 fn non_static_poisonable_inner_len(set_size: usize) -> usize {
1537 1 + (set_size + 1) / 64
1540 1 + (set_size + 1) / 64
1538 }
1541 }
1539
1542
1540 impl NonStaticPoisonableBitSet {
1543 impl NonStaticPoisonableBitSet {
1541 /// The index of the sub-bit set for the given n, and the index inside
1544 /// The index of the sub-bit set for the given n, and the index inside
1542 /// the latter
1545 /// the latter
1543 fn index(&self, n: usize) -> (usize, usize) {
1546 fn index(&self, n: usize) -> (usize, usize) {
1544 (n / 64, n % 64)
1547 (n / 64, n % 64)
1545 }
1548 }
1546 }
1549 }
1547
1550
1548 /// Mock implementation to ensure that the trait makes sense
1551 /// Mock implementation to ensure that the trait makes sense
1549 impl PoisonableBitSet for NonStaticPoisonableBitSet {
1552 impl PoisonableBitSet for NonStaticPoisonableBitSet {
1550 fn vec_of_empty(set_size: usize, vec_len: usize) -> Vec<Self> {
1553 fn vec_of_empty(set_size: usize, vec_len: usize) -> Vec<Self> {
1551 let tmpl = Self {
1554 let tmpl = Self {
1552 set_size,
1555 set_size,
1553 bit_set: vec![0u64; non_static_poisonable_inner_len(set_size)],
1556 bit_set: vec![0u64; non_static_poisonable_inner_len(set_size)],
1554 };
1557 };
1555 vec![tmpl; vec_len]
1558 vec![tmpl; vec_len]
1556 }
1559 }
1557
1560
1558 fn size(&self) -> usize {
1561 fn size(&self) -> usize {
1559 8 + self.bit_set.len() * 8
1562 8 + self.bit_set.len() * 8
1560 }
1563 }
1561
1564
1562 fn capacity(&self) -> usize {
1565 fn capacity(&self) -> usize {
1563 self.set_size
1566 self.set_size
1564 }
1567 }
1565
1568
1566 fn add(&mut self, n: usize) {
1569 fn add(&mut self, n: usize) {
1567 let (sub_bs, bit_pos) = self.index(n);
1570 let (sub_bs, bit_pos) = self.index(n);
1568 self.bit_set[sub_bs] |= 1 << bit_pos
1571 self.bit_set[sub_bs] |= 1 << bit_pos
1569 }
1572 }
1570
1573
1571 fn discard(&mut self, n: usize) {
1574 fn discard(&mut self, n: usize) {
1572 let (sub_bs, bit_pos) = self.index(n);
1575 let (sub_bs, bit_pos) = self.index(n);
1573 self.bit_set[sub_bs] |= u64::MAX - (1 << bit_pos)
1576 self.bit_set[sub_bs] |= u64::MAX - (1 << bit_pos)
1574 }
1577 }
1575
1578
1576 fn union(&mut self, other: &Self) {
1579 fn union(&mut self, other: &Self) {
1577 assert!(
1580 assert!(
1578 self.set_size == other.set_size,
1581 self.set_size == other.set_size,
1579 "Binary operations on bit sets can only be done on same size"
1582 "Binary operations on bit sets can only be done on same size"
1580 );
1583 );
1581 for i in 0..self.bit_set.len() - 1 {
1584 for i in 0..self.bit_set.len() - 1 {
1582 self.bit_set[i] |= other.bit_set[i]
1585 self.bit_set[i] |= other.bit_set[i]
1583 }
1586 }
1584 }
1587 }
1585
1588
1586 fn is_full_range(&self, n: usize) -> bool {
1589 fn is_full_range(&self, n: usize) -> bool {
1587 let (sub_bs, bit_pos) = self.index(n);
1590 let (sub_bs, bit_pos) = self.index(n);
1588 self.bit_set[..sub_bs].iter().all(|bs| *bs == u64::MAX)
1591 self.bit_set[..sub_bs].iter().all(|bs| *bs == u64::MAX)
1589 && self.bit_set[sub_bs] == (1 << (bit_pos + 1)) - 1
1592 && self.bit_set[sub_bs] == (1 << (bit_pos + 1)) - 1
1590 }
1593 }
1591
1594
1592 fn is_empty(&self) -> bool {
1595 fn is_empty(&self) -> bool {
1593 self.bit_set.iter().all(|bs| *bs == 0u64)
1596 self.bit_set.iter().all(|bs| *bs == 0u64)
1594 }
1597 }
1595
1598
1596 fn poison(&mut self) {
1599 fn poison(&mut self) {
1597 let (sub_bs, bit_pos) = self.index(self.set_size);
1600 let (sub_bs, bit_pos) = self.index(self.set_size);
1598 self.bit_set[sub_bs] = 1 << bit_pos;
1601 self.bit_set[sub_bs] = 1 << bit_pos;
1599 }
1602 }
1600
1603
1601 fn is_poisoned(&self) -> bool {
1604 fn is_poisoned(&self) -> bool {
1602 let (sub_bs, bit_pos) = self.index(self.set_size);
1605 let (sub_bs, bit_pos) = self.index(self.set_size);
1603 self.bit_set[sub_bs] >= 1 << bit_pos
1606 self.bit_set[sub_bs] >= 1 << bit_pos
1604 }
1607 }
1605 }
1608 }
1606
1609
1607 /// Set of roots of all non-public phases
1610 /// Set of roots of all non-public phases
1608 pub type RootsPerPhase = [Vec<Revision>; Phase::non_public_phases().len()];
1611 pub type RootsPerPhase = [Vec<Revision>; Phase::non_public_phases().len()];
1609
1612
1610 #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
1613 #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
1611 pub enum Phase {
1614 pub enum Phase {
1612 Public = 0,
1615 Public = 0,
1613 Draft = 1,
1616 Draft = 1,
1614 Secret = 2,
1617 Secret = 2,
1615 Archived = 3,
1618 Archived = 3,
1616 Internal = 4,
1619 Internal = 4,
1617 }
1620 }
1618
1621
1619 impl TryFrom<usize> for Phase {
1622 impl TryFrom<usize> for Phase {
1620 type Error = RevlogError;
1623 type Error = RevlogError;
1621
1624
1622 fn try_from(value: usize) -> Result<Self, Self::Error> {
1625 fn try_from(value: usize) -> Result<Self, Self::Error> {
1623 Ok(match value {
1626 Ok(match value {
1624 0 => Self::Public,
1627 0 => Self::Public,
1625 1 => Self::Draft,
1628 1 => Self::Draft,
1626 2 => Self::Secret,
1629 2 => Self::Secret,
1627 32 => Self::Archived,
1630 32 => Self::Archived,
1628 96 => Self::Internal,
1631 96 => Self::Internal,
1629 v => {
1632 v => {
1630 return Err(RevlogError::corrupted(format!(
1633 return Err(RevlogError::corrupted(format!(
1631 "invalid phase value {}",
1634 "invalid phase value {}",
1632 v
1635 v
1633 )))
1636 )))
1634 }
1637 }
1635 })
1638 })
1636 }
1639 }
1637 }
1640 }
1638
1641
1639 impl Phase {
1642 impl Phase {
1640 pub const fn all_phases() -> &'static [Self] {
1643 pub const fn all_phases() -> &'static [Self] {
1641 &[
1644 &[
1642 Self::Public,
1645 Self::Public,
1643 Self::Draft,
1646 Self::Draft,
1644 Self::Secret,
1647 Self::Secret,
1645 Self::Archived,
1648 Self::Archived,
1646 Self::Internal,
1649 Self::Internal,
1647 ]
1650 ]
1648 }
1651 }
1649 pub const fn non_public_phases() -> &'static [Self] {
1652 pub const fn non_public_phases() -> &'static [Self] {
1650 &[Self::Draft, Self::Secret, Self::Archived, Self::Internal]
1653 &[Self::Draft, Self::Secret, Self::Archived, Self::Internal]
1651 }
1654 }
1652 }
1655 }
1653
1656
1654 fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {
1657 fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {
1655 let mut offset: usize = 0;
1658 let mut offset: usize = 0;
1656 let mut offsets = Vec::new();
1659 let mut offsets = Vec::new();
1657
1660
1658 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
1661 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
1659 offsets.push(offset);
1662 offsets.push(offset);
1660 let end = offset + INDEX_ENTRY_SIZE;
1663 let end = offset + INDEX_ENTRY_SIZE;
1661 let entry = IndexEntry {
1664 let entry = IndexEntry {
1662 bytes: &bytes[offset..end],
1665 bytes: &bytes[offset..end],
1663 };
1666 };
1664
1667
1665 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
1668 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
1666 }
1669 }
1667 (offset, offsets)
1670 (offset, offsets)
1668 }
1671 }
1669
1672
1670 impl super::RevlogIndex for Index {
1673 impl super::RevlogIndex for Index {
1671 fn len(&self) -> usize {
1674 fn len(&self) -> usize {
1672 self.len()
1675 self.len()
1673 }
1676 }
1674
1677
1675 fn node(&self, rev: Revision) -> Option<&Node> {
1678 fn node(&self, rev: Revision) -> Option<&Node> {
1676 if rev == NULL_REVISION {
1679 if rev == NULL_REVISION {
1677 return Some(&NULL_NODE);
1680 return Some(&NULL_NODE);
1678 }
1681 }
1679 self.get_entry(rev).map(|entry| entry.hash())
1682 self.get_entry(rev).map(|entry| entry.hash())
1680 }
1683 }
1681 }
1684 }
1682
1685
1683 #[derive(Debug)]
1686 #[derive(Debug)]
1684 pub struct IndexEntry<'a> {
1687 pub struct IndexEntry<'a> {
1685 bytes: &'a [u8],
1688 bytes: &'a [u8],
1686 }
1689 }
1687
1690
1688 impl<'a> IndexEntry<'a> {
1691 impl<'a> IndexEntry<'a> {
1689 /// Return the offset of the data.
1692 /// Return the offset of the data.
1690 pub fn offset(&self) -> usize {
1693 pub fn offset(&self) -> usize {
1691 let mut bytes = [0; 8];
1694 let mut bytes = [0; 8];
1692 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
1695 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
1693 BigEndian::read_u64(&bytes[..]) as usize
1696 BigEndian::read_u64(&bytes[..]) as usize
1694 }
1697 }
1695 pub fn raw_offset(&self) -> u64 {
1698 pub fn raw_offset(&self) -> u64 {
1696 BigEndian::read_u64(&self.bytes[0..8])
1699 BigEndian::read_u64(&self.bytes[0..8])
1697 }
1700 }
1698
1701
1699 /// Same result (except potentially for rev 0) as C `index_get_start()`
1702 /// Same result (except potentially for rev 0) as C `index_get_start()`
1700 fn c_start(&self) -> u64 {
1703 fn c_start(&self) -> u64 {
1701 self.raw_offset() >> 16
1704 self.raw_offset() >> 16
1702 }
1705 }
1703
1706
1704 pub fn flags(&self) -> u16 {
1707 pub fn flags(&self) -> u16 {
1705 BigEndian::read_u16(&self.bytes[6..=7])
1708 BigEndian::read_u16(&self.bytes[6..=7])
1706 }
1709 }
1707
1710
1708 /// Return the compressed length of the data.
1711 /// Return the compressed length of the data.
1709 pub fn compressed_len(&self) -> u32 {
1712 pub fn compressed_len(&self) -> u32 {
1710 BigEndian::read_u32(&self.bytes[8..=11])
1713 BigEndian::read_u32(&self.bytes[8..=11])
1711 }
1714 }
1712
1715
1713 /// Return the uncompressed length of the data.
1716 /// Return the uncompressed length of the data.
1714 pub fn uncompressed_len(&self) -> i32 {
1717 pub fn uncompressed_len(&self) -> i32 {
1715 BigEndian::read_i32(&self.bytes[12..=15])
1718 BigEndian::read_i32(&self.bytes[12..=15])
1716 }
1719 }
1717
1720
1718 /// Return the revision upon which the data has been derived.
1721 /// Return the revision upon which the data has been derived.
1719 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
1722 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
1720 // TODO Maybe return an Option when base_revision == rev?
1723 // TODO Maybe return an Option when base_revision == rev?
1721 // Requires to add rev to IndexEntry
1724 // Requires to add rev to IndexEntry
1722
1725
1723 BigEndian::read_i32(&self.bytes[16..]).into()
1726 BigEndian::read_i32(&self.bytes[16..]).into()
1724 }
1727 }
1725
1728
1726 pub fn link_revision(&self) -> UncheckedRevision {
1729 pub fn link_revision(&self) -> UncheckedRevision {
1727 BigEndian::read_i32(&self.bytes[20..]).into()
1730 BigEndian::read_i32(&self.bytes[20..]).into()
1728 }
1731 }
1729
1732
1730 pub fn p1(&self) -> UncheckedRevision {
1733 pub fn p1(&self) -> UncheckedRevision {
1731 BigEndian::read_i32(&self.bytes[24..]).into()
1734 BigEndian::read_i32(&self.bytes[24..]).into()
1732 }
1735 }
1733
1736
1734 pub fn p2(&self) -> UncheckedRevision {
1737 pub fn p2(&self) -> UncheckedRevision {
1735 BigEndian::read_i32(&self.bytes[28..]).into()
1738 BigEndian::read_i32(&self.bytes[28..]).into()
1736 }
1739 }
1737
1740
1738 /// Return the hash of revision's full text.
1741 /// Return the hash of revision's full text.
1739 ///
1742 ///
1740 /// Currently, SHA-1 is used and only the first 20 bytes of this field
1743 /// Currently, SHA-1 is used and only the first 20 bytes of this field
1741 /// are used.
1744 /// are used.
1742 pub fn hash(&self) -> &'a Node {
1745 pub fn hash(&self) -> &'a Node {
1743 (&self.bytes[32..52]).try_into().unwrap()
1746 (&self.bytes[32..52]).try_into().unwrap()
1744 }
1747 }
1745
1748
1746 pub fn as_bytes(&self) -> &'a [u8] {
1749 pub fn as_bytes(&self) -> &'a [u8] {
1747 self.bytes
1750 self.bytes
1748 }
1751 }
1749 }
1752 }
1750
1753
1751 #[cfg(test)]
1754 #[cfg(test)]
1755 pub use tests::IndexEntryBuilder;
1756
1757 #[cfg(test)]
1752 mod tests {
1758 mod tests {
1753 use super::*;
1759 use super::*;
1754 use crate::node::NULL_NODE;
1760 use crate::node::NULL_NODE;
1755
1761
1756 #[cfg(test)]
1762 #[cfg(test)]
1757 #[derive(Debug, Copy, Clone)]
1763 #[derive(Debug, Copy, Clone)]
1758 pub struct IndexEntryBuilder {
1764 pub struct IndexEntryBuilder {
1759 is_first: bool,
1765 is_first: bool,
1760 is_inline: bool,
1766 is_inline: bool,
1761 is_general_delta: bool,
1767 is_general_delta: bool,
1762 version: u16,
1768 version: u16,
1763 offset: usize,
1769 offset: usize,
1764 compressed_len: usize,
1770 compressed_len: usize,
1765 uncompressed_len: usize,
1771 uncompressed_len: usize,
1766 base_revision_or_base_of_delta_chain: Revision,
1772 base_revision_or_base_of_delta_chain: Revision,
1767 link_revision: Revision,
1773 link_revision: Revision,
1768 p1: Revision,
1774 p1: Revision,
1769 p2: Revision,
1775 p2: Revision,
1770 node: Node,
1776 node: Node,
1771 }
1777 }
1772
1778
1773 #[cfg(test)]
1779 #[cfg(test)]
1774 impl IndexEntryBuilder {
1780 impl IndexEntryBuilder {
1775 #[allow(clippy::new_without_default)]
1781 #[allow(clippy::new_without_default)]
1776 pub fn new() -> Self {
1782 pub fn new() -> Self {
1777 Self {
1783 Self {
1778 is_first: false,
1784 is_first: false,
1779 is_inline: false,
1785 is_inline: false,
1780 is_general_delta: true,
1786 is_general_delta: true,
1781 version: 1,
1787 version: 1,
1782 offset: 0,
1788 offset: 0,
1783 compressed_len: 0,
1789 compressed_len: 0,
1784 uncompressed_len: 0,
1790 uncompressed_len: 0,
1785 base_revision_or_base_of_delta_chain: Revision(0),
1791 base_revision_or_base_of_delta_chain: Revision(0),
1786 link_revision: Revision(0),
1792 link_revision: Revision(0),
1787 p1: NULL_REVISION,
1793 p1: NULL_REVISION,
1788 p2: NULL_REVISION,
1794 p2: NULL_REVISION,
1789 node: NULL_NODE,
1795 node: NULL_NODE,
1790 }
1796 }
1791 }
1797 }
1792
1798
1793 pub fn is_first(&mut self, value: bool) -> &mut Self {
1799 pub fn is_first(&mut self, value: bool) -> &mut Self {
1794 self.is_first = value;
1800 self.is_first = value;
1795 self
1801 self
1796 }
1802 }
1797
1803
1798 pub fn with_inline(&mut self, value: bool) -> &mut Self {
1804 pub fn with_inline(&mut self, value: bool) -> &mut Self {
1799 self.is_inline = value;
1805 self.is_inline = value;
1800 self
1806 self
1801 }
1807 }
1802
1808
1803 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
1809 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
1804 self.is_general_delta = value;
1810 self.is_general_delta = value;
1805 self
1811 self
1806 }
1812 }
1807
1813
1808 pub fn with_version(&mut self, value: u16) -> &mut Self {
1814 pub fn with_version(&mut self, value: u16) -> &mut Self {
1809 self.version = value;
1815 self.version = value;
1810 self
1816 self
1811 }
1817 }
1812
1818
1813 pub fn with_offset(&mut self, value: usize) -> &mut Self {
1819 pub fn with_offset(&mut self, value: usize) -> &mut Self {
1814 self.offset = value;
1820 self.offset = value;
1815 self
1821 self
1816 }
1822 }
1817
1823
1818 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
1824 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
1819 self.compressed_len = value;
1825 self.compressed_len = value;
1820 self
1826 self
1821 }
1827 }
1822
1828
1823 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
1829 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
1824 self.uncompressed_len = value;
1830 self.uncompressed_len = value;
1825 self
1831 self
1826 }
1832 }
1827
1833
1828 pub fn with_base_revision_or_base_of_delta_chain(
1834 pub fn with_base_revision_or_base_of_delta_chain(
1829 &mut self,
1835 &mut self,
1830 value: Revision,
1836 value: Revision,
1831 ) -> &mut Self {
1837 ) -> &mut Self {
1832 self.base_revision_or_base_of_delta_chain = value;
1838 self.base_revision_or_base_of_delta_chain = value;
1833 self
1839 self
1834 }
1840 }
1835
1841
1836 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
1842 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
1837 self.link_revision = value;
1843 self.link_revision = value;
1838 self
1844 self
1839 }
1845 }
1840
1846
1841 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
1847 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
1842 self.p1 = value;
1848 self.p1 = value;
1843 self
1849 self
1844 }
1850 }
1845
1851
1846 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
1852 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
1847 self.p2 = value;
1853 self.p2 = value;
1848 self
1854 self
1849 }
1855 }
1850
1856
1851 pub fn with_node(&mut self, value: Node) -> &mut Self {
1857 pub fn with_node(&mut self, value: Node) -> &mut Self {
1852 self.node = value;
1858 self.node = value;
1853 self
1859 self
1854 }
1860 }
1855
1861
1856 pub fn build(&self) -> Vec<u8> {
1862 pub fn build(&self) -> Vec<u8> {
1857 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
1863 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
1858 if self.is_first {
1864 if self.is_first {
1859 bytes.extend(match (self.is_general_delta, self.is_inline) {
1865 bytes.extend(match (self.is_general_delta, self.is_inline) {
1860 (false, false) => [0u8, 0],
1866 (false, false) => [0u8, 0],
1861 (false, true) => [0u8, 1],
1867 (false, true) => [0u8, 1],
1862 (true, false) => [0u8, 2],
1868 (true, false) => [0u8, 2],
1863 (true, true) => [0u8, 3],
1869 (true, true) => [0u8, 3],
1864 });
1870 });
1865 bytes.extend(self.version.to_be_bytes());
1871 bytes.extend(self.version.to_be_bytes());
1866 // Remaining offset bytes.
1872 // Remaining offset bytes.
1867 bytes.extend([0u8; 2]);
1873 bytes.extend([0u8; 2]);
1868 } else {
1874 } else {
1869 // Offset stored on 48 bits (6 bytes)
1875 // Offset stored on 48 bits (6 bytes)
1870 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
1876 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
1871 }
1877 }
1872 bytes.extend([0u8; 2]); // Revision flags.
1878 bytes.extend([0u8; 2]); // Revision flags.
1873 bytes.extend((self.compressed_len as u32).to_be_bytes());
1879 bytes.extend((self.compressed_len as u32).to_be_bytes());
1874 bytes.extend((self.uncompressed_len as u32).to_be_bytes());
1880 bytes.extend((self.uncompressed_len as u32).to_be_bytes());
1875 bytes.extend(
1881 bytes.extend(
1876 self.base_revision_or_base_of_delta_chain.0.to_be_bytes(),
1882 self.base_revision_or_base_of_delta_chain.0.to_be_bytes(),
1877 );
1883 );
1878 bytes.extend(self.link_revision.0.to_be_bytes());
1884 bytes.extend(self.link_revision.0.to_be_bytes());
1879 bytes.extend(self.p1.0.to_be_bytes());
1885 bytes.extend(self.p1.0.to_be_bytes());
1880 bytes.extend(self.p2.0.to_be_bytes());
1886 bytes.extend(self.p2.0.to_be_bytes());
1881 bytes.extend(self.node.as_bytes());
1887 bytes.extend(self.node.as_bytes());
1882 bytes.extend(vec![0u8; 12]);
1888 bytes.extend(vec![0u8; 12]);
1883 bytes
1889 bytes
1884 }
1890 }
1885 }
1891 }
1886
1892
1887 pub fn is_inline(index_bytes: &[u8]) -> bool {
1893 pub fn is_inline(index_bytes: &[u8]) -> bool {
1888 IndexHeader::parse(index_bytes)
1894 IndexHeader::parse(index_bytes)
1889 .expect("too short")
1895 .expect("too short")
1890 .unwrap()
1896 .unwrap()
1891 .format_flags()
1897 .format_flags()
1892 .is_inline()
1898 .is_inline()
1893 }
1899 }
1894
1900
1895 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
1901 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
1896 IndexHeader::parse(index_bytes)
1902 IndexHeader::parse(index_bytes)
1897 .expect("too short")
1903 .expect("too short")
1898 .unwrap()
1904 .unwrap()
1899 .format_flags()
1905 .format_flags()
1900 .uses_generaldelta()
1906 .uses_generaldelta()
1901 }
1907 }
1902
1908
1903 pub fn get_version(index_bytes: &[u8]) -> u16 {
1909 pub fn get_version(index_bytes: &[u8]) -> u16 {
1904 IndexHeader::parse(index_bytes)
1910 IndexHeader::parse(index_bytes)
1905 .expect("too short")
1911 .expect("too short")
1906 .unwrap()
1912 .unwrap()
1907 .format_version()
1913 .format_version()
1908 }
1914 }
1909
1915
1910 #[test]
1916 #[test]
1911 fn flags_when_no_inline_flag_test() {
1917 fn flags_when_no_inline_flag_test() {
1912 let bytes = IndexEntryBuilder::new()
1918 let bytes = IndexEntryBuilder::new()
1913 .is_first(true)
1919 .is_first(true)
1914 .with_general_delta(false)
1920 .with_general_delta(false)
1915 .with_inline(false)
1921 .with_inline(false)
1916 .build();
1922 .build();
1917
1923
1918 assert!(!is_inline(&bytes));
1924 assert!(!is_inline(&bytes));
1919 assert!(!uses_generaldelta(&bytes));
1925 assert!(!uses_generaldelta(&bytes));
1920 }
1926 }
1921
1927
1922 #[test]
1928 #[test]
1923 fn flags_when_inline_flag_test() {
1929 fn flags_when_inline_flag_test() {
1924 let bytes = IndexEntryBuilder::new()
1930 let bytes = IndexEntryBuilder::new()
1925 .is_first(true)
1931 .is_first(true)
1926 .with_general_delta(false)
1932 .with_general_delta(false)
1927 .with_inline(true)
1933 .with_inline(true)
1928 .build();
1934 .build();
1929
1935
1930 assert!(is_inline(&bytes));
1936 assert!(is_inline(&bytes));
1931 assert!(!uses_generaldelta(&bytes));
1937 assert!(!uses_generaldelta(&bytes));
1932 }
1938 }
1933
1939
1934 #[test]
1940 #[test]
1935 fn flags_when_inline_and_generaldelta_flags_test() {
1941 fn flags_when_inline_and_generaldelta_flags_test() {
1936 let bytes = IndexEntryBuilder::new()
1942 let bytes = IndexEntryBuilder::new()
1937 .is_first(true)
1943 .is_first(true)
1938 .with_general_delta(true)
1944 .with_general_delta(true)
1939 .with_inline(true)
1945 .with_inline(true)
1940 .build();
1946 .build();
1941
1947
1942 assert!(is_inline(&bytes));
1948 assert!(is_inline(&bytes));
1943 assert!(uses_generaldelta(&bytes));
1949 assert!(uses_generaldelta(&bytes));
1944 }
1950 }
1945
1951
1946 #[test]
1952 #[test]
1947 fn test_offset() {
1953 fn test_offset() {
1948 let bytes = IndexEntryBuilder::new().with_offset(1).build();
1954 let bytes = IndexEntryBuilder::new().with_offset(1).build();
1949 let entry = IndexEntry { bytes: &bytes };
1955 let entry = IndexEntry { bytes: &bytes };
1950
1956
1951 assert_eq!(entry.offset(), 1)
1957 assert_eq!(entry.offset(), 1)
1952 }
1958 }
1953
1959
1954 #[test]
1960 #[test]
1955 fn test_compressed_len() {
1961 fn test_compressed_len() {
1956 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
1962 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
1957 let entry = IndexEntry { bytes: &bytes };
1963 let entry = IndexEntry { bytes: &bytes };
1958
1964
1959 assert_eq!(entry.compressed_len(), 1)
1965 assert_eq!(entry.compressed_len(), 1)
1960 }
1966 }
1961
1967
1962 #[test]
1968 #[test]
1963 fn test_uncompressed_len() {
1969 fn test_uncompressed_len() {
1964 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
1970 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
1965 let entry = IndexEntry { bytes: &bytes };
1971 let entry = IndexEntry { bytes: &bytes };
1966
1972
1967 assert_eq!(entry.uncompressed_len(), 1)
1973 assert_eq!(entry.uncompressed_len(), 1)
1968 }
1974 }
1969
1975
1970 #[test]
1976 #[test]
1971 fn test_base_revision_or_base_of_delta_chain() {
1977 fn test_base_revision_or_base_of_delta_chain() {
1972 let bytes = IndexEntryBuilder::new()
1978 let bytes = IndexEntryBuilder::new()
1973 .with_base_revision_or_base_of_delta_chain(Revision(1))
1979 .with_base_revision_or_base_of_delta_chain(Revision(1))
1974 .build();
1980 .build();
1975 let entry = IndexEntry { bytes: &bytes };
1981 let entry = IndexEntry { bytes: &bytes };
1976
1982
1977 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
1983 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
1978 }
1984 }
1979
1985
1980 #[test]
1986 #[test]
1981 fn link_revision_test() {
1987 fn link_revision_test() {
1982 let bytes = IndexEntryBuilder::new()
1988 let bytes = IndexEntryBuilder::new()
1983 .with_link_revision(Revision(123))
1989 .with_link_revision(Revision(123))
1984 .build();
1990 .build();
1985
1991
1986 let entry = IndexEntry { bytes: &bytes };
1992 let entry = IndexEntry { bytes: &bytes };
1987
1993
1988 assert_eq!(entry.link_revision(), 123.into());
1994 assert_eq!(entry.link_revision(), 123.into());
1989 }
1995 }
1990
1996
1991 #[test]
1997 #[test]
1992 fn p1_test() {
1998 fn p1_test() {
1993 let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build();
1999 let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build();
1994
2000
1995 let entry = IndexEntry { bytes: &bytes };
2001 let entry = IndexEntry { bytes: &bytes };
1996
2002
1997 assert_eq!(entry.p1(), 123.into());
2003 assert_eq!(entry.p1(), 123.into());
1998 }
2004 }
1999
2005
2000 #[test]
2006 #[test]
2001 fn p2_test() {
2007 fn p2_test() {
2002 let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build();
2008 let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build();
2003
2009
2004 let entry = IndexEntry { bytes: &bytes };
2010 let entry = IndexEntry { bytes: &bytes };
2005
2011
2006 assert_eq!(entry.p2(), 123.into());
2012 assert_eq!(entry.p2(), 123.into());
2007 }
2013 }
2008
2014
2009 #[test]
2015 #[test]
2010 fn node_test() {
2016 fn node_test() {
2011 let node = Node::from_hex("0123456789012345678901234567890123456789")
2017 let node = Node::from_hex("0123456789012345678901234567890123456789")
2012 .unwrap();
2018 .unwrap();
2013 let bytes = IndexEntryBuilder::new().with_node(node).build();
2019 let bytes = IndexEntryBuilder::new().with_node(node).build();
2014
2020
2015 let entry = IndexEntry { bytes: &bytes };
2021 let entry = IndexEntry { bytes: &bytes };
2016
2022
2017 assert_eq!(*entry.hash(), node);
2023 assert_eq!(*entry.hash(), node);
2018 }
2024 }
2019
2025
2020 #[test]
2026 #[test]
2021 fn version_test() {
2027 fn version_test() {
2022 let bytes = IndexEntryBuilder::new()
2028 let bytes = IndexEntryBuilder::new()
2023 .is_first(true)
2029 .is_first(true)
2024 .with_version(2)
2030 .with_version(2)
2025 .build();
2031 .build();
2026
2032
2027 assert_eq!(get_version(&bytes), 2)
2033 assert_eq!(get_version(&bytes), 2)
2028 }
2034 }
2029 }
2035 }
2030
2031 #[cfg(test)]
2032 pub use tests::IndexEntryBuilder;
@@ -1,433 +1,433
1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
2 //
2 //
3 // This software may be used and distributed according to the terms of the
3 // This software may be used and distributed according to the terms of the
4 // GNU General Public License version 2 or any later version.
4 // GNU General Public License version 2 or any later version.
5
5
6 //! Definitions and utilities for Revision nodes
6 //! Definitions and utilities for Revision nodes
7 //!
7 //!
8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
9 //! of a revision.
9 //! of a revision.
10
10
11 use crate::errors::HgError;
11 use crate::errors::HgError;
12 use bytes_cast::BytesCast;
12 use bytes_cast::BytesCast;
13 use std::fmt;
13 use std::fmt;
14
14
15 /// The length in bytes of a `Node`
15 /// The length in bytes of a `Node`
16 ///
16 ///
17 /// This constant is meant to ease refactors of this module, and
17 /// This constant is meant to ease refactors of this module, and
18 /// are private so that calling code does not expect all nodes have
18 /// are private so that calling code does not expect all nodes have
19 /// the same size, should we support several formats concurrently in
19 /// the same size, should we support several formats concurrently in
20 /// the future.
20 /// the future.
21 pub const NODE_BYTES_LENGTH: usize = 20;
21 pub const NODE_BYTES_LENGTH: usize = 20;
22
22
23 /// The length in bytes set aside on disk for a `Node`. Revlog up to v1 only
23 /// The length in bytes set aside on disk for a `Node`. Revlog up to v1 only
24 /// use 20 out of those 32.
24 /// use 20 out of those 32.
25 pub const STORED_NODE_ID_BYTES: usize = 32;
25 pub const STORED_NODE_ID_BYTES: usize = 32;
26
26
27 /// Id of the null node.
27 /// Id of the null node.
28 ///
28 ///
29 /// Used to indicate the absence of node.
29 /// Used to indicate the absence of node.
30 pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
30 pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
31
31
32 /// The length in bytes of a `Node`
32 /// The length in bytes of a `Node`
33 ///
33 ///
34 /// see also `NODES_BYTES_LENGTH` about it being private.
34 /// see also `NODES_BYTES_LENGTH` about it being private.
35 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
35 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
36
36
37 /// Default for UI presentation
37 /// Default for UI presentation
38 const SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH: u8 = 12;
38 const SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH: u8 = 12;
39
39
40 /// Private alias for readability and to ease future change
40 /// Private alias for readability and to ease future change
41 type NodeData = [u8; NODE_BYTES_LENGTH];
41 type NodeData = [u8; NODE_BYTES_LENGTH];
42
42
43 /// Binary revision SHA
43 /// Binary revision SHA
44 ///
44 ///
45 /// ## Future changes of hash size
45 /// ## Future changes of hash size
46 ///
46 ///
47 /// To accomodate future changes of hash size, Rust callers
47 /// To accomodate future changes of hash size, Rust callers
48 /// should use the conversion methods at the boundaries (FFI, actual
48 /// should use the conversion methods at the boundaries (FFI, actual
49 /// computation of hashes and I/O) only, and only if required.
49 /// computation of hashes and I/O) only, and only if required.
50 ///
50 ///
51 /// All other callers outside of unit tests should just handle `Node` values
51 /// All other callers outside of unit tests should just handle `Node` values
52 /// and never make any assumption on the actual length, using [`nybbles_len`]
52 /// and never make any assumption on the actual length, using [`nybbles_len`]
53 /// if they need a loop boundary.
53 /// if they need a loop boundary.
54 ///
54 ///
55 /// All methods that create a `Node` either take a type that enforces
55 /// All methods that create a `Node` either take a type that enforces
56 /// the size or return an error at runtime.
56 /// the size or return an error at runtime.
57 ///
57 ///
58 /// [`nybbles_len`]: #method.nybbles_len
58 /// [`nybbles_len`]: #method.nybbles_len
59 #[derive(Copy, Clone, PartialEq, BytesCast, derive_more::From)]
59 #[derive(Copy, Clone, PartialEq, BytesCast, derive_more::From)]
60 #[repr(transparent)]
60 #[repr(transparent)]
61 pub struct Node {
61 pub struct Node {
62 data: NodeData,
62 data: NodeData,
63 }
63 }
64
64
65 impl fmt::Debug for Node {
65 impl fmt::Debug for Node {
66 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
66 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
67 let n = format!("{:x?}", self.data);
67 let n = format!("{:x?}", self.data);
68 // We're using debug_tuple because it makes the output a little
68 // We're using debug_tuple because it makes the output a little
69 // more compact without losing data.
69 // more compact without losing data.
70 f.debug_tuple("Node").field(&n).finish()
70 f.debug_tuple("Node").field(&n).finish()
71 }
71 }
72 }
72 }
73
73
74 /// The node value for NULL_REVISION
74 /// The node value for NULL_REVISION
75 pub const NULL_NODE: Node = Node {
75 pub const NULL_NODE: Node = Node {
76 data: [0; NODE_BYTES_LENGTH],
76 data: [0; NODE_BYTES_LENGTH],
77 };
77 };
78
78
79 /// Return an error if the slice has an unexpected length
79 /// Return an error if the slice has an unexpected length
80 impl<'a> TryFrom<&'a [u8]> for &'a Node {
80 impl<'a> TryFrom<&'a [u8]> for &'a Node {
81 type Error = ();
81 type Error = ();
82
82
83 #[inline]
83 #[inline]
84 fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
84 fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
85 match Node::from_bytes(bytes) {
85 match Node::from_bytes(bytes) {
86 Ok((node, rest)) if rest.is_empty() => Ok(node),
86 Ok((node, [])) => Ok(node),
87 _ => Err(()),
87 _ => Err(()),
88 }
88 }
89 }
89 }
90 }
90 }
91
91
92 /// Return an error if the slice has an unexpected length
92 /// Return an error if the slice has an unexpected length
93 impl TryFrom<&'_ [u8]> for Node {
93 impl TryFrom<&'_ [u8]> for Node {
94 type Error = std::array::TryFromSliceError;
94 type Error = std::array::TryFromSliceError;
95
95
96 #[inline]
96 #[inline]
97 fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> {
97 fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> {
98 let data = bytes.try_into()?;
98 let data = bytes.try_into()?;
99 Ok(Self { data })
99 Ok(Self { data })
100 }
100 }
101 }
101 }
102
102
103 impl From<&'_ NodeData> for Node {
103 impl From<&'_ NodeData> for Node {
104 #[inline]
104 #[inline]
105 fn from(data: &'_ NodeData) -> Self {
105 fn from(data: &'_ NodeData) -> Self {
106 Self { data: *data }
106 Self { data: *data }
107 }
107 }
108 }
108 }
109
109
110 impl fmt::LowerHex for Node {
110 impl fmt::LowerHex for Node {
111 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
111 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
112 for &byte in &self.data {
112 for &byte in &self.data {
113 write!(f, "{:02x}", byte)?
113 write!(f, "{:02x}", byte)?
114 }
114 }
115 Ok(())
115 Ok(())
116 }
116 }
117 }
117 }
118
118
119 #[derive(Debug)]
119 #[derive(Debug)]
120 pub struct FromHexError;
120 pub struct FromHexError;
121
121
122 /// Low level utility function, also for prefixes
122 /// Low level utility function, also for prefixes
123 fn get_nybble(s: &[u8], i: usize) -> u8 {
123 fn get_nybble(s: &[u8], i: usize) -> u8 {
124 if i % 2 == 0 {
124 if i % 2 == 0 {
125 s[i / 2] >> 4
125 s[i / 2] >> 4
126 } else {
126 } else {
127 s[i / 2] & 0x0f
127 s[i / 2] & 0x0f
128 }
128 }
129 }
129 }
130
130
131 impl Node {
131 impl Node {
132 /// Retrieve the `i`th half-byte of the binary data.
132 /// Retrieve the `i`th half-byte of the binary data.
133 ///
133 ///
134 /// This is also the `i`th hexadecimal digit in numeric form,
134 /// This is also the `i`th hexadecimal digit in numeric form,
135 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
135 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
136 pub fn get_nybble(&self, i: usize) -> u8 {
136 pub fn get_nybble(&self, i: usize) -> u8 {
137 get_nybble(&self.data, i)
137 get_nybble(&self.data, i)
138 }
138 }
139
139
140 /// Length of the data, in nybbles
140 /// Length of the data, in nybbles
141 pub fn nybbles_len(&self) -> usize {
141 pub fn nybbles_len(&self) -> usize {
142 // public exposure as an instance method only, so that we can
142 // public exposure as an instance method only, so that we can
143 // easily support several sizes of hashes if needed in the future.
143 // easily support several sizes of hashes if needed in the future.
144 NODE_NYBBLES_LENGTH
144 NODE_NYBBLES_LENGTH
145 }
145 }
146
146
147 /// Convert from hexadecimal string representation
147 /// Convert from hexadecimal string representation
148 ///
148 ///
149 /// Exact length is required.
149 /// Exact length is required.
150 ///
150 ///
151 /// To be used in FFI and I/O only, in order to facilitate future
151 /// To be used in FFI and I/O only, in order to facilitate future
152 /// changes of hash format.
152 /// changes of hash format.
153 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> {
153 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> {
154 let prefix = NodePrefix::from_hex(hex)?;
154 let prefix = NodePrefix::from_hex(hex)?;
155 if prefix.nybbles_len() == NODE_NYBBLES_LENGTH {
155 if prefix.nybbles_len() == NODE_NYBBLES_LENGTH {
156 Ok(Self { data: prefix.data })
156 Ok(Self { data: prefix.data })
157 } else {
157 } else {
158 Err(FromHexError)
158 Err(FromHexError)
159 }
159 }
160 }
160 }
161
161
162 /// `from_hex`, but for input from an internal file of the repository such
162 /// `from_hex`, but for input from an internal file of the repository such
163 /// as a changelog or manifest entry.
163 /// as a changelog or manifest entry.
164 ///
164 ///
165 /// An error is treated as repository corruption.
165 /// An error is treated as repository corruption.
166 pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> {
166 pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> {
167 Self::from_hex(hex.as_ref()).map_err(|FromHexError| {
167 Self::from_hex(hex.as_ref()).map_err(|FromHexError| {
168 HgError::CorruptedRepository(format!(
168 HgError::CorruptedRepository(format!(
169 "Expected a full hexadecimal node ID, found {}",
169 "Expected a full hexadecimal node ID, found {}",
170 String::from_utf8_lossy(hex.as_ref())
170 String::from_utf8_lossy(hex.as_ref())
171 ))
171 ))
172 })
172 })
173 }
173 }
174
174
175 /// Provide access to binary data
175 /// Provide access to binary data
176 ///
176 ///
177 /// This is needed by FFI layers, for instance to return expected
177 /// This is needed by FFI layers, for instance to return expected
178 /// binary values to Python.
178 /// binary values to Python.
179 pub fn as_bytes(&self) -> &[u8] {
179 pub fn as_bytes(&self) -> &[u8] {
180 &self.data
180 &self.data
181 }
181 }
182
182
183 pub fn short(&self) -> NodePrefix {
183 pub fn short(&self) -> NodePrefix {
184 NodePrefix {
184 NodePrefix {
185 nybbles_len: SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH,
185 nybbles_len: SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH,
186 data: self.data,
186 data: self.data,
187 }
187 }
188 }
188 }
189
189
190 pub fn pad_to_256_bits(&self) -> [u8; 32] {
190 pub fn pad_to_256_bits(&self) -> [u8; 32] {
191 let mut bits = [0; 32];
191 let mut bits = [0; 32];
192 bits[..NODE_BYTES_LENGTH].copy_from_slice(&self.data);
192 bits[..NODE_BYTES_LENGTH].copy_from_slice(&self.data);
193 bits
193 bits
194 }
194 }
195 }
195 }
196
196
197 /// The beginning of a binary revision SHA.
197 /// The beginning of a binary revision SHA.
198 ///
198 ///
199 /// Since it can potentially come from an hexadecimal representation with
199 /// Since it can potentially come from an hexadecimal representation with
200 /// odd length, it needs to carry around whether the last 4 bits are relevant
200 /// odd length, it needs to carry around whether the last 4 bits are relevant
201 /// or not.
201 /// or not.
202 #[derive(Debug, PartialEq, Copy, Clone)]
202 #[derive(Debug, PartialEq, Copy, Clone)]
203 pub struct NodePrefix {
203 pub struct NodePrefix {
204 /// In `1..=NODE_NYBBLES_LENGTH`
204 /// In `1..=NODE_NYBBLES_LENGTH`
205 nybbles_len: u8,
205 nybbles_len: u8,
206 /// The first `4 * length_in_nybbles` bits are used (considering bits
206 /// The first `4 * length_in_nybbles` bits are used (considering bits
207 /// within a bytes in big-endian: most significant first), the rest
207 /// within a bytes in big-endian: most significant first), the rest
208 /// are zero.
208 /// are zero.
209 data: NodeData,
209 data: NodeData,
210 }
210 }
211
211
212 impl NodePrefix {
212 impl NodePrefix {
213 /// Convert from hexadecimal string representation
213 /// Convert from hexadecimal string representation
214 ///
214 ///
215 /// Similarly to `hex::decode`, can be used with Unicode string types
215 /// Similarly to `hex::decode`, can be used with Unicode string types
216 /// (`String`, `&str`) as well as bytes.
216 /// (`String`, `&str`) as well as bytes.
217 ///
217 ///
218 /// To be used in FFI and I/O only, in order to facilitate future
218 /// To be used in FFI and I/O only, in order to facilitate future
219 /// changes of hash format.
219 /// changes of hash format.
220 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> {
220 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> {
221 let hex = hex.as_ref();
221 let hex = hex.as_ref();
222 let len = hex.len();
222 let len = hex.len();
223 if len > NODE_NYBBLES_LENGTH || len == 0 {
223 if len > NODE_NYBBLES_LENGTH || len == 0 {
224 return Err(FromHexError);
224 return Err(FromHexError);
225 }
225 }
226
226
227 let mut data = [0; NODE_BYTES_LENGTH];
227 let mut data = [0; NODE_BYTES_LENGTH];
228 let mut nybbles_len = 0;
228 let mut nybbles_len = 0;
229 for &ascii_byte in hex {
229 for &ascii_byte in hex {
230 let nybble = match char::from(ascii_byte).to_digit(16) {
230 let nybble = match char::from(ascii_byte).to_digit(16) {
231 Some(digit) => digit as u8,
231 Some(digit) => digit as u8,
232 None => return Err(FromHexError),
232 None => return Err(FromHexError),
233 };
233 };
234 // Fill in the upper half of a byte first, then the lower half.
234 // Fill in the upper half of a byte first, then the lower half.
235 let shift = if nybbles_len % 2 == 0 { 4 } else { 0 };
235 let shift = if nybbles_len % 2 == 0 { 4 } else { 0 };
236 data[nybbles_len as usize / 2] |= nybble << shift;
236 data[nybbles_len as usize / 2] |= nybble << shift;
237 nybbles_len += 1;
237 nybbles_len += 1;
238 }
238 }
239 Ok(Self { data, nybbles_len })
239 Ok(Self { data, nybbles_len })
240 }
240 }
241
241
242 pub fn nybbles_len(&self) -> usize {
242 pub fn nybbles_len(&self) -> usize {
243 self.nybbles_len as _
243 self.nybbles_len as _
244 }
244 }
245
245
246 pub fn is_prefix_of(&self, node: &Node) -> bool {
246 pub fn is_prefix_of(&self, node: &Node) -> bool {
247 let full_bytes = self.nybbles_len() / 2;
247 let full_bytes = self.nybbles_len() / 2;
248 if self.data[..full_bytes] != node.data[..full_bytes] {
248 if self.data[..full_bytes] != node.data[..full_bytes] {
249 return false;
249 return false;
250 }
250 }
251 if self.nybbles_len() % 2 == 0 {
251 if self.nybbles_len() % 2 == 0 {
252 return true;
252 return true;
253 }
253 }
254 let last = self.nybbles_len() - 1;
254 let last = self.nybbles_len() - 1;
255 self.get_nybble(last) == node.get_nybble(last)
255 self.get_nybble(last) == node.get_nybble(last)
256 }
256 }
257
257
258 /// Retrieve the `i`th half-byte from the prefix.
258 /// Retrieve the `i`th half-byte from the prefix.
259 ///
259 ///
260 /// This is also the `i`th hexadecimal digit in numeric form,
260 /// This is also the `i`th hexadecimal digit in numeric form,
261 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
261 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
262 pub fn get_nybble(&self, i: usize) -> u8 {
262 pub fn get_nybble(&self, i: usize) -> u8 {
263 assert!(i < self.nybbles_len());
263 assert!(i < self.nybbles_len());
264 get_nybble(&self.data, i)
264 get_nybble(&self.data, i)
265 }
265 }
266
266
267 fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ {
267 fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ {
268 (0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i))
268 (0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i))
269 }
269 }
270
270
271 /// Return the index first nybble that's different from `node`
271 /// Return the index first nybble that's different from `node`
272 ///
272 ///
273 /// If the return value is `None` that means that `self` is
273 /// If the return value is `None` that means that `self` is
274 /// a prefix of `node`, but the current method is a bit slower
274 /// a prefix of `node`, but the current method is a bit slower
275 /// than `is_prefix_of`.
275 /// than `is_prefix_of`.
276 ///
276 ///
277 /// Returned index is as in `get_nybble`, i.e., starting at 0.
277 /// Returned index is as in `get_nybble`, i.e., starting at 0.
278 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
278 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
279 self.iter_nybbles()
279 self.iter_nybbles()
280 .zip(NodePrefix::from(*node).iter_nybbles())
280 .zip(NodePrefix::from(*node).iter_nybbles())
281 .position(|(a, b)| a != b)
281 .position(|(a, b)| a != b)
282 }
282 }
283 }
283 }
284
284
285 impl fmt::LowerHex for NodePrefix {
285 impl fmt::LowerHex for NodePrefix {
286 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
286 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
287 let full_bytes = self.nybbles_len() / 2;
287 let full_bytes = self.nybbles_len() / 2;
288 for &byte in &self.data[..full_bytes] {
288 for &byte in &self.data[..full_bytes] {
289 write!(f, "{:02x}", byte)?
289 write!(f, "{:02x}", byte)?
290 }
290 }
291 if self.nybbles_len() % 2 == 1 {
291 if self.nybbles_len() % 2 == 1 {
292 let last = self.nybbles_len() - 1;
292 let last = self.nybbles_len() - 1;
293 write!(f, "{:x}", self.get_nybble(last))?
293 write!(f, "{:x}", self.get_nybble(last))?
294 }
294 }
295 Ok(())
295 Ok(())
296 }
296 }
297 }
297 }
298
298
299 /// A shortcut for full `Node` references
299 /// A shortcut for full `Node` references
300 impl From<&'_ Node> for NodePrefix {
300 impl From<&'_ Node> for NodePrefix {
301 fn from(node: &'_ Node) -> Self {
301 fn from(node: &'_ Node) -> Self {
302 NodePrefix {
302 NodePrefix {
303 nybbles_len: node.nybbles_len() as _,
303 nybbles_len: node.nybbles_len() as _,
304 data: node.data,
304 data: node.data,
305 }
305 }
306 }
306 }
307 }
307 }
308
308
309 /// A shortcut for full `Node` references
309 /// A shortcut for full `Node` references
310 impl From<Node> for NodePrefix {
310 impl From<Node> for NodePrefix {
311 fn from(node: Node) -> Self {
311 fn from(node: Node) -> Self {
312 NodePrefix {
312 NodePrefix {
313 nybbles_len: node.nybbles_len() as _,
313 nybbles_len: node.nybbles_len() as _,
314 data: node.data,
314 data: node.data,
315 }
315 }
316 }
316 }
317 }
317 }
318
318
319 impl PartialEq<Node> for NodePrefix {
319 impl PartialEq<Node> for NodePrefix {
320 fn eq(&self, other: &Node) -> bool {
320 fn eq(&self, other: &Node) -> bool {
321 self.data == other.data && self.nybbles_len() == other.nybbles_len()
321 self.data == other.data && self.nybbles_len() == other.nybbles_len()
322 }
322 }
323 }
323 }
324
324
325 #[cfg(test)]
325 #[cfg(test)]
326 pub use tests::hex_pad_right;
327
328 #[cfg(test)]
326 mod tests {
329 mod tests {
327 use super::*;
330 use super::*;
328
331
329 const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef";
332 const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef";
330 const SAMPLE_NODE: Node = Node {
333 const SAMPLE_NODE: Node = Node {
331 data: [
334 data: [
332 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
335 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
333 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
336 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
334 ],
337 ],
335 };
338 };
336
339
337 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
340 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
338 /// The padding is made with zeros.
341 /// The padding is made with zeros.
339 pub fn hex_pad_right(hex: &str) -> String {
342 pub fn hex_pad_right(hex: &str) -> String {
340 let mut res = hex.to_string();
343 let mut res = hex.to_string();
341 while res.len() < NODE_NYBBLES_LENGTH {
344 while res.len() < NODE_NYBBLES_LENGTH {
342 res.push('0');
345 res.push('0');
343 }
346 }
344 res
347 res
345 }
348 }
346
349
347 #[test]
350 #[test]
348 fn test_node_from_hex() {
351 fn test_node_from_hex() {
349 let not_hex = "012... oops";
352 let not_hex = "012... oops";
350 let too_short = "0123";
353 let too_short = "0123";
351 let too_long = format!("{}0", SAMPLE_NODE_HEX);
354 let too_long = format!("{}0", SAMPLE_NODE_HEX);
352 assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE);
355 assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE);
353 assert!(Node::from_hex(not_hex).is_err());
356 assert!(Node::from_hex(not_hex).is_err());
354 assert!(Node::from_hex(too_short).is_err());
357 assert!(Node::from_hex(too_short).is_err());
355 assert!(Node::from_hex(too_long).is_err());
358 assert!(Node::from_hex(too_long).is_err());
356 }
359 }
357
360
358 #[test]
361 #[test]
359 fn test_node_encode_hex() {
362 fn test_node_encode_hex() {
360 assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX);
363 assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX);
361 }
364 }
362
365
363 #[test]
366 #[test]
364 fn test_prefix_from_to_hex() -> Result<(), FromHexError> {
367 fn test_prefix_from_to_hex() -> Result<(), FromHexError> {
365 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1");
368 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1");
366 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a");
369 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a");
367 assert_eq!(
370 assert_eq!(
368 format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?),
371 format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?),
369 SAMPLE_NODE_HEX
372 SAMPLE_NODE_HEX
370 );
373 );
371 Ok(())
374 Ok(())
372 }
375 }
373
376
374 #[test]
377 #[test]
375 fn test_prefix_from_hex_errors() {
378 fn test_prefix_from_hex_errors() {
376 assert!(NodePrefix::from_hex("testgr").is_err());
379 assert!(NodePrefix::from_hex("testgr").is_err());
377 let mut long = format!("{:x}", NULL_NODE);
380 let mut long = format!("{:x}", NULL_NODE);
378 long.push('c');
381 long.push('c');
379 assert!(NodePrefix::from_hex(&long).is_err())
382 assert!(NodePrefix::from_hex(&long).is_err())
380 }
383 }
381
384
382 #[test]
385 #[test]
383 fn test_is_prefix_of() -> Result<(), FromHexError> {
386 fn test_is_prefix_of() -> Result<(), FromHexError> {
384 let mut node_data = [0; NODE_BYTES_LENGTH];
387 let mut node_data = [0; NODE_BYTES_LENGTH];
385 node_data[0] = 0x12;
388 node_data[0] = 0x12;
386 node_data[1] = 0xca;
389 node_data[1] = 0xca;
387 let node = Node::from(node_data);
390 let node = Node::from(node_data);
388 assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node));
391 assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node));
389 assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node));
392 assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node));
390 assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node));
393 assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node));
391 assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node));
394 assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node));
392 Ok(())
395 Ok(())
393 }
396 }
394
397
395 #[test]
398 #[test]
396 fn test_get_nybble() -> Result<(), FromHexError> {
399 fn test_get_nybble() -> Result<(), FromHexError> {
397 let prefix = NodePrefix::from_hex("dead6789cafe")?;
400 let prefix = NodePrefix::from_hex("dead6789cafe")?;
398 assert_eq!(prefix.get_nybble(0), 13);
401 assert_eq!(prefix.get_nybble(0), 13);
399 assert_eq!(prefix.get_nybble(7), 9);
402 assert_eq!(prefix.get_nybble(7), 9);
400 Ok(())
403 Ok(())
401 }
404 }
402
405
403 #[test]
406 #[test]
404 fn test_first_different_nybble_even_prefix() {
407 fn test_first_different_nybble_even_prefix() {
405 let prefix = NodePrefix::from_hex("12ca").unwrap();
408 let prefix = NodePrefix::from_hex("12ca").unwrap();
406 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
409 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
407 assert_eq!(prefix.first_different_nybble(&node), Some(0));
410 assert_eq!(prefix.first_different_nybble(&node), Some(0));
408 node.data[0] = 0x13;
411 node.data[0] = 0x13;
409 assert_eq!(prefix.first_different_nybble(&node), Some(1));
412 assert_eq!(prefix.first_different_nybble(&node), Some(1));
410 node.data[0] = 0x12;
413 node.data[0] = 0x12;
411 assert_eq!(prefix.first_different_nybble(&node), Some(2));
414 assert_eq!(prefix.first_different_nybble(&node), Some(2));
412 node.data[1] = 0xca;
415 node.data[1] = 0xca;
413 // now it is a prefix
416 // now it is a prefix
414 assert_eq!(prefix.first_different_nybble(&node), None);
417 assert_eq!(prefix.first_different_nybble(&node), None);
415 }
418 }
416
419
417 #[test]
420 #[test]
418 fn test_first_different_nybble_odd_prefix() {
421 fn test_first_different_nybble_odd_prefix() {
419 let prefix = NodePrefix::from_hex("12c").unwrap();
422 let prefix = NodePrefix::from_hex("12c").unwrap();
420 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
423 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
421 assert_eq!(prefix.first_different_nybble(&node), Some(0));
424 assert_eq!(prefix.first_different_nybble(&node), Some(0));
422 node.data[0] = 0x13;
425 node.data[0] = 0x13;
423 assert_eq!(prefix.first_different_nybble(&node), Some(1));
426 assert_eq!(prefix.first_different_nybble(&node), Some(1));
424 node.data[0] = 0x12;
427 node.data[0] = 0x12;
425 assert_eq!(prefix.first_different_nybble(&node), Some(2));
428 assert_eq!(prefix.first_different_nybble(&node), Some(2));
426 node.data[1] = 0xca;
429 node.data[1] = 0xca;
427 // now it is a prefix
430 // now it is a prefix
428 assert_eq!(prefix.first_different_nybble(&node), None);
431 assert_eq!(prefix.first_different_nybble(&node), None);
429 }
432 }
430 }
433 }
431
432 #[cfg(test)]
433 pub use tests::hex_pad_right;
@@ -1,340 +1,341
1 use hg::testing::VecGraph;
1 use hg::testing::VecGraph;
2 use hg::Revision;
2 use hg::Revision;
3 use hg::*;
3 use hg::*;
4 use rand::distributions::{Distribution, Uniform};
4 use rand::distributions::{Distribution, Uniform};
5 use rand::{thread_rng, Rng, RngCore, SeedableRng};
5 use rand::{thread_rng, Rng, RngCore, SeedableRng};
6 use rand_distr::LogNormal;
6 use rand_distr::LogNormal;
7 use std::cmp::min;
7 use std::cmp::min;
8 use std::collections::HashSet;
8 use std::collections::HashSet;
9 use std::env;
9 use std::env;
10 use std::fmt::Debug;
10 use std::fmt::Debug;
11
11
12 fn build_random_graph(
12 fn build_random_graph(
13 nodes_opt: Option<usize>,
13 nodes_opt: Option<usize>,
14 rootprob_opt: Option<f64>,
14 rootprob_opt: Option<f64>,
15 mergeprob_opt: Option<f64>,
15 mergeprob_opt: Option<f64>,
16 prevprob_opt: Option<f64>,
16 prevprob_opt: Option<f64>,
17 ) -> VecGraph {
17 ) -> VecGraph {
18 let nodes = nodes_opt.unwrap_or(100);
18 let nodes = nodes_opt.unwrap_or(100);
19 let rootprob = rootprob_opt.unwrap_or(0.05);
19 let rootprob = rootprob_opt.unwrap_or(0.05);
20 let mergeprob = mergeprob_opt.unwrap_or(0.2);
20 let mergeprob = mergeprob_opt.unwrap_or(0.2);
21 let prevprob = prevprob_opt.unwrap_or(0.7);
21 let prevprob = prevprob_opt.unwrap_or(0.7);
22
22
23 let mut rng = thread_rng();
23 let mut rng = thread_rng();
24 let mut vg: VecGraph = Vec::with_capacity(nodes);
24 let mut vg: VecGraph = Vec::with_capacity(nodes);
25 for i in 0..nodes {
25 for i in 0..nodes {
26 if i == 0 || rng.gen_bool(rootprob) {
26 if i == 0 || rng.gen_bool(rootprob) {
27 vg.push([NULL_REVISION, NULL_REVISION])
27 vg.push([NULL_REVISION, NULL_REVISION])
28 } else if i == 1 {
28 } else if i == 1 {
29 vg.push([Revision(0), NULL_REVISION])
29 vg.push([Revision(0), NULL_REVISION])
30 } else if rng.gen_bool(mergeprob) {
30 } else if rng.gen_bool(mergeprob) {
31 let p1 = {
31 let p1 = {
32 if i == 2 || rng.gen_bool(prevprob) {
32 if i == 2 || rng.gen_bool(prevprob) {
33 Revision((i - 1) as BaseRevision)
33 Revision((i - 1) as BaseRevision)
34 } else {
34 } else {
35 Revision(rng.gen_range(0..i - 1) as BaseRevision)
35 Revision(rng.gen_range(0..i - 1) as BaseRevision)
36 }
36 }
37 };
37 };
38 // p2 is a random revision lower than i and different from p1
38 // p2 is a random revision lower than i and different from p1
39 let mut p2 = Revision(rng.gen_range(0..i - 1) as BaseRevision);
39 let mut p2 = Revision(rng.gen_range(0..i - 1) as BaseRevision);
40 if p2 >= p1 {
40 if p2 >= p1 {
41 p2.0 += 1;
41 p2.0 += 1;
42 }
42 }
43 vg.push([p1, p2]);
43 vg.push([p1, p2]);
44 } else if rng.gen_bool(prevprob) {
44 } else if rng.gen_bool(prevprob) {
45 vg.push([Revision((i - 1) as BaseRevision), NULL_REVISION])
45 vg.push([Revision((i - 1) as BaseRevision), NULL_REVISION])
46 } else {
46 } else {
47 vg.push([
47 vg.push([
48 Revision(rng.gen_range(0..i - 1) as BaseRevision),
48 Revision(rng.gen_range(0..i - 1) as BaseRevision),
49 NULL_REVISION,
49 NULL_REVISION,
50 ])
50 ])
51 }
51 }
52 }
52 }
53 vg
53 vg
54 }
54 }
55
55
56 /// Compute the ancestors set of all revisions of a VecGraph
56 /// Compute the ancestors set of all revisions of a VecGraph
57 fn ancestors_sets(vg: &VecGraph) -> Vec<HashSet<Revision>> {
57 fn ancestors_sets(vg: &VecGraph) -> Vec<HashSet<Revision>> {
58 let mut ancs: Vec<HashSet<Revision>> = Vec::new();
58 let mut ancs: Vec<HashSet<Revision>> = Vec::new();
59 (0..vg.len()).for_each(|i| {
59 (0..vg.len()).for_each(|i| {
60 let mut ancs_i = HashSet::new();
60 let mut ancs_i = HashSet::new();
61 ancs_i.insert(Revision(i as BaseRevision));
61 ancs_i.insert(Revision(i as BaseRevision));
62 for p in vg[i].iter().cloned() {
62 for p in vg[i].iter().cloned() {
63 if p != NULL_REVISION {
63 if p != NULL_REVISION {
64 ancs_i.extend(&ancs[p.0 as usize]);
64 ancs_i.extend(&ancs[p.0 as usize]);
65 }
65 }
66 }
66 }
67 ancs.push(ancs_i);
67 ancs.push(ancs_i);
68 });
68 });
69 ancs
69 ancs
70 }
70 }
71
71
72 #[allow(unused)] // Useful when debugging
72 #[derive(Clone, Debug)]
73 #[derive(Clone, Debug)]
73 enum MissingAncestorsAction {
74 enum MissingAncestorsAction {
74 InitialBases(HashSet<Revision>),
75 InitialBases(HashSet<Revision>),
75 AddBases(HashSet<Revision>),
76 AddBases(HashSet<Revision>),
76 RemoveAncestorsFrom(HashSet<Revision>),
77 RemoveAncestorsFrom(HashSet<Revision>),
77 MissingAncestors(HashSet<Revision>),
78 MissingAncestors(HashSet<Revision>),
78 }
79 }
79
80
80 /// An instrumented naive yet obviously correct implementation
81 /// An instrumented naive yet obviously correct implementation
81 ///
82 ///
82 /// It also records all its actions for easy reproduction for replay
83 /// It also records all its actions for easy reproduction for replay
83 /// of problematic cases
84 /// of problematic cases
84 struct NaiveMissingAncestors<'a> {
85 struct NaiveMissingAncestors<'a> {
85 ancestors_sets: &'a Vec<HashSet<Revision>>,
86 ancestors_sets: &'a Vec<HashSet<Revision>>,
86 graph: &'a VecGraph, // used for error reporting only
87 graph: &'a VecGraph, // used for error reporting only
87 bases: HashSet<Revision>,
88 bases: HashSet<Revision>,
88 history: Vec<MissingAncestorsAction>,
89 history: Vec<MissingAncestorsAction>,
89 // for error reporting, assuming we are in a random test
90 // for error reporting, assuming we are in a random test
90 random_seed: String,
91 random_seed: String,
91 }
92 }
92
93
93 impl<'a> NaiveMissingAncestors<'a> {
94 impl<'a> NaiveMissingAncestors<'a> {
94 fn new(
95 fn new(
95 graph: &'a VecGraph,
96 graph: &'a VecGraph,
96 ancestors_sets: &'a Vec<HashSet<Revision>>,
97 ancestors_sets: &'a Vec<HashSet<Revision>>,
97 bases: &HashSet<Revision>,
98 bases: &HashSet<Revision>,
98 random_seed: &str,
99 random_seed: &str,
99 ) -> Self {
100 ) -> Self {
100 Self {
101 Self {
101 ancestors_sets,
102 ancestors_sets,
102 bases: bases.clone(),
103 bases: bases.clone(),
103 graph,
104 graph,
104 history: vec![MissingAncestorsAction::InitialBases(bases.clone())],
105 history: vec![MissingAncestorsAction::InitialBases(bases.clone())],
105 random_seed: random_seed.into(),
106 random_seed: random_seed.into(),
106 }
107 }
107 }
108 }
108
109
109 fn add_bases(&mut self, new_bases: HashSet<Revision>) {
110 fn add_bases(&mut self, new_bases: HashSet<Revision>) {
110 self.bases.extend(&new_bases);
111 self.bases.extend(&new_bases);
111 self.history
112 self.history
112 .push(MissingAncestorsAction::AddBases(new_bases))
113 .push(MissingAncestorsAction::AddBases(new_bases))
113 }
114 }
114
115
115 fn remove_ancestors_from(&mut self, revs: &mut HashSet<Revision>) {
116 fn remove_ancestors_from(&mut self, revs: &mut HashSet<Revision>) {
116 revs.remove(&NULL_REVISION);
117 revs.remove(&NULL_REVISION);
117 self.history
118 self.history
118 .push(MissingAncestorsAction::RemoveAncestorsFrom(revs.clone()));
119 .push(MissingAncestorsAction::RemoveAncestorsFrom(revs.clone()));
119 for base in self.bases.iter().cloned() {
120 for base in self.bases.iter().cloned() {
120 if base != NULL_REVISION {
121 if base != NULL_REVISION {
121 for rev in &self.ancestors_sets[base.0 as usize] {
122 for rev in &self.ancestors_sets[base.0 as usize] {
122 revs.remove(rev);
123 revs.remove(rev);
123 }
124 }
124 }
125 }
125 }
126 }
126 }
127 }
127
128
128 fn missing_ancestors(
129 fn missing_ancestors(
129 &mut self,
130 &mut self,
130 revs: impl IntoIterator<Item = Revision>,
131 revs: impl IntoIterator<Item = Revision>,
131 ) -> Vec<Revision> {
132 ) -> Vec<Revision> {
132 let revs_as_set: HashSet<Revision> = revs.into_iter().collect();
133 let revs_as_set: HashSet<Revision> = revs.into_iter().collect();
133
134
134 let mut missing: HashSet<Revision> = HashSet::new();
135 let mut missing: HashSet<Revision> = HashSet::new();
135 for rev in revs_as_set.iter().cloned() {
136 for rev in revs_as_set.iter().cloned() {
136 if rev != NULL_REVISION {
137 if rev != NULL_REVISION {
137 missing.extend(&self.ancestors_sets[rev.0 as usize])
138 missing.extend(&self.ancestors_sets[rev.0 as usize])
138 }
139 }
139 }
140 }
140 self.history
141 self.history
141 .push(MissingAncestorsAction::MissingAncestors(revs_as_set));
142 .push(MissingAncestorsAction::MissingAncestors(revs_as_set));
142
143
143 for base in self.bases.iter().cloned() {
144 for base in self.bases.iter().cloned() {
144 if base != NULL_REVISION {
145 if base != NULL_REVISION {
145 for rev in &self.ancestors_sets[base.0 as usize] {
146 for rev in &self.ancestors_sets[base.0 as usize] {
146 missing.remove(rev);
147 missing.remove(rev);
147 }
148 }
148 }
149 }
149 }
150 }
150 let mut res: Vec<Revision> = missing.iter().cloned().collect();
151 let mut res: Vec<Revision> = missing.iter().cloned().collect();
151 res.sort_unstable();
152 res.sort_unstable();
152 res
153 res
153 }
154 }
154
155
155 fn assert_eq<T>(&self, left: T, right: T)
156 fn assert_eq<T>(&self, left: T, right: T)
156 where
157 where
157 T: PartialEq + Debug,
158 T: PartialEq + Debug,
158 {
159 {
159 if left == right {
160 if left == right {
160 return;
161 return;
161 }
162 }
162 panic!(
163 panic!(
163 "Equality assertion failed (left != right)
164 "Equality assertion failed (left != right)
164 left={:?}
165 left={:?}
165 right={:?}
166 right={:?}
166 graph={:?}
167 graph={:?}
167 current bases={:?}
168 current bases={:?}
168 history={:?}
169 history={:?}
169 random seed={}
170 random seed={}
170 ",
171 ",
171 left,
172 left,
172 right,
173 right,
173 self.graph,
174 self.graph,
174 self.bases,
175 self.bases,
175 self.history,
176 self.history,
176 self.random_seed,
177 self.random_seed,
177 );
178 );
178 }
179 }
179 }
180 }
180
181
181 /// Choose a set of random revisions
182 /// Choose a set of random revisions
182 ///
183 ///
183 /// The size of the set is taken from a LogNormal distribution
184 /// The size of the set is taken from a LogNormal distribution
184 /// with default mu=1.1 and default sigma=0.8. Quoting the Python
185 /// with default mu=1.1 and default sigma=0.8. Quoting the Python
185 /// test this is taken from:
186 /// test this is taken from:
186 /// the default mu and sigma give us a nice distribution of mostly
187 /// the default mu and sigma give us a nice distribution of mostly
187 /// single-digit counts (including 0) with some higher ones
188 /// single-digit counts (including 0) with some higher ones
188 /// The sample may include NULL_REVISION
189 /// The sample may include NULL_REVISION
189 fn sample_revs<R: RngCore>(
190 fn sample_revs<R: RngCore>(
190 rng: &mut R,
191 rng: &mut R,
191 maxrev: Revision,
192 maxrev: Revision,
192 mu_opt: Option<f64>,
193 mu_opt: Option<f64>,
193 sigma_opt: Option<f64>,
194 sigma_opt: Option<f64>,
194 ) -> HashSet<Revision> {
195 ) -> HashSet<Revision> {
195 let mu = mu_opt.unwrap_or(1.1);
196 let mu = mu_opt.unwrap_or(1.1);
196 let sigma = sigma_opt.unwrap_or(0.8);
197 let sigma = sigma_opt.unwrap_or(0.8);
197
198
198 let log_normal = LogNormal::new(mu, sigma).unwrap();
199 let log_normal = LogNormal::new(mu, sigma).unwrap();
199 let nb = min(maxrev.0 as usize, log_normal.sample(rng).floor() as usize);
200 let nb = min(maxrev.0 as usize, log_normal.sample(rng).floor() as usize);
200
201
201 let dist = Uniform::from(NULL_REVISION.0..maxrev.0);
202 let dist = Uniform::from(NULL_REVISION.0..maxrev.0);
202 rng.sample_iter(&dist).take(nb).map(Revision).collect()
203 rng.sample_iter(&dist).take(nb).map(Revision).collect()
203 }
204 }
204
205
205 /// Produces the hexadecimal representation of a slice of bytes
206 /// Produces the hexadecimal representation of a slice of bytes
206 fn hex_bytes(bytes: &[u8]) -> String {
207 fn hex_bytes(bytes: &[u8]) -> String {
207 let mut s = String::with_capacity(bytes.len() * 2);
208 let mut s = String::with_capacity(bytes.len() * 2);
208 for b in bytes {
209 for b in bytes {
209 s.push_str(&format!("{:x}", b));
210 s.push_str(&format!("{:x}", b));
210 }
211 }
211 s
212 s
212 }
213 }
213
214
214 /// Fill a random seed from its hexadecimal representation.
215 /// Fill a random seed from its hexadecimal representation.
215 ///
216 ///
216 /// This signature is meant to be consistent with `RngCore::fill_bytes`
217 /// This signature is meant to be consistent with `RngCore::fill_bytes`
217 fn seed_parse_in(hex: &str, seed: &mut [u8]) {
218 fn seed_parse_in(hex: &str, seed: &mut [u8]) {
218 if hex.len() != 32 {
219 if hex.len() != 32 {
219 panic!("Seed {} is too short for 128 bits hex", hex);
220 panic!("Seed {} is too short for 128 bits hex", hex);
220 }
221 }
221 for i in 0..8 {
222 for i in 0..8 {
222 seed[i] = u8::from_str_radix(&hex[2 * i..2 * (i + 1)], 16)
223 seed[i] = u8::from_str_radix(&hex[2 * i..2 * (i + 1)], 16)
223 .unwrap_or_else(|_e| panic!("Seed {} is not 128 bits hex", hex));
224 .unwrap_or_else(|_e| panic!("Seed {} is not 128 bits hex", hex));
224 }
225 }
225 }
226 }
226
227
227 /// Parse the parameters for `test_missing_ancestors()`
228 /// Parse the parameters for `test_missing_ancestors()`
228 ///
229 ///
229 /// Returns (graphs, instances, calls per instance)
230 /// Returns (graphs, instances, calls per instance)
230 fn parse_test_missing_ancestors_params(var: &str) -> (usize, usize, usize) {
231 fn parse_test_missing_ancestors_params(var: &str) -> (usize, usize, usize) {
231 let err_msg = "TEST_MISSING_ANCESTORS format: GRAPHS,INSTANCES,CALLS";
232 let err_msg = "TEST_MISSING_ANCESTORS format: GRAPHS,INSTANCES,CALLS";
232 let params: Vec<usize> = var
233 let params: Vec<usize> = var
233 .split(',')
234 .split(',')
234 .map(|n| n.trim().parse().expect(err_msg))
235 .map(|n| n.trim().parse().expect(err_msg))
235 .collect();
236 .collect();
236 if params.len() != 3 {
237 if params.len() != 3 {
237 panic!("{}", err_msg);
238 panic!("{}", err_msg);
238 }
239 }
239 (params[0], params[1], params[2])
240 (params[0], params[1], params[2])
240 }
241 }
241
242
242 #[test]
243 #[test]
243 /// This test creates lots of random VecGraphs,
244 /// This test creates lots of random VecGraphs,
244 /// and compare a bunch of MissingAncestors for them with
245 /// and compare a bunch of MissingAncestors for them with
245 /// NaiveMissingAncestors that rely on precomputed transitive closures of
246 /// NaiveMissingAncestors that rely on precomputed transitive closures of
246 /// these VecGraphs (ancestors_sets).
247 /// these VecGraphs (ancestors_sets).
247 ///
248 ///
248 /// For each generater graph, several instances of `MissingAncestors` are
249 /// For each generater graph, several instances of `MissingAncestors` are
249 /// created, whose methods are called and checked a given number of times.
250 /// created, whose methods are called and checked a given number of times.
250 ///
251 ///
251 /// This test can be parametrized by two environment variables:
252 /// This test can be parametrized by two environment variables:
252 ///
253 ///
253 /// - TEST_RANDOM_SEED: must be 128 bits in hexadecimal
254 /// - TEST_RANDOM_SEED: must be 128 bits in hexadecimal
254 /// - TEST_MISSING_ANCESTORS: "GRAPHS,INSTANCES,CALLS". The default is
255 /// - TEST_MISSING_ANCESTORS: "GRAPHS,INSTANCES,CALLS". The default is
255 /// "100,10,10"
256 /// "100,10,10"
256 ///
257 ///
257 /// This is slow: it runs on my workstation in about 5 seconds with the
258 /// This is slow: it runs on my workstation in about 5 seconds with the
258 /// default parameters with a plain `cargo --test`.
259 /// default parameters with a plain `cargo --test`.
259 ///
260 ///
260 /// If you want to run it faster, especially if you're changing the
261 /// If you want to run it faster, especially if you're changing the
261 /// parameters, use `cargo test --release`.
262 /// parameters, use `cargo test --release`.
262 /// For me, that gets it down to 0.15 seconds with the default parameters
263 /// For me, that gets it down to 0.15 seconds with the default parameters
263 fn test_missing_ancestors_compare_naive() {
264 fn test_missing_ancestors_compare_naive() {
264 let (graphcount, testcount, inccount) =
265 let (graphcount, testcount, inccount) =
265 match env::var("TEST_MISSING_ANCESTORS") {
266 match env::var("TEST_MISSING_ANCESTORS") {
266 Err(env::VarError::NotPresent) => (100, 10, 10),
267 Err(env::VarError::NotPresent) => (100, 10, 10),
267 Ok(val) => parse_test_missing_ancestors_params(&val),
268 Ok(val) => parse_test_missing_ancestors_params(&val),
268 Err(env::VarError::NotUnicode(_)) => {
269 Err(env::VarError::NotUnicode(_)) => {
269 panic!("TEST_MISSING_ANCESTORS is invalid");
270 panic!("TEST_MISSING_ANCESTORS is invalid");
270 }
271 }
271 };
272 };
272 let mut seed: [u8; 16] = [0; 16];
273 let mut seed: [u8; 16] = [0; 16];
273 match env::var("TEST_RANDOM_SEED") {
274 match env::var("TEST_RANDOM_SEED") {
274 Ok(val) => {
275 Ok(val) => {
275 seed_parse_in(&val, &mut seed);
276 seed_parse_in(&val, &mut seed);
276 }
277 }
277 Err(env::VarError::NotPresent) => {
278 Err(env::VarError::NotPresent) => {
278 thread_rng().fill_bytes(&mut seed);
279 thread_rng().fill_bytes(&mut seed);
279 }
280 }
280 Err(env::VarError::NotUnicode(_)) => {
281 Err(env::VarError::NotUnicode(_)) => {
281 panic!("TEST_RANDOM_SEED must be 128 bits in hex");
282 panic!("TEST_RANDOM_SEED must be 128 bits in hex");
282 }
283 }
283 }
284 }
284 let hex_seed = hex_bytes(&seed);
285 let hex_seed = hex_bytes(&seed);
285 eprintln!("Random seed: {}", hex_seed);
286 eprintln!("Random seed: {}", hex_seed);
286
287
287 let mut rng = rand_pcg::Pcg32::from_seed(seed);
288 let mut rng = rand_pcg::Pcg32::from_seed(seed);
288
289
289 eprint!("Checking MissingAncestors against brute force implementation ");
290 eprint!("Checking MissingAncestors against brute force implementation ");
290 eprint!("for {} random graphs, ", graphcount);
291 eprint!("for {} random graphs, ", graphcount);
291 eprintln!(
292 eprintln!(
292 "with {} instances for each and {} calls per instance",
293 "with {} instances for each and {} calls per instance",
293 testcount, inccount,
294 testcount, inccount,
294 );
295 );
295 for g in 0..graphcount {
296 for g in 0..graphcount {
296 if g != 0 && g % 100 == 0 {
297 if g != 0 && g % 100 == 0 {
297 eprintln!("Tested with {} graphs", g);
298 eprintln!("Tested with {} graphs", g);
298 }
299 }
299 let graph = build_random_graph(None, None, None, None);
300 let graph = build_random_graph(None, None, None, None);
300 let graph_len = Revision(graph.len() as BaseRevision);
301 let graph_len = Revision(graph.len() as BaseRevision);
301 let ancestors_sets = ancestors_sets(&graph);
302 let ancestors_sets = ancestors_sets(&graph);
302 for _testno in 0..testcount {
303 for _testno in 0..testcount {
303 let bases: HashSet<Revision> =
304 let bases: HashSet<Revision> =
304 sample_revs(&mut rng, graph_len, None, None);
305 sample_revs(&mut rng, graph_len, None, None);
305 let mut inc = MissingAncestors::<VecGraph>::new(
306 let mut inc = MissingAncestors::<VecGraph>::new(
306 graph.clone(),
307 graph.clone(),
307 bases.clone(),
308 bases.clone(),
308 );
309 );
309 let mut naive = NaiveMissingAncestors::new(
310 let mut naive = NaiveMissingAncestors::new(
310 &graph,
311 &graph,
311 &ancestors_sets,
312 &ancestors_sets,
312 &bases,
313 &bases,
313 &hex_seed,
314 &hex_seed,
314 );
315 );
315 for _m in 0..inccount {
316 for _m in 0..inccount {
316 if rng.gen_bool(0.2) {
317 if rng.gen_bool(0.2) {
317 let new_bases =
318 let new_bases =
318 sample_revs(&mut rng, graph_len, None, None);
319 sample_revs(&mut rng, graph_len, None, None);
319 inc.add_bases(new_bases.iter().cloned());
320 inc.add_bases(new_bases.iter().cloned());
320 naive.add_bases(new_bases);
321 naive.add_bases(new_bases);
321 }
322 }
322 if rng.gen_bool(0.4) {
323 if rng.gen_bool(0.4) {
323 // larger set so that there are more revs to remove from
324 // larger set so that there are more revs to remove from
324 let mut hrevs =
325 let mut hrevs =
325 sample_revs(&mut rng, graph_len, Some(1.5), None);
326 sample_revs(&mut rng, graph_len, Some(1.5), None);
326 let mut rrevs = hrevs.clone();
327 let mut rrevs = hrevs.clone();
327 inc.remove_ancestors_from(&mut hrevs).unwrap();
328 inc.remove_ancestors_from(&mut hrevs).unwrap();
328 naive.remove_ancestors_from(&mut rrevs);
329 naive.remove_ancestors_from(&mut rrevs);
329 naive.assert_eq(hrevs, rrevs);
330 naive.assert_eq(hrevs, rrevs);
330 } else {
331 } else {
331 let revs = sample_revs(&mut rng, graph_len, None, None);
332 let revs = sample_revs(&mut rng, graph_len, None, None);
332 let hm =
333 let hm =
333 inc.missing_ancestors(revs.iter().cloned()).unwrap();
334 inc.missing_ancestors(revs.iter().cloned()).unwrap();
334 let rm = naive.missing_ancestors(revs.iter().cloned());
335 let rm = naive.missing_ancestors(revs.iter().cloned());
335 naive.assert_eq(hm, rm);
336 naive.assert_eq(hm, rm);
336 }
337 }
337 }
338 }
338 }
339 }
339 }
340 }
340 }
341 }
General Comments 0
You need to be logged in to leave comments. Login now