##// END OF EJS Templates
dirstate-v2: Store a bitfield on disk instead of v1-like state...
Simon Sapin -
r48951:ab5a7fdb default
parent child Browse files
Show More
@@ -1,413 +1,429 b''
1 1 use crate::errors::HgError;
2 2 use bitflags::bitflags;
3 3 use std::convert::TryFrom;
4 4
5 5 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
6 6 pub enum EntryState {
7 7 Normal,
8 8 Added,
9 9 Removed,
10 10 Merged,
11 11 }
12 12
13 13 /// The C implementation uses all signed types. This will be an issue
14 14 /// either when 4GB+ source files are commonplace or in 2038, whichever
15 15 /// comes first.
16 16 #[derive(Debug, PartialEq, Copy, Clone)]
17 17 pub struct DirstateEntry {
18 flags: Flags,
18 pub(crate) flags: Flags,
19 19 mode_size: Option<(i32, i32)>,
20 20 mtime: Option<i32>,
21 21 }
22 22
23 23 bitflags! {
24 struct Flags: u8 {
24 pub(crate) struct Flags: u8 {
25 25 const WDIR_TRACKED = 1 << 0;
26 26 const P1_TRACKED = 1 << 1;
27 27 const P2_INFO = 1 << 2;
28 28 }
29 29 }
30 30
31 31 pub const V1_RANGEMASK: i32 = 0x7FFFFFFF;
32 32
33 33 pub const MTIME_UNSET: i32 = -1;
34 34
35 35 /// A `DirstateEntry` with a size of `-2` means that it was merged from the
36 36 /// other parent. This allows revert to pick the right status back during a
37 37 /// merge.
38 38 pub const SIZE_FROM_OTHER_PARENT: i32 = -2;
39 39 /// A special value used for internal representation of special case in
40 40 /// dirstate v1 format.
41 41 pub const SIZE_NON_NORMAL: i32 = -1;
42 42
43 43 impl DirstateEntry {
44 pub fn new(
44 pub fn from_v2_data(
45 45 wdir_tracked: bool,
46 46 p1_tracked: bool,
47 47 p2_info: bool,
48 48 mode_size: Option<(i32, i32)>,
49 49 mtime: Option<i32>,
50 50 ) -> Self {
51 51 let mut flags = Flags::empty();
52 52 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
53 53 flags.set(Flags::P1_TRACKED, p1_tracked);
54 54 flags.set(Flags::P2_INFO, p2_info);
55 55 Self {
56 56 flags,
57 57 mode_size,
58 58 mtime,
59 59 }
60 60 }
61 61
62 62 pub fn from_v1_data(
63 63 state: EntryState,
64 64 mode: i32,
65 65 size: i32,
66 66 mtime: i32,
67 67 ) -> Self {
68 68 match state {
69 69 EntryState::Normal => {
70 70 if size == SIZE_FROM_OTHER_PARENT {
71 71 Self::new_from_p2()
72 72 } else if size == SIZE_NON_NORMAL {
73 73 Self::new_possibly_dirty()
74 74 } else if mtime == MTIME_UNSET {
75 75 Self {
76 76 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
77 77 mode_size: Some((mode, size)),
78 78 mtime: None,
79 79 }
80 80 } else {
81 81 Self::new_normal(mode, size, mtime)
82 82 }
83 83 }
84 84 EntryState::Added => Self::new_added(),
85 85 EntryState::Removed => Self {
86 86 flags: if size == SIZE_NON_NORMAL {
87 87 Flags::P1_TRACKED | Flags::P2_INFO
88 88 } else if size == SIZE_FROM_OTHER_PARENT {
89 89 // We don’t know if P1_TRACKED should be set (file history)
90 90 Flags::P2_INFO
91 91 } else {
92 92 Flags::P1_TRACKED
93 93 },
94 94 mode_size: None,
95 95 mtime: None,
96 96 },
97 97 EntryState::Merged => Self::new_merged(),
98 98 }
99 99 }
100 100
101 101 pub fn new_from_p2() -> Self {
102 102 Self {
103 103 // might be missing P1_TRACKED
104 104 flags: Flags::WDIR_TRACKED | Flags::P2_INFO,
105 105 mode_size: None,
106 106 mtime: None,
107 107 }
108 108 }
109 109
110 110 pub fn new_possibly_dirty() -> Self {
111 111 Self {
112 112 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
113 113 mode_size: None,
114 114 mtime: None,
115 115 }
116 116 }
117 117
118 118 pub fn new_added() -> Self {
119 119 Self {
120 120 flags: Flags::WDIR_TRACKED,
121 121 mode_size: None,
122 122 mtime: None,
123 123 }
124 124 }
125 125
126 126 pub fn new_merged() -> Self {
127 127 Self {
128 128 flags: Flags::WDIR_TRACKED
129 129 | Flags::P1_TRACKED // might not be true because of rename ?
130 130 | Flags::P2_INFO, // might not be true because of rename ?
131 131 mode_size: None,
132 132 mtime: None,
133 133 }
134 134 }
135 135
136 136 pub fn new_normal(mode: i32, size: i32, mtime: i32) -> Self {
137 137 Self {
138 138 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
139 139 mode_size: Some((mode, size)),
140 140 mtime: Some(mtime),
141 141 }
142 142 }
143 143
144 144 /// Creates a new entry in "removed" state.
145 145 ///
146 146 /// `size` is expected to be zero, `SIZE_NON_NORMAL`, or
147 147 /// `SIZE_FROM_OTHER_PARENT`
148 148 pub fn new_removed(size: i32) -> Self {
149 149 Self::from_v1_data(EntryState::Removed, 0, size, 0)
150 150 }
151 151
152 152 pub fn tracked(&self) -> bool {
153 153 self.flags.contains(Flags::WDIR_TRACKED)
154 154 }
155 155
156 156 fn in_either_parent(&self) -> bool {
157 157 self.flags.intersects(Flags::P1_TRACKED | Flags::P2_INFO)
158 158 }
159 159
160 160 pub fn removed(&self) -> bool {
161 161 self.in_either_parent() && !self.flags.contains(Flags::WDIR_TRACKED)
162 162 }
163 163
164 164 pub fn merged(&self) -> bool {
165 165 self.flags
166 166 .contains(Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO)
167 167 }
168 168
169 169 pub fn added(&self) -> bool {
170 170 self.flags.contains(Flags::WDIR_TRACKED) && !self.in_either_parent()
171 171 }
172 172
173 173 pub fn from_p2(&self) -> bool {
174 174 self.flags.contains(Flags::WDIR_TRACKED | Flags::P2_INFO)
175 175 && !self.flags.contains(Flags::P1_TRACKED)
176 176 }
177 177
178 178 pub fn maybe_clean(&self) -> bool {
179 179 if !self.flags.contains(Flags::WDIR_TRACKED) {
180 180 false
181 181 } else if !self.flags.contains(Flags::P1_TRACKED) {
182 182 false
183 183 } else if self.flags.contains(Flags::P2_INFO) {
184 184 false
185 185 } else {
186 186 true
187 187 }
188 188 }
189 189
190 190 pub fn any_tracked(&self) -> bool {
191 191 self.flags.intersects(
192 192 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
193 193 )
194 194 }
195 195
196 /// Returns `(wdir_tracked, p1_tracked, p2_info, mode_size, mtime)`
197 pub(crate) fn v2_data(
198 &self,
199 ) -> (bool, bool, bool, Option<(i32, i32)>, Option<i32>) {
200 if !self.any_tracked() {
201 // TODO: return an Option instead?
202 panic!("Accessing v1_state of an untracked DirstateEntry")
203 }
204 let wdir_tracked = self.flags.contains(Flags::WDIR_TRACKED);
205 let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
206 let p2_info = self.flags.contains(Flags::P2_INFO);
207 let mode_size = self.mode_size;
208 let mtime = self.mtime;
209 (wdir_tracked, p1_tracked, p2_info, mode_size, mtime)
210 }
211
196 212 fn v1_state(&self) -> EntryState {
197 213 if !self.any_tracked() {
198 214 // TODO: return an Option instead?
199 215 panic!("Accessing v1_state of an untracked DirstateEntry")
200 216 }
201 217 if self.removed() {
202 218 EntryState::Removed
203 219 } else if self.merged() {
204 220 EntryState::Merged
205 221 } else if self.added() {
206 222 EntryState::Added
207 223 } else {
208 224 EntryState::Normal
209 225 }
210 226 }
211 227
212 228 fn v1_mode(&self) -> i32 {
213 229 if let Some((mode, _size)) = self.mode_size {
214 230 mode
215 231 } else {
216 232 0
217 233 }
218 234 }
219 235
220 236 fn v1_size(&self) -> i32 {
221 237 if !self.any_tracked() {
222 238 // TODO: return an Option instead?
223 239 panic!("Accessing v1_size of an untracked DirstateEntry")
224 240 }
225 241 if self.removed()
226 242 && self.flags.contains(Flags::P1_TRACKED | Flags::P2_INFO)
227 243 {
228 244 SIZE_NON_NORMAL
229 245 } else if self.removed() && self.flags.contains(Flags::P2_INFO) {
230 246 SIZE_FROM_OTHER_PARENT
231 247 } else if self.removed() {
232 248 0
233 249 } else if self.merged() {
234 250 SIZE_FROM_OTHER_PARENT
235 251 } else if self.added() {
236 252 SIZE_NON_NORMAL
237 253 } else if self.from_p2() {
238 254 SIZE_FROM_OTHER_PARENT
239 255 } else if let Some((_mode, size)) = self.mode_size {
240 256 size
241 257 } else {
242 258 SIZE_NON_NORMAL
243 259 }
244 260 }
245 261
246 262 fn v1_mtime(&self) -> i32 {
247 263 if !self.any_tracked() {
248 264 // TODO: return an Option instead?
249 265 panic!("Accessing v1_mtime of an untracked DirstateEntry")
250 266 }
251 267 if self.removed() {
252 268 0
253 269 } else if self.flags.contains(Flags::P2_INFO) {
254 270 MTIME_UNSET
255 271 } else if !self.flags.contains(Flags::P1_TRACKED) {
256 272 MTIME_UNSET
257 273 } else {
258 274 self.mtime.unwrap_or(MTIME_UNSET)
259 275 }
260 276 }
261 277
262 278 // TODO: return `Option<EntryState>`? None when `!self.any_tracked`
263 279 pub fn state(&self) -> EntryState {
264 280 self.v1_state()
265 281 }
266 282
267 283 // TODO: return Option?
268 284 pub fn mode(&self) -> i32 {
269 285 self.v1_mode()
270 286 }
271 287
272 288 // TODO: return Option?
273 289 pub fn size(&self) -> i32 {
274 290 self.v1_size()
275 291 }
276 292
277 293 // TODO: return Option?
278 294 pub fn mtime(&self) -> i32 {
279 295 self.v1_mtime()
280 296 }
281 297
282 298 pub fn drop_merge_data(&mut self) {
283 299 if self.flags.contains(Flags::P2_INFO) {
284 300 self.flags.remove(Flags::P2_INFO);
285 301 self.mode_size = None;
286 302 self.mtime = None;
287 303 }
288 304 }
289 305
290 306 pub fn set_possibly_dirty(&mut self) {
291 307 self.mtime = None
292 308 }
293 309
294 310 pub fn set_clean(&mut self, mode: i32, size: i32, mtime: i32) {
295 311 self.flags.insert(Flags::WDIR_TRACKED | Flags::P1_TRACKED);
296 312 self.mode_size = Some((mode, size));
297 313 self.mtime = Some(mtime);
298 314 }
299 315
300 316 pub fn set_tracked(&mut self) {
301 317 self.flags.insert(Flags::WDIR_TRACKED);
302 318 // `set_tracked` is replacing various `normallookup` call. So we mark
303 319 // the files as needing lookup
304 320 //
305 321 // Consider dropping this in the future in favor of something less
306 322 // broad.
307 323 self.mtime = None;
308 324 }
309 325
310 326 pub fn set_untracked(&mut self) {
311 327 self.flags.remove(Flags::WDIR_TRACKED);
312 328 self.mode_size = None;
313 329 self.mtime = None;
314 330 }
315 331
316 332 /// Returns `(state, mode, size, mtime)` for the puprose of serialization
317 333 /// in the dirstate-v1 format.
318 334 ///
319 335 /// This includes marker values such as `mtime == -1`. In the future we may
320 336 /// want to not represent these cases that way in memory, but serialization
321 337 /// will need to keep the same format.
322 338 pub fn v1_data(&self) -> (u8, i32, i32, i32) {
323 339 (
324 340 self.v1_state().into(),
325 341 self.v1_mode(),
326 342 self.v1_size(),
327 343 self.v1_mtime(),
328 344 )
329 345 }
330 346
331 347 pub(crate) fn is_from_other_parent(&self) -> bool {
332 348 self.state() == EntryState::Normal
333 349 && self.size() == SIZE_FROM_OTHER_PARENT
334 350 }
335 351
336 352 // TODO: other platforms
337 353 #[cfg(unix)]
338 354 pub fn mode_changed(
339 355 &self,
340 356 filesystem_metadata: &std::fs::Metadata,
341 357 ) -> bool {
342 358 use std::os::unix::fs::MetadataExt;
343 359 const EXEC_BIT_MASK: u32 = 0o100;
344 360 let dirstate_exec_bit = (self.mode() as u32) & EXEC_BIT_MASK;
345 361 let fs_exec_bit = filesystem_metadata.mode() & EXEC_BIT_MASK;
346 362 dirstate_exec_bit != fs_exec_bit
347 363 }
348 364
349 365 /// Returns a `(state, mode, size, mtime)` tuple as for
350 366 /// `DirstateMapMethods::debug_iter`.
351 367 pub fn debug_tuple(&self) -> (u8, i32, i32, i32) {
352 368 (self.state().into(), self.mode(), self.size(), self.mtime())
353 369 }
354 370
355 371 pub fn mtime_is_ambiguous(&self, now: i32) -> bool {
356 372 self.state() == EntryState::Normal && self.mtime() == now
357 373 }
358 374
359 375 pub fn clear_ambiguous_mtime(&mut self, now: i32) -> bool {
360 376 let ambiguous = self.mtime_is_ambiguous(now);
361 377 if ambiguous {
362 378 // The file was last modified "simultaneously" with the current
363 379 // write to dirstate (i.e. within the same second for file-
364 380 // systems with a granularity of 1 sec). This commonly happens
365 381 // for at least a couple of files on 'update'.
366 382 // The user could change the file without changing its size
367 383 // within the same second. Invalidate the file's mtime in
368 384 // dirstate, forcing future 'status' calls to compare the
369 385 // contents of the file if the size is the same. This prevents
370 386 // mistakenly treating such files as clean.
371 387 self.set_possibly_dirty()
372 388 }
373 389 ambiguous
374 390 }
375 391 }
376 392
377 393 impl EntryState {
378 394 pub fn is_tracked(self) -> bool {
379 395 use EntryState::*;
380 396 match self {
381 397 Normal | Added | Merged => true,
382 398 Removed => false,
383 399 }
384 400 }
385 401 }
386 402
387 403 impl TryFrom<u8> for EntryState {
388 404 type Error = HgError;
389 405
390 406 fn try_from(value: u8) -> Result<Self, Self::Error> {
391 407 match value {
392 408 b'n' => Ok(EntryState::Normal),
393 409 b'a' => Ok(EntryState::Added),
394 410 b'r' => Ok(EntryState::Removed),
395 411 b'm' => Ok(EntryState::Merged),
396 412 _ => Err(HgError::CorruptedRepository(format!(
397 413 "Incorrect dirstate entry state {}",
398 414 value
399 415 ))),
400 416 }
401 417 }
402 418 }
403 419
404 420 impl Into<u8> for EntryState {
405 421 fn into(self) -> u8 {
406 422 match self {
407 423 EntryState::Normal => b'n',
408 424 EntryState::Added => b'a',
409 425 EntryState::Removed => b'r',
410 426 EntryState::Merged => b'm',
411 427 }
412 428 }
413 429 }
@@ -1,1193 +1,1188 b''
1 1 use bytes_cast::BytesCast;
2 2 use micro_timer::timed;
3 3 use std::borrow::Cow;
4 4 use std::convert::TryInto;
5 5 use std::path::PathBuf;
6 6
7 7 use super::on_disk;
8 8 use super::on_disk::DirstateV2ParseError;
9 9 use super::owning::OwningDirstateMap;
10 10 use super::path_with_basename::WithBasename;
11 11 use crate::dirstate::parsers::pack_entry;
12 12 use crate::dirstate::parsers::packed_entry_size;
13 13 use crate::dirstate::parsers::parse_dirstate_entries;
14 14 use crate::dirstate::parsers::Timestamp;
15 15 use crate::dirstate::CopyMapIter;
16 16 use crate::dirstate::StateMapIter;
17 17 use crate::dirstate::SIZE_FROM_OTHER_PARENT;
18 18 use crate::dirstate::SIZE_NON_NORMAL;
19 19 use crate::matchers::Matcher;
20 20 use crate::utils::hg_path::{HgPath, HgPathBuf};
21 21 use crate::DirstateEntry;
22 22 use crate::DirstateError;
23 23 use crate::DirstateParents;
24 24 use crate::DirstateStatus;
25 25 use crate::EntryState;
26 26 use crate::FastHashMap;
27 27 use crate::PatternFileWarning;
28 28 use crate::StatusError;
29 29 use crate::StatusOptions;
30 30
31 31 /// Append to an existing data file if the amount of unreachable data (not used
32 32 /// anymore) is less than this fraction of the total amount of existing data.
33 33 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
34 34
35 35 pub struct DirstateMap<'on_disk> {
36 36 /// Contents of the `.hg/dirstate` file
37 37 pub(super) on_disk: &'on_disk [u8],
38 38
39 39 pub(super) root: ChildNodes<'on_disk>,
40 40
41 41 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
42 42 pub(super) nodes_with_entry_count: u32,
43 43
44 44 /// Number of nodes anywhere in the tree that have
45 45 /// `.copy_source.is_some()`.
46 46 pub(super) nodes_with_copy_source_count: u32,
47 47
48 48 /// See on_disk::Header
49 49 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
50 50
51 51 /// How many bytes of `on_disk` are not used anymore
52 52 pub(super) unreachable_bytes: u32,
53 53 }
54 54
55 55 /// Using a plain `HgPathBuf` of the full path from the repository root as a
56 56 /// map key would also work: all paths in a given map have the same parent
57 57 /// path, so comparing full paths gives the same result as comparing base
58 58 /// names. However `HashMap` would waste time always re-hashing the same
59 59 /// string prefix.
60 60 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
61 61
62 62 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
63 63 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
64 64 pub(super) enum BorrowedPath<'tree, 'on_disk> {
65 65 InMemory(&'tree HgPathBuf),
66 66 OnDisk(&'on_disk HgPath),
67 67 }
68 68
69 69 pub(super) enum ChildNodes<'on_disk> {
70 70 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
71 71 OnDisk(&'on_disk [on_disk::Node]),
72 72 }
73 73
74 74 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
75 75 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
76 76 OnDisk(&'on_disk [on_disk::Node]),
77 77 }
78 78
79 79 pub(super) enum NodeRef<'tree, 'on_disk> {
80 80 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
81 81 OnDisk(&'on_disk on_disk::Node),
82 82 }
83 83
84 84 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
85 85 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
86 86 match *self {
87 87 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
88 88 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
89 89 }
90 90 }
91 91 }
92 92
93 93 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
94 94 type Target = HgPath;
95 95
96 96 fn deref(&self) -> &HgPath {
97 97 match *self {
98 98 BorrowedPath::InMemory(in_memory) => in_memory,
99 99 BorrowedPath::OnDisk(on_disk) => on_disk,
100 100 }
101 101 }
102 102 }
103 103
104 104 impl Default for ChildNodes<'_> {
105 105 fn default() -> Self {
106 106 ChildNodes::InMemory(Default::default())
107 107 }
108 108 }
109 109
110 110 impl<'on_disk> ChildNodes<'on_disk> {
111 111 pub(super) fn as_ref<'tree>(
112 112 &'tree self,
113 113 ) -> ChildNodesRef<'tree, 'on_disk> {
114 114 match self {
115 115 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
116 116 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
117 117 }
118 118 }
119 119
120 120 pub(super) fn is_empty(&self) -> bool {
121 121 match self {
122 122 ChildNodes::InMemory(nodes) => nodes.is_empty(),
123 123 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
124 124 }
125 125 }
126 126
127 127 fn make_mut(
128 128 &mut self,
129 129 on_disk: &'on_disk [u8],
130 130 unreachable_bytes: &mut u32,
131 131 ) -> Result<
132 132 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
133 133 DirstateV2ParseError,
134 134 > {
135 135 match self {
136 136 ChildNodes::InMemory(nodes) => Ok(nodes),
137 137 ChildNodes::OnDisk(nodes) => {
138 138 *unreachable_bytes +=
139 139 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
140 140 let nodes = nodes
141 141 .iter()
142 142 .map(|node| {
143 143 Ok((
144 144 node.path(on_disk)?,
145 145 node.to_in_memory_node(on_disk)?,
146 146 ))
147 147 })
148 148 .collect::<Result<_, _>>()?;
149 149 *self = ChildNodes::InMemory(nodes);
150 150 match self {
151 151 ChildNodes::InMemory(nodes) => Ok(nodes),
152 152 ChildNodes::OnDisk(_) => unreachable!(),
153 153 }
154 154 }
155 155 }
156 156 }
157 157 }
158 158
159 159 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
160 160 pub(super) fn get(
161 161 &self,
162 162 base_name: &HgPath,
163 163 on_disk: &'on_disk [u8],
164 164 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
165 165 match self {
166 166 ChildNodesRef::InMemory(nodes) => Ok(nodes
167 167 .get_key_value(base_name)
168 168 .map(|(k, v)| NodeRef::InMemory(k, v))),
169 169 ChildNodesRef::OnDisk(nodes) => {
170 170 let mut parse_result = Ok(());
171 171 let search_result = nodes.binary_search_by(|node| {
172 172 match node.base_name(on_disk) {
173 173 Ok(node_base_name) => node_base_name.cmp(base_name),
174 174 Err(e) => {
175 175 parse_result = Err(e);
176 176 // Dummy comparison result, `search_result` won’t
177 177 // be used since `parse_result` is an error
178 178 std::cmp::Ordering::Equal
179 179 }
180 180 }
181 181 });
182 182 parse_result.map(|()| {
183 183 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
184 184 })
185 185 }
186 186 }
187 187 }
188 188
189 189 /// Iterate in undefined order
190 190 pub(super) fn iter(
191 191 &self,
192 192 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
193 193 match self {
194 194 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
195 195 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
196 196 ),
197 197 ChildNodesRef::OnDisk(nodes) => {
198 198 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
199 199 }
200 200 }
201 201 }
202 202
203 203 /// Iterate in parallel in undefined order
204 204 pub(super) fn par_iter(
205 205 &self,
206 206 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
207 207 {
208 208 use rayon::prelude::*;
209 209 match self {
210 210 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
211 211 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
212 212 ),
213 213 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
214 214 nodes.par_iter().map(NodeRef::OnDisk),
215 215 ),
216 216 }
217 217 }
218 218
219 219 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
220 220 match self {
221 221 ChildNodesRef::InMemory(nodes) => {
222 222 let mut vec: Vec<_> = nodes
223 223 .iter()
224 224 .map(|(k, v)| NodeRef::InMemory(k, v))
225 225 .collect();
226 226 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
227 227 match node {
228 228 NodeRef::InMemory(path, _node) => path.base_name(),
229 229 NodeRef::OnDisk(_) => unreachable!(),
230 230 }
231 231 }
232 232 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
233 233 // value: https://github.com/rust-lang/rust/issues/34162
234 234 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
235 235 vec
236 236 }
237 237 ChildNodesRef::OnDisk(nodes) => {
238 238 // Nodes on disk are already sorted
239 239 nodes.iter().map(NodeRef::OnDisk).collect()
240 240 }
241 241 }
242 242 }
243 243 }
244 244
245 245 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
246 246 pub(super) fn full_path(
247 247 &self,
248 248 on_disk: &'on_disk [u8],
249 249 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
250 250 match self {
251 251 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
252 252 NodeRef::OnDisk(node) => node.full_path(on_disk),
253 253 }
254 254 }
255 255
256 256 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
257 257 /// HgPath>` detached from `'tree`
258 258 pub(super) fn full_path_borrowed(
259 259 &self,
260 260 on_disk: &'on_disk [u8],
261 261 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
262 262 match self {
263 263 NodeRef::InMemory(path, _node) => match path.full_path() {
264 264 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
265 265 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
266 266 },
267 267 NodeRef::OnDisk(node) => {
268 268 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
269 269 }
270 270 }
271 271 }
272 272
273 273 pub(super) fn base_name(
274 274 &self,
275 275 on_disk: &'on_disk [u8],
276 276 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
277 277 match self {
278 278 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
279 279 NodeRef::OnDisk(node) => node.base_name(on_disk),
280 280 }
281 281 }
282 282
283 283 pub(super) fn children(
284 284 &self,
285 285 on_disk: &'on_disk [u8],
286 286 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
287 287 match self {
288 288 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
289 289 NodeRef::OnDisk(node) => {
290 290 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
291 291 }
292 292 }
293 293 }
294 294
295 295 pub(super) fn has_copy_source(&self) -> bool {
296 296 match self {
297 297 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
298 298 NodeRef::OnDisk(node) => node.has_copy_source(),
299 299 }
300 300 }
301 301
302 302 pub(super) fn copy_source(
303 303 &self,
304 304 on_disk: &'on_disk [u8],
305 305 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
306 306 match self {
307 307 NodeRef::InMemory(_path, node) => {
308 308 Ok(node.copy_source.as_ref().map(|s| &**s))
309 309 }
310 310 NodeRef::OnDisk(node) => node.copy_source(on_disk),
311 311 }
312 312 }
313 313
314 314 pub(super) fn entry(
315 315 &self,
316 316 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
317 317 match self {
318 318 NodeRef::InMemory(_path, node) => {
319 319 Ok(node.data.as_entry().copied())
320 320 }
321 321 NodeRef::OnDisk(node) => node.entry(),
322 322 }
323 323 }
324 324
325 325 pub(super) fn state(
326 326 &self,
327 327 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
328 match self {
329 NodeRef::InMemory(_path, node) => {
330 Ok(node.data.as_entry().map(|entry| entry.state()))
331 }
332 NodeRef::OnDisk(node) => node.state(),
333 }
328 Ok(self.entry()?.map(|e| e.state()))
334 329 }
335 330
336 331 pub(super) fn cached_directory_mtime(
337 332 &self,
338 333 ) -> Option<&'tree on_disk::Timestamp> {
339 334 match self {
340 335 NodeRef::InMemory(_path, node) => match &node.data {
341 336 NodeData::CachedDirectory { mtime } => Some(mtime),
342 337 _ => None,
343 338 },
344 339 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
345 340 }
346 341 }
347 342
348 343 pub(super) fn descendants_with_entry_count(&self) -> u32 {
349 344 match self {
350 345 NodeRef::InMemory(_path, node) => {
351 346 node.descendants_with_entry_count
352 347 }
353 348 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
354 349 }
355 350 }
356 351
357 352 pub(super) fn tracked_descendants_count(&self) -> u32 {
358 353 match self {
359 354 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
360 355 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
361 356 }
362 357 }
363 358 }
364 359
365 360 /// Represents a file or a directory
366 361 #[derive(Default)]
367 362 pub(super) struct Node<'on_disk> {
368 363 pub(super) data: NodeData,
369 364
370 365 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
371 366
372 367 pub(super) children: ChildNodes<'on_disk>,
373 368
374 369 /// How many (non-inclusive) descendants of this node have an entry.
375 370 pub(super) descendants_with_entry_count: u32,
376 371
377 372 /// How many (non-inclusive) descendants of this node have an entry whose
378 373 /// state is "tracked".
379 374 pub(super) tracked_descendants_count: u32,
380 375 }
381 376
382 377 pub(super) enum NodeData {
383 378 Entry(DirstateEntry),
384 379 CachedDirectory { mtime: on_disk::Timestamp },
385 380 None,
386 381 }
387 382
388 383 impl Default for NodeData {
389 384 fn default() -> Self {
390 385 NodeData::None
391 386 }
392 387 }
393 388
394 389 impl NodeData {
395 390 fn has_entry(&self) -> bool {
396 391 match self {
397 392 NodeData::Entry(_) => true,
398 393 _ => false,
399 394 }
400 395 }
401 396
402 397 fn as_entry(&self) -> Option<&DirstateEntry> {
403 398 match self {
404 399 NodeData::Entry(entry) => Some(entry),
405 400 _ => None,
406 401 }
407 402 }
408 403 }
409 404
410 405 impl<'on_disk> DirstateMap<'on_disk> {
411 406 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
412 407 Self {
413 408 on_disk,
414 409 root: ChildNodes::default(),
415 410 nodes_with_entry_count: 0,
416 411 nodes_with_copy_source_count: 0,
417 412 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
418 413 unreachable_bytes: 0,
419 414 }
420 415 }
421 416
422 417 #[timed]
423 418 pub fn new_v2(
424 419 on_disk: &'on_disk [u8],
425 420 data_size: usize,
426 421 metadata: &[u8],
427 422 ) -> Result<Self, DirstateError> {
428 423 if let Some(data) = on_disk.get(..data_size) {
429 424 Ok(on_disk::read(data, metadata)?)
430 425 } else {
431 426 Err(DirstateV2ParseError.into())
432 427 }
433 428 }
434 429
435 430 #[timed]
436 431 pub fn new_v1(
437 432 on_disk: &'on_disk [u8],
438 433 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
439 434 let mut map = Self::empty(on_disk);
440 435 if map.on_disk.is_empty() {
441 436 return Ok((map, None));
442 437 }
443 438
444 439 let parents = parse_dirstate_entries(
445 440 map.on_disk,
446 441 |path, entry, copy_source| {
447 442 let tracked = entry.state().is_tracked();
448 443 let node = Self::get_or_insert_node(
449 444 map.on_disk,
450 445 &mut map.unreachable_bytes,
451 446 &mut map.root,
452 447 path,
453 448 WithBasename::to_cow_borrowed,
454 449 |ancestor| {
455 450 if tracked {
456 451 ancestor.tracked_descendants_count += 1
457 452 }
458 453 ancestor.descendants_with_entry_count += 1
459 454 },
460 455 )?;
461 456 assert!(
462 457 !node.data.has_entry(),
463 458 "duplicate dirstate entry in read"
464 459 );
465 460 assert!(
466 461 node.copy_source.is_none(),
467 462 "duplicate dirstate entry in read"
468 463 );
469 464 node.data = NodeData::Entry(*entry);
470 465 node.copy_source = copy_source.map(Cow::Borrowed);
471 466 map.nodes_with_entry_count += 1;
472 467 if copy_source.is_some() {
473 468 map.nodes_with_copy_source_count += 1
474 469 }
475 470 Ok(())
476 471 },
477 472 )?;
478 473 let parents = Some(parents.clone());
479 474
480 475 Ok((map, parents))
481 476 }
482 477
483 478 /// Assuming dirstate-v2 format, returns whether the next write should
484 479 /// append to the existing data file that contains `self.on_disk` (true),
485 480 /// or create a new data file from scratch (false).
486 481 pub(super) fn write_should_append(&self) -> bool {
487 482 let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
488 483 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
489 484 }
490 485
491 486 fn get_node<'tree>(
492 487 &'tree self,
493 488 path: &HgPath,
494 489 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
495 490 let mut children = self.root.as_ref();
496 491 let mut components = path.components();
497 492 let mut component =
498 493 components.next().expect("expected at least one components");
499 494 loop {
500 495 if let Some(child) = children.get(component, self.on_disk)? {
501 496 if let Some(next_component) = components.next() {
502 497 component = next_component;
503 498 children = child.children(self.on_disk)?;
504 499 } else {
505 500 return Ok(Some(child));
506 501 }
507 502 } else {
508 503 return Ok(None);
509 504 }
510 505 }
511 506 }
512 507
513 508 /// Returns a mutable reference to the node at `path` if it exists
514 509 ///
515 510 /// This takes `root` instead of `&mut self` so that callers can mutate
516 511 /// other fields while the returned borrow is still valid
517 512 fn get_node_mut<'tree>(
518 513 on_disk: &'on_disk [u8],
519 514 unreachable_bytes: &mut u32,
520 515 root: &'tree mut ChildNodes<'on_disk>,
521 516 path: &HgPath,
522 517 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
523 518 let mut children = root;
524 519 let mut components = path.components();
525 520 let mut component =
526 521 components.next().expect("expected at least one components");
527 522 loop {
528 523 if let Some(child) = children
529 524 .make_mut(on_disk, unreachable_bytes)?
530 525 .get_mut(component)
531 526 {
532 527 if let Some(next_component) = components.next() {
533 528 component = next_component;
534 529 children = &mut child.children;
535 530 } else {
536 531 return Ok(Some(child));
537 532 }
538 533 } else {
539 534 return Ok(None);
540 535 }
541 536 }
542 537 }
543 538
544 539 pub(super) fn get_or_insert<'tree, 'path>(
545 540 &'tree mut self,
546 541 path: &HgPath,
547 542 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
548 543 Self::get_or_insert_node(
549 544 self.on_disk,
550 545 &mut self.unreachable_bytes,
551 546 &mut self.root,
552 547 path,
553 548 WithBasename::to_cow_owned,
554 549 |_| {},
555 550 )
556 551 }
557 552
558 553 fn get_or_insert_node<'tree, 'path>(
559 554 on_disk: &'on_disk [u8],
560 555 unreachable_bytes: &mut u32,
561 556 root: &'tree mut ChildNodes<'on_disk>,
562 557 path: &'path HgPath,
563 558 to_cow: impl Fn(
564 559 WithBasename<&'path HgPath>,
565 560 ) -> WithBasename<Cow<'on_disk, HgPath>>,
566 561 mut each_ancestor: impl FnMut(&mut Node),
567 562 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
568 563 let mut child_nodes = root;
569 564 let mut inclusive_ancestor_paths =
570 565 WithBasename::inclusive_ancestors_of(path);
571 566 let mut ancestor_path = inclusive_ancestor_paths
572 567 .next()
573 568 .expect("expected at least one inclusive ancestor");
574 569 loop {
575 570 // TODO: can we avoid allocating an owned key in cases where the
576 571 // map already contains that key, without introducing double
577 572 // lookup?
578 573 let child_node = child_nodes
579 574 .make_mut(on_disk, unreachable_bytes)?
580 575 .entry(to_cow(ancestor_path))
581 576 .or_default();
582 577 if let Some(next) = inclusive_ancestor_paths.next() {
583 578 each_ancestor(child_node);
584 579 ancestor_path = next;
585 580 child_nodes = &mut child_node.children;
586 581 } else {
587 582 return Ok(child_node);
588 583 }
589 584 }
590 585 }
591 586
592 587 fn add_or_remove_file(
593 588 &mut self,
594 589 path: &HgPath,
595 590 old_state: Option<EntryState>,
596 591 new_entry: DirstateEntry,
597 592 ) -> Result<(), DirstateV2ParseError> {
598 593 let had_entry = old_state.is_some();
599 594 let was_tracked = old_state.map_or(false, |s| s.is_tracked());
600 595 let tracked_count_increment =
601 596 match (was_tracked, new_entry.state().is_tracked()) {
602 597 (false, true) => 1,
603 598 (true, false) => -1,
604 599 _ => 0,
605 600 };
606 601
607 602 let node = Self::get_or_insert_node(
608 603 self.on_disk,
609 604 &mut self.unreachable_bytes,
610 605 &mut self.root,
611 606 path,
612 607 WithBasename::to_cow_owned,
613 608 |ancestor| {
614 609 if !had_entry {
615 610 ancestor.descendants_with_entry_count += 1;
616 611 }
617 612
618 613 // We can’t use `+= increment` because the counter is unsigned,
619 614 // and we want debug builds to detect accidental underflow
620 615 // through zero
621 616 match tracked_count_increment {
622 617 1 => ancestor.tracked_descendants_count += 1,
623 618 -1 => ancestor.tracked_descendants_count -= 1,
624 619 _ => {}
625 620 }
626 621 },
627 622 )?;
628 623 if !had_entry {
629 624 self.nodes_with_entry_count += 1
630 625 }
631 626 node.data = NodeData::Entry(new_entry);
632 627 Ok(())
633 628 }
634 629
635 630 fn iter_nodes<'tree>(
636 631 &'tree self,
637 632 ) -> impl Iterator<
638 633 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
639 634 > + 'tree {
640 635 // Depth first tree traversal.
641 636 //
642 637 // If we could afford internal iteration and recursion,
643 638 // this would look like:
644 639 //
645 640 // ```
646 641 // fn traverse_children(
647 642 // children: &ChildNodes,
648 643 // each: &mut impl FnMut(&Node),
649 644 // ) {
650 645 // for child in children.values() {
651 646 // traverse_children(&child.children, each);
652 647 // each(child);
653 648 // }
654 649 // }
655 650 // ```
656 651 //
657 652 // However we want an external iterator and therefore can’t use the
658 653 // call stack. Use an explicit stack instead:
659 654 let mut stack = Vec::new();
660 655 let mut iter = self.root.as_ref().iter();
661 656 std::iter::from_fn(move || {
662 657 while let Some(child_node) = iter.next() {
663 658 let children = match child_node.children(self.on_disk) {
664 659 Ok(children) => children,
665 660 Err(error) => return Some(Err(error)),
666 661 };
667 662 // Pseudo-recursion
668 663 let new_iter = children.iter();
669 664 let old_iter = std::mem::replace(&mut iter, new_iter);
670 665 stack.push((child_node, old_iter));
671 666 }
672 667 // Found the end of a `children.iter()` iterator.
673 668 if let Some((child_node, next_iter)) = stack.pop() {
674 669 // "Return" from pseudo-recursion by restoring state from the
675 670 // explicit stack
676 671 iter = next_iter;
677 672
678 673 Some(Ok(child_node))
679 674 } else {
680 675 // Reached the bottom of the stack, we’re done
681 676 None
682 677 }
683 678 })
684 679 }
685 680
686 681 fn clear_known_ambiguous_mtimes(
687 682 &mut self,
688 683 paths: &[impl AsRef<HgPath>],
689 684 ) -> Result<(), DirstateV2ParseError> {
690 685 for path in paths {
691 686 if let Some(node) = Self::get_node_mut(
692 687 self.on_disk,
693 688 &mut self.unreachable_bytes,
694 689 &mut self.root,
695 690 path.as_ref(),
696 691 )? {
697 692 if let NodeData::Entry(entry) = &mut node.data {
698 693 entry.set_possibly_dirty();
699 694 }
700 695 }
701 696 }
702 697 Ok(())
703 698 }
704 699
705 700 fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
706 701 if let Cow::Borrowed(path) = path {
707 702 *unreachable_bytes += path.len() as u32
708 703 }
709 704 }
710 705 }
711 706
712 707 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
713 708 ///
714 709 /// The callback is only called for incoming `Ok` values. Errors are passed
715 710 /// through as-is. In order to let it use the `?` operator the callback is
716 711 /// expected to return a `Result` of `Option`, instead of an `Option` of
717 712 /// `Result`.
718 713 fn filter_map_results<'a, I, F, A, B, E>(
719 714 iter: I,
720 715 f: F,
721 716 ) -> impl Iterator<Item = Result<B, E>> + 'a
722 717 where
723 718 I: Iterator<Item = Result<A, E>> + 'a,
724 719 F: Fn(A) -> Result<Option<B>, E> + 'a,
725 720 {
726 721 iter.filter_map(move |result| match result {
727 722 Ok(node) => f(node).transpose(),
728 723 Err(e) => Some(Err(e)),
729 724 })
730 725 }
731 726
732 727 impl OwningDirstateMap {
733 728 pub fn clear(&mut self) {
734 729 let map = self.get_map_mut();
735 730 map.root = Default::default();
736 731 map.nodes_with_entry_count = 0;
737 732 map.nodes_with_copy_source_count = 0;
738 733 }
739 734
740 735 pub fn set_entry(
741 736 &mut self,
742 737 filename: &HgPath,
743 738 entry: DirstateEntry,
744 739 ) -> Result<(), DirstateV2ParseError> {
745 740 let map = self.get_map_mut();
746 741 map.get_or_insert(&filename)?.data = NodeData::Entry(entry);
747 742 Ok(())
748 743 }
749 744
750 745 pub fn add_file(
751 746 &mut self,
752 747 filename: &HgPath,
753 748 entry: DirstateEntry,
754 749 ) -> Result<(), DirstateError> {
755 750 let old_state = self.get(filename)?.map(|e| e.state());
756 751 let map = self.get_map_mut();
757 752 Ok(map.add_or_remove_file(filename, old_state, entry)?)
758 753 }
759 754
760 755 pub fn remove_file(
761 756 &mut self,
762 757 filename: &HgPath,
763 758 in_merge: bool,
764 759 ) -> Result<(), DirstateError> {
765 760 let old_entry_opt = self.get(filename)?;
766 761 let old_state = old_entry_opt.map(|e| e.state());
767 762 let mut size = 0;
768 763 if in_merge {
769 764 // XXX we should not be able to have 'm' state and 'FROM_P2' if not
770 765 // during a merge. So I (marmoute) am not sure we need the
771 766 // conditionnal at all. Adding double checking this with assert
772 767 // would be nice.
773 768 if let Some(old_entry) = old_entry_opt {
774 769 // backup the previous state
775 770 if old_entry.state() == EntryState::Merged {
776 771 size = SIZE_NON_NORMAL;
777 772 } else if old_entry.state() == EntryState::Normal
778 773 && old_entry.size() == SIZE_FROM_OTHER_PARENT
779 774 {
780 775 // other parent
781 776 size = SIZE_FROM_OTHER_PARENT;
782 777 }
783 778 }
784 779 }
785 780 if size == 0 {
786 781 self.copy_map_remove(filename)?;
787 782 }
788 783 let map = self.get_map_mut();
789 784 let entry = DirstateEntry::new_removed(size);
790 785 Ok(map.add_or_remove_file(filename, old_state, entry)?)
791 786 }
792 787
793 788 pub fn drop_entry_and_copy_source(
794 789 &mut self,
795 790 filename: &HgPath,
796 791 ) -> Result<(), DirstateError> {
797 792 let was_tracked = self
798 793 .get(filename)?
799 794 .map_or(false, |e| e.state().is_tracked());
800 795 let map = self.get_map_mut();
801 796 struct Dropped {
802 797 was_tracked: bool,
803 798 had_entry: bool,
804 799 had_copy_source: bool,
805 800 }
806 801
807 802 /// If this returns `Ok(Some((dropped, removed)))`, then
808 803 ///
809 804 /// * `dropped` is about the leaf node that was at `filename`
810 805 /// * `removed` is whether this particular level of recursion just
811 806 /// removed a node in `nodes`.
812 807 fn recur<'on_disk>(
813 808 on_disk: &'on_disk [u8],
814 809 unreachable_bytes: &mut u32,
815 810 nodes: &mut ChildNodes<'on_disk>,
816 811 path: &HgPath,
817 812 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
818 813 let (first_path_component, rest_of_path) =
819 814 path.split_first_component();
820 815 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
821 816 let node = if let Some(node) = nodes.get_mut(first_path_component)
822 817 {
823 818 node
824 819 } else {
825 820 return Ok(None);
826 821 };
827 822 let dropped;
828 823 if let Some(rest) = rest_of_path {
829 824 if let Some((d, removed)) = recur(
830 825 on_disk,
831 826 unreachable_bytes,
832 827 &mut node.children,
833 828 rest,
834 829 )? {
835 830 dropped = d;
836 831 if dropped.had_entry {
837 832 node.descendants_with_entry_count -= 1;
838 833 }
839 834 if dropped.was_tracked {
840 835 node.tracked_descendants_count -= 1;
841 836 }
842 837
843 838 // Directory caches must be invalidated when removing a
844 839 // child node
845 840 if removed {
846 841 if let NodeData::CachedDirectory { .. } = &node.data {
847 842 node.data = NodeData::None
848 843 }
849 844 }
850 845 } else {
851 846 return Ok(None);
852 847 }
853 848 } else {
854 849 let had_entry = node.data.has_entry();
855 850 if had_entry {
856 851 node.data = NodeData::None
857 852 }
858 853 if let Some(source) = &node.copy_source {
859 854 DirstateMap::count_dropped_path(unreachable_bytes, source);
860 855 node.copy_source = None
861 856 }
862 857 dropped = Dropped {
863 858 was_tracked: node
864 859 .data
865 860 .as_entry()
866 861 .map_or(false, |entry| entry.state().is_tracked()),
867 862 had_entry,
868 863 had_copy_source: node.copy_source.take().is_some(),
869 864 };
870 865 }
871 866 // After recursion, for both leaf (rest_of_path is None) nodes and
872 867 // parent nodes, remove a node if it just became empty.
873 868 let remove = !node.data.has_entry()
874 869 && node.copy_source.is_none()
875 870 && node.children.is_empty();
876 871 if remove {
877 872 let (key, _) =
878 873 nodes.remove_entry(first_path_component).unwrap();
879 874 DirstateMap::count_dropped_path(
880 875 unreachable_bytes,
881 876 key.full_path(),
882 877 )
883 878 }
884 879 Ok(Some((dropped, remove)))
885 880 }
886 881
887 882 if let Some((dropped, _removed)) = recur(
888 883 map.on_disk,
889 884 &mut map.unreachable_bytes,
890 885 &mut map.root,
891 886 filename,
892 887 )? {
893 888 if dropped.had_entry {
894 889 map.nodes_with_entry_count -= 1
895 890 }
896 891 if dropped.had_copy_source {
897 892 map.nodes_with_copy_source_count -= 1
898 893 }
899 894 } else {
900 895 debug_assert!(!was_tracked);
901 896 }
902 897 Ok(())
903 898 }
904 899
905 900 pub fn has_tracked_dir(
906 901 &mut self,
907 902 directory: &HgPath,
908 903 ) -> Result<bool, DirstateError> {
909 904 let map = self.get_map_mut();
910 905 if let Some(node) = map.get_node(directory)? {
911 906 // A node without a `DirstateEntry` was created to hold child
912 907 // nodes, and is therefore a directory.
913 908 let state = node.state()?;
914 909 Ok(state.is_none() && node.tracked_descendants_count() > 0)
915 910 } else {
916 911 Ok(false)
917 912 }
918 913 }
919 914
920 915 pub fn has_dir(
921 916 &mut self,
922 917 directory: &HgPath,
923 918 ) -> Result<bool, DirstateError> {
924 919 let map = self.get_map_mut();
925 920 if let Some(node) = map.get_node(directory)? {
926 921 // A node without a `DirstateEntry` was created to hold child
927 922 // nodes, and is therefore a directory.
928 923 let state = node.state()?;
929 924 Ok(state.is_none() && node.descendants_with_entry_count() > 0)
930 925 } else {
931 926 Ok(false)
932 927 }
933 928 }
934 929
935 930 #[timed]
936 931 pub fn pack_v1(
937 932 &mut self,
938 933 parents: DirstateParents,
939 934 now: Timestamp,
940 935 ) -> Result<Vec<u8>, DirstateError> {
941 936 let map = self.get_map_mut();
942 937 let now: i32 = now.0.try_into().expect("time overflow");
943 938 let mut ambiguous_mtimes = Vec::new();
944 939 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
945 940 // reallocations
946 941 let mut size = parents.as_bytes().len();
947 942 for node in map.iter_nodes() {
948 943 let node = node?;
949 944 if let Some(entry) = node.entry()? {
950 945 size += packed_entry_size(
951 946 node.full_path(map.on_disk)?,
952 947 node.copy_source(map.on_disk)?,
953 948 );
954 949 if entry.mtime_is_ambiguous(now) {
955 950 ambiguous_mtimes.push(
956 951 node.full_path_borrowed(map.on_disk)?
957 952 .detach_from_tree(),
958 953 )
959 954 }
960 955 }
961 956 }
962 957 map.clear_known_ambiguous_mtimes(&ambiguous_mtimes)?;
963 958
964 959 let mut packed = Vec::with_capacity(size);
965 960 packed.extend(parents.as_bytes());
966 961
967 962 for node in map.iter_nodes() {
968 963 let node = node?;
969 964 if let Some(entry) = node.entry()? {
970 965 pack_entry(
971 966 node.full_path(map.on_disk)?,
972 967 &entry,
973 968 node.copy_source(map.on_disk)?,
974 969 &mut packed,
975 970 );
976 971 }
977 972 }
978 973 Ok(packed)
979 974 }
980 975
981 976 /// Returns new data and metadata together with whether that data should be
982 977 /// appended to the existing data file whose content is at
983 978 /// `map.on_disk` (true), instead of written to a new data file
984 979 /// (false).
985 980 #[timed]
986 981 pub fn pack_v2(
987 982 &mut self,
988 983 now: Timestamp,
989 984 can_append: bool,
990 985 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
991 986 let map = self.get_map_mut();
992 987 // TODO: how do we want to handle this in 2038?
993 988 let now: i32 = now.0.try_into().expect("time overflow");
994 989 let mut paths = Vec::new();
995 990 for node in map.iter_nodes() {
996 991 let node = node?;
997 992 if let Some(entry) = node.entry()? {
998 993 if entry.mtime_is_ambiguous(now) {
999 994 paths.push(
1000 995 node.full_path_borrowed(map.on_disk)?
1001 996 .detach_from_tree(),
1002 997 )
1003 998 }
1004 999 }
1005 1000 }
1006 1001 // Borrow of `self` ends here since we collect cloned paths
1007 1002
1008 1003 map.clear_known_ambiguous_mtimes(&paths)?;
1009 1004
1010 1005 on_disk::write(map, can_append)
1011 1006 }
1012 1007
1013 1008 pub fn status<'a>(
1014 1009 &'a mut self,
1015 1010 matcher: &'a (dyn Matcher + Sync),
1016 1011 root_dir: PathBuf,
1017 1012 ignore_files: Vec<PathBuf>,
1018 1013 options: StatusOptions,
1019 1014 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
1020 1015 {
1021 1016 let map = self.get_map_mut();
1022 1017 super::status::status(map, matcher, root_dir, ignore_files, options)
1023 1018 }
1024 1019
1025 1020 pub fn copy_map_len(&self) -> usize {
1026 1021 let map = self.get_map();
1027 1022 map.nodes_with_copy_source_count as usize
1028 1023 }
1029 1024
1030 1025 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
1031 1026 let map = self.get_map();
1032 1027 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1033 1028 Ok(if let Some(source) = node.copy_source(map.on_disk)? {
1034 1029 Some((node.full_path(map.on_disk)?, source))
1035 1030 } else {
1036 1031 None
1037 1032 })
1038 1033 }))
1039 1034 }
1040 1035
1041 1036 pub fn copy_map_contains_key(
1042 1037 &self,
1043 1038 key: &HgPath,
1044 1039 ) -> Result<bool, DirstateV2ParseError> {
1045 1040 let map = self.get_map();
1046 1041 Ok(if let Some(node) = map.get_node(key)? {
1047 1042 node.has_copy_source()
1048 1043 } else {
1049 1044 false
1050 1045 })
1051 1046 }
1052 1047
1053 1048 pub fn copy_map_get(
1054 1049 &self,
1055 1050 key: &HgPath,
1056 1051 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1057 1052 let map = self.get_map();
1058 1053 if let Some(node) = map.get_node(key)? {
1059 1054 if let Some(source) = node.copy_source(map.on_disk)? {
1060 1055 return Ok(Some(source));
1061 1056 }
1062 1057 }
1063 1058 Ok(None)
1064 1059 }
1065 1060
1066 1061 pub fn copy_map_remove(
1067 1062 &mut self,
1068 1063 key: &HgPath,
1069 1064 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1070 1065 let map = self.get_map_mut();
1071 1066 let count = &mut map.nodes_with_copy_source_count;
1072 1067 let unreachable_bytes = &mut map.unreachable_bytes;
1073 1068 Ok(DirstateMap::get_node_mut(
1074 1069 map.on_disk,
1075 1070 unreachable_bytes,
1076 1071 &mut map.root,
1077 1072 key,
1078 1073 )?
1079 1074 .and_then(|node| {
1080 1075 if let Some(source) = &node.copy_source {
1081 1076 *count -= 1;
1082 1077 DirstateMap::count_dropped_path(unreachable_bytes, source);
1083 1078 }
1084 1079 node.copy_source.take().map(Cow::into_owned)
1085 1080 }))
1086 1081 }
1087 1082
1088 1083 pub fn copy_map_insert(
1089 1084 &mut self,
1090 1085 key: HgPathBuf,
1091 1086 value: HgPathBuf,
1092 1087 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1093 1088 let map = self.get_map_mut();
1094 1089 let node = DirstateMap::get_or_insert_node(
1095 1090 map.on_disk,
1096 1091 &mut map.unreachable_bytes,
1097 1092 &mut map.root,
1098 1093 &key,
1099 1094 WithBasename::to_cow_owned,
1100 1095 |_ancestor| {},
1101 1096 )?;
1102 1097 if node.copy_source.is_none() {
1103 1098 map.nodes_with_copy_source_count += 1
1104 1099 }
1105 1100 Ok(node.copy_source.replace(value.into()).map(Cow::into_owned))
1106 1101 }
1107 1102
1108 1103 pub fn len(&self) -> usize {
1109 1104 let map = self.get_map();
1110 1105 map.nodes_with_entry_count as usize
1111 1106 }
1112 1107
1113 1108 pub fn contains_key(
1114 1109 &self,
1115 1110 key: &HgPath,
1116 1111 ) -> Result<bool, DirstateV2ParseError> {
1117 1112 Ok(self.get(key)?.is_some())
1118 1113 }
1119 1114
1120 1115 pub fn get(
1121 1116 &self,
1122 1117 key: &HgPath,
1123 1118 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1124 1119 let map = self.get_map();
1125 1120 Ok(if let Some(node) = map.get_node(key)? {
1126 1121 node.entry()?
1127 1122 } else {
1128 1123 None
1129 1124 })
1130 1125 }
1131 1126
1132 1127 pub fn iter(&self) -> StateMapIter<'_> {
1133 1128 let map = self.get_map();
1134 1129 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1135 1130 Ok(if let Some(entry) = node.entry()? {
1136 1131 Some((node.full_path(map.on_disk)?, entry))
1137 1132 } else {
1138 1133 None
1139 1134 })
1140 1135 }))
1141 1136 }
1142 1137
1143 1138 pub fn iter_tracked_dirs(
1144 1139 &mut self,
1145 1140 ) -> Result<
1146 1141 Box<
1147 1142 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1148 1143 + Send
1149 1144 + '_,
1150 1145 >,
1151 1146 DirstateError,
1152 1147 > {
1153 1148 let map = self.get_map_mut();
1154 1149 let on_disk = map.on_disk;
1155 1150 Ok(Box::new(filter_map_results(
1156 1151 map.iter_nodes(),
1157 1152 move |node| {
1158 1153 Ok(if node.tracked_descendants_count() > 0 {
1159 1154 Some(node.full_path(on_disk)?)
1160 1155 } else {
1161 1156 None
1162 1157 })
1163 1158 },
1164 1159 )))
1165 1160 }
1166 1161
1167 1162 pub fn debug_iter(
1168 1163 &self,
1169 1164 all: bool,
1170 1165 ) -> Box<
1171 1166 dyn Iterator<
1172 1167 Item = Result<
1173 1168 (&HgPath, (u8, i32, i32, i32)),
1174 1169 DirstateV2ParseError,
1175 1170 >,
1176 1171 > + Send
1177 1172 + '_,
1178 1173 > {
1179 1174 let map = self.get_map();
1180 1175 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1181 1176 let debug_tuple = if let Some(entry) = node.entry()? {
1182 1177 entry.debug_tuple()
1183 1178 } else if !all {
1184 1179 return Ok(None);
1185 1180 } else if let Some(mtime) = node.cached_directory_mtime() {
1186 1181 (b' ', 0, -1, mtime.seconds() as i32)
1187 1182 } else {
1188 1183 (b' ', 0, -1, -1)
1189 1184 };
1190 1185 Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
1191 1186 }))
1192 1187 }
1193 1188 }
@@ -1,763 +1,808 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! # File format
4 4 //!
5 5 //! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts
6 6 //! with a fixed-sized header whose layout is defined by the `DocketHeader`
7 7 //! struct, followed by the data file identifier.
8 8 //!
9 9 //! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That
10 10 //! file may be longer than the size given in the docket, but not shorter. Only
11 11 //! the start of the data file up to the given size is considered. The
12 12 //! fixed-size "root" of the dirstate tree whose layout is defined by the
13 13 //! `Root` struct is found at the end of that slice of data.
14 14 //!
15 15 //! Its `root_nodes` field contains the slice (offset and length) to
16 16 //! the nodes representing the files and directories at the root of the
17 17 //! repository. Each node is also fixed-size, defined by the `Node` struct.
18 18 //! Nodes in turn contain slices to variable-size paths, and to their own child
19 19 //! nodes (if any) for nested files and directories.
20 20
21 21 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
22 22 use crate::dirstate_tree::path_with_basename::WithBasename;
23 23 use crate::errors::HgError;
24 24 use crate::utils::hg_path::HgPath;
25 25 use crate::DirstateEntry;
26 26 use crate::DirstateError;
27 27 use crate::DirstateParents;
28 use crate::EntryState;
28 use bitflags::bitflags;
29 29 use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
30 30 use bytes_cast::BytesCast;
31 31 use format_bytes::format_bytes;
32 32 use std::borrow::Cow;
33 33 use std::convert::{TryFrom, TryInto};
34 34 use std::time::{Duration, SystemTime, UNIX_EPOCH};
35 35
36 36 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
37 37 /// This a redundant sanity check more than an actual "magic number" since
38 38 /// `.hg/requires` already governs which format should be used.
39 39 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
40 40
41 41 /// Keep space for 256-bit hashes
42 42 const STORED_NODE_ID_BYTES: usize = 32;
43 43
44 44 /// … even though only 160 bits are used for now, with SHA-1
45 45 const USED_NODE_ID_BYTES: usize = 20;
46 46
47 47 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
48 48 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
49 49
50 50 /// Must match the constant of the same name in
51 51 /// `mercurial/dirstateutils/docket.py`
52 52 const TREE_METADATA_SIZE: usize = 44;
53 53
54 54 /// Make sure that size-affecting changes are made knowingly
55 55 #[allow(unused)]
56 56 fn static_assert_size_of() {
57 57 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
58 58 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
59 59 let _ = std::mem::transmute::<Node, [u8; 43]>;
60 60 }
61 61
62 62 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
63 63 #[derive(BytesCast)]
64 64 #[repr(C)]
65 65 struct DocketHeader {
66 66 marker: [u8; V2_FORMAT_MARKER.len()],
67 67 parent_1: [u8; STORED_NODE_ID_BYTES],
68 68 parent_2: [u8; STORED_NODE_ID_BYTES],
69 69
70 70 /// Counted in bytes
71 71 data_size: Size,
72 72
73 73 metadata: TreeMetadata,
74 74
75 75 uuid_size: u8,
76 76 }
77 77
78 78 pub struct Docket<'on_disk> {
79 79 header: &'on_disk DocketHeader,
80 80 uuid: &'on_disk [u8],
81 81 }
82 82
83 83 #[derive(BytesCast)]
84 84 #[repr(C)]
85 85 struct TreeMetadata {
86 86 root_nodes: ChildNodes,
87 87 nodes_with_entry_count: Size,
88 88 nodes_with_copy_source_count: Size,
89 89
90 90 /// How many bytes of this data file are not used anymore
91 91 unreachable_bytes: Size,
92 92
93 93 /// Current version always sets these bytes to zero when creating or
94 94 /// updating a dirstate. Future versions could assign some bits to signal
95 95 /// for example "the version that last wrote/updated this dirstate did so
96 96 /// in such and such way that can be relied on by versions that know to."
97 97 unused: [u8; 4],
98 98
99 99 /// If non-zero, a hash of ignore files that were used for some previous
100 100 /// run of the `status` algorithm.
101 101 ///
102 102 /// We define:
103 103 ///
104 104 /// * "Root" ignore files are `.hgignore` at the root of the repository if
105 105 /// it exists, and files from `ui.ignore.*` config. This set of files is
106 106 /// then sorted by the string representation of their path.
107 107 /// * The "expanded contents" of an ignore files is the byte string made
108 108 /// by concatenating its contents with the "expanded contents" of other
109 109 /// files included with `include:` or `subinclude:` files, in inclusion
110 110 /// order. This definition is recursive, as included files can
111 111 /// themselves include more files.
112 112 ///
113 113 /// This hash is defined as the SHA-1 of the concatenation (in sorted
114 114 /// order) of the "expanded contents" of each "root" ignore file.
115 115 /// (Note that computing this does not require actually concatenating byte
116 116 /// strings into contiguous memory, instead SHA-1 hashing can be done
117 117 /// incrementally.)
118 118 ignore_patterns_hash: IgnorePatternsHash,
119 119 }
120 120
121 121 #[derive(BytesCast)]
122 122 #[repr(C)]
123 123 pub(super) struct Node {
124 124 full_path: PathSlice,
125 125
126 126 /// In bytes from `self.full_path.start`
127 127 base_name_start: PathSize,
128 128
129 129 copy_source: OptPathSlice,
130 130 children: ChildNodes,
131 131 pub(super) descendants_with_entry_count: Size,
132 132 pub(super) tracked_descendants_count: Size,
133 133
134 /// Depending on the value of `state`:
134 /// Depending on the bits in `flags`:
135 ///
136 /// * If any of `WDIR_TRACKED`, `P1_TRACKED`, or `P2_INFO` are set, the
137 /// node has an entry.
135 138 ///
136 /// * A null byte: `data` is not used.
139 /// - If `HAS_MODE_AND_SIZE` is set, `data.mode` and `data.size` are
140 /// meaningful. Otherwise they are set to zero
141 /// - If `HAS_MTIME` is set, `data.mtime` is meaningful. Otherwise it is
142 /// set to zero.
137 143 ///
138 /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
139 /// represent a dirstate entry like in the v1 format.
144 /// * If none of `WDIR_TRACKED`, `P1_TRACKED`, `P2_INFO`, or `HAS_MTIME`
145 /// are set, the node does not have an entry and `data` is set to all
146 /// zeros.
140 147 ///
141 /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
142 /// as the `Timestamp` for the mtime of a cached directory.
148 /// * If none of `WDIR_TRACKED`, `P1_TRACKED`, `P2_INFO` are set, but
149 /// `HAS_MTIME` is set, the bytes of `data` should instead be
150 /// interpreted as the `Timestamp` for the mtime of a cached directory.
143 151 ///
144 /// The presence of this state means that at some point, this path in
145 /// the working directory was observed:
152 /// The presence of this combination of flags means that at some point,
153 /// this path in the working directory was observed:
146 154 ///
147 155 /// - To be a directory
148 156 /// - With the modification time as given by `Timestamp`
149 157 /// - That timestamp was already strictly in the past when observed,
150 158 /// meaning that later changes cannot happen in the same clock tick
151 159 /// and must cause a different modification time (unless the system
152 160 /// clock jumps back and we get unlucky, which is not impossible but
153 161 /// but deemed unlikely enough).
154 162 /// - All direct children of this directory (as returned by
155 163 /// `std::fs::read_dir`) either have a corresponding dirstate node, or
156 164 /// are ignored by ignore patterns whose hash is in
157 165 /// `TreeMetadata::ignore_patterns_hash`.
158 166 ///
159 167 /// This means that if `std::fs::symlink_metadata` later reports the
160 168 /// same modification time and ignored patterns haven’t changed, a run
161 169 /// of status that is not listing ignored files can skip calling
162 170 /// `std::fs::read_dir` again for this directory, iterate child
163 171 /// dirstate nodes instead.
164 state: u8,
172 flags: Flags,
165 173 data: Entry,
166 174 }
167 175
168 #[derive(BytesCast, Copy, Clone)]
176 bitflags! {
177 #[derive(BytesCast)]
178 #[repr(C)]
179 struct Flags: u8 {
180 const WDIR_TRACKED = 1 << 0;
181 const P1_TRACKED = 1 << 1;
182 const P2_INFO = 1 << 2;
183 const HAS_MODE_AND_SIZE = 1 << 3;
184 const HAS_MTIME = 1 << 4;
185 }
186 }
187
188 #[derive(BytesCast, Copy, Clone, Debug)]
169 189 #[repr(C)]
170 190 struct Entry {
171 191 mode: I32Be,
172 192 mtime: I32Be,
173 193 size: I32Be,
174 194 }
175 195
176 196 /// Duration since the Unix epoch
177 197 #[derive(BytesCast, Copy, Clone, PartialEq)]
178 198 #[repr(C)]
179 199 pub(super) struct Timestamp {
180 200 seconds: I64Be,
181 201
182 202 /// In `0 .. 1_000_000_000`.
183 203 ///
184 204 /// This timestamp is later or earlier than `(seconds, 0)` by this many
185 205 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
186 206 nanoseconds: U32Be,
187 207 }
188 208
189 209 /// Counted in bytes from the start of the file
190 210 ///
191 211 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
192 212 type Offset = U32Be;
193 213
194 214 /// Counted in number of items
195 215 ///
196 216 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
197 217 type Size = U32Be;
198 218
199 219 /// Counted in bytes
200 220 ///
201 221 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
202 222 type PathSize = U16Be;
203 223
204 224 /// A contiguous sequence of `len` times `Node`, representing the child nodes
205 225 /// of either some other node or of the repository root.
206 226 ///
207 227 /// Always sorted by ascending `full_path`, to allow binary search.
208 228 /// Since nodes with the same parent nodes also have the same parent path,
209 229 /// only the `base_name`s need to be compared during binary search.
210 230 #[derive(BytesCast, Copy, Clone)]
211 231 #[repr(C)]
212 232 struct ChildNodes {
213 233 start: Offset,
214 234 len: Size,
215 235 }
216 236
217 237 /// A `HgPath` of `len` bytes
218 238 #[derive(BytesCast, Copy, Clone)]
219 239 #[repr(C)]
220 240 struct PathSlice {
221 241 start: Offset,
222 242 len: PathSize,
223 243 }
224 244
225 245 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
226 246 type OptPathSlice = PathSlice;
227 247
228 248 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
229 249 ///
230 250 /// This should only happen if Mercurial is buggy or a repository is corrupted.
231 251 #[derive(Debug)]
232 252 pub struct DirstateV2ParseError;
233 253
234 254 impl From<DirstateV2ParseError> for HgError {
235 255 fn from(_: DirstateV2ParseError) -> Self {
236 256 HgError::corrupted("dirstate-v2 parse error")
237 257 }
238 258 }
239 259
240 260 impl From<DirstateV2ParseError> for crate::DirstateError {
241 261 fn from(error: DirstateV2ParseError) -> Self {
242 262 HgError::from(error).into()
243 263 }
244 264 }
245 265
246 266 impl<'on_disk> Docket<'on_disk> {
247 267 pub fn parents(&self) -> DirstateParents {
248 268 use crate::Node;
249 269 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
250 270 .unwrap()
251 271 .clone();
252 272 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
253 273 .unwrap()
254 274 .clone();
255 275 DirstateParents { p1, p2 }
256 276 }
257 277
258 278 pub fn tree_metadata(&self) -> &[u8] {
259 279 self.header.metadata.as_bytes()
260 280 }
261 281
262 282 pub fn data_size(&self) -> usize {
263 283 // This `unwrap` could only panic on a 16-bit CPU
264 284 self.header.data_size.get().try_into().unwrap()
265 285 }
266 286
267 287 pub fn data_filename(&self) -> String {
268 288 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
269 289 }
270 290 }
271 291
272 292 pub fn read_docket(
273 293 on_disk: &[u8],
274 294 ) -> Result<Docket<'_>, DirstateV2ParseError> {
275 295 let (header, uuid) =
276 296 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
277 297 let uuid_size = header.uuid_size as usize;
278 298 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
279 299 Ok(Docket { header, uuid })
280 300 } else {
281 301 Err(DirstateV2ParseError)
282 302 }
283 303 }
284 304
285 305 pub(super) fn read<'on_disk>(
286 306 on_disk: &'on_disk [u8],
287 307 metadata: &[u8],
288 308 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
289 309 if on_disk.is_empty() {
290 310 return Ok(DirstateMap::empty(on_disk));
291 311 }
292 312 let (meta, _) = TreeMetadata::from_bytes(metadata)
293 313 .map_err(|_| DirstateV2ParseError)?;
294 314 let dirstate_map = DirstateMap {
295 315 on_disk,
296 316 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
297 317 on_disk,
298 318 meta.root_nodes,
299 319 )?),
300 320 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
301 321 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
302 322 ignore_patterns_hash: meta.ignore_patterns_hash,
303 323 unreachable_bytes: meta.unreachable_bytes.get(),
304 324 };
305 325 Ok(dirstate_map)
306 326 }
307 327
308 328 impl Node {
309 329 pub(super) fn full_path<'on_disk>(
310 330 &self,
311 331 on_disk: &'on_disk [u8],
312 332 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
313 333 read_hg_path(on_disk, self.full_path)
314 334 }
315 335
316 336 pub(super) fn base_name_start<'on_disk>(
317 337 &self,
318 338 ) -> Result<usize, DirstateV2ParseError> {
319 339 let start = self.base_name_start.get();
320 340 if start < self.full_path.len.get() {
321 341 let start = usize::try_from(start)
322 342 // u32 -> usize, could only panic on a 16-bit CPU
323 343 .expect("dirstate-v2 base_name_start out of bounds");
324 344 Ok(start)
325 345 } else {
326 346 Err(DirstateV2ParseError)
327 347 }
328 348 }
329 349
330 350 pub(super) fn base_name<'on_disk>(
331 351 &self,
332 352 on_disk: &'on_disk [u8],
333 353 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
334 354 let full_path = self.full_path(on_disk)?;
335 355 let base_name_start = self.base_name_start()?;
336 356 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
337 357 }
338 358
339 359 pub(super) fn path<'on_disk>(
340 360 &self,
341 361 on_disk: &'on_disk [u8],
342 362 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
343 363 Ok(WithBasename::from_raw_parts(
344 364 Cow::Borrowed(self.full_path(on_disk)?),
345 365 self.base_name_start()?,
346 366 ))
347 367 }
348 368
349 369 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
350 370 self.copy_source.start.get() != 0
351 371 }
352 372
353 373 pub(super) fn copy_source<'on_disk>(
354 374 &self,
355 375 on_disk: &'on_disk [u8],
356 376 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
357 377 Ok(if self.has_copy_source() {
358 378 Some(read_hg_path(on_disk, self.copy_source)?)
359 379 } else {
360 380 None
361 381 })
362 382 }
363 383
384 fn has_entry(&self) -> bool {
385 self.flags.intersects(
386 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
387 )
388 }
389
364 390 pub(super) fn node_data(
365 391 &self,
366 392 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
367 let entry = |state| {
368 dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
369 };
370
371 match self.state {
372 b'\0' => Ok(dirstate_map::NodeData::None),
373 b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
374 mtime: *self.data.as_timestamp(),
375 }),
376 b'n' => Ok(entry(EntryState::Normal)),
377 b'a' => Ok(entry(EntryState::Added)),
378 b'r' => Ok(entry(EntryState::Removed)),
379 b'm' => Ok(entry(EntryState::Merged)),
380 _ => Err(DirstateV2ParseError),
393 if self.has_entry() {
394 Ok(dirstate_map::NodeData::Entry(self.assume_entry()))
395 } else if let Some(&mtime) = self.cached_directory_mtime() {
396 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
397 } else {
398 Ok(dirstate_map::NodeData::None)
381 399 }
382 400 }
383 401
384 402 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
385 if self.state == b'd' {
403 if self.flags.contains(Flags::HAS_MTIME) && !self.has_entry() {
386 404 Some(self.data.as_timestamp())
387 405 } else {
388 406 None
389 407 }
390 408 }
391 409
392 pub(super) fn state(
393 &self,
394 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
395 match self.state {
396 b'\0' | b'd' => Ok(None),
397 b'n' => Ok(Some(EntryState::Normal)),
398 b'a' => Ok(Some(EntryState::Added)),
399 b'r' => Ok(Some(EntryState::Removed)),
400 b'm' => Ok(Some(EntryState::Merged)),
401 _ => Err(DirstateV2ParseError),
402 }
403 }
404
405 fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
406 // For now, the on-disk representation of DirstateEntry in dirstate-v2
407 // format is equivalent to that of dirstate-v1. When that changes, add
408 // a new constructor.
409 DirstateEntry::from_v1_data(
410 state,
411 self.data.mode.get(),
412 self.data.size.get(),
413 self.data.mtime.get(),
410 fn assume_entry(&self) -> DirstateEntry {
411 // TODO: convert through raw bits instead?
412 let wdir_tracked = self.flags.contains(Flags::WDIR_TRACKED);
413 let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
414 let p2_info = self.flags.contains(Flags::P2_INFO);
415 let mode_size = if self.flags.contains(Flags::HAS_MODE_AND_SIZE) {
416 Some((self.data.mode.into(), self.data.size.into()))
417 } else {
418 None
419 };
420 let mtime = if self.flags.contains(Flags::HAS_MTIME) {
421 Some(self.data.mtime.into())
422 } else {
423 None
424 };
425 DirstateEntry::from_v2_data(
426 wdir_tracked,
427 p1_tracked,
428 p2_info,
429 mode_size,
430 mtime,
414 431 )
415 432 }
416 433
417 434 pub(super) fn entry(
418 435 &self,
419 436 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
420 Ok(self
421 .state()?
422 .map(|state| self.entry_with_given_state(state)))
437 if self.has_entry() {
438 Ok(Some(self.assume_entry()))
439 } else {
440 Ok(None)
441 }
423 442 }
424 443
425 444 pub(super) fn children<'on_disk>(
426 445 &self,
427 446 on_disk: &'on_disk [u8],
428 447 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
429 448 read_nodes(on_disk, self.children)
430 449 }
431 450
432 451 pub(super) fn to_in_memory_node<'on_disk>(
433 452 &self,
434 453 on_disk: &'on_disk [u8],
435 454 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
436 455 Ok(dirstate_map::Node {
437 456 children: dirstate_map::ChildNodes::OnDisk(
438 457 self.children(on_disk)?,
439 458 ),
440 459 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
441 460 data: self.node_data()?,
442 461 descendants_with_entry_count: self
443 462 .descendants_with_entry_count
444 463 .get(),
445 464 tracked_descendants_count: self.tracked_descendants_count.get(),
446 465 })
447 466 }
448 467 }
449 468
450 469 impl Entry {
470 fn from_dirstate_entry(entry: &DirstateEntry) -> (Flags, Self) {
471 let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) =
472 entry.v2_data();
473 // TODO: convert throug raw flag bits instead?
474 let mut flags = Flags::empty();
475 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
476 flags.set(Flags::P1_TRACKED, p1_tracked);
477 flags.set(Flags::P2_INFO, p2_info);
478 let (mode, size, mtime);
479 if let Some((m, s)) = mode_size_opt {
480 mode = m;
481 size = s;
482 flags.insert(Flags::HAS_MODE_AND_SIZE)
483 } else {
484 mode = 0;
485 size = 0;
486 }
487 if let Some(m) = mtime_opt {
488 mtime = m;
489 flags.insert(Flags::HAS_MTIME);
490 } else {
491 mtime = 0;
492 }
493 let raw_entry = Entry {
494 mode: mode.into(),
495 size: size.into(),
496 mtime: mtime.into(),
497 };
498 (flags, raw_entry)
499 }
500
451 501 fn from_timestamp(timestamp: Timestamp) -> Self {
452 502 // Safety: both types implement the `ByteCast` trait, so we could
453 503 // safely use `as_bytes` and `from_bytes` to do this conversion. Using
454 504 // `transmute` instead makes the compiler check that the two types
455 505 // have the same size, which eliminates the error case of
456 506 // `from_bytes`.
457 507 unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
458 508 }
459 509
460 510 fn as_timestamp(&self) -> &Timestamp {
461 511 // Safety: same as above in `from_timestamp`
462 512 unsafe { &*(self as *const Entry as *const Timestamp) }
463 513 }
464 514 }
465 515
466 516 impl Timestamp {
467 517 pub fn seconds(&self) -> i64 {
468 518 self.seconds.get()
469 519 }
470 520 }
471 521
472 522 impl From<SystemTime> for Timestamp {
473 523 fn from(system_time: SystemTime) -> Self {
474 524 let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
475 525 Ok(duration) => {
476 526 (duration.as_secs() as i64, duration.subsec_nanos())
477 527 }
478 528 Err(error) => {
479 529 let negative = error.duration();
480 530 (-(negative.as_secs() as i64), negative.subsec_nanos())
481 531 }
482 532 };
483 533 Timestamp {
484 534 seconds: secs.into(),
485 535 nanoseconds: nanos.into(),
486 536 }
487 537 }
488 538 }
489 539
490 540 impl From<&'_ Timestamp> for SystemTime {
491 541 fn from(timestamp: &'_ Timestamp) -> Self {
492 542 let secs = timestamp.seconds.get();
493 543 let nanos = timestamp.nanoseconds.get();
494 544 if secs >= 0 {
495 545 UNIX_EPOCH + Duration::new(secs as u64, nanos)
496 546 } else {
497 547 UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
498 548 }
499 549 }
500 550 }
501 551
502 552 fn read_hg_path(
503 553 on_disk: &[u8],
504 554 slice: PathSlice,
505 555 ) -> Result<&HgPath, DirstateV2ParseError> {
506 556 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
507 557 }
508 558
509 559 fn read_nodes(
510 560 on_disk: &[u8],
511 561 slice: ChildNodes,
512 562 ) -> Result<&[Node], DirstateV2ParseError> {
513 563 read_slice(on_disk, slice.start, slice.len.get())
514 564 }
515 565
516 566 fn read_slice<T, Len>(
517 567 on_disk: &[u8],
518 568 start: Offset,
519 569 len: Len,
520 570 ) -> Result<&[T], DirstateV2ParseError>
521 571 where
522 572 T: BytesCast,
523 573 Len: TryInto<usize>,
524 574 {
525 575 // Either `usize::MAX` would result in "out of bounds" error since a single
526 576 // `&[u8]` cannot occupy the entire addess space.
527 577 let start = start.get().try_into().unwrap_or(std::usize::MAX);
528 578 let len = len.try_into().unwrap_or(std::usize::MAX);
529 579 on_disk
530 580 .get(start..)
531 581 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
532 582 .map(|(slice, _rest)| slice)
533 583 .ok_or_else(|| DirstateV2ParseError)
534 584 }
535 585
536 586 pub(crate) fn for_each_tracked_path<'on_disk>(
537 587 on_disk: &'on_disk [u8],
538 588 metadata: &[u8],
539 589 mut f: impl FnMut(&'on_disk HgPath),
540 590 ) -> Result<(), DirstateV2ParseError> {
541 591 let (meta, _) = TreeMetadata::from_bytes(metadata)
542 592 .map_err(|_| DirstateV2ParseError)?;
543 593 fn recur<'on_disk>(
544 594 on_disk: &'on_disk [u8],
545 595 nodes: ChildNodes,
546 596 f: &mut impl FnMut(&'on_disk HgPath),
547 597 ) -> Result<(), DirstateV2ParseError> {
548 598 for node in read_nodes(on_disk, nodes)? {
549 if let Some(state) = node.state()? {
550 if state.is_tracked() {
599 if let Some(entry) = node.entry()? {
600 if entry.state().is_tracked() {
551 601 f(node.full_path(on_disk)?)
552 602 }
553 603 }
554 604 recur(on_disk, node.children, f)?
555 605 }
556 606 Ok(())
557 607 }
558 608 recur(on_disk, meta.root_nodes, &mut f)
559 609 }
560 610
561 611 /// Returns new data and metadata, together with whether that data should be
562 612 /// appended to the existing data file whose content is at
563 613 /// `dirstate_map.on_disk` (true), instead of written to a new data file
564 614 /// (false).
565 615 pub(super) fn write(
566 616 dirstate_map: &mut DirstateMap,
567 617 can_append: bool,
568 618 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
569 619 let append = can_append && dirstate_map.write_should_append();
570 620
571 621 // This ignores the space for paths, and for nodes without an entry.
572 622 // TODO: better estimate? Skip the `Vec` and write to a file directly?
573 623 let size_guess = std::mem::size_of::<Node>()
574 624 * dirstate_map.nodes_with_entry_count as usize;
575 625
576 626 let mut writer = Writer {
577 627 dirstate_map,
578 628 append,
579 629 out: Vec::with_capacity(size_guess),
580 630 };
581 631
582 632 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
583 633
584 634 let meta = TreeMetadata {
585 635 root_nodes,
586 636 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
587 637 nodes_with_copy_source_count: dirstate_map
588 638 .nodes_with_copy_source_count
589 639 .into(),
590 640 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
591 641 unused: [0; 4],
592 642 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
593 643 };
594 644 Ok((writer.out, meta.as_bytes().to_vec(), append))
595 645 }
596 646
597 647 struct Writer<'dmap, 'on_disk> {
598 648 dirstate_map: &'dmap DirstateMap<'on_disk>,
599 649 append: bool,
600 650 out: Vec<u8>,
601 651 }
602 652
603 653 impl Writer<'_, '_> {
604 654 fn write_nodes(
605 655 &mut self,
606 656 nodes: dirstate_map::ChildNodesRef,
607 657 ) -> Result<ChildNodes, DirstateError> {
608 658 // Reuse already-written nodes if possible
609 659 if self.append {
610 660 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
611 661 let start = self.on_disk_offset_of(nodes_slice).expect(
612 662 "dirstate-v2 OnDisk nodes not found within on_disk",
613 663 );
614 664 let len = child_nodes_len_from_usize(nodes_slice.len());
615 665 return Ok(ChildNodes { start, len });
616 666 }
617 667 }
618 668
619 669 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
620 670 // undefined iteration order. Sort to enable binary search in the
621 671 // written file.
622 672 let nodes = nodes.sorted();
623 673 let nodes_len = nodes.len();
624 674
625 675 // First accumulate serialized nodes in a `Vec`
626 676 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
627 677 for node in nodes {
628 678 let children =
629 679 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
630 680 let full_path = node.full_path(self.dirstate_map.on_disk)?;
631 681 let full_path = self.write_path(full_path.as_bytes());
632 682 let copy_source = if let Some(source) =
633 683 node.copy_source(self.dirstate_map.on_disk)?
634 684 {
635 685 self.write_path(source.as_bytes())
636 686 } else {
637 687 PathSlice {
638 688 start: 0.into(),
639 689 len: 0.into(),
640 690 }
641 691 };
642 692 on_disk_nodes.push(match node {
643 693 NodeRef::InMemory(path, node) => {
644 let (state, data) = match &node.data {
645 dirstate_map::NodeData::Entry(entry) => (
646 entry.state().into(),
647 Entry {
648 mode: entry.mode().into(),
649 mtime: entry.mtime().into(),
650 size: entry.size().into(),
651 },
652 ),
694 let (flags, data) = match &node.data {
695 dirstate_map::NodeData::Entry(entry) => {
696 Entry::from_dirstate_entry(entry)
697 }
653 698 dirstate_map::NodeData::CachedDirectory { mtime } => {
654 (b'd', Entry::from_timestamp(*mtime))
699 (Flags::HAS_MTIME, Entry::from_timestamp(*mtime))
655 700 }
656 701 dirstate_map::NodeData::None => (
657 b'\0',
702 Flags::empty(),
658 703 Entry {
659 704 mode: 0.into(),
705 size: 0.into(),
660 706 mtime: 0.into(),
661 size: 0.into(),
662 707 },
663 708 ),
664 709 };
665 710 Node {
666 711 children,
667 712 copy_source,
668 713 full_path,
669 714 base_name_start: u16::try_from(path.base_name_start())
670 715 // Could only panic for paths over 64 KiB
671 716 .expect("dirstate-v2 path length overflow")
672 717 .into(),
673 718 descendants_with_entry_count: node
674 719 .descendants_with_entry_count
675 720 .into(),
676 721 tracked_descendants_count: node
677 722 .tracked_descendants_count
678 723 .into(),
679 state,
724 flags,
680 725 data,
681 726 }
682 727 }
683 728 NodeRef::OnDisk(node) => Node {
684 729 children,
685 730 copy_source,
686 731 full_path,
687 732 ..*node
688 733 },
689 734 })
690 735 }
691 736 // … so we can write them contiguously, after writing everything else
692 737 // they refer to.
693 738 let start = self.current_offset();
694 739 let len = child_nodes_len_from_usize(nodes_len);
695 740 self.out.extend(on_disk_nodes.as_bytes());
696 741 Ok(ChildNodes { start, len })
697 742 }
698 743
699 744 /// If the given slice of items is within `on_disk`, returns its offset
700 745 /// from the start of `on_disk`.
701 746 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
702 747 where
703 748 T: BytesCast,
704 749 {
705 750 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
706 751 let start = slice.as_ptr() as usize;
707 752 let end = start + slice.len();
708 753 start..=end
709 754 }
710 755 let slice_addresses = address_range(slice.as_bytes());
711 756 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
712 757 if on_disk_addresses.contains(slice_addresses.start())
713 758 && on_disk_addresses.contains(slice_addresses.end())
714 759 {
715 760 let offset = slice_addresses.start() - on_disk_addresses.start();
716 761 Some(offset_from_usize(offset))
717 762 } else {
718 763 None
719 764 }
720 765 }
721 766
722 767 fn current_offset(&mut self) -> Offset {
723 768 let mut offset = self.out.len();
724 769 if self.append {
725 770 offset += self.dirstate_map.on_disk.len()
726 771 }
727 772 offset_from_usize(offset)
728 773 }
729 774
730 775 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
731 776 let len = path_len_from_usize(slice.len());
732 777 // Reuse an already-written path if possible
733 778 if self.append {
734 779 if let Some(start) = self.on_disk_offset_of(slice) {
735 780 return PathSlice { start, len };
736 781 }
737 782 }
738 783 let start = self.current_offset();
739 784 self.out.extend(slice.as_bytes());
740 785 PathSlice { start, len }
741 786 }
742 787 }
743 788
744 789 fn offset_from_usize(x: usize) -> Offset {
745 790 u32::try_from(x)
746 791 // Could only panic for a dirstate file larger than 4 GiB
747 792 .expect("dirstate-v2 offset overflow")
748 793 .into()
749 794 }
750 795
751 796 fn child_nodes_len_from_usize(x: usize) -> Size {
752 797 u32::try_from(x)
753 798 // Could only panic with over 4 billion nodes
754 799 .expect("dirstate-v2 slice length overflow")
755 800 .into()
756 801 }
757 802
758 803 fn path_len_from_usize(x: usize) -> PathSize {
759 804 u16::try_from(x)
760 805 // Could only panic for paths over 64 KiB
761 806 .expect("dirstate-v2 path length overflow")
762 807 .into()
763 808 }
@@ -1,219 +1,219 b''
1 1 use cpython::exc;
2 2 use cpython::PyBytes;
3 3 use cpython::PyErr;
4 4 use cpython::PyNone;
5 5 use cpython::PyObject;
6 6 use cpython::PyResult;
7 7 use cpython::Python;
8 8 use cpython::PythonObject;
9 9 use hg::dirstate::DirstateEntry;
10 10 use hg::dirstate::EntryState;
11 11 use std::cell::Cell;
12 12 use std::convert::TryFrom;
13 13
14 14 py_class!(pub class DirstateItem |py| {
15 15 data entry: Cell<DirstateEntry>;
16 16
17 17 def __new__(
18 18 _cls,
19 19 wc_tracked: bool = false,
20 20 p1_tracked: bool = false,
21 21 p2_info: bool = false,
22 22 has_meaningful_data: bool = true,
23 23 has_meaningful_mtime: bool = true,
24 24 parentfiledata: Option<(i32, i32, i32)> = None,
25 25
26 26 ) -> PyResult<DirstateItem> {
27 27 let mut mode_size_opt = None;
28 28 let mut mtime_opt = None;
29 29 if let Some((mode, size, mtime)) = parentfiledata {
30 30 if has_meaningful_data {
31 31 mode_size_opt = Some((mode, size))
32 32 }
33 33 if has_meaningful_mtime {
34 34 mtime_opt = Some(mtime)
35 35 }
36 36 }
37 let entry = DirstateEntry::new(
37 let entry = DirstateEntry::from_v2_data(
38 38 wc_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt,
39 39 );
40 40 DirstateItem::create_instance(py, Cell::new(entry))
41 41 }
42 42
43 43 @property
44 44 def state(&self) -> PyResult<PyBytes> {
45 45 let state_byte: u8 = self.entry(py).get().state().into();
46 46 Ok(PyBytes::new(py, &[state_byte]))
47 47 }
48 48
49 49 @property
50 50 def mode(&self) -> PyResult<i32> {
51 51 Ok(self.entry(py).get().mode())
52 52 }
53 53
54 54 @property
55 55 def size(&self) -> PyResult<i32> {
56 56 Ok(self.entry(py).get().size())
57 57 }
58 58
59 59 @property
60 60 def mtime(&self) -> PyResult<i32> {
61 61 Ok(self.entry(py).get().mtime())
62 62 }
63 63
64 64 @property
65 65 def tracked(&self) -> PyResult<bool> {
66 66 Ok(self.entry(py).get().tracked())
67 67 }
68 68
69 69 @property
70 70 def added(&self) -> PyResult<bool> {
71 71 Ok(self.entry(py).get().added())
72 72 }
73 73
74 74 @property
75 75 def merged(&self) -> PyResult<bool> {
76 76 Ok(self.entry(py).get().merged())
77 77 }
78 78
79 79 @property
80 80 def removed(&self) -> PyResult<bool> {
81 81 Ok(self.entry(py).get().removed())
82 82 }
83 83
84 84 @property
85 85 def from_p2(&self) -> PyResult<bool> {
86 86 Ok(self.entry(py).get().from_p2())
87 87 }
88 88
89 89 @property
90 90 def maybe_clean(&self) -> PyResult<bool> {
91 91 Ok(self.entry(py).get().maybe_clean())
92 92 }
93 93
94 94 @property
95 95 def any_tracked(&self) -> PyResult<bool> {
96 96 Ok(self.entry(py).get().any_tracked())
97 97 }
98 98
99 99 def v1_state(&self) -> PyResult<PyBytes> {
100 100 let (state, _mode, _size, _mtime) = self.entry(py).get().v1_data();
101 101 let state_byte: u8 = state.into();
102 102 Ok(PyBytes::new(py, &[state_byte]))
103 103 }
104 104
105 105 def v1_mode(&self) -> PyResult<i32> {
106 106 let (_state, mode, _size, _mtime) = self.entry(py).get().v1_data();
107 107 Ok(mode)
108 108 }
109 109
110 110 def v1_size(&self) -> PyResult<i32> {
111 111 let (_state, _mode, size, _mtime) = self.entry(py).get().v1_data();
112 112 Ok(size)
113 113 }
114 114
115 115 def v1_mtime(&self) -> PyResult<i32> {
116 116 let (_state, _mode, _size, mtime) = self.entry(py).get().v1_data();
117 117 Ok(mtime)
118 118 }
119 119
120 120 def need_delay(&self, now: i32) -> PyResult<bool> {
121 121 Ok(self.entry(py).get().mtime_is_ambiguous(now))
122 122 }
123 123
124 124 @classmethod
125 125 def from_v1_data(
126 126 _cls,
127 127 state: PyBytes,
128 128 mode: i32,
129 129 size: i32,
130 130 mtime: i32,
131 131 ) -> PyResult<Self> {
132 132 let state = <[u8; 1]>::try_from(state.data(py))
133 133 .ok()
134 134 .and_then(|state| EntryState::try_from(state[0]).ok())
135 135 .ok_or_else(|| PyErr::new::<exc::ValueError, _>(py, "invalid state"))?;
136 136 let entry = DirstateEntry::from_v1_data(state, mode, size, mtime);
137 137 DirstateItem::create_instance(py, Cell::new(entry))
138 138 }
139 139
140 140 @classmethod
141 141 def new_added(_cls) -> PyResult<Self> {
142 142 let entry = DirstateEntry::new_added();
143 143 DirstateItem::create_instance(py, Cell::new(entry))
144 144 }
145 145
146 146 @classmethod
147 147 def new_merged(_cls) -> PyResult<Self> {
148 148 let entry = DirstateEntry::new_merged();
149 149 DirstateItem::create_instance(py, Cell::new(entry))
150 150 }
151 151
152 152 @classmethod
153 153 def new_from_p2(_cls) -> PyResult<Self> {
154 154 let entry = DirstateEntry::new_from_p2();
155 155 DirstateItem::create_instance(py, Cell::new(entry))
156 156 }
157 157
158 158 @classmethod
159 159 def new_possibly_dirty(_cls) -> PyResult<Self> {
160 160 let entry = DirstateEntry::new_possibly_dirty();
161 161 DirstateItem::create_instance(py, Cell::new(entry))
162 162 }
163 163
164 164 @classmethod
165 165 def new_normal(_cls, mode: i32, size: i32, mtime: i32) -> PyResult<Self> {
166 166 let entry = DirstateEntry::new_normal(mode, size, mtime);
167 167 DirstateItem::create_instance(py, Cell::new(entry))
168 168 }
169 169
170 170 def drop_merge_data(&self) -> PyResult<PyNone> {
171 171 self.update(py, |entry| entry.drop_merge_data());
172 172 Ok(PyNone)
173 173 }
174 174
175 175 def set_clean(
176 176 &self,
177 177 mode: i32,
178 178 size: i32,
179 179 mtime: i32,
180 180 ) -> PyResult<PyNone> {
181 181 self.update(py, |entry| entry.set_clean(mode, size, mtime));
182 182 Ok(PyNone)
183 183 }
184 184
185 185 def set_possibly_dirty(&self) -> PyResult<PyNone> {
186 186 self.update(py, |entry| entry.set_possibly_dirty());
187 187 Ok(PyNone)
188 188 }
189 189
190 190 def set_tracked(&self) -> PyResult<PyNone> {
191 191 self.update(py, |entry| entry.set_tracked());
192 192 Ok(PyNone)
193 193 }
194 194
195 195 def set_untracked(&self) -> PyResult<PyNone> {
196 196 self.update(py, |entry| entry.set_untracked());
197 197 Ok(PyNone)
198 198 }
199 199 });
200 200
201 201 impl DirstateItem {
202 202 pub fn new_as_pyobject(
203 203 py: Python<'_>,
204 204 entry: DirstateEntry,
205 205 ) -> PyResult<PyObject> {
206 206 Ok(DirstateItem::create_instance(py, Cell::new(entry))?.into_object())
207 207 }
208 208
209 209 pub fn get_entry(&self, py: Python<'_>) -> DirstateEntry {
210 210 self.entry(py).get()
211 211 }
212 212
213 213 // TODO: Use https://doc.rust-lang.org/std/cell/struct.Cell.html#method.update instead when it’s stable
214 214 pub fn update(&self, py: Python<'_>, f: impl FnOnce(&mut DirstateEntry)) {
215 215 let mut entry = self.entry(py).get();
216 216 f(&mut entry);
217 217 self.entry(py).set(entry)
218 218 }
219 219 }
General Comments 0
You need to be logged in to leave comments. Login now