##// END OF EJS Templates
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
Simon Sapin -
r48128:0654b3b3 default
parent child Browse files
Show More
@@ -48,14 +48,17 b" pub(super) type NodeKey<'on_disk> = With"
48 48
49 49 pub(super) enum ChildNodes<'on_disk> {
50 50 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
51 OnDisk(&'on_disk [on_disk::Node]),
51 52 }
52 53
53 54 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
54 55 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
56 OnDisk(&'on_disk [on_disk::Node]),
55 57 }
56 58
57 59 pub(super) enum NodeRef<'tree, 'on_disk> {
58 60 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
61 OnDisk(&'on_disk on_disk::Node),
59 62 }
60 63
61 64 impl Default for ChildNodes<'_> {
@@ -70,24 +73,42 b" impl<'on_disk> ChildNodes<'on_disk> {"
70 73 ) -> ChildNodesRef<'tree, 'on_disk> {
71 74 match self {
72 75 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
76 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
73 77 }
74 78 }
75 79
76 80 pub(super) fn is_empty(&self) -> bool {
77 81 match self {
78 82 ChildNodes::InMemory(nodes) => nodes.is_empty(),
83 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
79 84 }
80 85 }
81 86
82 87 pub(super) fn make_mut(
83 88 &mut self,
84 _on_disk: &'on_disk [u8],
89 on_disk: &'on_disk [u8],
85 90 ) -> Result<
86 91 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
87 92 DirstateV2ParseError,
88 93 > {
89 94 match self {
90 95 ChildNodes::InMemory(nodes) => Ok(nodes),
96 ChildNodes::OnDisk(nodes) => {
97 let nodes = nodes
98 .iter()
99 .map(|node| {
100 Ok((
101 node.path(on_disk)?,
102 node.to_in_memory_node(on_disk)?,
103 ))
104 })
105 .collect::<Result<_, _>>()?;
106 *self = ChildNodes::InMemory(nodes);
107 match self {
108 ChildNodes::InMemory(nodes) => Ok(nodes),
109 ChildNodes::OnDisk(_) => unreachable!(),
110 }
111 }
91 112 }
92 113 }
93 114 }
@@ -96,12 +117,29 b" impl<'tree, 'on_disk> ChildNodesRef<'tre"
96 117 pub(super) fn get(
97 118 &self,
98 119 base_name: &HgPath,
99 _on_disk: &'on_disk [u8],
120 on_disk: &'on_disk [u8],
100 121 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
101 122 match self {
102 123 ChildNodesRef::InMemory(nodes) => Ok(nodes
103 124 .get_key_value(base_name)
104 125 .map(|(k, v)| NodeRef::InMemory(k, v))),
126 ChildNodesRef::OnDisk(nodes) => {
127 let mut parse_result = Ok(());
128 let search_result = nodes.binary_search_by(|node| {
129 match node.base_name(on_disk) {
130 Ok(node_base_name) => node_base_name.cmp(base_name),
131 Err(e) => {
132 parse_result = Err(e);
133 // Dummy comparison result, `search_result` won’t
134 // be used since `parse_result` is an error
135 std::cmp::Ordering::Equal
136 }
137 }
138 });
139 parse_result.map(|()| {
140 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
141 })
142 }
105 143 }
106 144 }
107 145
@@ -110,8 +148,11 b" impl<'tree, 'on_disk> ChildNodesRef<'tre"
110 148 &self,
111 149 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
112 150 match self {
113 ChildNodesRef::InMemory(nodes) => {
114 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v))
151 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
152 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
153 ),
154 ChildNodesRef::OnDisk(nodes) => {
155 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
115 156 }
116 157 }
117 158 }
@@ -123,9 +164,12 b" impl<'tree, 'on_disk> ChildNodesRef<'tre"
123 164 {
124 165 use rayon::prelude::*;
125 166 match self {
126 ChildNodesRef::InMemory(nodes) => {
127 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v))
128 }
167 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
168 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
169 ),
170 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
171 nodes.par_iter().map(NodeRef::OnDisk),
172 ),
129 173 }
130 174 }
131 175
@@ -139,6 +183,7 b" impl<'tree, 'on_disk> ChildNodesRef<'tre"
139 183 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
140 184 match node {
141 185 NodeRef::InMemory(path, _node) => path.base_name(),
186 NodeRef::OnDisk(_) => unreachable!(),
142 187 }
143 188 }
144 189 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
@@ -146,6 +191,10 b" impl<'tree, 'on_disk> ChildNodesRef<'tre"
146 191 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
147 192 vec
148 193 }
194 ChildNodesRef::OnDisk(nodes) => {
195 // Nodes on disk are already sorted
196 nodes.iter().map(NodeRef::OnDisk).collect()
197 }
149 198 }
150 199 }
151 200 }
@@ -153,61 +202,72 b" impl<'tree, 'on_disk> ChildNodesRef<'tre"
153 202 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
154 203 pub(super) fn full_path(
155 204 &self,
156 _on_disk: &'on_disk [u8],
205 on_disk: &'on_disk [u8],
157 206 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
158 207 match self {
159 208 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
209 NodeRef::OnDisk(node) => node.full_path(on_disk),
160 210 }
161 211 }
162 212
163 213 /// Returns a `Cow` that can borrow 'on_disk but is detached from 'tree
164 214 pub(super) fn full_path_cow(
165 215 &self,
166 _on_disk: &'on_disk [u8],
216 on_disk: &'on_disk [u8],
167 217 ) -> Result<Cow<'on_disk, HgPath>, DirstateV2ParseError> {
168 218 match self {
169 219 NodeRef::InMemory(path, _node) => Ok(path.full_path().clone()),
220 NodeRef::OnDisk(node) => {
221 Ok(Cow::Borrowed(node.full_path(on_disk)?))
222 }
170 223 }
171 224 }
172 225
173 226 pub(super) fn base_name(
174 227 &self,
175 _on_disk: &'on_disk [u8],
228 on_disk: &'on_disk [u8],
176 229 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
177 230 match self {
178 231 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
232 NodeRef::OnDisk(node) => node.base_name(on_disk),
179 233 }
180 234 }
181 235
182 236 pub(super) fn children(
183 237 &self,
184 _on_disk: &'on_disk [u8],
238 on_disk: &'on_disk [u8],
185 239 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
186 240 match self {
187 241 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
242 NodeRef::OnDisk(node) => {
243 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
244 }
188 245 }
189 246 }
190 247
191 248 pub(super) fn has_copy_source(&self) -> bool {
192 249 match self {
193 250 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
251 NodeRef::OnDisk(node) => node.has_copy_source(),
194 252 }
195 253 }
196 254
197 255 pub(super) fn copy_source(
198 256 &self,
199 _on_disk: &'on_disk [u8],
257 on_disk: &'on_disk [u8],
200 258 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
201 259 match self {
202 260 NodeRef::InMemory(_path, node) => {
203 261 Ok(node.copy_source.as_ref().map(|s| &**s))
204 262 }
263 NodeRef::OnDisk(node) => node.copy_source(on_disk),
205 264 }
206 265 }
207 266
208 267 pub(super) fn has_entry(&self) -> bool {
209 268 match self {
210 269 NodeRef::InMemory(_path, node) => node.entry.is_some(),
270 NodeRef::OnDisk(node) => node.has_entry(),
211 271 }
212 272 }
213 273
@@ -216,6 +276,7 b" impl<'tree, 'on_disk> NodeRef<'tree, 'on"
216 276 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
217 277 match self {
218 278 NodeRef::InMemory(_path, node) => Ok(node.entry),
279 NodeRef::OnDisk(node) => node.entry(),
219 280 }
220 281 }
221 282
@@ -226,12 +287,14 b" impl<'tree, 'on_disk> NodeRef<'tree, 'on"
226 287 NodeRef::InMemory(_path, node) => {
227 288 Ok(node.entry.as_ref().map(|entry| entry.state))
228 289 }
290 NodeRef::OnDisk(node) => node.state(),
229 291 }
230 292 }
231 293
232 294 pub(super) fn tracked_descendants_count(&self) -> u32 {
233 295 match self {
234 296 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
297 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
235 298 }
236 299 }
237 300 }
@@ -16,6 +16,7 b' use crate::utils::hg_path::HgPath;'
16 16 use crate::DirstateEntry;
17 17 use crate::DirstateError;
18 18 use crate::DirstateParents;
19 use crate::EntryState;
19 20 use bytes_cast::unaligned::{I32Be, U32Be, U64Be};
20 21 use bytes_cast::BytesCast;
21 22 use std::borrow::Cow;
@@ -42,7 +43,7 b' struct Header {'
42 43
43 44 #[derive(BytesCast)]
44 45 #[repr(C)]
45 struct Node {
46 pub(super) struct Node {
46 47 full_path: PathSlice,
47 48
48 49 /// In bytes from `self.full_path.start`
@@ -51,12 +52,12 b' struct Node {'
51 52 copy_source: OptPathSlice,
52 53 entry: OptEntry,
53 54 children: ChildNodes,
54 tracked_descendants_count: Size,
55 pub(super) tracked_descendants_count: Size,
55 56 }
56 57
57 58 /// Either nothing if `state == b'\0'`, or a dirstate entry like in the v1
58 59 /// format
59 #[derive(BytesCast)]
60 #[derive(BytesCast, Copy, Clone)]
60 61 #[repr(C)]
61 62 struct OptEntry {
62 63 state: u8,
@@ -149,7 +150,9 b" pub(super) fn read<'on_disk>("
149 150 }
150 151 let dirstate_map = DirstateMap {
151 152 on_disk,
152 root: read_nodes(on_disk, *root)?,
153 root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>(
154 on_disk, *root,
155 )?),
153 156 nodes_with_entry_count: nodes_with_entry_count.get(),
154 157 nodes_with_copy_source_count: nodes_with_copy_source_count.get(),
155 158 };
@@ -158,49 +161,96 b" pub(super) fn read<'on_disk>("
158 161 }
159 162
160 163 impl Node {
161 pub(super) fn path<'on_disk>(
164 pub(super) fn full_path<'on_disk>(
162 165 &self,
163 166 on_disk: &'on_disk [u8],
164 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
165 let full_path = read_hg_path(on_disk, self.full_path)?;
166 let base_name_start = usize::try_from(self.base_name_start.get())
167 // u32 -> usize, could only panic on a 16-bit CPU
168 .expect("dirstate-v2 base_name_start out of bounds");
169 if base_name_start < full_path.len() {
170 Ok(WithBasename::from_raw_parts(full_path, base_name_start))
167 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
168 read_hg_path(on_disk, self.full_path)
169 }
170
171 pub(super) fn base_name_start<'on_disk>(
172 &self,
173 ) -> Result<usize, DirstateV2ParseError> {
174 let start = self.base_name_start.get();
175 if start < self.full_path.len.get() {
176 let start = usize::try_from(start)
177 // u32 -> usize, could only panic on a 16-bit CPU
178 .expect("dirstate-v2 base_name_start out of bounds");
179 Ok(start)
171 180 } else {
172 181 Err(DirstateV2ParseError)
173 182 }
174 183 }
175 184
185 pub(super) fn base_name<'on_disk>(
186 &self,
187 on_disk: &'on_disk [u8],
188 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
189 let full_path = self.full_path(on_disk)?;
190 let base_name_start = self.base_name_start()?;
191 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
192 }
193
194 pub(super) fn path<'on_disk>(
195 &self,
196 on_disk: &'on_disk [u8],
197 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
198 Ok(WithBasename::from_raw_parts(
199 Cow::Borrowed(self.full_path(on_disk)?),
200 self.base_name_start()?,
201 ))
202 }
203
204 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
205 self.copy_source.start.get() != 0
206 }
207
176 208 pub(super) fn copy_source<'on_disk>(
177 209 &self,
178 210 on_disk: &'on_disk [u8],
179 ) -> Result<Option<Cow<'on_disk, HgPath>>, DirstateV2ParseError> {
180 Ok(if self.copy_source.start.get() != 0 {
211 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
212 Ok(if self.has_copy_source() {
181 213 Some(read_hg_path(on_disk, self.copy_source)?)
182 214 } else {
183 215 None
184 216 })
185 217 }
186 218
219 pub(super) fn has_entry(&self) -> bool {
220 self.entry.state != b'\0'
221 }
222
223 pub(super) fn state(
224 &self,
225 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
226 Ok(if self.has_entry() {
227 Some(
228 self.entry
229 .state
230 .try_into()
231 .map_err(|_| DirstateV2ParseError)?,
232 )
233 } else {
234 None
235 })
236 }
237
187 238 pub(super) fn entry(
188 239 &self,
189 240 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
190 Ok(if self.entry.state != b'\0' {
191 Some(DirstateEntry {
192 state: self
193 .entry
194 .state
195 .try_into()
196 .map_err(|_| DirstateV2ParseError)?,
197 mode: self.entry.mode.get(),
198 mtime: self.entry.mtime.get(),
199 size: self.entry.size.get(),
200 })
201 } else {
202 None
203 })
241 Ok(self.state()?.map(|state| DirstateEntry {
242 state,
243 mode: self.entry.mode.get(),
244 mtime: self.entry.mtime.get(),
245 size: self.entry.size.get(),
246 }))
247 }
248
249 pub(super) fn children<'on_disk>(
250 &self,
251 on_disk: &'on_disk [u8],
252 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
253 read_slice::<Node>(on_disk, self.children)
204 254 }
205 255
206 256 pub(super) fn to_in_memory_node<'on_disk>(
@@ -208,33 +258,22 b' impl Node {'
208 258 on_disk: &'on_disk [u8],
209 259 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
210 260 Ok(dirstate_map::Node {
211 children: read_nodes(on_disk, self.children)?,
212 copy_source: self.copy_source(on_disk)?,
261 children: dirstate_map::ChildNodes::OnDisk(
262 self.children(on_disk)?,
263 ),
264 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
213 265 entry: self.entry()?,
214 266 tracked_descendants_count: self.tracked_descendants_count.get(),
215 267 })
216 268 }
217 269 }
218 270
219 fn read_nodes(
220 on_disk: &[u8],
221 slice: ChildNodes,
222 ) -> Result<dirstate_map::ChildNodes, DirstateV2ParseError> {
223 read_slice::<Node>(on_disk, slice)?
224 .iter()
225 .map(|node| {
226 Ok((node.path(on_disk)?, node.to_in_memory_node(on_disk)?))
227 })
228 .collect::<Result<_, _>>()
229 .map(dirstate_map::ChildNodes::InMemory)
230 }
231
232 271 fn read_hg_path(
233 272 on_disk: &[u8],
234 273 slice: Slice,
235 ) -> Result<Cow<HgPath>, DirstateV2ParseError> {
274 ) -> Result<&HgPath, DirstateV2ParseError> {
236 275 let bytes = read_slice::<u8>(on_disk, slice)?;
237 Ok(Cow::Borrowed(HgPath::new(bytes)))
276 Ok(HgPath::new(bytes))
238 277 }
239 278
240 279 fn read_slice<T>(
@@ -300,8 +339,11 b' fn write_nodes('
300 339 // First accumulate serialized nodes in a `Vec`
301 340 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
302 341 for node in nodes {
303 let children = node.children(dirstate_map.on_disk)?;
304 let children = write_nodes(dirstate_map, children, out)?;
342 let children = write_nodes(
343 dirstate_map,
344 node.children(dirstate_map.on_disk)?,
345 out,
346 )?;
305 347 let full_path = node.full_path(dirstate_map.on_disk)?;
306 348 let full_path = write_slice::<u8>(full_path.as_bytes(), out);
307 349 let copy_source =
@@ -341,6 +383,12 b' fn write_nodes('
341 383 }
342 384 },
343 385 },
386 NodeRef::OnDisk(node) => Node {
387 children,
388 copy_source,
389 full_path,
390 ..*node
391 },
344 392 })
345 393 }
346 394 // … so we can write them contiguously
General Comments 0
You need to be logged in to leave comments. Login now