##// END OF EJS Templates
dirstate-v2: no longer register the data-file during transaction...
marmoute -
r50364:b5c85248 6.2.2 stable
parent child Browse files
Show More
@@ -1,690 +1,688 b''
1 1 # dirstatemap.py
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6
7 7 from .i18n import _
8 8
9 9 from . import (
10 10 error,
11 11 pathutil,
12 12 policy,
13 13 txnutil,
14 14 util,
15 15 )
16 16
17 17 from .dirstateutils import (
18 18 docket as docketmod,
19 19 v2,
20 20 )
21 21
22 22 parsers = policy.importmod('parsers')
23 23 rustmod = policy.importrust('dirstate')
24 24
25 25 propertycache = util.propertycache
26 26
27 27 if rustmod is None:
28 28 DirstateItem = parsers.DirstateItem
29 29 else:
30 30 DirstateItem = rustmod.DirstateItem
31 31
32 32 rangemask = 0x7FFFFFFF
33 33
34 34
35 35 class _dirstatemapcommon:
36 36 """
37 37 Methods that are identical for both implementations of the dirstatemap
38 38 class, with and without Rust extensions enabled.
39 39 """
40 40
41 41 # please pytype
42 42
43 43 _map = None
44 44 copymap = None
45 45
46 46 def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2):
47 47 self._use_dirstate_v2 = use_dirstate_v2
48 48 self._nodeconstants = nodeconstants
49 49 self._ui = ui
50 50 self._opener = opener
51 51 self._root = root
52 52 self._filename = b'dirstate'
53 53 self._nodelen = 20 # Also update Rust code when changing this!
54 54 self._parents = None
55 55 self._dirtyparents = False
56 56 self._docket = None
57 57
58 58 # for consistent view between _pl() and _read() invocations
59 59 self._pendingmode = None
60 60
61 61 def preload(self):
62 62 """Loads the underlying data, if it's not already loaded"""
63 63 self._map
64 64
65 65 def get(self, key, default=None):
66 66 return self._map.get(key, default)
67 67
68 68 def __len__(self):
69 69 return len(self._map)
70 70
71 71 def __iter__(self):
72 72 return iter(self._map)
73 73
74 74 def __contains__(self, key):
75 75 return key in self._map
76 76
77 77 def __getitem__(self, item):
78 78 return self._map[item]
79 79
80 80 ### disk interaction
81 81
82 82 def _opendirstatefile(self):
83 83 fp, mode = txnutil.trypending(self._root, self._opener, self._filename)
84 84 if self._pendingmode is not None and self._pendingmode != mode:
85 85 fp.close()
86 86 raise error.Abort(
87 87 _(b'working directory state may be changed parallelly')
88 88 )
89 89 self._pendingmode = mode
90 90 return fp
91 91
92 92 def _readdirstatefile(self, size=-1):
93 93 try:
94 94 with self._opendirstatefile() as fp:
95 95 return fp.read(size)
96 96 except FileNotFoundError:
97 97 # File doesn't exist, so the current state is empty
98 98 return b''
99 99
100 100 @property
101 101 def docket(self):
102 102 if not self._docket:
103 103 if not self._use_dirstate_v2:
104 104 raise error.ProgrammingError(
105 105 b'dirstate only has a docket in v2 format'
106 106 )
107 107 self._docket = docketmod.DirstateDocket.parse(
108 108 self._readdirstatefile(), self._nodeconstants
109 109 )
110 110 return self._docket
111 111
112 112 def write_v2_no_append(self, tr, st, meta, packed):
113 113 old_docket = self.docket
114 114 new_docket = docketmod.DirstateDocket.with_new_uuid(
115 115 self.parents(), len(packed), meta
116 116 )
117 117 data_filename = new_docket.data_filename()
118 if tr:
119 tr.add(data_filename, 0)
120 118 self._opener.write(data_filename, packed)
121 119 # Write the new docket after the new data file has been
122 120 # written. Because `st` was opened with `atomictemp=True`,
123 121 # the actual `.hg/dirstate` file is only affected on close.
124 122 st.write(new_docket.serialize())
125 123 st.close()
126 124 # Remove the old data file after the new docket pointing to
127 125 # the new data file was written.
128 126 if old_docket.uuid:
129 127 data_filename = old_docket.data_filename()
130 128 unlink = lambda _tr=None: self._opener.unlink(data_filename)
131 129 if tr:
132 130 category = b"dirstate-v2-clean-" + old_docket.uuid
133 131 tr.addpostclose(category, unlink)
134 132 else:
135 133 unlink()
136 134 self._docket = new_docket
137 135
138 136 ### reading/setting parents
139 137
140 138 def parents(self):
141 139 if not self._parents:
142 140 if self._use_dirstate_v2:
143 141 self._parents = self.docket.parents
144 142 else:
145 143 read_len = self._nodelen * 2
146 144 st = self._readdirstatefile(read_len)
147 145 l = len(st)
148 146 if l == read_len:
149 147 self._parents = (
150 148 st[: self._nodelen],
151 149 st[self._nodelen : 2 * self._nodelen],
152 150 )
153 151 elif l == 0:
154 152 self._parents = (
155 153 self._nodeconstants.nullid,
156 154 self._nodeconstants.nullid,
157 155 )
158 156 else:
159 157 raise error.Abort(
160 158 _(b'working directory state appears damaged!')
161 159 )
162 160
163 161 return self._parents
164 162
165 163
166 164 class dirstatemap(_dirstatemapcommon):
167 165 """Map encapsulating the dirstate's contents.
168 166
169 167 The dirstate contains the following state:
170 168
171 169 - `identity` is the identity of the dirstate file, which can be used to
172 170 detect when changes have occurred to the dirstate file.
173 171
174 172 - `parents` is a pair containing the parents of the working copy. The
175 173 parents are updated by calling `setparents`.
176 174
177 175 - the state map maps filenames to tuples of (state, mode, size, mtime),
178 176 where state is a single character representing 'normal', 'added',
179 177 'removed', or 'merged'. It is read by treating the dirstate as a
180 178 dict. File state is updated by calling various methods (see each
181 179 documentation for details):
182 180
183 181 - `reset_state`,
184 182 - `set_tracked`
185 183 - `set_untracked`
186 184 - `set_clean`
187 185 - `set_possibly_dirty`
188 186
189 187 - `copymap` maps destination filenames to their source filename.
190 188
191 189 The dirstate also provides the following views onto the state:
192 190
193 191 - `filefoldmap` is a dict mapping normalized filenames to the denormalized
194 192 form that they appear as in the dirstate.
195 193
196 194 - `dirfoldmap` is a dict mapping normalized directory names to the
197 195 denormalized form that they appear as in the dirstate.
198 196 """
199 197
200 198 ### Core data storage and access
201 199
202 200 @propertycache
203 201 def _map(self):
204 202 self._map = {}
205 203 self.read()
206 204 return self._map
207 205
208 206 @propertycache
209 207 def copymap(self):
210 208 self.copymap = {}
211 209 self._map
212 210 return self.copymap
213 211
214 212 def clear(self):
215 213 self._map.clear()
216 214 self.copymap.clear()
217 215 self.setparents(self._nodeconstants.nullid, self._nodeconstants.nullid)
218 216 util.clearcachedproperty(self, b"_dirs")
219 217 util.clearcachedproperty(self, b"_alldirs")
220 218 util.clearcachedproperty(self, b"filefoldmap")
221 219 util.clearcachedproperty(self, b"dirfoldmap")
222 220
223 221 def items(self):
224 222 return self._map.items()
225 223
226 224 # forward for python2,3 compat
227 225 iteritems = items
228 226
229 227 def debug_iter(self, all):
230 228 """
231 229 Return an iterator of (filename, state, mode, size, mtime) tuples
232 230
233 231 `all` is unused when Rust is not enabled
234 232 """
235 233 for (filename, item) in self.items():
236 234 yield (filename, item.state, item.mode, item.size, item.mtime)
237 235
238 236 def keys(self):
239 237 return self._map.keys()
240 238
241 239 ### reading/setting parents
242 240
243 241 def setparents(self, p1, p2, fold_p2=False):
244 242 self._parents = (p1, p2)
245 243 self._dirtyparents = True
246 244 copies = {}
247 245 if fold_p2:
248 246 for f, s in self._map.items():
249 247 # Discard "merged" markers when moving away from a merge state
250 248 if s.p2_info:
251 249 source = self.copymap.pop(f, None)
252 250 if source:
253 251 copies[f] = source
254 252 s.drop_merge_data()
255 253 return copies
256 254
257 255 ### disk interaction
258 256
259 257 def read(self):
260 258 # ignore HG_PENDING because identity is used only for writing
261 259 self.identity = util.filestat.frompath(
262 260 self._opener.join(self._filename)
263 261 )
264 262
265 263 if self._use_dirstate_v2:
266 264 if not self.docket.uuid:
267 265 return
268 266 st = self._opener.read(self.docket.data_filename())
269 267 else:
270 268 st = self._readdirstatefile()
271 269
272 270 if not st:
273 271 return
274 272
275 273 # TODO: adjust this estimate for dirstate-v2
276 274 if util.safehasattr(parsers, b'dict_new_presized'):
277 275 # Make an estimate of the number of files in the dirstate based on
278 276 # its size. This trades wasting some memory for avoiding costly
279 277 # resizes. Each entry have a prefix of 17 bytes followed by one or
280 278 # two path names. Studies on various large-scale real-world repositories
281 279 # found 54 bytes a reasonable upper limit for the average path names.
282 280 # Copy entries are ignored for the sake of this estimate.
283 281 self._map = parsers.dict_new_presized(len(st) // 71)
284 282
285 283 # Python's garbage collector triggers a GC each time a certain number
286 284 # of container objects (the number being defined by
287 285 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
288 286 # for each file in the dirstate. The C version then immediately marks
289 287 # them as not to be tracked by the collector. However, this has no
290 288 # effect on when GCs are triggered, only on what objects the GC looks
291 289 # into. This means that O(number of files) GCs are unavoidable.
292 290 # Depending on when in the process's lifetime the dirstate is parsed,
293 291 # this can get very expensive. As a workaround, disable GC while
294 292 # parsing the dirstate.
295 293 #
296 294 # (we cannot decorate the function directly since it is in a C module)
297 295 if self._use_dirstate_v2:
298 296 p = self.docket.parents
299 297 meta = self.docket.tree_metadata
300 298 parse_dirstate = util.nogc(v2.parse_dirstate)
301 299 parse_dirstate(self._map, self.copymap, st, meta)
302 300 else:
303 301 parse_dirstate = util.nogc(parsers.parse_dirstate)
304 302 p = parse_dirstate(self._map, self.copymap, st)
305 303 if not self._dirtyparents:
306 304 self.setparents(*p)
307 305
308 306 # Avoid excess attribute lookups by fast pathing certain checks
309 307 self.__contains__ = self._map.__contains__
310 308 self.__getitem__ = self._map.__getitem__
311 309 self.get = self._map.get
312 310
313 311 def write(self, tr, st):
314 312 if self._use_dirstate_v2:
315 313 packed, meta = v2.pack_dirstate(self._map, self.copymap)
316 314 self.write_v2_no_append(tr, st, meta, packed)
317 315 else:
318 316 packed = parsers.pack_dirstate(
319 317 self._map, self.copymap, self.parents()
320 318 )
321 319 st.write(packed)
322 320 st.close()
323 321 self._dirtyparents = False
324 322
325 323 @propertycache
326 324 def identity(self):
327 325 self._map
328 326 return self.identity
329 327
330 328 ### code related to maintaining and accessing "extra" property
331 329 # (e.g. "has_dir")
332 330
333 331 def _dirs_incr(self, filename, old_entry=None):
334 332 """increment the dirstate counter if applicable"""
335 333 if (
336 334 old_entry is None or old_entry.removed
337 335 ) and "_dirs" in self.__dict__:
338 336 self._dirs.addpath(filename)
339 337 if old_entry is None and "_alldirs" in self.__dict__:
340 338 self._alldirs.addpath(filename)
341 339
342 340 def _dirs_decr(self, filename, old_entry=None, remove_variant=False):
343 341 """decrement the dirstate counter if applicable"""
344 342 if old_entry is not None:
345 343 if "_dirs" in self.__dict__ and not old_entry.removed:
346 344 self._dirs.delpath(filename)
347 345 if "_alldirs" in self.__dict__ and not remove_variant:
348 346 self._alldirs.delpath(filename)
349 347 elif remove_variant and "_alldirs" in self.__dict__:
350 348 self._alldirs.addpath(filename)
351 349 if "filefoldmap" in self.__dict__:
352 350 normed = util.normcase(filename)
353 351 self.filefoldmap.pop(normed, None)
354 352
355 353 @propertycache
356 354 def filefoldmap(self):
357 355 """Returns a dictionary mapping normalized case paths to their
358 356 non-normalized versions.
359 357 """
360 358 try:
361 359 makefilefoldmap = parsers.make_file_foldmap
362 360 except AttributeError:
363 361 pass
364 362 else:
365 363 return makefilefoldmap(
366 364 self._map, util.normcasespec, util.normcasefallback
367 365 )
368 366
369 367 f = {}
370 368 normcase = util.normcase
371 369 for name, s in self._map.items():
372 370 if not s.removed:
373 371 f[normcase(name)] = name
374 372 f[b'.'] = b'.' # prevents useless util.fspath() invocation
375 373 return f
376 374
377 375 @propertycache
378 376 def dirfoldmap(self):
379 377 f = {}
380 378 normcase = util.normcase
381 379 for name in self._dirs:
382 380 f[normcase(name)] = name
383 381 return f
384 382
385 383 def hastrackeddir(self, d):
386 384 """
387 385 Returns True if the dirstate contains a tracked (not removed) file
388 386 in this directory.
389 387 """
390 388 return d in self._dirs
391 389
392 390 def hasdir(self, d):
393 391 """
394 392 Returns True if the dirstate contains a file (tracked or removed)
395 393 in this directory.
396 394 """
397 395 return d in self._alldirs
398 396
399 397 @propertycache
400 398 def _dirs(self):
401 399 return pathutil.dirs(self._map, only_tracked=True)
402 400
403 401 @propertycache
404 402 def _alldirs(self):
405 403 return pathutil.dirs(self._map)
406 404
407 405 ### code related to manipulation of entries and copy-sources
408 406
409 407 def reset_state(
410 408 self,
411 409 filename,
412 410 wc_tracked=False,
413 411 p1_tracked=False,
414 412 p2_info=False,
415 413 has_meaningful_mtime=True,
416 414 parentfiledata=None,
417 415 ):
418 416 """Set a entry to a given state, diregarding all previous state
419 417
420 418 This is to be used by the part of the dirstate API dedicated to
421 419 adjusting the dirstate after a update/merge.
422 420
423 421 note: calling this might result to no entry existing at all if the
424 422 dirstate map does not see any point at having one for this file
425 423 anymore.
426 424 """
427 425 # copy information are now outdated
428 426 # (maybe new information should be in directly passed to this function)
429 427 self.copymap.pop(filename, None)
430 428
431 429 if not (p1_tracked or p2_info or wc_tracked):
432 430 old_entry = self._map.get(filename)
433 431 self._drop_entry(filename)
434 432 self._dirs_decr(filename, old_entry=old_entry)
435 433 return
436 434
437 435 old_entry = self._map.get(filename)
438 436 self._dirs_incr(filename, old_entry)
439 437 entry = DirstateItem(
440 438 wc_tracked=wc_tracked,
441 439 p1_tracked=p1_tracked,
442 440 p2_info=p2_info,
443 441 has_meaningful_mtime=has_meaningful_mtime,
444 442 parentfiledata=parentfiledata,
445 443 )
446 444 self._map[filename] = entry
447 445
448 446 def set_tracked(self, filename):
449 447 new = False
450 448 entry = self.get(filename)
451 449 if entry is None:
452 450 self._dirs_incr(filename)
453 451 entry = DirstateItem(
454 452 wc_tracked=True,
455 453 )
456 454
457 455 self._map[filename] = entry
458 456 new = True
459 457 elif not entry.tracked:
460 458 self._dirs_incr(filename, entry)
461 459 entry.set_tracked()
462 460 self._refresh_entry(filename, entry)
463 461 new = True
464 462 else:
465 463 # XXX This is probably overkill for more case, but we need this to
466 464 # fully replace the `normallookup` call with `set_tracked` one.
467 465 # Consider smoothing this in the future.
468 466 entry.set_possibly_dirty()
469 467 self._refresh_entry(filename, entry)
470 468 return new
471 469
472 470 def set_untracked(self, f):
473 471 """Mark a file as no longer tracked in the dirstate map"""
474 472 entry = self.get(f)
475 473 if entry is None:
476 474 return False
477 475 else:
478 476 self._dirs_decr(f, old_entry=entry, remove_variant=not entry.added)
479 477 if not entry.p2_info:
480 478 self.copymap.pop(f, None)
481 479 entry.set_untracked()
482 480 self._refresh_entry(f, entry)
483 481 return True
484 482
485 483 def set_clean(self, filename, mode, size, mtime):
486 484 """mark a file as back to a clean state"""
487 485 entry = self[filename]
488 486 size = size & rangemask
489 487 entry.set_clean(mode, size, mtime)
490 488 self._refresh_entry(filename, entry)
491 489 self.copymap.pop(filename, None)
492 490
493 491 def set_possibly_dirty(self, filename):
494 492 """record that the current state of the file on disk is unknown"""
495 493 entry = self[filename]
496 494 entry.set_possibly_dirty()
497 495 self._refresh_entry(filename, entry)
498 496
499 497 def _refresh_entry(self, f, entry):
500 498 """record updated state of an entry"""
501 499 if not entry.any_tracked:
502 500 self._map.pop(f, None)
503 501
504 502 def _drop_entry(self, f):
505 503 """remove any entry for file f
506 504
507 505 This should also drop associated copy information
508 506
509 507 The fact we actually need to drop it is the responsability of the caller"""
510 508 self._map.pop(f, None)
511 509 self.copymap.pop(f, None)
512 510
513 511
514 512 if rustmod is not None:
515 513
516 514 class dirstatemap(_dirstatemapcommon):
517 515
518 516 ### Core data storage and access
519 517
520 518 @propertycache
521 519 def _map(self):
522 520 """
523 521 Fills the Dirstatemap when called.
524 522 """
525 523 # ignore HG_PENDING because identity is used only for writing
526 524 self.identity = util.filestat.frompath(
527 525 self._opener.join(self._filename)
528 526 )
529 527
530 528 if self._use_dirstate_v2:
531 529 if self.docket.uuid:
532 530 # TODO: use mmap when possible
533 531 data = self._opener.read(self.docket.data_filename())
534 532 else:
535 533 data = b''
536 534 self._map = rustmod.DirstateMap.new_v2(
537 535 data, self.docket.data_size, self.docket.tree_metadata
538 536 )
539 537 parents = self.docket.parents
540 538 else:
541 539 self._map, parents = rustmod.DirstateMap.new_v1(
542 540 self._readdirstatefile()
543 541 )
544 542
545 543 if parents and not self._dirtyparents:
546 544 self.setparents(*parents)
547 545
548 546 self.__contains__ = self._map.__contains__
549 547 self.__getitem__ = self._map.__getitem__
550 548 self.get = self._map.get
551 549 return self._map
552 550
553 551 @property
554 552 def copymap(self):
555 553 return self._map.copymap()
556 554
557 555 def debug_iter(self, all):
558 556 """
559 557 Return an iterator of (filename, state, mode, size, mtime) tuples
560 558
561 559 `all`: also include with `state == b' '` dirstate tree nodes that
562 560 don't have an associated `DirstateItem`.
563 561
564 562 """
565 563 return self._map.debug_iter(all)
566 564
567 565 def clear(self):
568 566 self._map.clear()
569 567 self.setparents(
570 568 self._nodeconstants.nullid, self._nodeconstants.nullid
571 569 )
572 570 util.clearcachedproperty(self, b"_dirs")
573 571 util.clearcachedproperty(self, b"_alldirs")
574 572 util.clearcachedproperty(self, b"dirfoldmap")
575 573
576 574 def items(self):
577 575 return self._map.items()
578 576
579 577 # forward for python2,3 compat
580 578 iteritems = items
581 579
582 580 def keys(self):
583 581 return iter(self._map)
584 582
585 583 ### reading/setting parents
586 584
587 585 def setparents(self, p1, p2, fold_p2=False):
588 586 self._parents = (p1, p2)
589 587 self._dirtyparents = True
590 588 copies = {}
591 589 if fold_p2:
592 590 copies = self._map.setparents_fixup()
593 591 return copies
594 592
595 593 ### disk interaction
596 594
597 595 @propertycache
598 596 def identity(self):
599 597 self._map
600 598 return self.identity
601 599
602 600 def write(self, tr, st):
603 601 if not self._use_dirstate_v2:
604 602 p1, p2 = self.parents()
605 603 packed = self._map.write_v1(p1, p2)
606 604 st.write(packed)
607 605 st.close()
608 606 self._dirtyparents = False
609 607 return
610 608
611 609 # We can only append to an existing data file if there is one
612 610 can_append = self.docket.uuid is not None
613 611 packed, meta, append = self._map.write_v2(can_append)
614 612 if append:
615 613 docket = self.docket
616 614 data_filename = docket.data_filename()
617 615 if tr:
618 616 tr.add(data_filename, docket.data_size)
619 617 with self._opener(data_filename, b'r+b') as fp:
620 618 fp.seek(docket.data_size)
621 619 assert fp.tell() == docket.data_size
622 620 written = fp.write(packed)
623 621 if written is not None: # py2 may return None
624 622 assert written == len(packed), (written, len(packed))
625 623 docket.data_size += len(packed)
626 624 docket.parents = self.parents()
627 625 docket.tree_metadata = meta
628 626 st.write(docket.serialize())
629 627 st.close()
630 628 else:
631 629 self.write_v2_no_append(tr, st, meta, packed)
632 630 # Reload from the newly-written file
633 631 util.clearcachedproperty(self, b"_map")
634 632 self._dirtyparents = False
635 633
636 634 ### code related to maintaining and accessing "extra" property
637 635 # (e.g. "has_dir")
638 636
639 637 @propertycache
640 638 def filefoldmap(self):
641 639 """Returns a dictionary mapping normalized case paths to their
642 640 non-normalized versions.
643 641 """
644 642 return self._map.filefoldmapasdict()
645 643
646 644 def hastrackeddir(self, d):
647 645 return self._map.hastrackeddir(d)
648 646
649 647 def hasdir(self, d):
650 648 return self._map.hasdir(d)
651 649
652 650 @propertycache
653 651 def dirfoldmap(self):
654 652 f = {}
655 653 normcase = util.normcase
656 654 for name in self._map.tracked_dirs():
657 655 f[normcase(name)] = name
658 656 return f
659 657
660 658 ### code related to manipulation of entries and copy-sources
661 659
662 660 def set_tracked(self, f):
663 661 return self._map.set_tracked(f)
664 662
665 663 def set_untracked(self, f):
666 664 return self._map.set_untracked(f)
667 665
668 666 def set_clean(self, filename, mode, size, mtime):
669 667 self._map.set_clean(filename, mode, size, mtime)
670 668
671 669 def set_possibly_dirty(self, f):
672 670 self._map.set_possibly_dirty(f)
673 671
674 672 def reset_state(
675 673 self,
676 674 filename,
677 675 wc_tracked=False,
678 676 p1_tracked=False,
679 677 p2_info=False,
680 678 has_meaningful_mtime=True,
681 679 parentfiledata=None,
682 680 ):
683 681 return self._map.reset_state(
684 682 filename,
685 683 wc_tracked,
686 684 p1_tracked,
687 685 p2_info,
688 686 has_meaningful_mtime,
689 687 parentfiledata,
690 688 )
General Comments 0
You need to be logged in to leave comments. Login now