##// END OF EJS Templates
dirstate: explicitly backup the datafile...
marmoute -
r50976:9a0778bb default
parent child Browse files
Show More
@@ -1,686 +1,699 b''
1 1 # dirstatemap.py
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6
7 7 from .i18n import _
8 8
9 9 from . import (
10 10 error,
11 11 pathutil,
12 12 policy,
13 13 txnutil,
14 14 util,
15 15 )
16 16
17 17 from .dirstateutils import (
18 18 docket as docketmod,
19 19 v2,
20 20 )
21 21
22 22 parsers = policy.importmod('parsers')
23 23 rustmod = policy.importrust('dirstate')
24 24
25 25 propertycache = util.propertycache
26 26
27 27 if rustmod is None:
28 28 DirstateItem = parsers.DirstateItem
29 29 else:
30 30 DirstateItem = rustmod.DirstateItem
31 31
32 32 rangemask = 0x7FFFFFFF
33 33
34 34
35 35 class _dirstatemapcommon:
36 36 """
37 37 Methods that are identical for both implementations of the dirstatemap
38 38 class, with and without Rust extensions enabled.
39 39 """
40 40
41 41 # please pytype
42 42
43 43 _map = None
44 44 copymap = None
45 45
46 46 def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2):
47 47 self._use_dirstate_v2 = use_dirstate_v2
48 48 self._nodeconstants = nodeconstants
49 49 self._ui = ui
50 50 self._opener = opener
51 51 self._root = root
52 52 self._filename = b'dirstate'
53 53 self._nodelen = 20 # Also update Rust code when changing this!
54 54 self._parents = None
55 55 self._dirtyparents = False
56 56 self._docket = None
57 57
58 58 # for consistent view between _pl() and _read() invocations
59 59 self._pendingmode = None
60 60
61 61 def preload(self):
62 62 """Loads the underlying data, if it's not already loaded"""
63 63 self._map
64 64
65 65 def get(self, key, default=None):
66 66 return self._map.get(key, default)
67 67
68 68 def __len__(self):
69 69 return len(self._map)
70 70
71 71 def __iter__(self):
72 72 return iter(self._map)
73 73
74 74 def __contains__(self, key):
75 75 return key in self._map
76 76
77 77 def __getitem__(self, item):
78 78 return self._map[item]
79 79
80 80 ### disk interaction
81 81
82 82 def _opendirstatefile(self):
83 83 fp, mode = txnutil.trypending(self._root, self._opener, self._filename)
84 84 if self._pendingmode is not None and self._pendingmode != mode:
85 85 fp.close()
86 86 raise error.Abort(
87 87 _(b'working directory state may be changed parallelly')
88 88 )
89 89 self._pendingmode = mode
90 90 return fp
91 91
92 92 def _readdirstatefile(self, size=-1):
93 93 try:
94 94 with self._opendirstatefile() as fp:
95 95 return fp.read(size)
96 96 except FileNotFoundError:
97 97 # File doesn't exist, so the current state is empty
98 98 return b''
99 99
100 100 @property
101 101 def docket(self):
102 102 if not self._docket:
103 103 if not self._use_dirstate_v2:
104 104 raise error.ProgrammingError(
105 105 b'dirstate only has a docket in v2 format'
106 106 )
107 107 self._docket = docketmod.DirstateDocket.parse(
108 108 self._readdirstatefile(), self._nodeconstants
109 109 )
110 110 return self._docket
111 111
112 112 def write_v2_no_append(self, tr, st, meta, packed):
113 113 old_docket = self.docket
114 114 new_docket = docketmod.DirstateDocket.with_new_uuid(
115 115 self.parents(), len(packed), meta
116 116 )
117 117 data_filename = new_docket.data_filename()
118 118 self._opener.write(data_filename, packed)
119 # tell the transaction that we are adding a new file
120 if tr is not None:
121 tr.addbackup(data_filename, location=b'plain')
119 122 # Write the new docket after the new data file has been
120 123 # written. Because `st` was opened with `atomictemp=True`,
121 124 # the actual `.hg/dirstate` file is only affected on close.
122 125 st.write(new_docket.serialize())
123 126 st.close()
124 127 # Remove the old data file after the new docket pointing to
125 128 # the new data file was written.
126 129 if old_docket.uuid:
127 130 data_filename = old_docket.data_filename()
131 if tr is not None:
132 tr.addbackup(data_filename, location=b'plain')
128 133 unlink = lambda _tr=None: self._opener.unlink(data_filename)
129 134 if tr:
130 135 category = b"dirstate-v2-clean-" + old_docket.uuid
131 136 tr.addpostclose(category, unlink)
132 137 else:
133 138 unlink()
134 139 self._docket = new_docket
135 140
136 141 ### reading/setting parents
137 142
138 143 def parents(self):
139 144 if not self._parents:
140 145 if self._use_dirstate_v2:
141 146 self._parents = self.docket.parents
142 147 else:
143 148 read_len = self._nodelen * 2
144 149 st = self._readdirstatefile(read_len)
145 150 l = len(st)
146 151 if l == read_len:
147 152 self._parents = (
148 153 st[: self._nodelen],
149 154 st[self._nodelen : 2 * self._nodelen],
150 155 )
151 156 elif l == 0:
152 157 self._parents = (
153 158 self._nodeconstants.nullid,
154 159 self._nodeconstants.nullid,
155 160 )
156 161 else:
157 162 raise error.Abort(
158 163 _(b'working directory state appears damaged!')
159 164 )
160 165
161 166 return self._parents
162 167
163 168
164 169 class dirstatemap(_dirstatemapcommon):
165 170 """Map encapsulating the dirstate's contents.
166 171
167 172 The dirstate contains the following state:
168 173
169 174 - `identity` is the identity of the dirstate file, which can be used to
170 175 detect when changes have occurred to the dirstate file.
171 176
172 177 - `parents` is a pair containing the parents of the working copy. The
173 178 parents are updated by calling `setparents`.
174 179
175 180 - the state map maps filenames to tuples of (state, mode, size, mtime),
176 181 where state is a single character representing 'normal', 'added',
177 182 'removed', or 'merged'. It is read by treating the dirstate as a
178 183 dict. File state is updated by calling various methods (see each
179 184 documentation for details):
180 185
181 186 - `reset_state`,
182 187 - `set_tracked`
183 188 - `set_untracked`
184 189 - `set_clean`
185 190 - `set_possibly_dirty`
186 191
187 192 - `copymap` maps destination filenames to their source filename.
188 193
189 194 The dirstate also provides the following views onto the state:
190 195
191 196 - `filefoldmap` is a dict mapping normalized filenames to the denormalized
192 197 form that they appear as in the dirstate.
193 198
194 199 - `dirfoldmap` is a dict mapping normalized directory names to the
195 200 denormalized form that they appear as in the dirstate.
196 201 """
197 202
198 203 ### Core data storage and access
199 204
200 205 @propertycache
201 206 def _map(self):
202 207 self._map = {}
203 208 self.read()
204 209 return self._map
205 210
206 211 @propertycache
207 212 def copymap(self):
208 213 self.copymap = {}
209 214 self._map
210 215 return self.copymap
211 216
212 217 def clear(self):
213 218 self._map.clear()
214 219 self.copymap.clear()
215 220 self.setparents(self._nodeconstants.nullid, self._nodeconstants.nullid)
216 221 util.clearcachedproperty(self, b"_dirs")
217 222 util.clearcachedproperty(self, b"_alldirs")
218 223 util.clearcachedproperty(self, b"filefoldmap")
219 224 util.clearcachedproperty(self, b"dirfoldmap")
220 225
221 226 def items(self):
222 227 return self._map.items()
223 228
224 229 # forward for python2,3 compat
225 230 iteritems = items
226 231
227 232 def debug_iter(self, all):
228 233 """
229 234 Return an iterator of (filename, state, mode, size, mtime) tuples
230 235
231 236 `all` is unused when Rust is not enabled
232 237 """
233 238 for (filename, item) in self.items():
234 239 yield (filename, item.state, item.mode, item.size, item.mtime)
235 240
236 241 def keys(self):
237 242 return self._map.keys()
238 243
239 244 ### reading/setting parents
240 245
241 246 def setparents(self, p1, p2, fold_p2=False):
242 247 self._parents = (p1, p2)
243 248 self._dirtyparents = True
244 249 copies = {}
245 250 if fold_p2:
246 251 for f, s in self._map.items():
247 252 # Discard "merged" markers when moving away from a merge state
248 253 if s.p2_info:
249 254 source = self.copymap.pop(f, None)
250 255 if source:
251 256 copies[f] = source
252 257 s.drop_merge_data()
253 258 return copies
254 259
255 260 ### disk interaction
256 261
257 262 def read(self):
258 263 # ignore HG_PENDING because identity is used only for writing
259 264 self.identity = util.filestat.frompath(
260 265 self._opener.join(self._filename)
261 266 )
262 267
263 268 if self._use_dirstate_v2:
264 269 if not self.docket.uuid:
265 270 return
266 271 st = self._opener.read(self.docket.data_filename())
267 272 else:
268 273 st = self._readdirstatefile()
269 274
270 275 if not st:
271 276 return
272 277
273 278 # TODO: adjust this estimate for dirstate-v2
274 279 if util.safehasattr(parsers, b'dict_new_presized'):
275 280 # Make an estimate of the number of files in the dirstate based on
276 281 # its size. This trades wasting some memory for avoiding costly
277 282 # resizes. Each entry have a prefix of 17 bytes followed by one or
278 283 # two path names. Studies on various large-scale real-world repositories
279 284 # found 54 bytes a reasonable upper limit for the average path names.
280 285 # Copy entries are ignored for the sake of this estimate.
281 286 self._map = parsers.dict_new_presized(len(st) // 71)
282 287
283 288 # Python's garbage collector triggers a GC each time a certain number
284 289 # of container objects (the number being defined by
285 290 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
286 291 # for each file in the dirstate. The C version then immediately marks
287 292 # them as not to be tracked by the collector. However, this has no
288 293 # effect on when GCs are triggered, only on what objects the GC looks
289 294 # into. This means that O(number of files) GCs are unavoidable.
290 295 # Depending on when in the process's lifetime the dirstate is parsed,
291 296 # this can get very expensive. As a workaround, disable GC while
292 297 # parsing the dirstate.
293 298 #
294 299 # (we cannot decorate the function directly since it is in a C module)
295 300 if self._use_dirstate_v2:
296 301 p = self.docket.parents
297 302 meta = self.docket.tree_metadata
298 303 parse_dirstate = util.nogc(v2.parse_dirstate)
299 304 parse_dirstate(self._map, self.copymap, st, meta)
300 305 else:
301 306 parse_dirstate = util.nogc(parsers.parse_dirstate)
302 307 p = parse_dirstate(self._map, self.copymap, st)
303 308 if not self._dirtyparents:
304 309 self.setparents(*p)
305 310
306 311 # Avoid excess attribute lookups by fast pathing certain checks
307 312 self.__contains__ = self._map.__contains__
308 313 self.__getitem__ = self._map.__getitem__
309 314 self.get = self._map.get
310 315
311 316 def write(self, tr, st):
312 317 if self._use_dirstate_v2:
313 318 packed, meta = v2.pack_dirstate(self._map, self.copymap)
314 319 self.write_v2_no_append(tr, st, meta, packed)
315 320 else:
316 321 packed = parsers.pack_dirstate(
317 322 self._map, self.copymap, self.parents()
318 323 )
319 324 st.write(packed)
320 325 st.close()
321 326 self._dirtyparents = False
322 327
323 328 @propertycache
324 329 def identity(self):
325 330 self._map
326 331 return self.identity
327 332
328 333 ### code related to maintaining and accessing "extra" property
329 334 # (e.g. "has_dir")
330 335
331 336 def _dirs_incr(self, filename, old_entry=None):
332 337 """increment the dirstate counter if applicable"""
333 338 if (
334 339 old_entry is None or old_entry.removed
335 340 ) and "_dirs" in self.__dict__:
336 341 self._dirs.addpath(filename)
337 342 if old_entry is None and "_alldirs" in self.__dict__:
338 343 self._alldirs.addpath(filename)
339 344
340 345 def _dirs_decr(self, filename, old_entry=None, remove_variant=False):
341 346 """decrement the dirstate counter if applicable"""
342 347 if old_entry is not None:
343 348 if "_dirs" in self.__dict__ and not old_entry.removed:
344 349 self._dirs.delpath(filename)
345 350 if "_alldirs" in self.__dict__ and not remove_variant:
346 351 self._alldirs.delpath(filename)
347 352 elif remove_variant and "_alldirs" in self.__dict__:
348 353 self._alldirs.addpath(filename)
349 354 if "filefoldmap" in self.__dict__:
350 355 normed = util.normcase(filename)
351 356 self.filefoldmap.pop(normed, None)
352 357
353 358 @propertycache
354 359 def filefoldmap(self):
355 360 """Returns a dictionary mapping normalized case paths to their
356 361 non-normalized versions.
357 362 """
358 363 try:
359 364 makefilefoldmap = parsers.make_file_foldmap
360 365 except AttributeError:
361 366 pass
362 367 else:
363 368 return makefilefoldmap(
364 369 self._map, util.normcasespec, util.normcasefallback
365 370 )
366 371
367 372 f = {}
368 373 normcase = util.normcase
369 374 for name, s in self._map.items():
370 375 if not s.removed:
371 376 f[normcase(name)] = name
372 377 f[b'.'] = b'.' # prevents useless util.fspath() invocation
373 378 return f
374 379
375 380 @propertycache
376 381 def dirfoldmap(self):
377 382 f = {}
378 383 normcase = util.normcase
379 384 for name in self._dirs:
380 385 f[normcase(name)] = name
381 386 return f
382 387
383 388 def hastrackeddir(self, d):
384 389 """
385 390 Returns True if the dirstate contains a tracked (not removed) file
386 391 in this directory.
387 392 """
388 393 return d in self._dirs
389 394
390 395 def hasdir(self, d):
391 396 """
392 397 Returns True if the dirstate contains a file (tracked or removed)
393 398 in this directory.
394 399 """
395 400 return d in self._alldirs
396 401
397 402 @propertycache
398 403 def _dirs(self):
399 404 return pathutil.dirs(self._map, only_tracked=True)
400 405
401 406 @propertycache
402 407 def _alldirs(self):
403 408 return pathutil.dirs(self._map)
404 409
405 410 ### code related to manipulation of entries and copy-sources
406 411
407 412 def reset_state(
408 413 self,
409 414 filename,
410 415 wc_tracked=False,
411 416 p1_tracked=False,
412 417 p2_info=False,
413 418 has_meaningful_mtime=True,
414 419 parentfiledata=None,
415 420 ):
416 421 """Set a entry to a given state, diregarding all previous state
417 422
418 423 This is to be used by the part of the dirstate API dedicated to
419 424 adjusting the dirstate after a update/merge.
420 425
421 426 note: calling this might result to no entry existing at all if the
422 427 dirstate map does not see any point at having one for this file
423 428 anymore.
424 429 """
425 430 # copy information are now outdated
426 431 # (maybe new information should be in directly passed to this function)
427 432 self.copymap.pop(filename, None)
428 433
429 434 if not (p1_tracked or p2_info or wc_tracked):
430 435 old_entry = self._map.get(filename)
431 436 self._drop_entry(filename)
432 437 self._dirs_decr(filename, old_entry=old_entry)
433 438 return
434 439
435 440 old_entry = self._map.get(filename)
436 441 self._dirs_incr(filename, old_entry)
437 442 entry = DirstateItem(
438 443 wc_tracked=wc_tracked,
439 444 p1_tracked=p1_tracked,
440 445 p2_info=p2_info,
441 446 has_meaningful_mtime=has_meaningful_mtime,
442 447 parentfiledata=parentfiledata,
443 448 )
444 449 self._map[filename] = entry
445 450
446 451 def set_tracked(self, filename):
447 452 new = False
448 453 entry = self.get(filename)
449 454 if entry is None:
450 455 self._dirs_incr(filename)
451 456 entry = DirstateItem(
452 457 wc_tracked=True,
453 458 )
454 459
455 460 self._map[filename] = entry
456 461 new = True
457 462 elif not entry.tracked:
458 463 self._dirs_incr(filename, entry)
459 464 entry.set_tracked()
460 465 self._refresh_entry(filename, entry)
461 466 new = True
462 467 else:
463 468 # XXX This is probably overkill for more case, but we need this to
464 469 # fully replace the `normallookup` call with `set_tracked` one.
465 470 # Consider smoothing this in the future.
466 471 entry.set_possibly_dirty()
467 472 self._refresh_entry(filename, entry)
468 473 return new
469 474
470 475 def set_untracked(self, f):
471 476 """Mark a file as no longer tracked in the dirstate map"""
472 477 entry = self.get(f)
473 478 if entry is None:
474 479 return False
475 480 else:
476 481 self._dirs_decr(f, old_entry=entry, remove_variant=not entry.added)
477 482 if not entry.p2_info:
478 483 self.copymap.pop(f, None)
479 484 entry.set_untracked()
480 485 self._refresh_entry(f, entry)
481 486 return True
482 487
483 488 def set_clean(self, filename, mode, size, mtime):
484 489 """mark a file as back to a clean state"""
485 490 entry = self[filename]
486 491 size = size & rangemask
487 492 entry.set_clean(mode, size, mtime)
488 493 self._refresh_entry(filename, entry)
489 494 self.copymap.pop(filename, None)
490 495
491 496 def set_possibly_dirty(self, filename):
492 497 """record that the current state of the file on disk is unknown"""
493 498 entry = self[filename]
494 499 entry.set_possibly_dirty()
495 500 self._refresh_entry(filename, entry)
496 501
497 502 def _refresh_entry(self, f, entry):
498 503 """record updated state of an entry"""
499 504 if not entry.any_tracked:
500 505 self._map.pop(f, None)
501 506
502 507 def _drop_entry(self, f):
503 508 """remove any entry for file f
504 509
505 510 This should also drop associated copy information
506 511
507 512 The fact we actually need to drop it is the responsability of the caller"""
508 513 self._map.pop(f, None)
509 514 self.copymap.pop(f, None)
510 515
511 516
512 517 if rustmod is not None:
513 518
514 519 class dirstatemap(_dirstatemapcommon):
515 520
516 521 ### Core data storage and access
517 522
518 523 @propertycache
519 524 def _map(self):
520 525 """
521 526 Fills the Dirstatemap when called.
522 527 """
523 528 # ignore HG_PENDING because identity is used only for writing
524 529 self.identity = util.filestat.frompath(
525 530 self._opener.join(self._filename)
526 531 )
527 532
528 533 if self._use_dirstate_v2:
529 534 if self.docket.uuid:
530 535 # TODO: use mmap when possible
531 536 data = self._opener.read(self.docket.data_filename())
532 537 else:
533 538 data = b''
534 539 self._map = rustmod.DirstateMap.new_v2(
535 540 data, self.docket.data_size, self.docket.tree_metadata
536 541 )
537 542 parents = self.docket.parents
538 543 else:
539 544 self._map, parents = rustmod.DirstateMap.new_v1(
540 545 self._readdirstatefile()
541 546 )
542 547
543 548 if parents and not self._dirtyparents:
544 549 self.setparents(*parents)
545 550
546 551 self.__contains__ = self._map.__contains__
547 552 self.__getitem__ = self._map.__getitem__
548 553 self.get = self._map.get
549 554 return self._map
550 555
551 556 @property
552 557 def copymap(self):
553 558 return self._map.copymap()
554 559
555 560 def debug_iter(self, all):
556 561 """
557 562 Return an iterator of (filename, state, mode, size, mtime) tuples
558 563
559 564 `all`: also include with `state == b' '` dirstate tree nodes that
560 565 don't have an associated `DirstateItem`.
561 566
562 567 """
563 568 return self._map.debug_iter(all)
564 569
565 570 def clear(self):
566 571 self._map.clear()
567 572 self.setparents(
568 573 self._nodeconstants.nullid, self._nodeconstants.nullid
569 574 )
570 575 util.clearcachedproperty(self, b"_dirs")
571 576 util.clearcachedproperty(self, b"_alldirs")
572 577 util.clearcachedproperty(self, b"dirfoldmap")
573 578
574 579 def items(self):
575 580 return self._map.items()
576 581
577 582 # forward for python2,3 compat
578 583 iteritems = items
579 584
580 585 def keys(self):
581 586 return iter(self._map)
582 587
583 588 ### reading/setting parents
584 589
585 590 def setparents(self, p1, p2, fold_p2=False):
586 591 self._parents = (p1, p2)
587 592 self._dirtyparents = True
588 593 copies = {}
589 594 if fold_p2:
590 595 copies = self._map.setparents_fixup()
591 596 return copies
592 597
593 598 ### disk interaction
594 599
595 600 @propertycache
596 601 def identity(self):
597 602 self._map
598 603 return self.identity
599 604
600 605 def write(self, tr, st):
601 606 if not self._use_dirstate_v2:
602 607 p1, p2 = self.parents()
603 608 packed = self._map.write_v1(p1, p2)
604 609 st.write(packed)
605 610 st.close()
606 611 self._dirtyparents = False
607 612 return
608 613
609 614 # We can only append to an existing data file if there is one
610 615 can_append = self.docket.uuid is not None
611 616 packed, meta, append = self._map.write_v2(can_append)
612 617 if append:
613 618 docket = self.docket
614 619 data_filename = docket.data_filename()
620 # We mark it for backup to make sure a future `hg rollback` (or
621 # `hg recover`?) call find the data it needs to restore a
622 # working repository.
623 #
624 # The backup can use a hardlink because the format is resistant
625 # to trailing "dead" data.
626 if tr is not None:
627 tr.addbackup(data_filename, location=b'plain')
615 628 with self._opener(data_filename, b'r+b') as fp:
616 629 fp.seek(docket.data_size)
617 630 assert fp.tell() == docket.data_size
618 631 written = fp.write(packed)
619 632 if written is not None: # py2 may return None
620 633 assert written == len(packed), (written, len(packed))
621 634 docket.data_size += len(packed)
622 635 docket.parents = self.parents()
623 636 docket.tree_metadata = meta
624 637 st.write(docket.serialize())
625 638 st.close()
626 639 else:
627 640 self.write_v2_no_append(tr, st, meta, packed)
628 641 # Reload from the newly-written file
629 642 util.clearcachedproperty(self, b"_map")
630 643 self._dirtyparents = False
631 644
632 645 ### code related to maintaining and accessing "extra" property
633 646 # (e.g. "has_dir")
634 647
635 648 @propertycache
636 649 def filefoldmap(self):
637 650 """Returns a dictionary mapping normalized case paths to their
638 651 non-normalized versions.
639 652 """
640 653 return self._map.filefoldmapasdict()
641 654
642 655 def hastrackeddir(self, d):
643 656 return self._map.hastrackeddir(d)
644 657
645 658 def hasdir(self, d):
646 659 return self._map.hasdir(d)
647 660
648 661 @propertycache
649 662 def dirfoldmap(self):
650 663 f = {}
651 664 normcase = util.normcase
652 665 for name in self._map.tracked_dirs():
653 666 f[normcase(name)] = name
654 667 return f
655 668
656 669 ### code related to manipulation of entries and copy-sources
657 670
658 671 def set_tracked(self, f):
659 672 return self._map.set_tracked(f)
660 673
661 674 def set_untracked(self, f):
662 675 return self._map.set_untracked(f)
663 676
664 677 def set_clean(self, filename, mode, size, mtime):
665 678 self._map.set_clean(filename, mode, size, mtime)
666 679
667 680 def set_possibly_dirty(self, f):
668 681 self._map.set_possibly_dirty(f)
669 682
670 683 def reset_state(
671 684 self,
672 685 filename,
673 686 wc_tracked=False,
674 687 p1_tracked=False,
675 688 p2_info=False,
676 689 has_meaningful_mtime=True,
677 690 parentfiledata=None,
678 691 ):
679 692 return self._map.reset_state(
680 693 filename,
681 694 wc_tracked,
682 695 p1_tracked,
683 696 p2_info,
684 697 has_meaningful_mtime,
685 698 parentfiledata,
686 699 )
General Comments 0
You need to be logged in to leave comments. Login now